* [PATCH v8] io_uring: batch completion in prior_task_list
@ 2021-12-08 5:21 Hao Xu
2021-12-08 18:10 ` Pavel Begunkov
2021-12-08 18:35 ` Jens Axboe
0 siblings, 2 replies; 3+ messages in thread
From: Hao Xu @ 2021-12-08 5:21 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi
In previous patches, we have already gathered some tw with
io_req_task_complete() as callback in prior_task_list, let's complete
them in batch while we cannot grab uring lock. In this way, we batch
the req_complete_post path.
Signed-off-by: Hao Xu <[email protected]>
---
v4->v5
- change the implementation of merge_wq_list
v5->v6
- change the logic of handling prior task list to:
1) grabbed uring_lock: leverage the inline completion infra
2) otherwise: batch __req_complete_post() calls to save
completion_lock operations.
v6->v7
- add Pavel's fix of wrong spin unlock
- remove a patch and rebase work
v7->v8
- the previous fix in v7 is incompleted, fix it.(Pavel's comment)
- code clean(Jens' comment)
fs/io_uring.c | 71 +++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 60 insertions(+), 11 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 21738ed7521e..92dc33519466 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2225,7 +2225,49 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
percpu_ref_put(&ctx->refs);
}
-static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ctx, bool *locked)
+static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx)
+{
+ io_commit_cqring(ctx);
+ spin_unlock(&ctx->completion_lock);
+ io_cqring_ev_posted(ctx);
+}
+
+static void handle_prev_tw_list(struct io_wq_work_node *node,
+ struct io_ring_ctx **ctx, bool *uring_locked)
+{
+ if (*ctx && !*uring_locked)
+ spin_lock(&(*ctx)->completion_lock);
+
+ do {
+ struct io_wq_work_node *next = node->next;
+ struct io_kiocb *req = container_of(node, struct io_kiocb,
+ io_task_work.node);
+
+ if (req->ctx != *ctx) {
+ if (unlikely(!*uring_locked && *ctx))
+ ctx_commit_and_unlock(*ctx);
+
+ ctx_flush_and_put(*ctx, uring_locked);
+ *ctx = req->ctx;
+ /* if not contended, grab and improve batching */
+ *uring_locked = mutex_trylock(&(*ctx)->uring_lock);
+ percpu_ref_get(&(*ctx)->refs);
+ if (unlikely(!*uring_locked))
+ spin_lock(&(*ctx)->completion_lock);
+ }
+ if (likely(*uring_locked))
+ req->io_task_work.func(req, uring_locked);
+ else
+ __io_req_complete_post(req, req->result, io_put_kbuf(req));
+ node = next;
+ } while (node);
+
+ if (unlikely(!*uring_locked))
+ ctx_commit_and_unlock(*ctx);
+}
+
+static void handle_tw_list(struct io_wq_work_node *node,
+ struct io_ring_ctx **ctx, bool *locked)
{
do {
struct io_wq_work_node *next = node->next;
@@ -2246,31 +2288,38 @@ static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ct
static void tctx_task_work(struct callback_head *cb)
{
- bool locked = false;
+ bool uring_locked = false;
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
while (1) {
- struct io_wq_work_node *node;
+ struct io_wq_work_node *node1, *node2;
- if (!tctx->prior_task_list.first &&
- !tctx->task_list.first && locked)
+ if (!tctx->task_list.first &&
+ !tctx->prior_task_list.first && uring_locked)
io_submit_flush_completions(ctx);
spin_lock_irq(&tctx->task_lock);
- node= wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
- if (!node)
+ node1 = tctx->prior_task_list.first;
+ node2 = tctx->task_list.first;
+ INIT_WQ_LIST(&tctx->task_list);
+ INIT_WQ_LIST(&tctx->prior_task_list);
+ if (!node2 && !node1)
tctx->task_running = false;
spin_unlock_irq(&tctx->task_lock);
- if (!node)
+ if (!node2 && !node1)
break;
- handle_tw_list(node, &ctx, &locked);
+ if (node1)
+ handle_prev_tw_list(node1, &ctx, &uring_locked);
+
+ if (node2)
+ handle_tw_list(node2, &ctx, &uring_locked);
cond_resched();
}
- ctx_flush_and_put(ctx, &locked);
+ ctx_flush_and_put(ctx, &uring_locked);
}
static void io_req_task_work_add(struct io_kiocb *req, bool priority)
@@ -2759,7 +2808,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
return;
req->result = res;
req->io_task_work.func = io_req_task_complete;
- io_req_task_work_add(req, true);
+ io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
--
2.25.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v8] io_uring: batch completion in prior_task_list
2021-12-08 5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
@ 2021-12-08 18:10 ` Pavel Begunkov
2021-12-08 18:35 ` Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Pavel Begunkov @ 2021-12-08 18:10 UTC (permalink / raw)
To: Hao Xu, Jens Axboe; +Cc: io-uring, Joseph Qi
On 12/8/21 05:21, Hao Xu wrote:
> In previous patches, we have already gathered some tw with
> io_req_task_complete() as callback in prior_task_list, let's complete
> them in batch while we cannot grab uring lock. In this way, we batch
> the req_complete_post path.
Works fine now, don't see any problem
Reviewed-by: Pavel Begunkov <[email protected]>
>
> Signed-off-by: Hao Xu <[email protected]>
> ---
>
> v4->v5
> - change the implementation of merge_wq_list
>
> v5->v6
> - change the logic of handling prior task list to:
> 1) grabbed uring_lock: leverage the inline completion infra
> 2) otherwise: batch __req_complete_post() calls to save
> completion_lock operations.
>
> v6->v7
> - add Pavel's fix of wrong spin unlock
> - remove a patch and rebase work
>
> v7->v8
> - the previous fix in v7 is incompleted, fix it.(Pavel's comment)
> - code clean(Jens' comment)
>
> fs/io_uring.c | 71 +++++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 60 insertions(+), 11 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 21738ed7521e..92dc33519466 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2225,7 +2225,49 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
> percpu_ref_put(&ctx->refs);
> }
>
> -static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ctx, bool *locked)
> +static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx)
> +{
> + io_commit_cqring(ctx);
> + spin_unlock(&ctx->completion_lock);
> + io_cqring_ev_posted(ctx);
> +}
> +
> +static void handle_prev_tw_list(struct io_wq_work_node *node,
> + struct io_ring_ctx **ctx, bool *uring_locked)
> +{
> + if (*ctx && !*uring_locked)
> + spin_lock(&(*ctx)->completion_lock);
> +
> + do {
> + struct io_wq_work_node *next = node->next;
> + struct io_kiocb *req = container_of(node, struct io_kiocb,
> + io_task_work.node);
> +
> + if (req->ctx != *ctx) {
> + if (unlikely(!*uring_locked && *ctx))
> + ctx_commit_and_unlock(*ctx);
> +
> + ctx_flush_and_put(*ctx, uring_locked);
> + *ctx = req->ctx;
> + /* if not contended, grab and improve batching */
> + *uring_locked = mutex_trylock(&(*ctx)->uring_lock);
> + percpu_ref_get(&(*ctx)->refs);
> + if (unlikely(!*uring_locked))
> + spin_lock(&(*ctx)->completion_lock);
> + }
> + if (likely(*uring_locked))
> + req->io_task_work.func(req, uring_locked);
> + else
> + __io_req_complete_post(req, req->result, io_put_kbuf(req));
> + node = next;
> + } while (node);
> +
> + if (unlikely(!*uring_locked))
> + ctx_commit_and_unlock(*ctx);
> +}
> +
> +static void handle_tw_list(struct io_wq_work_node *node,
> + struct io_ring_ctx **ctx, bool *locked)
> {
> do {
> struct io_wq_work_node *next = node->next;
> @@ -2246,31 +2288,38 @@ static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ct
>
> static void tctx_task_work(struct callback_head *cb)
> {
> - bool locked = false;
> + bool uring_locked = false;
> struct io_ring_ctx *ctx = NULL;
> struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
> task_work);
>
> while (1) {
> - struct io_wq_work_node *node;
> + struct io_wq_work_node *node1, *node2;
>
> - if (!tctx->prior_task_list.first &&
> - !tctx->task_list.first && locked)
> + if (!tctx->task_list.first &&
> + !tctx->prior_task_list.first && uring_locked)
> io_submit_flush_completions(ctx);
>
> spin_lock_irq(&tctx->task_lock);
> - node= wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
> - if (!node)
> + node1 = tctx->prior_task_list.first;
> + node2 = tctx->task_list.first;
> + INIT_WQ_LIST(&tctx->task_list);
> + INIT_WQ_LIST(&tctx->prior_task_list);
> + if (!node2 && !node1)
> tctx->task_running = false;
> spin_unlock_irq(&tctx->task_lock);
> - if (!node)
> + if (!node2 && !node1)
> break;
>
> - handle_tw_list(node, &ctx, &locked);
> + if (node1)
> + handle_prev_tw_list(node1, &ctx, &uring_locked);
> +
> + if (node2)
> + handle_tw_list(node2, &ctx, &uring_locked);
> cond_resched();
> }
>
> - ctx_flush_and_put(ctx, &locked);
> + ctx_flush_and_put(ctx, &uring_locked);
> }
>
> static void io_req_task_work_add(struct io_kiocb *req, bool priority)
> @@ -2759,7 +2808,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
> return;
> req->result = res;
> req->io_task_work.func = io_req_task_complete;
> - io_req_task_work_add(req, true);
> + io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
> }
>
> static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v8] io_uring: batch completion in prior_task_list
2021-12-08 5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
2021-12-08 18:10 ` Pavel Begunkov
@ 2021-12-08 18:35 ` Jens Axboe
1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-12-08 18:35 UTC (permalink / raw)
To: Hao Xu; +Cc: Joseph Qi, Pavel Begunkov, io-uring
On Wed, 8 Dec 2021 13:21:25 +0800, Hao Xu wrote:
> In previous patches, we have already gathered some tw with
> io_req_task_complete() as callback in prior_task_list, let's complete
> them in batch while we cannot grab uring lock. In this way, we batch
> the req_complete_post path.
>
>
Applied, thanks!
[1/1] io_uring: batch completion in prior_task_list
commit: f28c240e7152462f0750a8939db28d985ecf7c67
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-12-08 18:35 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-08 5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
2021-12-08 18:10 ` Pavel Begunkov
2021-12-08 18:35 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox