public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH v8] io_uring: batch completion in prior_task_list
@ 2021-12-08  5:21 Hao Xu
  2021-12-08 18:10 ` Pavel Begunkov
  2021-12-08 18:35 ` Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Hao Xu @ 2021-12-08  5:21 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi

In previous patches, we have already gathered some tw with
io_req_task_complete() as callback in prior_task_list, let's complete
them in batch while we cannot grab uring lock. In this way, we batch
the req_complete_post path.

Signed-off-by: Hao Xu <[email protected]>
---

v4->v5
- change the implementation of merge_wq_list

v5->v6
- change the logic of handling prior task list to:
  1) grabbed uring_lock: leverage the inline completion infra
  2) otherwise: batch __req_complete_post() calls to save
     completion_lock operations.

v6->v7
- add Pavel's fix of wrong spin unlock
- remove a patch and rebase work

v7->v8
- the previous fix in v7 is incompleted, fix it.(Pavel's comment)
- code clean(Jens' comment)

 fs/io_uring.c | 71 +++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 60 insertions(+), 11 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 21738ed7521e..92dc33519466 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2225,7 +2225,49 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
 	percpu_ref_put(&ctx->refs);
 }
 
-static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ctx, bool *locked)
+static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx)
+{
+	io_commit_cqring(ctx);
+	spin_unlock(&ctx->completion_lock);
+	io_cqring_ev_posted(ctx);
+}
+
+static void handle_prev_tw_list(struct io_wq_work_node *node,
+				struct io_ring_ctx **ctx, bool *uring_locked)
+{
+	if (*ctx && !*uring_locked)
+		spin_lock(&(*ctx)->completion_lock);
+
+	do {
+		struct io_wq_work_node *next = node->next;
+		struct io_kiocb *req = container_of(node, struct io_kiocb,
+						    io_task_work.node);
+
+		if (req->ctx != *ctx) {
+			if (unlikely(!*uring_locked && *ctx))
+				ctx_commit_and_unlock(*ctx);
+
+			ctx_flush_and_put(*ctx, uring_locked);
+			*ctx = req->ctx;
+			/* if not contended, grab and improve batching */
+			*uring_locked = mutex_trylock(&(*ctx)->uring_lock);
+			percpu_ref_get(&(*ctx)->refs);
+			if (unlikely(!*uring_locked))
+				spin_lock(&(*ctx)->completion_lock);
+		}
+		if (likely(*uring_locked))
+			req->io_task_work.func(req, uring_locked);
+		else
+			__io_req_complete_post(req, req->result, io_put_kbuf(req));
+		node = next;
+	} while (node);
+
+	if (unlikely(!*uring_locked))
+		ctx_commit_and_unlock(*ctx);
+}
+
+static void handle_tw_list(struct io_wq_work_node *node,
+			   struct io_ring_ctx **ctx, bool *locked)
 {
 	do {
 		struct io_wq_work_node *next = node->next;
@@ -2246,31 +2288,38 @@ static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ct
 
 static void tctx_task_work(struct callback_head *cb)
 {
-	bool locked = false;
+	bool uring_locked = false;
 	struct io_ring_ctx *ctx = NULL;
 	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
 						  task_work);
 
 	while (1) {
-		struct io_wq_work_node *node;
+		struct io_wq_work_node *node1, *node2;
 
-		if (!tctx->prior_task_list.first &&
-		    !tctx->task_list.first && locked)
+		if (!tctx->task_list.first &&
+		    !tctx->prior_task_list.first && uring_locked)
 			io_submit_flush_completions(ctx);
 
 		spin_lock_irq(&tctx->task_lock);
-		node= wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
-		if (!node)
+		node1 = tctx->prior_task_list.first;
+		node2 = tctx->task_list.first;
+		INIT_WQ_LIST(&tctx->task_list);
+		INIT_WQ_LIST(&tctx->prior_task_list);
+		if (!node2 && !node1)
 			tctx->task_running = false;
 		spin_unlock_irq(&tctx->task_lock);
-		if (!node)
+		if (!node2 && !node1)
 			break;
 
-		handle_tw_list(node, &ctx, &locked);
+		if (node1)
+			handle_prev_tw_list(node1, &ctx, &uring_locked);
+
+		if (node2)
+			handle_tw_list(node2, &ctx, &uring_locked);
 		cond_resched();
 	}
 
-	ctx_flush_and_put(ctx, &locked);
+	ctx_flush_and_put(ctx, &uring_locked);
 }
 
 static void io_req_task_work_add(struct io_kiocb *req, bool priority)
@@ -2759,7 +2808,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
 		return;
 	req->result = res;
 	req->io_task_work.func = io_req_task_complete;
-	io_req_task_work_add(req, true);
+	io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
 }
 
 static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v8] io_uring: batch completion in prior_task_list
  2021-12-08  5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
@ 2021-12-08 18:10 ` Pavel Begunkov
  2021-12-08 18:35 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Pavel Begunkov @ 2021-12-08 18:10 UTC (permalink / raw)
  To: Hao Xu, Jens Axboe; +Cc: io-uring, Joseph Qi

On 12/8/21 05:21, Hao Xu wrote:
> In previous patches, we have already gathered some tw with
> io_req_task_complete() as callback in prior_task_list, let's complete
> them in batch while we cannot grab uring lock. In this way, we batch
> the req_complete_post path.

Works fine now, don't see any problem

Reviewed-by: Pavel Begunkov <[email protected]>

> 
> Signed-off-by: Hao Xu <[email protected]>
> ---
> 
> v4->v5
> - change the implementation of merge_wq_list
> 
> v5->v6
> - change the logic of handling prior task list to:
>    1) grabbed uring_lock: leverage the inline completion infra
>    2) otherwise: batch __req_complete_post() calls to save
>       completion_lock operations.
> 
> v6->v7
> - add Pavel's fix of wrong spin unlock
> - remove a patch and rebase work
> 
> v7->v8
> - the previous fix in v7 is incompleted, fix it.(Pavel's comment)
> - code clean(Jens' comment)
> 
>   fs/io_uring.c | 71 +++++++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 60 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 21738ed7521e..92dc33519466 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2225,7 +2225,49 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
>   	percpu_ref_put(&ctx->refs);
>   }
>   
> -static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ctx, bool *locked)
> +static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx)
> +{
> +	io_commit_cqring(ctx);
> +	spin_unlock(&ctx->completion_lock);
> +	io_cqring_ev_posted(ctx);
> +}
> +
> +static void handle_prev_tw_list(struct io_wq_work_node *node,
> +				struct io_ring_ctx **ctx, bool *uring_locked)
> +{
> +	if (*ctx && !*uring_locked)
> +		spin_lock(&(*ctx)->completion_lock);
> +
> +	do {
> +		struct io_wq_work_node *next = node->next;
> +		struct io_kiocb *req = container_of(node, struct io_kiocb,
> +						    io_task_work.node);
> +
> +		if (req->ctx != *ctx) {
> +			if (unlikely(!*uring_locked && *ctx))
> +				ctx_commit_and_unlock(*ctx);
> +
> +			ctx_flush_and_put(*ctx, uring_locked);
> +			*ctx = req->ctx;
> +			/* if not contended, grab and improve batching */
> +			*uring_locked = mutex_trylock(&(*ctx)->uring_lock);
> +			percpu_ref_get(&(*ctx)->refs);
> +			if (unlikely(!*uring_locked))
> +				spin_lock(&(*ctx)->completion_lock);
> +		}
> +		if (likely(*uring_locked))
> +			req->io_task_work.func(req, uring_locked);
> +		else
> +			__io_req_complete_post(req, req->result, io_put_kbuf(req));
> +		node = next;
> +	} while (node);
> +
> +	if (unlikely(!*uring_locked))
> +		ctx_commit_and_unlock(*ctx);
> +}
> +
> +static void handle_tw_list(struct io_wq_work_node *node,
> +			   struct io_ring_ctx **ctx, bool *locked)
>   {
>   	do {
>   		struct io_wq_work_node *next = node->next;
> @@ -2246,31 +2288,38 @@ static void handle_tw_list(struct io_wq_work_node *node, struct io_ring_ctx **ct
>   
>   static void tctx_task_work(struct callback_head *cb)
>   {
> -	bool locked = false;
> +	bool uring_locked = false;
>   	struct io_ring_ctx *ctx = NULL;
>   	struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
>   						  task_work);
>   
>   	while (1) {
> -		struct io_wq_work_node *node;
> +		struct io_wq_work_node *node1, *node2;
>   
> -		if (!tctx->prior_task_list.first &&
> -		    !tctx->task_list.first && locked)
> +		if (!tctx->task_list.first &&
> +		    !tctx->prior_task_list.first && uring_locked)
>   			io_submit_flush_completions(ctx);
>   
>   		spin_lock_irq(&tctx->task_lock);
> -		node= wq_list_merge(&tctx->prior_task_list, &tctx->task_list);
> -		if (!node)
> +		node1 = tctx->prior_task_list.first;
> +		node2 = tctx->task_list.first;
> +		INIT_WQ_LIST(&tctx->task_list);
> +		INIT_WQ_LIST(&tctx->prior_task_list);
> +		if (!node2 && !node1)
>   			tctx->task_running = false;
>   		spin_unlock_irq(&tctx->task_lock);
> -		if (!node)
> +		if (!node2 && !node1)
>   			break;
>   
> -		handle_tw_list(node, &ctx, &locked);
> +		if (node1)
> +			handle_prev_tw_list(node1, &ctx, &uring_locked);
> +
> +		if (node2)
> +			handle_tw_list(node2, &ctx, &uring_locked);
>   		cond_resched();
>   	}
>   
> -	ctx_flush_and_put(ctx, &locked);
> +	ctx_flush_and_put(ctx, &uring_locked);
>   }
>   
>   static void io_req_task_work_add(struct io_kiocb *req, bool priority)
> @@ -2759,7 +2808,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
>   		return;
>   	req->result = res;
>   	req->io_task_work.func = io_req_task_complete;
> -	io_req_task_work_add(req, true);
> +	io_req_task_work_add(req, !!(req->ctx->flags & IORING_SETUP_SQPOLL));
>   }
>   
>   static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v8] io_uring: batch completion in prior_task_list
  2021-12-08  5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
  2021-12-08 18:10 ` Pavel Begunkov
@ 2021-12-08 18:35 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2021-12-08 18:35 UTC (permalink / raw)
  To: Hao Xu; +Cc: Joseph Qi, Pavel Begunkov, io-uring

On Wed, 8 Dec 2021 13:21:25 +0800, Hao Xu wrote:
> In previous patches, we have already gathered some tw with
> io_req_task_complete() as callback in prior_task_list, let's complete
> them in batch while we cannot grab uring lock. In this way, we batch
> the req_complete_post path.
> 
> 

Applied, thanks!

[1/1] io_uring: batch completion in prior_task_list
      commit: f28c240e7152462f0750a8939db28d985ecf7c67

Best regards,
-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-12-08 18:35 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-08  5:21 [PATCH v8] io_uring: batch completion in prior_task_list Hao Xu
2021-12-08 18:10 ` Pavel Begunkov
2021-12-08 18:35 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox