public inbox for [email protected]
 help / color / mirror / Atom feed
From: Chengming Zhou <[email protected]>
To: Ming Lei <[email protected]>, Jens Axboe <[email protected]>,
	[email protected], [email protected]
Cc: David Howells <[email protected]>
Subject: Re: [PATCH] io_uring: fix IO hang in io_wq_put_and_exit from do_exit()
Date: Fri, 1 Sep 2023 09:50:02 +0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 2023/8/31 15:42, Ming Lei wrote:
> io_wq_put_and_exit() is called from do_exit(), but all requests in io_wq
> aren't cancelled in io_uring_cancel_generic() called from do_exit().
> Meantime io_wq IO code path may share resource with normal iopoll code
> path.
> 
> So if any HIPRI request is pending in io_wq_submit_work(), this request
> may not get resouce for moving on, given iopoll isn't possible in
> io_wq_put_and_exit().
> 
> The issue can be triggered when terminating 't/io_uring -n4 /dev/nullb0'
> with default null_blk parameters.
> 
> Fix it by always cancelling all requests in io_wq from io_uring_cancel_generic(),
> and this way is reasonable because io_wq destroying follows cancelling
> requests immediately. Based on one patch from Chengming.

Thanks much for this work, I'm still learning these code, so maybe some
silly questions below.

> 
> Closes: https://lore.kernel.org/linux-block/[email protected]/
> Reported-by: David Howells <[email protected]>
> Cc: Chengming Zhou <[email protected]>,
> Signed-off-by: Ming Lei <[email protected]>
> ---
>  io_uring/io_uring.c | 40 ++++++++++++++++++++++++++++------------
>  1 file changed, 28 insertions(+), 12 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index e7675355048d..18d5ab969c29 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -144,7 +144,7 @@ struct io_defer_entry {
>  
>  static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  					 struct task_struct *task,
> -					 bool cancel_all);
> +					 bool cancel_all, bool *wq_cancelled);
>  
>  static void io_queue_sqe(struct io_kiocb *req);
>  
> @@ -3049,7 +3049,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
>  		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
>  			io_move_task_work_from_local(ctx);
>  
> -		while (io_uring_try_cancel_requests(ctx, NULL, true))
> +		while (io_uring_try_cancel_requests(ctx, NULL, true, NULL))
>  			cond_resched();
>  
>  		if (ctx->sq_data) {
> @@ -3231,12 +3231,13 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
>  
>  static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  						struct task_struct *task,
> -						bool cancel_all)
> +						bool cancel_all, bool *wq_cancelled)
>  {
> -	struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
> +	struct io_task_cancel cancel = { .task = task, .all = true, };
>  	struct io_uring_task *tctx = task ? task->io_uring : NULL;
>  	enum io_wq_cancel cret;
>  	bool ret = false;
> +	bool wq_active = false;
>  
>  	/* set it so io_req_local_work_add() would wake us up */
>  	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
> @@ -3249,7 +3250,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  		return false;
>  
>  	if (!task) {
> -		ret |= io_uring_try_cancel_iowq(ctx);
> +		wq_active = io_uring_try_cancel_iowq(ctx);
>  	} else if (tctx && tctx->io_wq) {
>  		/*
>  		 * Cancels requests of all rings, not only @ctx, but
> @@ -3257,11 +3258,20 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
>  		 */
>  		cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
>  				       &cancel, true);
> -		ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
> +		wq_active = (cret != IO_WQ_CANCEL_NOTFOUND);
>  	}
> +	ret |= wq_active;
> +	if (wq_cancelled)
> +		*wq_cancelled = !wq_active;

Here it seems "wq_cancelled" means no any pending or running work anymore.

Why not just use the return value "loop", instead of using this new "wq_cancelled"?

If return value "loop" is true, we know there is still any request need to cancel,
so we will loop the cancel process until there is no any request.

Ah, I guess you may want to cover one case: !wq_active && loop == true

>  
> -	/* SQPOLL thread does its own polling */
> -	if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) ||
> +	/*
> +	 * SQPOLL thread does its own polling
> +	 *
> +	 * io_wq may share IO resources(such as requests) with iopoll, so
> +	 * iopoll requests have to be reapped for providing forward
> +	 * progress to io_wq cancelling
> +	 */
> +	if (!(ctx->flags & IORING_SETUP_SQPOLL) ||
>  	    (ctx->sq_data && ctx->sq_data->thread == current)) {
>  		while (!wq_list_empty(&ctx->iopoll_list)) {
>  			io_iopoll_try_reap_events(ctx);
> @@ -3313,11 +3323,12 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  	atomic_inc(&tctx->in_cancel);
>  	do {
>  		bool loop = false;
> +		bool wq_cancelled;
>  
>  		io_uring_drop_tctx_refs(current);
>  		/* read completions before cancelations */
>  		inflight = tctx_inflight(tctx, !cancel_all);
> -		if (!inflight)
> +		if (!inflight && !tctx->io_wq)
>  			break;
>  

I think this inflight check should put after the cancel loop, because the
cancel loop make sure there is no any request need to cancel, then we can
loop inflight checking to make sure all inflight requests to complete.

>  		if (!sqd) {
> @@ -3326,20 +3337,25 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
>  				if (node->ctx->sq_data)
>  					continue;
>  				loop |= io_uring_try_cancel_requests(node->ctx,
> -							current, cancel_all);
> +							current, cancel_all,
> +							&wq_cancelled);
>  			}
>  		} else {
>  			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
>  				loop |= io_uring_try_cancel_requests(ctx,
>  								     current,
> -								     cancel_all);
> +								     cancel_all,
> +								     &wq_cancelled);
>  		}
>  
> -		if (loop) {
> +		if (!wq_cancelled || (inflight && loop)) {
>  			cond_resched();
>  			continue;
>  		}

So here we just need to check "loop", continue cancel if loop is true?

Thanks!

>  
> +		if (!inflight)
> +			break;
> +
>  		prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE);
>  		io_run_task_work();
>  		io_uring_drop_tctx_refs(current);

  parent reply	other threads:[~2023-09-01  1:50 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-31  7:42 [PATCH] io_uring: fix IO hang in io_wq_put_and_exit from do_exit() Ming Lei
2023-08-31 17:59 ` Jens Axboe
2023-09-01  1:56   ` Ming Lei
2023-09-01  1:50 ` Chengming Zhou [this message]
2023-09-01  2:09   ` Ming Lei
2023-09-01  2:17     ` Chengming Zhou
2023-09-01  8:56       ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox