From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Subject: Re: [PATCH 1/1] io-wq: forcefully cancel on io-wq destroy
Date: Tue, 30 Mar 2021 19:40:36 +0100 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <822eeb713e57efe8960a7f3a7c11dbef1fcbf4e4.1617129472.git.asml.silence@gmail.com>
On 30/03/2021 19:38, Pavel Begunkov wrote:
> [ 491.222908] INFO: task thread-exit:2490 blocked for more than 122 seconds.
> [ 491.222957] Call Trace:
> [ 491.222967] __schedule+0x36b/0x950
> [ 491.222985] schedule+0x68/0xe0
> [ 491.222994] schedule_timeout+0x209/0x2a0
> [ 491.223003] ? tlb_flush_mmu+0x28/0x140
> [ 491.223013] wait_for_completion+0x8b/0xf0
> [ 491.223023] io_wq_destroy_manager+0x24/0x60
> [ 491.223037] io_wq_put_and_exit+0x18/0x30
> [ 491.223045] io_uring_clean_tctx+0x76/0xa0
> [ 491.223061] __io_uring_files_cancel+0x1b9/0x2e0
> [ 491.223068] ? blk_finish_plug+0x26/0x40
> [ 491.223085] do_exit+0xc0/0xb40
> [ 491.223099] ? syscall_trace_enter.isra.0+0x1a1/0x1e0
> [ 491.223109] __x64_sys_exit+0x1b/0x20
> [ 491.223117] do_syscall_64+0x38/0x50
> [ 491.223131] entry_SYSCALL_64_after_hwframe+0x44/0xae
> [ 491.223177] INFO: task iou-mgr-2490:2491 blocked for more than 122 seconds.
> [ 491.223194] Call Trace:
> [ 491.223198] __schedule+0x36b/0x950
> [ 491.223206] ? pick_next_task_fair+0xcf/0x3e0
> [ 491.223218] schedule+0x68/0xe0
> [ 491.223225] schedule_timeout+0x209/0x2a0
> [ 491.223236] wait_for_completion+0x8b/0xf0
> [ 491.223246] io_wq_manager+0xf1/0x1d0
> [ 491.223255] ? recalc_sigpending+0x1c/0x60
> [ 491.223265] ? io_wq_cpu_online+0x40/0x40
> [ 491.223272] ret_from_fork+0x22/0x30
>
> When io-wq worker exits and sees IO_WQ_BIT_EXIT it tries not cancel all
> left requests but to execute them, hence we may wait for the exiting
> task for long until someone pushes it, e.g. with SIGKILL. Actively
> cancel pending work items on io-wq destruction.
The trace is from slightly modified thread-exit, and it doesn't
hang forever, but can be killed with ctrl+c or whatever. Also,
predictably breaks thread-exit test.
>
> note: io_run_cancel() moved up without any changes.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> fs/io-wq.c | 50 +++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 35 insertions(+), 15 deletions(-)
>
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index 7434eb40ca8c..5fa5e0fd40d6 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -342,6 +342,20 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
> spin_unlock(&wq->hash->wait.lock);
> }
>
> +static struct io_wq_work *io_get_work_all(struct io_wqe *wqe)
> + __must_hold(wqe->lock)
> +{
> + struct io_wq_work_list *list = &wqe->work_list;
> + struct io_wq_work_node *node = list->first;
> + int i;
> +
> + list->first = list->last = NULL;
> + for (i = 0; i < IO_WQ_NR_HASH_BUCKETS; i++)
> + wqe->hash_tail[i] = NULL;
> +
> + return node ? container_of(node, struct io_wq_work, list) : NULL;
> +}
> +
> static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
> __must_hold(wqe->lock)
> {
> @@ -410,6 +424,17 @@ static void io_assign_current_work(struct io_worker *worker,
>
> static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
>
> +static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
> +{
> + struct io_wq *wq = wqe->wq;
> +
> + do {
> + work->flags |= IO_WQ_WORK_CANCEL;
> + wq->do_work(work);
> + work = wq->free_work(work);
> + } while (work);
> +}
> +
> static void io_worker_handle_work(struct io_worker *worker)
> __releases(wqe->lock)
> {
> @@ -518,11 +543,17 @@ static int io_wqe_worker(void *data)
> }
>
> if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
> + struct io_wq_work *work, *next;
> +
> raw_spin_lock_irq(&wqe->lock);
> - if (!wq_list_empty(&wqe->work_list))
> - io_worker_handle_work(worker);
> - else
> - raw_spin_unlock_irq(&wqe->lock);
> + work = io_get_all_items(wqe);
> + raw_spin_unlock_irq(&wqe->lock);
> +
> + while (work) {
> + next = wq_next_work(work);
> + io_get_work_all(work, wqe);
> + work = next;
> + }
> }
>
> io_worker_exit(worker);
> @@ -748,17 +779,6 @@ static int io_wq_manager(void *data)
> do_exit(0);
> }
>
> -static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
> -{
> - struct io_wq *wq = wqe->wq;
> -
> - do {
> - work->flags |= IO_WQ_WORK_CANCEL;
> - wq->do_work(work);
> - work = wq->free_work(work);
> - } while (work);
> -}
> -
> static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
> {
> unsigned int hash;
>
--
Pavel Begunkov
prev parent reply other threads:[~2021-03-30 18:45 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-30 18:38 [PATCH 1/1] io-wq: forcefully cancel on io-wq destroy Pavel Begunkov
2021-03-30 18:40 ` Pavel Begunkov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox