From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
[email protected],
[email protected]
Subject: [PATCH] io-wq: remove worker to owner tw dependency
Date: Fri, 29 Oct 2021 13:11:33 +0100 [thread overview]
Message-ID: <142a716f4ed936feae868959059154362bfa8c19.1635509451.git.asml.silence@gmail.com> (raw)
INFO: task iou-wrk-6609:6612 blocked for more than 143 seconds.
Not tainted 5.15.0-rc5-syzkaller #0
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:iou-wrk-6609 state:D stack:27944 pid: 6612 ppid: 6526 flags:0x00004006
Call Trace:
context_switch kernel/sched/core.c:4940 [inline]
__schedule+0xb44/0x5960 kernel/sched/core.c:6287
schedule+0xd3/0x270 kernel/sched/core.c:6366
schedule_timeout+0x1db/0x2a0 kernel/time/timer.c:1857
do_wait_for_common kernel/sched/completion.c:85 [inline]
__wait_for_common kernel/sched/completion.c:106 [inline]
wait_for_common kernel/sched/completion.c:117 [inline]
wait_for_completion+0x176/0x280 kernel/sched/completion.c:138
io_worker_exit fs/io-wq.c:183 [inline]
io_wqe_worker+0x66d/0xc40 fs/io-wq.c:597
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
io-wq worker may submit a task_work to the master task and upon
io_worker_exit() wait for the tw to get executed. The problem appears
when the master task is waiting in coredump.c:
468 freezer_do_not_count();
469 wait_for_completion(&core_state->startup);
470 freezer_count();
Apparently having some dependency on children threads getting everything
stuck. Workaround it by cancelling the taks_work callback that causes it
before going into io_worker_exit() waiting.
p.s. probably a better option is to not submit tw elevating the refcount
in the first place, but let's leave this excercise for the future.
Cc: [email protected]
Reported-and-tested-by: [email protected]
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io-wq.c | 46 +++++++++++++++++++++++++++++++++++++---------
1 file changed, 37 insertions(+), 9 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 422a7ed6a9bd..8fcac49b7b13 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -140,6 +140,7 @@ static void io_wqe_dec_running(struct io_worker *worker);
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
+static void create_worker_cb(struct callback_head *cb);
static bool io_worker_get(struct io_worker *worker)
{
@@ -174,9 +175,44 @@ static void io_worker_ref_put(struct io_wq *wq)
complete(&wq->worker_done);
}
+static void io_worker_cancel_cb(struct io_worker *worker)
+{
+ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wqe *wqe = worker->wqe;
+ struct io_wq *wq = wqe->wq;
+
+ atomic_dec(&acct->nr_running);
+ raw_spin_lock(&worker->wqe->lock);
+ acct->nr_workers--;
+ raw_spin_unlock(&worker->wqe->lock);
+ io_worker_ref_put(wq);
+ clear_bit_unlock(0, &worker->create_state);
+ io_worker_release(worker);
+}
+
+static bool io_task_worker_match(struct callback_head *cb, void *data)
+{
+ struct io_worker *worker;
+
+ if (cb->func != create_worker_cb)
+ return false;
+ worker = container_of(cb, struct io_worker, create_work);
+ return worker == data;
+}
+
static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
+ struct io_wq *wq = wqe->wq;
+
+ while (1) {
+ struct callback_head *cb = task_work_cancel_match(wq->task,
+ io_task_worker_match, worker);
+
+ if (!cb)
+ break;
+ io_worker_cancel_cb(worker);
+ }
if (refcount_dec_and_test(&worker->ref))
complete(&worker->ref_done);
@@ -1150,17 +1186,9 @@ static void io_wq_exit_workers(struct io_wq *wq)
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker;
- struct io_wqe_acct *acct;
worker = container_of(cb, struct io_worker, create_work);
- acct = io_wqe_get_acct(worker);
- atomic_dec(&acct->nr_running);
- raw_spin_lock(&worker->wqe->lock);
- acct->nr_workers--;
- raw_spin_unlock(&worker->wqe->lock);
- io_worker_ref_put(wq);
- clear_bit_unlock(0, &worker->create_state);
- io_worker_release(worker);
+ io_worker_cancel_cb(worker);
}
rcu_read_lock();
--
2.33.1
next reply other threads:[~2021-10-29 12:12 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-29 12:11 Pavel Begunkov [this message]
2021-10-29 12:48 ` [PATCH] io-wq: remove worker to owner tw dependency Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=142a716f4ed936feae868959059154362bfa8c19.1635509451.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox