From: Jens Axboe <[email protected]>
To: Linus Torvalds <[email protected]>
Cc: io-uring <[email protected]>
Subject: Re: [GIT PULL] io_uring fix for 5.16-rc6
Date: Fri, 17 Dec 2021 13:48:30 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
[-- Attachment #1: Type: text/plain, Size: 1955 bytes --]
On 12/17/21 1:11 PM, Jens Axboe wrote:
> On 12/17/21 12:45 PM, Linus Torvalds wrote:
>> On Fri, Dec 17, 2021 at 9:00 AM Jens Axboe <[email protected]> wrote:
>>>
>>> Just a single fix, fixing an issue with the worker creation change that
>>> was merged last week.
>>
>> Hmm. I've pulled, but looking at the result, this is a classic no-no.
>>
>> You can't just randomly drop and re-take a lock and sat it's "safe".
>>
>> Because I don't think it's necessarily safe at all.
>>
>> When you drop the wqe->lock in the middle of io_wqe_dec_running to
>> create a new worker, it means - for example - that "io_worker_exit()"
>> can now run immediately on the new worker as far as I can tell.
>>
>> So one io_worker_exit() m,ay literally race with another one, where
>> both are inside that io_wqe_dec_running() at the same time. And then
>> they both end up doing
>>
>> worker->flags = 0;
>> current->flags &= ~PF_IO_WORKER;
>>
>> afterwards in the caller, and not necessarily in the original order.
>> And then they'll both possible do
>>
>> kfree_rcu(worker, rcu);
>>
>> which sounds like a disaster.
>
> The worker itself calls io_worker_exit(), so it cannot happen from
> within io_wqe_dec_running for the existing one. And that's really all
> we care about. The new worker can come and go and we don't really
> care about it, we know we're within another worker.
>
> That said, I totally do agree that this pattern is not a great one
> and should be avoided if at all possible. This one should be solvable by
> passing back a "do the cancel" information from
> io_queue_worker_create(), but that also gets a bit ugly in terms of
> having three return types essentially...
>
> I'll have a think about how to do this in a saner fashion that's more
> obviously correct.
Something like this gets rid of it, but I'm not a huge fan of patch 1.
We could also make it an enum return, but that also gets a bit weird
imho.
--
Jens Axboe
[-- Attachment #2: 0001-io-wq-enable-io_queue_worker_create-worker-freeing-o.patch --]
[-- Type: text/x-patch, Size: 2429 bytes --]
From 259d17e8752041ee0311e098d9e64718cccd2f67 Mon Sep 17 00:00:00 2001
From: Jens Axboe <[email protected]>
Date: Fri, 17 Dec 2021 13:42:40 -0700
Subject: [PATCH 1/2] io-wq: enable io_queue_worker_create() worker freeing on
error
Rather than pass back this information, pass in whether or not we should
be kfree'ing the worker on error.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5c4f582d6549..f261fb700cfc 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -336,9 +336,10 @@ static void create_worker_cb(struct callback_head *cb)
io_worker_release(worker);
}
-static bool io_queue_worker_create(struct io_worker *worker,
+static void io_queue_worker_create(struct io_worker *worker,
struct io_wqe_acct *acct,
- task_work_func_t func)
+ task_work_func_t func,
+ bool free_worker_on_error)
{
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
@@ -370,8 +371,7 @@ static bool io_queue_worker_create(struct io_worker *worker,
*/
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
io_wq_cancel_tw_create(wq);
- io_worker_ref_put(wq);
- return true;
+ goto fail_wq_put;
}
io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
@@ -379,8 +379,10 @@ static bool io_queue_worker_create(struct io_worker *worker,
io_worker_release(worker);
fail:
atomic_dec(&acct->nr_running);
+fail_wq_put:
io_worker_ref_put(wq);
- return false;
+ if (free_worker_on_error)
+ kfree(worker);
}
static void io_wqe_dec_running(struct io_worker *worker)
@@ -396,7 +398,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
raw_spin_unlock(&wqe->lock);
- io_queue_worker_create(worker, acct, create_worker_cb);
+ io_queue_worker_create(worker, acct, create_worker_cb, false);
raw_spin_lock(&wqe->lock);
}
}
@@ -790,8 +792,7 @@ static void io_workqueue_create(struct work_struct *work)
struct io_worker *worker = container_of(work, struct io_worker, work);
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
- if (!io_queue_worker_create(worker, acct, create_worker_cont))
- kfree(worker);
+ io_queue_worker_create(worker, acct, create_worker_cont, true);
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
--
2.34.1
[-- Attachment #3: 0002-io-wq-pass-back-cancel-information-from-worker-creat.patch --]
[-- Type: text/x-patch, Size: 4994 bytes --]
From 39caf24bc3645a5c608c070652e3a5e9385232c7 Mon Sep 17 00:00:00 2001
From: Jens Axboe <[email protected]>
Date: Fri, 17 Dec 2021 13:44:22 -0700
Subject: [PATCH 2/2] io-wq: pass back cancel information from worker creation
path
Don't call cancel directly deep inside the worker creation, pass back
whether to cancel to the caller which can then do so from a saner
context. We have two paths that currently do this, and one does so while
holding a lock we may need on cancelation.
Fixes: d800c65c2d4e ("io-wq: drop wqe lock before creating new worker")
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 46 +++++++++++++++++++++++++++++++++-------------
1 file changed, 33 insertions(+), 13 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index f261fb700cfc..139eecd89e72 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -137,7 +137,7 @@ struct io_cb_cancel_data {
};
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
-static void io_wqe_dec_running(struct io_worker *worker);
+static bool io_wqe_dec_running(struct io_worker *worker);
static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
@@ -206,6 +206,7 @@ static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
+ bool cancel;
while (1) {
struct callback_head *cb = task_work_cancel_match(wq->task,
@@ -224,12 +225,15 @@ static void io_worker_exit(struct io_worker *worker)
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
preempt_disable();
- io_wqe_dec_running(worker);
+ cancel = io_wqe_dec_running(worker);
worker->flags = 0;
current->flags &= ~PF_IO_WORKER;
preempt_enable();
raw_spin_unlock(&wqe->lock);
+ if (cancel)
+ io_wq_cancel_tw_create(wq);
+
kfree_rcu(worker, rcu);
io_worker_ref_put(wqe->wq);
do_exit(0);
@@ -336,13 +340,17 @@ static void create_worker_cb(struct callback_head *cb)
io_worker_release(worker);
}
-static void io_queue_worker_create(struct io_worker *worker,
+/*
+ * Returns true if the caller should call io_wq_cancel_tw_create
+ */
+static bool io_queue_worker_create(struct io_worker *worker,
struct io_wqe_acct *acct,
task_work_func_t func,
bool free_worker_on_error)
{
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
+ bool ret = false;
/* raced with exit, just ignore create call */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
@@ -370,7 +378,7 @@ static void io_queue_worker_create(struct io_worker *worker,
* work item after we canceled in io_wq_exit_workers().
*/
if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
- io_wq_cancel_tw_create(wq);
+ ret = true;
goto fail_wq_put;
}
io_worker_ref_put(wq);
@@ -383,24 +391,28 @@ static void io_queue_worker_create(struct io_worker *worker,
io_worker_ref_put(wq);
if (free_worker_on_error)
kfree(worker);
+ return ret;
}
-static void io_wqe_dec_running(struct io_worker *worker)
+/*
+ * Returns true if the caller should call io_wq_cancel_tw_create
+ */
+static bool io_wqe_dec_running(struct io_worker *worker)
__must_hold(wqe->lock)
{
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe;
if (!(worker->flags & IO_WORKER_F_UP))
- return;
+ return false;
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
- raw_spin_unlock(&wqe->lock);
- io_queue_worker_create(worker, acct, create_worker_cb, false);
- raw_spin_lock(&wqe->lock);
+ return io_queue_worker_create(worker, acct, create_worker_cb, false);
}
+
+ return false;
}
/*
@@ -691,6 +703,8 @@ void io_wq_worker_running(struct task_struct *tsk)
void io_wq_worker_sleeping(struct task_struct *tsk)
{
struct io_worker *worker = tsk->pf_io_worker;
+ struct io_wqe *wqe = worker->wqe;
+ bool cancel;
if (!worker)
return;
@@ -701,9 +715,11 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
worker->flags &= ~IO_WORKER_F_RUNNING;
- raw_spin_lock(&worker->wqe->lock);
- io_wqe_dec_running(worker);
- raw_spin_unlock(&worker->wqe->lock);
+ raw_spin_lock(&wqe->lock);
+ cancel = io_wqe_dec_running(worker);
+ raw_spin_unlock(&wqe->lock);
+ if (cancel)
+ io_wq_cancel_tw_create(wqe->wq);
}
static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
@@ -791,8 +807,12 @@ static void io_workqueue_create(struct work_struct *work)
{
struct io_worker *worker = container_of(work, struct io_worker, work);
struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wq *wq = worker->wqe->wq;
+ bool cancel;
- io_queue_worker_create(worker, acct, create_worker_cont, true);
+ cancel = io_queue_worker_create(worker, acct, create_worker_cont, true);
+ if (cancel)
+ io_wq_cancel_tw_create(wq);
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
--
2.34.1
next prev parent reply other threads:[~2021-12-17 20:48 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-17 17:00 [GIT PULL] io_uring fix for 5.16-rc6 Jens Axboe
2021-12-17 19:45 ` Linus Torvalds
2021-12-17 20:11 ` Jens Axboe
2021-12-17 20:48 ` Jens Axboe [this message]
2021-12-17 21:43 ` pr-tracker-bot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox