* [PATCH] io-wq: check for wq exit after adding new worker task_work
@ 2021-12-10 15:35 Jens Axboe
2021-12-10 18:38 ` Hao Xu
0 siblings, 1 reply; 2+ messages in thread
From: Jens Axboe @ 2021-12-10 15:35 UTC (permalink / raw)
To: io-uring
We check IO_WQ_BIT_EXIT before attempting to create a new worker, and
wq exit cancels pending work if we have any. But it's possible to have
a race between the two, where creation checks exit finding it not set,
but we're in the process of exiting. The exit side will cancel pending
creation task_work, but there's a gap where we add task_work after we've
canceled existing creations at exit time.
Fix this by checking the EXIT bit post adding the creation task_work.
If it's set, run the same cancelation that exit does.
Reported-by: [email protected]
Signed-off-by: Jens Axboe <[email protected]>
---
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 35da9d90df76..8d2bb818a3bb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
struct io_wqe_acct *acct,
struct io_cb_cancel_data *match);
static void create_worker_cb(struct callback_head *cb);
+static void io_wq_cancel_tw_create(struct io_wq *wq);
static bool io_worker_get(struct io_worker *worker)
{
@@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
test_and_set_bit_lock(0, &worker->create_state))
goto fail_release;
+ atomic_inc(&wq->worker_refs);
init_task_work(&worker->create_work, func);
worker->create_index = acct->index;
- if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
+ if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
+ /*
+ * EXIT may have been set after checking it above, check after
+ * adding the task_work and remove any creation item if it is
+ * now set. wq exit does that too, but we can have added this
+ * work item after we canceled in io_wq_exit_workers().
+ */
+ if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
+ io_wq_cancel_tw_create(wq);
+ io_worker_ref_put(wq);
return true;
+ }
+ io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
fail_release:
io_worker_release(worker);
@@ -1196,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq)
set_bit(IO_WQ_BIT_EXIT, &wq->state);
}
-static void io_wq_exit_workers(struct io_wq *wq)
+static void io_wq_cancel_tw_create(struct io_wq *wq)
{
struct callback_head *cb;
- int node;
-
- if (!wq->task)
- return;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker;
@@ -1210,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
worker = container_of(cb, struct io_worker, create_work);
io_worker_cancel_cb(worker);
}
+}
+
+static void io_wq_exit_workers(struct io_wq *wq)
+{
+ int node;
+
+ if (!wq->task)
+ return;
+
+ io_wq_cancel_tw_create(wq);
rcu_read_lock();
for_each_node(node) {
--
Jens Axboe
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] io-wq: check for wq exit after adding new worker task_work
2021-12-10 15:35 [PATCH] io-wq: check for wq exit after adding new worker task_work Jens Axboe
@ 2021-12-10 18:38 ` Hao Xu
0 siblings, 0 replies; 2+ messages in thread
From: Hao Xu @ 2021-12-10 18:38 UTC (permalink / raw)
To: Jens Axboe, io-uring
在 2021/12/10 下午11:35, Jens Axboe 写道:
> We check IO_WQ_BIT_EXIT before attempting to create a new worker, and
> wq exit cancels pending work if we have any. But it's possible to have
> a race between the two, where creation checks exit finding it not set,
> but we're in the process of exiting. The exit side will cancel pending
> creation task_work, but there's a gap where we add task_work after we've
> canceled existing creations at exit time.
>
> Fix this by checking the EXIT bit post adding the creation task_work.
> If it's set, run the same cancelation that exit does.
>
> Reported-by: [email protected]
> Signed-off-by: Jens Axboe <[email protected]>
>
> ---
Looks good.
Reviewed-by: Hao Xu <[email protected]>
>
> diff --git a/fs/io-wq.c b/fs/io-wq.c
> index 35da9d90df76..8d2bb818a3bb 100644
> --- a/fs/io-wq.c
> +++ b/fs/io-wq.c
> @@ -142,6 +142,7 @@ static bool io_acct_cancel_pending_work(struct io_wqe *wqe,
> struct io_wqe_acct *acct,
> struct io_cb_cancel_data *match);
> static void create_worker_cb(struct callback_head *cb);
> +static void io_wq_cancel_tw_create(struct io_wq *wq);
>
> static bool io_worker_get(struct io_worker *worker)
> {
> @@ -357,10 +358,22 @@ static bool io_queue_worker_create(struct io_worker *worker,
> test_and_set_bit_lock(0, &worker->create_state))
> goto fail_release;
>
> + atomic_inc(&wq->worker_refs);
> init_task_work(&worker->create_work, func);
> worker->create_index = acct->index;
> - if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL))
> + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
> + /*
> + * EXIT may have been set after checking it above, check after
> + * adding the task_work and remove any creation item if it is
> + * now set. wq exit does that too, but we can have added this
> + * work item after we canceled in io_wq_exit_workers().
> + */
> + if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
> + io_wq_cancel_tw_create(wq);
> + io_worker_ref_put(wq);
> return true;
> + }
> + io_worker_ref_put(wq);
> clear_bit_unlock(0, &worker->create_state);
> fail_release:
> io_worker_release(worker);
> @@ -1196,13 +1209,9 @@ void io_wq_exit_start(struct io_wq *wq)
> set_bit(IO_WQ_BIT_EXIT, &wq->state);
> }
>
> -static void io_wq_exit_workers(struct io_wq *wq)
> +static void io_wq_cancel_tw_create(struct io_wq *wq)
> {
> struct callback_head *cb;
> - int node;
> -
> - if (!wq->task)
> - return;
>
> while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
> struct io_worker *worker;
> @@ -1210,6 +1219,16 @@ static void io_wq_exit_workers(struct io_wq *wq)
> worker = container_of(cb, struct io_worker, create_work);
> io_worker_cancel_cb(worker);
> }
> +}
> +
> +static void io_wq_exit_workers(struct io_wq *wq)
> +{
> + int node;
> +
> + if (!wq->task)
> + return;
> +
> + io_wq_cancel_tw_create(wq);
>
> rcu_read_lock();
> for_each_node(node) {
>
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2021-12-10 18:38 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-12-10 15:35 [PATCH] io-wq: check for wq exit after adding new worker task_work Jens Axboe
2021-12-10 18:38 ` Hao Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox