* [PATCH 1/3] io-wq: fix no lock protection of acct->nr_worker
2021-08-08 10:12 [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
@ 2021-08-08 10:12 ` Hao Xu
2021-08-08 10:12 ` [PATCH 2/3] io-wq: fix lack of acct->nr_workers < acct->max_workers judgement Hao Xu
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Hao Xu @ 2021-08-08 10:12 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi
There is an acct->nr_worker visit without lock protection. Think about
the case: two callers call io_wqe_wake_worker(), one is the original
context and the other one is an io-worker(by calling
io_wqe_enqueue(wqe, linked)), on two cpus paralelly, this may cause
nr_worker to be larger than max_worker.
Let's fix it by adding lock for it, and let's do nr_workers++ before
create_io_worker. There may be a edge cause that the first caller fails
to create an io-worker, but the second caller doesn't know it and then
quit creating io-worker as well:
say nr_worker = max_worker - 1
cpu 0 cpu 1
io_wqe_wake_worker() io_wqe_wake_worker()
nr_worker < max_worker
nr_worker++
create_io_worker() nr_worker == max_worker
failed return
return
But the chance of this case is very slim.
Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread")
Signed-off-by: Hao Xu <[email protected]>
---
fs/io-wq.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 50dc93ffc153..6788666c65de 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -247,10 +247,20 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
ret = io_wqe_activate_free_worker(wqe);
rcu_read_unlock();
- if (!ret && acct->nr_workers < acct->max_workers) {
- atomic_inc(&acct->nr_running);
- atomic_inc(&wqe->wq->worker_refs);
- create_io_worker(wqe->wq, wqe, acct->index);
+ if (!ret) {
+ bool need_create = false;
+
+ raw_spin_lock_irq(&wqe->lock);
+ if (acct->nr_workers < acct->max_workers) {
+ acct->nr_workers++;
+ need_create = true;
+ }
+ raw_spin_unlock_irq(&wqe->lock);
+ if (need_create) {
+ atomic_inc(&acct->nr_running);
+ atomic_inc(&wqe->wq->worker_refs);
+ create_io_worker(wqe->wq, wqe, acct->index);
+ }
}
}
@@ -635,6 +645,9 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
kfree(worker);
fail:
atomic_dec(&acct->nr_running);
+ raw_spin_lock_irq(&wqe->lock);
+ acct->nr_workers--;
+ raw_spin_unlock_irq(&wqe->lock);
io_worker_ref_put(wq);
return;
}
@@ -650,9 +663,8 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
- if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+ if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
- acct->nr_workers++;
raw_spin_unlock_irq(&wqe->lock);
wake_up_new_task(tsk);
}
--
2.24.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/3] io-wq: fix lack of acct->nr_workers < acct->max_workers judgement
2021-08-08 10:12 [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
2021-08-08 10:12 ` [PATCH 1/3] io-wq: fix no lock protection of acct->nr_worker Hao Xu
@ 2021-08-08 10:12 ` Hao Xu
2021-08-08 10:12 ` [PATCH 3/3] io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker() Hao Xu
2021-08-08 10:31 ` [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
3 siblings, 0 replies; 5+ messages in thread
From: Hao Xu @ 2021-08-08 10:12 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi
There should be this judgement before we create an io-worker
Fixes: 685fe7feedb9 ("io-wq: eliminate the need for a manager thread")
Signed-off-by: Hao Xu <[email protected]>
---
fs/io-wq.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 6788666c65de..d8684b4d0654 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -281,10 +281,26 @@ static void create_worker_cb(struct callback_head *cb)
{
struct create_worker_data *cwd;
struct io_wq *wq;
+ struct io_wqe *wqe;
+ struct io_wqe_acct *acct;
+ bool need_create = false;
cwd = container_of(cb, struct create_worker_data, work);
- wq = cwd->wqe->wq;
- create_io_worker(wq, cwd->wqe, cwd->index);
+ wqe = cwd->wqe;
+ wq = wqe->wq;
+ acct = &wqe->acct[cwd->index];
+ raw_spin_lock_irq(&wqe->lock);
+ if (acct->nr_workers < acct->max_workers) {
+ acct->nr_workers++;
+ need_create = true;
+ }
+ raw_spin_unlock_irq(&wqe->lock);
+ if (need_create) {
+ create_io_worker(wq, wqe, cwd->index);
+ } else {
+ atomic_dec(&acct->nr_running);
+ io_worker_ref_put(wq);
+ }
kfree(cwd);
}
--
2.24.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/3] io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker()
2021-08-08 10:12 [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
2021-08-08 10:12 ` [PATCH 1/3] io-wq: fix no lock protection of acct->nr_worker Hao Xu
2021-08-08 10:12 ` [PATCH 2/3] io-wq: fix lack of acct->nr_workers < acct->max_workers judgement Hao Xu
@ 2021-08-08 10:12 ` Hao Xu
2021-08-08 10:31 ` [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
3 siblings, 0 replies; 5+ messages in thread
From: Hao Xu @ 2021-08-08 10:12 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi
There may be cases like:
A B
spin_lock(wqe->lock)
nr_workers is 0
nr_workers++
spin_unlock(wqe->lock)
spin_lock(wqe->lock)
nr_wokers is 1
nr_workers++
spin_unlock(wqe->lock)
create_io_worker()
acct->worker is 1
create_io_worker()
acct->worker is 1
There should be one worker marked IO_WORKER_F_FIXED, but no one is.
Fix this by introduce a new agrument for create_io_worker() to indicate
if it is the first worker.
Signed-off-by: Hao Xu <[email protected]>
---
fs/io-wq.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index d8684b4d0654..d0128b72d174 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -129,7 +129,7 @@ struct io_cb_cancel_data {
bool cancel_all;
};
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index);
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first);
static void io_wqe_dec_running(struct io_worker *worker);
static bool io_worker_get(struct io_worker *worker)
@@ -248,10 +248,12 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
rcu_read_unlock();
if (!ret) {
- bool need_create = false;
+ bool need_create = false, first = false;
raw_spin_lock_irq(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
+ if (!acct->nr_workers)
+ first = true;
acct->nr_workers++;
need_create = true;
}
@@ -259,7 +261,7 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
if (need_create) {
atomic_inc(&acct->nr_running);
atomic_inc(&wqe->wq->worker_refs);
- create_io_worker(wqe->wq, wqe, acct->index);
+ create_io_worker(wqe->wq, wqe, acct->index, first);
}
}
}
@@ -283,7 +285,7 @@ static void create_worker_cb(struct callback_head *cb)
struct io_wq *wq;
struct io_wqe *wqe;
struct io_wqe_acct *acct;
- bool need_create = false;
+ bool need_create = false, first = false;
cwd = container_of(cb, struct create_worker_data, work);
wqe = cwd->wqe;
@@ -291,12 +293,14 @@ static void create_worker_cb(struct callback_head *cb)
acct = &wqe->acct[cwd->index];
raw_spin_lock_irq(&wqe->lock);
if (acct->nr_workers < acct->max_workers) {
+ if (!acct->nr_workers)
+ first = true;
acct->nr_workers++;
need_create = true;
}
raw_spin_unlock_irq(&wqe->lock);
if (need_create) {
- create_io_worker(wq, wqe, cwd->index);
+ create_io_worker(wq, wqe, cwd->index, first);
} else {
atomic_dec(&acct->nr_running);
io_worker_ref_put(wq);
@@ -638,7 +642,7 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
raw_spin_unlock_irq(&worker->wqe->lock);
}
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index, bool first)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
@@ -679,7 +683,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
worker->flags |= IO_WORKER_F_FREE;
if (index == IO_WQ_ACCT_BOUND)
worker->flags |= IO_WORKER_F_BOUND;
- if ((acct->nr_workers == 1) && (worker->flags & IO_WORKER_F_BOUND))
+ if (first && (worker->flags & IO_WORKER_F_BOUND))
worker->flags |= IO_WORKER_F_FIXED;
raw_spin_unlock_irq(&wqe->lock);
wake_up_new_task(tsk);
--
2.24.4
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v2 0/3] code clean and nr_worker fixes
2021-08-08 10:12 [PATCH v2 0/3] code clean and nr_worker fixes Hao Xu
` (2 preceding siblings ...)
2021-08-08 10:12 ` [PATCH 3/3] io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker() Hao Xu
@ 2021-08-08 10:31 ` Hao Xu
3 siblings, 0 replies; 5+ messages in thread
From: Hao Xu @ 2021-08-08 10:31 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, Pavel Begunkov, Joseph Qi
在 2021/8/8 下午6:12, Hao Xu 写道:
Since the old patches have been merged, I'll resend fixes based on the
new code baseline. Please ignore this patchset.
> v1-->v2
> - fix bug of creating io-wokers unconditionally
> - fix bug of no nr_running and worker_ref decrementing when fails
> - fix bug of setting IO_WORKER_BOUND_FIXED incorrectly.
>
> Hao Xu (3):
> io-wq: fix no lock protection of acct->nr_worker
> io-wq: fix lack of acct->nr_workers < acct->max_workers judgement
> io-wq: fix IO_WORKER_F_FIXED issue in create_io_worker()
>
> fs/io-wq.c | 52 ++++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 42 insertions(+), 10 deletions(-)
>
^ permalink raw reply [flat|nested] 5+ messages in thread