* [PATCHSET 0/6] Fixes for gaps in async cancelations
@ 2022-01-19 2:42 Jens Axboe
2022-01-19 2:42 ` [PATCH 1/6] io-wq: remove useless 'work' argument to __io_worker_busy() Jens Axboe
` (5 more replies)
0 siblings, 6 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring
Hi,
Based on the report from Florian, found a few gaps in how we handle the
async work lookup and cancelations. Tried to break it down to as simple
patches as possible, and the first one is just an unrelated cleanup in
the same area that I came across. The functional bits are near the end
of the series.
--
Jens Axboe
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/6] io-wq: remove useless 'work' argument to __io_worker_busy()
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
2022-01-19 2:42 ` [PATCH 2/6] io-wq: make io_worker lock a raw spinlock Jens Axboe
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
We don't use 'work' anymore in the busy logic, remove the dead argument.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5c4f582d6549..f8a5f172b9eb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -405,8 +405,7 @@ static void io_wqe_dec_running(struct io_worker *worker)
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
*/
-static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
- struct io_wq_work *work)
+static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{
if (worker->flags & IO_WORKER_F_FREE) {
@@ -556,7 +555,7 @@ static void io_worker_handle_work(struct io_worker *worker)
*/
work = io_get_next_work(acct, worker);
if (work)
- __io_worker_busy(wqe, worker, work);
+ __io_worker_busy(wqe, worker);
raw_spin_unlock(&wqe->lock);
if (!work)
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/6] io-wq: make io_worker lock a raw spinlock
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
2022-01-19 2:42 ` [PATCH 1/6] io-wq: remove useless 'work' argument to __io_worker_busy() Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
2022-01-19 2:42 ` [PATCH 3/6] io-wq: invoke work cancelation with wqe->lock held Jens Axboe
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
In preparation to nesting it under the wqe lock (which is raw due to
being acquired from the scheduler side), change the io_worker lock from
a normal spinlock to a raw spinlock.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index f8a5f172b9eb..c369910de793 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,7 +48,7 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct completion ref_done;
@@ -528,9 +528,9 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -814,7 +814,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
@@ -980,13 +980,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock(&worker->lock);
+ raw_spin_lock(&worker->lock);
if (worker->cur_work &&
match->fn(worker->cur_work, match->data)) {
set_notify_signal(worker->task);
match->nr_running++;
}
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/6] io-wq: invoke work cancelation with wqe->lock held
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
2022-01-19 2:42 ` [PATCH 1/6] io-wq: remove useless 'work' argument to __io_worker_busy() Jens Axboe
2022-01-19 2:42 ` [PATCH 2/6] io-wq: make io_worker lock a raw spinlock Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
2022-01-19 2:42 ` [PATCH 4/6] io-wq: perform both unstarted and started work cancelations in one go Jens Axboe
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
io_wqe_cancel_pending_work() grabs it internally, grab it upfront
instead. For the running work cancelation, grab the lock around it as
well.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index c369910de793..a92fbdc8bea3 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1038,17 +1038,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
int i;
retry:
- raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
+ raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
- return;
+ break;
}
}
- raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1077,7 +1076,9 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_pending && !match.cancel_all)
return IO_WQ_CANCEL_OK;
}
@@ -1091,7 +1092,9 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_running_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
return IO_WQ_CANCEL_RUNNING;
}
@@ -1262,7 +1265,9 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/6] io-wq: perform both unstarted and started work cancelations in one go
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
` (2 preceding siblings ...)
2022-01-19 2:42 ` [PATCH 3/6] io-wq: invoke work cancelation with wqe->lock held Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
2022-01-19 2:42 ` [PATCH 5/6] io-wq: add intermediate work step between pending list and active work Jens Axboe
2022-01-19 2:42 ` [PATCH 6/6] io_uring: perform poll removal even if async work removal is successful Jens Axboe
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Rather than split these into two separate lookups and matches, combine
them into one loop. This will become important when we can guarantee
that we don't have a window where a pending work item isn't discoverable
in either state.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 24 +++++++++++-------------
1 file changed, 11 insertions(+), 13 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index a92fbdc8bea3..db150186ce94 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -1072,27 +1072,25 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
* First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new,
* no completion will be posted for it.
- */
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
-
- raw_spin_lock(&wqe->lock);
- io_wqe_cancel_pending_work(wqe, &match);
- raw_spin_unlock(&wqe->lock);
- if (match.nr_pending && !match.cancel_all)
- return IO_WQ_CANCEL_OK;
- }
-
- /*
- * Now check if a free (going busy) or busy worker has the work
+ *
+ * Then check if a free (going busy) or busy worker has the work
* currently running. If we find it there, we'll return CANCEL_RUNNING
* as an indication that we attempt to signal cancellation. The
* completion will run normally in this case.
+ *
+ * Do both of these while holding the wqe->lock, to ensure that
+ * we'll find a work item regardless of state.
*/
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
raw_spin_lock(&wqe->lock);
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all) {
+ raw_spin_unlock(&wqe->lock);
+ return IO_WQ_CANCEL_OK;
+ }
+
io_wqe_cancel_running_work(wqe, &match);
raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 5/6] io-wq: add intermediate work step between pending list and active work
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
` (3 preceding siblings ...)
2022-01-19 2:42 ` [PATCH 4/6] io-wq: perform both unstarted and started work cancelations in one go Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
2022-01-19 2:42 ` [PATCH 6/6] io_uring: perform poll removal even if async work removal is successful Jens Axboe
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, Florian Fischer
We have a gap where a worker removes an item from the work list and to
when it gets added as the workers active work. In this state, the work
item cannot be found by cancelations. This is a small window, but it does
exist.
Add a temporary pointer to a work item that isn't on the pending work
list anymore, but also not the active work. This is needed as we need
to drop the wqe lock in between grabbing the work item and marking it
as active, to ensure that signal based cancelations are properly
ordered.
Reported-by: Florian Fischer <[email protected]>
Link: https://lore.kernel.org/io-uring/20220118151337.fac6cthvbnu7icoc@pasture/
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 34 +++++++++++++++++++++++++++++-----
1 file changed, 29 insertions(+), 5 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index db150186ce94..1efb134c98b7 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,6 +48,7 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
+ struct io_wq_work *next_work;
raw_spinlock_t lock;
struct completion ref_done;
@@ -530,6 +531,7 @@ static void io_assign_current_work(struct io_worker *worker,
raw_spin_lock(&worker->lock);
worker->cur_work = work;
+ worker->next_work = NULL;
raw_spin_unlock(&worker->lock);
}
@@ -554,9 +556,20 @@ static void io_worker_handle_work(struct io_worker *worker)
* clear the stalled flag.
*/
work = io_get_next_work(acct, worker);
- if (work)
+ if (work) {
__io_worker_busy(wqe, worker);
+ /*
+ * Make sure cancelation can find this, even before
+ * it becomes the active work. That avoids a window
+ * where the work has been removed from our general
+ * work list, but isn't yet discoverable as the
+ * current work item for this worker.
+ */
+ raw_spin_lock(&worker->lock);
+ worker->next_work = work;
+ raw_spin_unlock(&worker->lock);
+ }
raw_spin_unlock(&wqe->lock);
if (!work)
break;
@@ -972,6 +985,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+ struct io_cb_cancel_data *match,
+ struct io_wq_work *work)
+{
+ if (work && match->fn(work, match->data)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ set_notify_signal(worker->task);
+ return true;
+ }
+
+ return false;
+}
+
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -981,11 +1007,9 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* may dereference the passed in work.
*/
raw_spin_lock(&worker->lock);
- if (worker->cur_work &&
- match->fn(worker->cur_work, match->data)) {
- set_notify_signal(worker->task);
+ if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+ __io_wq_worker_cancel(worker, match, worker->next_work))
match->nr_running++;
- }
raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 6/6] io_uring: perform poll removal even if async work removal is successful
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
` (4 preceding siblings ...)
2022-01-19 2:42 ` [PATCH 5/6] io-wq: add intermediate work step between pending list and active work Jens Axboe
@ 2022-01-19 2:42 ` Jens Axboe
5 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2022-01-19 2:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, Florian Fischer
An active work can have poll armed, hence it's not enough to just do
the async work removal and return the value if it's different from "not
found". Rather than make poll removal special, just fall through to do
the remaining type lookups and removals.
Reported-by: Florian Fischer <[email protected]>
Link: https://lore.kernel.org/io-uring/20220118151337.fac6cthvbnu7icoc@pasture/
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 422d6de48688..e54c4127422e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6386,16 +6386,21 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
- return ret;
+ /*
+ * Fall-through even for -EALREADY, as we may have poll armed
+ * that need unarming.
+ */
+ if (!ret)
+ return 0;
spin_lock(&ctx->completion_lock);
+ ret = io_poll_cancel(ctx, sqe_addr, false);
+ if (ret != -ENOENT)
+ goto out;
+
spin_lock_irq(&ctx->timeout_lock);
ret = io_timeout_cancel(ctx, sqe_addr);
spin_unlock_irq(&ctx->timeout_lock);
- if (ret != -ENOENT)
- goto out;
- ret = io_poll_cancel(ctx, sqe_addr, false);
out:
spin_unlock(&ctx->completion_lock);
return ret;
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
end of thread, other threads:[~2022-01-19 2:42 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-01-19 2:42 [PATCHSET 0/6] Fixes for gaps in async cancelations Jens Axboe
2022-01-19 2:42 ` [PATCH 1/6] io-wq: remove useless 'work' argument to __io_worker_busy() Jens Axboe
2022-01-19 2:42 ` [PATCH 2/6] io-wq: make io_worker lock a raw spinlock Jens Axboe
2022-01-19 2:42 ` [PATCH 3/6] io-wq: invoke work cancelation with wqe->lock held Jens Axboe
2022-01-19 2:42 ` [PATCH 4/6] io-wq: perform both unstarted and started work cancelations in one go Jens Axboe
2022-01-19 2:42 ` [PATCH 5/6] io-wq: add intermediate work step between pending list and active work Jens Axboe
2022-01-19 2:42 ` [PATCH 6/6] io_uring: perform poll removal even if async work removal is successful Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox