From: Jens Axboe <[email protected]>
To: [email protected], [email protected]
Subject: Re: Canceled read requests never completed
Date: Tue, 18 Jan 2022 16:36:59 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <20220118200549.qybt7fgfqznscidx@pasture>
On 1/18/22 1:05 PM, Florian Fischer wrote:
>>> After reading the io_uring_enter(2) man page a IORING_OP_ASYNC_CANCEL's return value of -EALREADY apparently
>>> may not cause the request to terminate. At least that is our interpretation of "…res field will contain -EALREADY.
>>> In this case, the request may or may not terminate."
>>
>> I took a look at this, and my theory is that the request cancelation
>> ends up happening right in between when the work item is moved between
>> the work list and to the worker itself. The way the async queue works,
>> the work item is sitting in a list until it gets assigned by a worker.
>> When that assignment happens, it's removed from the general work list
>> and then assigned to the worker itself. There's a small gap there where
>> the work cannot be found in the general list, and isn't yet findable in
>> the worker itself either.
>>
>> Do you always see -ENOENT from the cancel when you get the hang
>> condition?
>
> No we also and actually more commonly observe cancel returning
> -EALREADY and the canceled read request never gets completed.
>
> As shown in the log snippet I included below.
I think there are a couple of different cases here. Can you try the
below patch? It's against current -git.
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 5c4f582d6549..e8f8a5ee93c1 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -48,7 +48,8 @@ struct io_worker {
struct io_wqe *wqe;
struct io_wq_work *cur_work;
- spinlock_t lock;
+ struct io_wq_work *next_work;
+ raw_spinlock_t lock;
struct completion ref_done;
@@ -529,9 +530,11 @@ static void io_assign_current_work(struct io_worker *worker,
cond_resched();
}
- spin_lock(&worker->lock);
+ WARN_ON_ONCE(work != worker->next_work);
+ raw_spin_lock(&worker->lock);
worker->cur_work = work;
- spin_unlock(&worker->lock);
+ worker->next_work = NULL;
+ raw_spin_unlock(&worker->lock);
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
@@ -555,9 +558,20 @@ static void io_worker_handle_work(struct io_worker *worker)
* clear the stalled flag.
*/
work = io_get_next_work(acct, worker);
- if (work)
+ if (work) {
__io_worker_busy(wqe, worker, work);
+ /*
+ * Make sure cancelation can find this, even before
+ * it becomes the active work. That avoids a window
+ * where the work has been removed from our general
+ * work list, but isn't yet discoverable as the
+ * current work item for this worker.
+ */
+ raw_spin_lock(&worker->lock);
+ worker->next_work = work;
+ raw_spin_unlock(&worker->lock);
+ }
raw_spin_unlock(&wqe->lock);
if (!work)
break;
@@ -815,7 +829,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1);
worker->wqe = wqe;
- spin_lock_init(&worker->lock);
+ raw_spin_lock_init(&worker->lock);
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
@@ -973,6 +987,19 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
+static bool __io_wq_worker_cancel(struct io_worker *worker,
+ struct io_cb_cancel_data *match,
+ struct io_wq_work *work)
+{
+ if (work && match->fn(work, match->data)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ set_notify_signal(worker->task);
+ return true;
+ }
+
+ return false;
+}
+
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -981,13 +1008,11 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
* Hold the lock to avoid ->cur_work going out of scope, caller
* may dereference the passed in work.
*/
- spin_lock(&worker->lock);
- if (worker->cur_work &&
- match->fn(worker->cur_work, match->data)) {
- set_notify_signal(worker->task);
+ raw_spin_lock(&worker->lock);
+ if (__io_wq_worker_cancel(worker, match, worker->cur_work) ||
+ __io_wq_worker_cancel(worker, match, worker->next_work))
match->nr_running++;
- }
- spin_unlock(&worker->lock);
+ raw_spin_unlock(&worker->lock);
return match->nr_running && !match->cancel_all;
}
@@ -1039,17 +1064,16 @@ static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
{
int i;
retry:
- raw_spin_lock(&wqe->lock);
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
struct io_wqe_acct *acct = io_get_acct(wqe, i == 0);
if (io_acct_cancel_pending_work(wqe, acct, match)) {
+ raw_spin_lock(&wqe->lock);
if (match->cancel_all)
goto retry;
- return;
+ break;
}
}
- raw_spin_unlock(&wqe->lock);
}
static void io_wqe_cancel_running_work(struct io_wqe *wqe,
@@ -1078,7 +1102,9 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_pending && !match.cancel_all)
return IO_WQ_CANCEL_OK;
}
@@ -1092,7 +1118,15 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
+ raw_spin_lock(&wqe->lock);
+ io_wqe_cancel_pending_work(wqe, &match);
+ if (match.nr_pending && !match.cancel_all) {
+ raw_spin_unlock(&wqe->lock);
+ return IO_WQ_CANCEL_OK;
+ }
+
io_wqe_cancel_running_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
if (match.nr_running && !match.cancel_all)
return IO_WQ_CANCEL_RUNNING;
}
@@ -1263,7 +1297,9 @@ static void io_wq_destroy(struct io_wq *wq)
.fn = io_wq_work_match_all,
.cancel_all = true,
};
+ raw_spin_lock(&wqe->lock);
io_wqe_cancel_pending_work(wqe, &match);
+ raw_spin_unlock(&wqe->lock);
free_cpumask_var(wqe->cpu_mask);
kfree(wqe);
}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 422d6de48688..49f115a2dec4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6386,7 +6386,8 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current);
ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
- if (ret != -ENOENT)
+ //if (ret != -ENOENT)
+ if (ret == 0)
return ret;
spin_lock(&ctx->completion_lock);
@@ -6892,7 +6893,8 @@ static void io_wq_submit_work(struct io_wq_work *work)
io_queue_linked_timeout(timeout);
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
- if (work->flags & IO_WQ_WORK_CANCEL) {
+ if (work->flags & IO_WQ_WORK_CANCEL ||
+ test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL)) {
io_req_task_queue_fail(req, -ECANCELED);
return;
}
--
Jens Axboe
next prev parent reply other threads:[~2022-01-18 23:37 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-18 15:13 Canceled read requests never completed Florian Fischer
2022-01-18 19:07 ` Jens Axboe
2022-01-18 20:05 ` Florian Fischer
2022-01-18 23:36 ` Jens Axboe [this message]
2022-01-19 2:32 ` Jens Axboe
2022-01-19 13:42 ` Florian Fischer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox