From: Jens Axboe <[email protected]>
To: io-uring <[email protected]>
Subject: [PATCH] io_uring: make ctx->timeout_lock a raw spinlock
Date: Tue, 17 Dec 2024 08:57:05 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
Chase reports that their tester complaints about a locking context
mismatch:
=============================
[ BUG: Invalid wait context ]
6.13.0-rc1-gf137f14b7ccb-dirty #9 Not tainted
-----------------------------
syz.1.25198/182604 is trying to lock:
ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at: spin_lock_irq
include/linux/spinlock.h:376 [inline]
ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at:
io_match_task_safe io_uring/io_uring.c:218 [inline]
ffff88805e66a358 (&ctx->timeout_lock){-.-.}-{3:3}, at:
io_match_task_safe+0x187/0x250 io_uring/io_uring.c:204
other info that might help us debug this:
context-{5:5}
1 lock held by syz.1.25198/182604:
#0: ffff88802b7d48c0 (&acct->lock){+.+.}-{2:2}, at:
io_acct_cancel_pending_work+0x2d/0x6b0 io_uring/io-wq.c:1049
stack backtrace:
CPU: 0 UID: 0 PID: 182604 Comm: syz.1.25198 Not tainted
6.13.0-rc1-gf137f14b7ccb-dirty #9
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:94 [inline]
dump_stack_lvl+0x82/0xd0 lib/dump_stack.c:120
print_lock_invalid_wait_context kernel/locking/lockdep.c:4826 [inline]
check_wait_context kernel/locking/lockdep.c:4898 [inline]
__lock_acquire+0x883/0x3c80 kernel/locking/lockdep.c:5176
lock_acquire.part.0+0x11b/0x370 kernel/locking/lockdep.c:5849
__raw_spin_lock_irq include/linux/spinlock_api_smp.h:119 [inline]
_raw_spin_lock_irq+0x36/0x50 kernel/locking/spinlock.c:170
spin_lock_irq include/linux/spinlock.h:376 [inline]
io_match_task_safe io_uring/io_uring.c:218 [inline]
io_match_task_safe+0x187/0x250 io_uring/io_uring.c:204
io_acct_cancel_pending_work+0xb8/0x6b0 io_uring/io-wq.c:1052
io_wq_cancel_pending_work io_uring/io-wq.c:1074 [inline]
io_wq_cancel_cb+0xb0/0x390 io_uring/io-wq.c:1112
io_uring_try_cancel_requests+0x15e/0xd70 io_uring/io_uring.c:3062
io_uring_cancel_generic+0x6ec/0x8c0 io_uring/io_uring.c:3140
io_uring_files_cancel include/linux/io_uring.h:20 [inline]
do_exit+0x494/0x27a0 kernel/exit.c:894
do_group_exit+0xb3/0x250 kernel/exit.c:1087
get_signal+0x1d77/0x1ef0 kernel/signal.c:3017
arch_do_signal_or_restart+0x79/0x5b0 arch/x86/kernel/signal.c:337
exit_to_user_mode_loop kernel/entry/common.c:111 [inline]
exit_to_user_mode_prepare include/linux/entry-common.h:329 [inline]
__syscall_exit_to_user_mode_work kernel/entry/common.c:207 [inline]
syscall_exit_to_user_mode+0x150/0x2a0 kernel/entry/common.c:218
do_syscall_64+0xd8/0x250 arch/x86/entry/common.c:89
entry_SYSCALL_64_after_hwframe+0x77/0x7f
which is because io_uring has ctx->timeout_lock nesting inside the
io-wq acct lock, the latter of which is used from inside the scheduler
and hence is a raw spinlock, while the former is a "normal" spinlock
and can hence be sleeping on PREEMPT_RT.
Change ctx->timeout_lock to be a raw spinlock to solve this nesting
dependency on PREEMPT_RT=y.
Reported-by: chase xd <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 011860ade268..fd4cdb0860a2 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -345,7 +345,7 @@ struct io_ring_ctx {
/* timeouts */
struct {
- spinlock_t timeout_lock;
+ raw_spinlock_t timeout_lock;
struct list_head timeout_list;
struct list_head ltimeout_list;
unsigned cq_last_tm_flush;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 06ff41484e29..605625e932eb 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -215,9 +215,9 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
struct io_ring_ctx *ctx = head->ctx;
/* protect against races with linked timeouts */
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
matched = io_match_linked(head);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
matched = io_match_linked(head);
}
@@ -333,7 +333,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
init_waitqueue_head(&ctx->cq_wait);
init_waitqueue_head(&ctx->poll_wq);
spin_lock_init(&ctx->completion_lock);
- spin_lock_init(&ctx->timeout_lock);
+ raw_spin_lock_init(&ctx->timeout_lock);
INIT_WQ_LIST(&ctx->iopoll_list);
INIT_LIST_HEAD(&ctx->io_buffers_comp);
INIT_LIST_HEAD(&ctx->defer_list);
@@ -498,10 +498,10 @@ static void io_prep_async_link(struct io_kiocb *req)
if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
io_for_each_link(cur, req)
io_prep_async_work(cur);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
} else {
io_for_each_link(cur, req)
io_prep_async_work(cur);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index f3d502717aeb..bbe58638eca7 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -74,10 +74,10 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
if (!io_timeout_finish(timeout, data)) {
if (io_req_post_cqe(req, -ETIME, IORING_CQE_F_MORE)) {
/* re-arm timer */
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
list_add(&timeout->list, ctx->timeout_list.prev);
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
return;
}
}
@@ -109,7 +109,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
u32 seq;
struct io_timeout *timeout, *tmp;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
@@ -134,7 +134,7 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
io_kill_timeout(req, 0);
}
ctx->cq_last_tm_flush = seq;
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
}
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
@@ -200,9 +200,9 @@ void io_disarm_next(struct io_kiocb *req)
} else if (req->flags & REQ_F_LINK_TIMEOUT) {
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
link = io_disarm_linked_timeout(req);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
if (link)
io_req_queue_tw_complete(link, -ECANCELED);
}
@@ -238,11 +238,11 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->timeout_lock, flags);
+ raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
list_del_init(&timeout->list);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS))
req_set_fail(req);
@@ -285,9 +285,9 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
{
struct io_kiocb *req;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
req = io_timeout_extract(ctx, cd);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -330,7 +330,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->timeout_lock, flags);
+ raw_spin_lock_irqsave(&ctx->timeout_lock, flags);
prev = timeout->head;
timeout->head = NULL;
@@ -345,7 +345,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
}
list_del(&timeout->list);
timeout->prev = prev;
- spin_unlock_irqrestore(&ctx->timeout_lock, flags);
+ raw_spin_unlock_irqrestore(&ctx->timeout_lock, flags);
req->io_task_work.func = io_req_task_link_timeout;
io_req_task_work_add(req);
@@ -472,12 +472,12 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
} else {
enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags);
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
if (tr->ltimeout)
ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode);
else
ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
}
if (ret < 0)
@@ -572,7 +572,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
struct list_head *entry;
u32 tail, off = timeout->off;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
/*
* sqe->off holds how many events that need to occur for this
@@ -611,7 +611,7 @@ int io_timeout(struct io_kiocb *req, unsigned int issue_flags)
list_add(&timeout->list, entry);
data->timer.function = io_timeout_fn;
hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode);
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
return IOU_ISSUE_SKIP_COMPLETE;
}
@@ -620,7 +620,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
struct io_ring_ctx *ctx = req->ctx;
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
/*
* If the back reference is NULL, then our linked request finished
* before we got a chance to setup the timer
@@ -633,7 +633,7 @@ void io_queue_linked_timeout(struct io_kiocb *req)
data->mode);
list_add_tail(&timeout->list, &ctx->ltimeout_list);
}
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
/* drop submission reference */
io_put_req(req);
}
@@ -668,7 +668,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
* timeout_lockfirst to keep locking ordering.
*/
spin_lock(&ctx->completion_lock);
- spin_lock_irq(&ctx->timeout_lock);
+ raw_spin_lock_irq(&ctx->timeout_lock);
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout);
@@ -676,7 +676,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
io_kill_timeout(req, -ECANCELED))
canceled++;
}
- spin_unlock_irq(&ctx->timeout_lock);
+ raw_spin_unlock_irq(&ctx->timeout_lock);
spin_unlock(&ctx->completion_lock);
return canceled != 0;
}
--
Jens Axboe
reply other threads:[~2024-12-17 15:57 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox