From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 13/27] io_uring: SQPOLL parking fixes
Date: Wed, 10 Mar 2021 15:43:44 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
We keep running into weird dependency issues between the sqd lock and
the parking state. Disentangle the SQPOLL thread from the last bits of
the kthread parking inheritance, and just replace the parking state,
and two associated locks, with a single rw mutex. The SQPOLL thread
keeps the mutex for read all the time, except if someone has marked us
needing to park. Then we drop/re-acquire and try again.
This greatly simplifies the parking state machine (by just getting rid
of it), and makes it a lot more obvious how it works - if you need to
modify the ctx list, then you simply park the thread which will grab
the lock for writing.
Fold in fix from Hillf Danton on not setting STOP on a fatal signal.
Fixes: e54945ae947f ("io_uring: SQPOLL stop error handling fixes")
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 133 +++++++++++++-------------------------------------
1 file changed, 34 insertions(+), 99 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7cf96be691d8..2a3542b487ff 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -258,12 +258,11 @@ enum {
struct io_sq_data {
refcount_t refs;
- struct mutex lock;
+ struct rw_semaphore rw_lock;
/* ctx's that are using this sqd */
struct list_head ctx_list;
struct list_head ctx_new_list;
- struct mutex ctx_lock;
struct task_struct *thread;
struct wait_queue_head wait;
@@ -274,7 +273,6 @@ struct io_sq_data {
unsigned long state;
struct completion startup;
- struct completion parked;
struct completion exited;
};
@@ -6638,45 +6636,6 @@ static void io_sqd_init_new(struct io_sq_data *sqd)
io_sqd_update_thread_idle(sqd);
}
-static bool io_sq_thread_should_stop(struct io_sq_data *sqd)
-{
- return test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
-}
-
-static bool io_sq_thread_should_park(struct io_sq_data *sqd)
-{
- return test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
-}
-
-static void io_sq_thread_parkme(struct io_sq_data *sqd)
-{
- for (;;) {
- /*
- * TASK_PARKED is a special state; we must serialize against
- * possible pending wakeups to avoid store-store collisions on
- * task->state.
- *
- * Such a collision might possibly result in the task state
- * changin from TASK_PARKED and us failing the
- * wait_task_inactive() in kthread_park().
- */
- set_special_state(TASK_PARKED);
- if (!test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state))
- break;
-
- /*
- * Thread is going to call schedule(), do not preempt it,
- * or the caller of kthread_park() may spend more time in
- * wait_task_inactive().
- */
- preempt_disable();
- complete(&sqd->parked);
- schedule_preempt_disabled();
- preempt_enable();
- }
- __set_current_state(TASK_RUNNING);
-}
-
static int io_sq_thread(void *data)
{
struct io_sq_data *sqd = data;
@@ -6697,17 +6656,16 @@ static int io_sq_thread(void *data)
wait_for_completion(&sqd->startup);
- while (!io_sq_thread_should_stop(sqd)) {
+ down_read(&sqd->rw_lock);
+
+ while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
int ret;
bool cap_entries, sqt_spin, needs_sched;
- /*
- * Any changes to the sqd lists are synchronized through the
- * thread parking. This synchronizes the thread vs users,
- * the users are synchronized on the sqd->ctx_lock.
- */
- if (io_sq_thread_should_park(sqd)) {
- io_sq_thread_parkme(sqd);
+ if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
+ up_read(&sqd->rw_lock);
+ cond_resched();
+ down_read(&sqd->rw_lock);
continue;
}
if (unlikely(!list_empty(&sqd->ctx_new_list))) {
@@ -6752,12 +6710,14 @@ static int io_sq_thread(void *data)
}
}
- if (needs_sched && !io_sq_thread_should_park(sqd)) {
+ if (needs_sched && !test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_set_wakeup_flag(ctx);
+ up_read(&sqd->rw_lock);
schedule();
try_to_freeze();
+ down_read(&sqd->rw_lock);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_clear_wakeup_flag(ctx);
}
@@ -6768,28 +6728,16 @@ static int io_sq_thread(void *data)
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_uring_cancel_sqpoll(ctx);
+ up_read(&sqd->rw_lock);
io_run_task_work();
- /*
- * Ensure that we park properly if racing with someone trying to park
- * while we're exiting. If we fail to grab the lock, check park and
- * park if necessary. The ordering with the park bit and the lock
- * ensures that we catch this reliably.
- */
- if (!mutex_trylock(&sqd->lock)) {
- if (io_sq_thread_should_park(sqd))
- io_sq_thread_parkme(sqd);
- mutex_lock(&sqd->lock);
- }
-
+ down_write(&sqd->rw_lock);
sqd->thread = NULL;
- list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_set_wakeup_flag(ctx);
- }
-
+ up_write(&sqd->rw_lock);
complete(&sqd->exited);
- mutex_unlock(&sqd->lock);
do_exit(0);
}
@@ -7088,44 +7036,40 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
}
static void io_sq_thread_unpark(struct io_sq_data *sqd)
- __releases(&sqd->lock)
+ __releases(&sqd->rw_lock)
{
if (sqd->thread == current)
return;
clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
- if (sqd->thread)
- wake_up_state(sqd->thread, TASK_PARKED);
- mutex_unlock(&sqd->lock);
+ up_write(&sqd->rw_lock);
}
static void io_sq_thread_park(struct io_sq_data *sqd)
- __acquires(&sqd->lock)
+ __acquires(&sqd->rw_lock)
{
if (sqd->thread == current)
return;
set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
- mutex_lock(&sqd->lock);
- if (sqd->thread) {
+ down_write(&sqd->rw_lock);
+ /* set again for consistency, in case concurrent parks are happening */
+ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+ if (sqd->thread)
wake_up_process(sqd->thread);
- wait_for_completion(&sqd->parked);
- }
}
static void io_sq_thread_stop(struct io_sq_data *sqd)
{
if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state))
return;
- mutex_lock(&sqd->lock);
- if (sqd->thread) {
- set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
- WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state));
- wake_up_process(sqd->thread);
- mutex_unlock(&sqd->lock);
- wait_for_completion(&sqd->exited);
- WARN_ON_ONCE(sqd->thread);
- } else {
- mutex_unlock(&sqd->lock);
+ down_write(&sqd->rw_lock);
+ if (!sqd->thread) {
+ up_write(&sqd->rw_lock);
+ return;
}
+ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ wake_up_process(sqd->thread);
+ up_write(&sqd->rw_lock);
+ wait_for_completion(&sqd->exited);
}
static void io_put_sq_data(struct io_sq_data *sqd)
@@ -7142,18 +7086,13 @@ static void io_sq_thread_finish(struct io_ring_ctx *ctx)
if (sqd) {
complete(&sqd->startup);
- if (sqd->thread) {
+ if (sqd->thread)
wait_for_completion(&ctx->sq_thread_comp);
- io_sq_thread_park(sqd);
- }
- mutex_lock(&sqd->ctx_lock);
+ io_sq_thread_park(sqd);
list_del(&ctx->sqd_list);
io_sqd_update_thread_idle(sqd);
- mutex_unlock(&sqd->ctx_lock);
-
- if (sqd->thread)
- io_sq_thread_unpark(sqd);
+ io_sq_thread_unpark(sqd);
io_put_sq_data(sqd);
ctx->sq_data = NULL;
@@ -7202,11 +7141,9 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
refcount_set(&sqd->refs, 1);
INIT_LIST_HEAD(&sqd->ctx_list);
INIT_LIST_HEAD(&sqd->ctx_new_list);
- mutex_init(&sqd->ctx_lock);
- mutex_init(&sqd->lock);
+ init_rwsem(&sqd->rw_lock);
init_waitqueue_head(&sqd->wait);
init_completion(&sqd->startup);
- init_completion(&sqd->parked);
init_completion(&sqd->exited);
return sqd;
}
@@ -7880,9 +7817,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
ctx->sq_creds = get_current_cred();
ctx->sq_data = sqd;
io_sq_thread_park(sqd);
- mutex_lock(&sqd->ctx_lock);
list_add(&ctx->sqd_list, &sqd->ctx_new_list);
- mutex_unlock(&sqd->ctx_lock);
io_sq_thread_unpark(sqd);
ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
--
2.30.2
next prev parent reply other threads:[~2021-03-10 22:45 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-10 22:43 [PATCHSET for 5.12] Patches queued up for 5.12 Jens Axboe
2021-03-10 22:43 ` [PATCH 01/27] io-wq: fix race in freeing 'wq' and worker access Jens Axboe
2021-03-10 22:43 ` [PATCH 02/27] io-wq: always track creds for async issue Jens Axboe
2021-03-10 22:43 ` [PATCH 03/27] io_uring: make del_task_file more forgiving Jens Axboe
2021-03-10 22:43 ` [PATCH 04/27] io_uring: introduce ctx to tctx back map Jens Axboe
2021-03-10 22:43 ` [PATCH 05/27] io_uring: do ctx initiated file note removal Jens Axboe
2021-03-10 22:43 ` [PATCH 06/27] io_uring: don't take task ring-file notes Jens Axboe
2021-03-10 22:43 ` [PATCH 07/27] io_uring: index io_uring->xa by ctx not file Jens Axboe
2021-03-10 22:43 ` [PATCH 08/27] io_uring: warn when ring exit takes too long Jens Axboe
2021-03-10 22:43 ` [PATCH 09/27] io_uring: cancel reqs of all iowq's on ring exit Jens Axboe
2021-03-10 22:43 ` [PATCH 10/27] io-wq: warn on creating manager while exiting Jens Axboe
2021-03-10 22:43 ` [PATCH 11/27] io_uring: run __io_sq_thread() with the initial creds from io_uring_setup() Jens Axboe
2021-03-10 22:43 ` [PATCH 12/27] io_uring: kill io_sq_thread_fork() and return -EOWNERDEAD if the sq_thread is gone Jens Axboe
2021-03-10 22:43 ` Jens Axboe [this message]
2021-03-10 22:43 ` [PATCH 14/27] io_uring: fix unrelated ctx reqs cancellation Jens Axboe
2021-03-10 22:43 ` [PATCH 15/27] io_uring: clean R_DISABLED startup mess Jens Axboe
2021-03-10 22:43 ` [PATCH 16/27] io_uring: Convert personality_idr to XArray Jens Axboe
2021-03-10 22:43 ` [PATCH 17/27] io-wq: remove unused 'user' member of io_wq Jens Axboe
2021-03-10 22:43 ` [PATCH 18/27] io_uring: fix io_sq_offload_create error handling Jens Axboe
2021-03-10 22:43 ` [PATCH 19/27] io_uring: add io_disarm_next() helper Jens Axboe
2021-03-10 22:43 ` [PATCH 20/27] io_uring: fix complete_post races for linked req Jens Axboe
2021-03-10 22:43 ` [PATCH 21/27] io-wq: fix ref leak for req in case of exit cancelations Jens Axboe
2021-03-10 22:43 ` [PATCH 22/27] io_uring: move all io_kiocb init early in io_init_req() Jens Axboe
2021-03-10 22:43 ` [PATCH 23/27] io_uring: remove unneeded variable 'ret' Jens Axboe
2021-03-10 22:43 ` [PATCH 24/27] io_uring: always wait for sqd exited when stopping SQPOLL thread Jens Axboe
2021-03-10 22:43 ` [PATCH 25/27] kernel: make IO threads unfreezable by default Jens Axboe
2021-03-10 22:43 ` [PATCH 26/27] io_uring: fix invalid ctx->sq_thread_idle Jens Axboe
2021-03-10 22:43 ` [PATCH 27/27] io_uring: remove indirect ctx into sqo injection Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox