* [PATCH 1/5] task_work: allow TWA_SIGNAL without a rescheduling IPI
2022-04-22 21:42 [PATCHSET next 0/5] Add support for non-IPI task_work Jens Axboe
@ 2022-04-22 21:42 ` Jens Axboe
2022-04-22 21:42 ` [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg() Jens Axboe
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-04-22 21:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Some use cases don't always need an IPI when sending a TWA_SIGNAL
notification. Add TWA_SIGNAL_NO_IPI, which is just like TWA_SIGNAL,
except it doesn't send an IPI to the target task. It merely sets
TIF_NOTIFY_SIGNAL and wakes up the task.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/sched/signal.h | 13 +++++++++++--
include/linux/task_work.h | 1 +
kernel/task_work.c | 15 ++++++++++-----
3 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 3c8b34876744..66b689f6cfcb 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -355,14 +355,23 @@ static inline void clear_notify_signal(void)
smp_mb__after_atomic();
}
+/*
+ * Returns 'true' if kick_process() is needed to force a transition from
+ * user -> kernel to guarantee expedient run of TWA_SIGNAL based task_work.
+ */
+static inline bool __set_notify_signal(struct task_struct *task)
+{
+ return !test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE);
+}
+
/*
* Called to break out of interruptible wait loops, and enter the
* exit_to_user_mode_loop().
*/
static inline void set_notify_signal(struct task_struct *task)
{
- if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
- !wake_up_state(task, TASK_INTERRUPTIBLE))
+ if (__set_notify_signal(task))
kick_process(task);
}
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
index 897494b597ba..795ef5a68429 100644
--- a/include/linux/task_work.h
+++ b/include/linux/task_work.h
@@ -17,6 +17,7 @@ enum task_work_notify_mode {
TWA_NONE,
TWA_RESUME,
TWA_SIGNAL,
+ TWA_SIGNAL_NO_IPI,
};
static inline bool task_work_pending(struct task_struct *task)
diff --git a/kernel/task_work.c b/kernel/task_work.c
index c59e1a49bc40..fa8fdd04aa17 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -13,11 +13,13 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
*
* Queue @work for task_work_run() below and notify the @task if @notify
* is @TWA_RESUME or @TWA_SIGNAL. @TWA_SIGNAL works like signals, in that the
- * it will interrupt the targeted task and run the task_work. @TWA_RESUME
- * work is run only when the task exits the kernel and returns to user mode,
- * or before entering guest mode. Fails if the @task is exiting/exited and thus
- * it can't process this @work. Otherwise @work->func() will be called when the
- * @task goes through one of the aforementioned transitions, or exits.
+ * it will interrupt the targeted task and run the task_work. @TWA_SIGNAL_NO_IPI
+ * works like @TWA_SIGNAL, except it doesn't send a reschedule IPI to force the
+ * targeted task to reschedule and run task_work. @TWA_RESUME work is run only
+ * when the task exits the kernel and returns to user mode, or before entering
+ * guest mode. Fails if the @task is exiting/exited and thus it can't process
+ * this @work. Otherwise @work->func() will be called when the @task goes
+ * through one of the aforementioned transitions, or exits.
*
* If the targeted task is exiting, then an error is returned and the work item
* is not queued. It's up to the caller to arrange for an alternative mechanism
@@ -53,6 +55,9 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
case TWA_SIGNAL:
set_notify_signal(task);
break;
+ case TWA_SIGNAL_NO_IPI:
+ __set_notify_signal(task);
+ break;
default:
WARN_ON_ONCE(1);
break;
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg()
2022-04-22 21:42 [PATCHSET next 0/5] Add support for non-IPI task_work Jens Axboe
2022-04-22 21:42 ` [PATCH 1/5] task_work: allow TWA_SIGNAL without a rescheduling IPI Jens Axboe
@ 2022-04-22 21:42 ` Jens Axboe
2022-04-23 8:06 ` Pavel Begunkov
2022-04-22 21:42 ` [PATCH 3/5] io-wq: use __set_notify_signal() to wake workers Jens Axboe
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2022-04-22 21:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Rather than require ctx->completion_lock for ensuring that we don't
clobber the flags, use try_cmpxchg() instead. This removes the need
to grab the completion_lock, in preparation for needing to set or
clear sq_flags when we don't know the status of this lock.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 54 ++++++++++++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 22 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 626bf840bed2..38e58fe4963d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1999,6 +1999,34 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
io_cqring_wake(ctx);
}
+static void io_ring_sq_flag_clear(struct io_ring_ctx *ctx, unsigned int flag)
+{
+ struct io_rings *rings = ctx->rings;
+ unsigned int oldf, newf;
+
+ do {
+ oldf = READ_ONCE(rings->sq_flags);
+
+ if (!(oldf & flag))
+ break;
+ newf = oldf & ~flag;
+ } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
+}
+
+static void io_ring_sq_flag_set(struct io_ring_ctx *ctx, unsigned int flag)
+{
+ struct io_rings *rings = ctx->rings;
+ unsigned int oldf, newf;
+
+ do {
+ oldf = READ_ONCE(rings->sq_flags);
+
+ if (oldf & flag)
+ break;
+ newf = oldf | flag;
+ } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
+}
+
/* Returns true if there are no backlogged entries after the flush */
static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
{
@@ -2030,8 +2058,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
all_flushed = list_empty(&ctx->cq_overflow_list);
if (all_flushed) {
clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW);
+ io_ring_sq_flag_clear(ctx, IORING_SQ_CQ_OVERFLOW);
}
io_commit_cqring(ctx);
@@ -8105,23 +8132,6 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
return READ_ONCE(sqd->state);
}
-static inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx)
-{
- /* Tell userspace we may need a wakeup call */
- spin_lock(&ctx->completion_lock);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP);
- spin_unlock(&ctx->completion_lock);
-}
-
-static inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx)
-{
- spin_lock(&ctx->completion_lock);
- WRITE_ONCE(ctx->rings->sq_flags,
- ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP);
- spin_unlock(&ctx->completion_lock);
-}
-
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
{
unsigned int to_submit;
@@ -8237,7 +8247,7 @@ static int io_sq_thread(void *data)
bool needs_sched = true;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
- io_ring_set_wakeup_flag(ctx);
+ io_ring_sq_flag_set(ctx, IORING_SQ_NEED_WAKEUP);
if ((ctx->flags & IORING_SETUP_IOPOLL) &&
!wq_list_empty(&ctx->iopoll_list)) {
@@ -8263,7 +8273,7 @@ static int io_sq_thread(void *data)
mutex_lock(&sqd->lock);
}
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
- io_ring_clear_wakeup_flag(ctx);
+ io_ring_sq_flag_clear(ctx, IORING_SQ_NEED_WAKEUP);
}
finish_wait(&sqd->wait, &wait);
@@ -8273,7 +8283,7 @@ static int io_sq_thread(void *data)
io_uring_cancel_generic(true, sqd);
sqd->thread = NULL;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
- io_ring_set_wakeup_flag(ctx);
+ io_ring_sq_flag_set(ctx, IORING_SQ_NEED_WAKEUP);
io_run_task_work();
mutex_unlock(&sqd->lock);
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg()
2022-04-22 21:42 ` [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg() Jens Axboe
@ 2022-04-23 8:06 ` Pavel Begunkov
2022-04-23 13:07 ` Jens Axboe
0 siblings, 1 reply; 9+ messages in thread
From: Pavel Begunkov @ 2022-04-23 8:06 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 4/22/22 22:42, Jens Axboe wrote:
> Rather than require ctx->completion_lock for ensuring that we don't
> clobber the flags, use try_cmpxchg() instead. This removes the need
> to grab the completion_lock, in preparation for needing to set or
> clear sq_flags when we don't know the status of this lock.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 54 ++++++++++++++++++++++++++++++---------------------
> 1 file changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 626bf840bed2..38e58fe4963d 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -1999,6 +1999,34 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
> io_cqring_wake(ctx);
> }
>
> +static void io_ring_sq_flag_clear(struct io_ring_ctx *ctx, unsigned int flag)
> +{
> + struct io_rings *rings = ctx->rings;
> + unsigned int oldf, newf;
> +
> + do {
> + oldf = READ_ONCE(rings->sq_flags);
> +
> + if (!(oldf & flag))
> + break;
> + newf = oldf & ~flag;
> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
> +}
> +
> +static void io_ring_sq_flag_set(struct io_ring_ctx *ctx, unsigned int flag)
> +{
> + struct io_rings *rings = ctx->rings;
> + unsigned int oldf, newf;
> +
> + do {
> + oldf = READ_ONCE(rings->sq_flags);
> +
> + if (oldf & flag)
> + break;
> + newf = oldf | flag;
> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
> +}
atomic and/or might be a better fit
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg()
2022-04-23 8:06 ` Pavel Begunkov
@ 2022-04-23 13:07 ` Jens Axboe
2022-04-23 13:28 ` Jens Axboe
0 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2022-04-23 13:07 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 4/23/22 2:06 AM, Pavel Begunkov wrote:
> On 4/22/22 22:42, Jens Axboe wrote:
>> Rather than require ctx->completion_lock for ensuring that we don't
>> clobber the flags, use try_cmpxchg() instead. This removes the need
>> to grab the completion_lock, in preparation for needing to set or
>> clear sq_flags when we don't know the status of this lock.
>>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>> fs/io_uring.c | 54 ++++++++++++++++++++++++++++++---------------------
>> 1 file changed, 32 insertions(+), 22 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 626bf840bed2..38e58fe4963d 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -1999,6 +1999,34 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
>> io_cqring_wake(ctx);
>> }
>> +static void io_ring_sq_flag_clear(struct io_ring_ctx *ctx, unsigned int flag)
>> +{
>> + struct io_rings *rings = ctx->rings;
>> + unsigned int oldf, newf;
>> +
>> + do {
>> + oldf = READ_ONCE(rings->sq_flags);
>> +
>> + if (!(oldf & flag))
>> + break;
>> + newf = oldf & ~flag;
>> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
>> +}
>> +
>> +static void io_ring_sq_flag_set(struct io_ring_ctx *ctx, unsigned int flag)
>> +{
>> + struct io_rings *rings = ctx->rings;
>> + unsigned int oldf, newf;
>> +
>> + do {
>> + oldf = READ_ONCE(rings->sq_flags);
>> +
>> + if (oldf & flag)
>> + break;
>> + newf = oldf | flag;
>> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
>> +}
>
> atomic and/or might be a better fit
That would indeed be great, but the type is fixed. Not sure if all archs
end up with a plain int as the type?
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg()
2022-04-23 13:07 ` Jens Axboe
@ 2022-04-23 13:28 ` Jens Axboe
0 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-04-23 13:28 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 4/23/22 7:07 AM, Jens Axboe wrote:
> On 4/23/22 2:06 AM, Pavel Begunkov wrote:
>> On 4/22/22 22:42, Jens Axboe wrote:
>>> Rather than require ctx->completion_lock for ensuring that we don't
>>> clobber the flags, use try_cmpxchg() instead. This removes the need
>>> to grab the completion_lock, in preparation for needing to set or
>>> clear sq_flags when we don't know the status of this lock.
>>>
>>> Signed-off-by: Jens Axboe <[email protected]>
>>> ---
>>> fs/io_uring.c | 54 ++++++++++++++++++++++++++++++---------------------
>>> 1 file changed, 32 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>>> index 626bf840bed2..38e58fe4963d 100644
>>> --- a/fs/io_uring.c
>>> +++ b/fs/io_uring.c
>>> @@ -1999,6 +1999,34 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
>>> io_cqring_wake(ctx);
>>> }
>>> +static void io_ring_sq_flag_clear(struct io_ring_ctx *ctx, unsigned int flag)
>>> +{
>>> + struct io_rings *rings = ctx->rings;
>>> + unsigned int oldf, newf;
>>> +
>>> + do {
>>> + oldf = READ_ONCE(rings->sq_flags);
>>> +
>>> + if (!(oldf & flag))
>>> + break;
>>> + newf = oldf & ~flag;
>>> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
>>> +}
>>> +
>>> +static void io_ring_sq_flag_set(struct io_ring_ctx *ctx, unsigned int flag)
>>> +{
>>> + struct io_rings *rings = ctx->rings;
>>> + unsigned int oldf, newf;
>>> +
>>> + do {
>>> + oldf = READ_ONCE(rings->sq_flags);
>>> +
>>> + if (oldf & flag)
>>> + break;
>>> + newf = oldf | flag;
>>> + } while (!try_cmpxchg(&rings->sq_flags, &oldf, newf));
>>> +}
>>
>> atomic and/or might be a better fit
>
> That would indeed be great, but the type is fixed. Not sure if all archs
> end up with a plain int as the type?
Looks like it actually is defined generically, so this could work.
--
Jens Axboe
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/5] io-wq: use __set_notify_signal() to wake workers
2022-04-22 21:42 [PATCHSET next 0/5] Add support for non-IPI task_work Jens Axboe
2022-04-22 21:42 ` [PATCH 1/5] task_work: allow TWA_SIGNAL without a rescheduling IPI Jens Axboe
2022-04-22 21:42 ` [PATCH 2/5] io_uring: serialize ctx->rings->sq_flags with cmpxchg() Jens Axboe
@ 2022-04-22 21:42 ` Jens Axboe
2022-04-22 21:42 ` [PATCH 4/5] io_uring: set task_work notify method at init time Jens Axboe
2022-04-22 21:42 ` [PATCH 5/5] io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_NO_RESCHED is used Jens Axboe
4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-04-22 21:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
The only difference between set_notify_signal() and __set_notify_signal()
is that the former checks if it needs to deliver an IPI to force a
reschedule. As the io-wq workers never leave the kernel, and IPI is never
needed, they simply need a wakeup.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 32aeb2c581c5..824623bcf1a5 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -871,7 +871,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
{
- set_notify_signal(worker->task);
+ __set_notify_signal(worker->task);
wake_up_process(worker->task);
return false;
}
@@ -991,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker,
{
if (work && match->fn(work, match->data)) {
work->flags |= IO_WQ_WORK_CANCEL;
- set_notify_signal(worker->task);
+ __set_notify_signal(worker->task);
return true;
}
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 4/5] io_uring: set task_work notify method at init time
2022-04-22 21:42 [PATCHSET next 0/5] Add support for non-IPI task_work Jens Axboe
` (2 preceding siblings ...)
2022-04-22 21:42 ` [PATCH 3/5] io-wq: use __set_notify_signal() to wake workers Jens Axboe
@ 2022-04-22 21:42 ` Jens Axboe
2022-04-22 21:42 ` [PATCH 5/5] io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_NO_RESCHED is used Jens Axboe
4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-04-22 21:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
While doing so, switch SQPOLL to TWA_SIGNAL_NO_IPI as well, as that
just does a task wakeup and then we can remove the special wakeup we
have in task_work_add.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 23 +++++++++++------------
1 file changed, 11 insertions(+), 12 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 38e58fe4963d..20297fe4300b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -366,6 +366,7 @@ struct io_ring_ctx {
struct io_rings *rings;
unsigned int flags;
+ enum task_work_notify_mode notify_method;
unsigned int compat: 1;
unsigned int drain_next: 1;
unsigned int restricted: 1;
@@ -2650,8 +2651,8 @@ static void tctx_task_work(struct callback_head *cb)
static void io_req_task_work_add(struct io_kiocb *req, bool priority)
{
struct task_struct *tsk = req->task;
+ struct io_ring_ctx *ctx = req->ctx;
struct io_uring_task *tctx = tsk->io_uring;
- enum task_work_notify_mode notify;
struct io_wq_work_node *node;
unsigned long flags;
bool running;
@@ -2674,18 +2675,8 @@ static void io_req_task_work_add(struct io_kiocb *req, bool priority)
if (running)
return;
- /*
- * SQPOLL kernel thread doesn't need notification, just a wakeup. For
- * all other cases, use TWA_SIGNAL unconditionally to ensure we're
- * processing task_work. There's no reliable way to tell if TWA_RESUME
- * will do the job.
- */
- notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
- if (likely(!task_work_add(tsk, &tctx->task_work, notify))) {
- if (notify == TWA_NONE)
- wake_up_process(tsk);
+ if (likely(!task_work_add(tsk, &tctx->task_work, ctx->notify_method)))
return;
- }
spin_lock_irqsave(&tctx->task_lock, flags);
tctx->task_running = false;
@@ -11360,6 +11351,14 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (!capable(CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
+ /*
+ * For SQPOLL, we just need a wakeup, always.
+ */
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ ctx->notify_method = TWA_SIGNAL_NO_IPI;
+ else
+ ctx->notify_method = TWA_SIGNAL;
+
/*
* This is just grabbed for accounting purposes. When a process exits,
* the mm is exited and dropped before the files, hence we need to hang
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 5/5] io_uring: use TWA_SIGNAL_NO_IPI if IORING_SETUP_NO_RESCHED is used
2022-04-22 21:42 [PATCHSET next 0/5] Add support for non-IPI task_work Jens Axboe
` (3 preceding siblings ...)
2022-04-22 21:42 ` [PATCH 4/5] io_uring: set task_work notify method at init time Jens Axboe
@ 2022-04-22 21:42 ` Jens Axboe
4 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2022-04-22 21:42 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
If this is set, io_uring will never use an IPI to deliver a task_work
notification. This can be used in the common case where a single task or
thread communicates with the ring, and doesn't rely on
io_uring_cqe_peek().
This provides a noticeable win in performance, both from eliminating
the IPI itself, but also from avoiding interrupting the submitting
task unnecessarily.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 8 +++++---
include/uapi/linux/io_uring.h | 1 +
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 20297fe4300b..43634cd5c79d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -11352,9 +11352,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->user = get_uid(current_user());
/*
- * For SQPOLL, we just need a wakeup, always.
+ * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
+ * NO_RESCHED is set, then IPIs are never needed by the app.
*/
- if (ctx->flags & IORING_SETUP_SQPOLL)
+ if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_NO_RESCHED))
ctx->notify_method = TWA_SIGNAL_NO_IPI;
else
ctx->notify_method = TWA_SIGNAL;
@@ -11456,7 +11457,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL |
IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE |
IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ |
- IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL))
+ IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL |
+ IORING_SETUP_NO_RESCHED))
return -EINVAL;
return io_uring_create(entries, &p, params);
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 980d82eb196e..8a32230aa6f4 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -102,6 +102,7 @@ enum {
#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+#define IORING_SETUP_NO_RESCHED (1U << 8) /* work doesn't need resched */
enum {
IORING_OP_NOP,
--
2.35.1
^ permalink raw reply related [flat|nested] 9+ messages in thread