From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [RFC 3/3] io_uring: allow waiting loop to ignore some CQEs
Date: Sun, 10 Nov 2024 14:56:22 +0000 [thread overview]
Message-ID: <a15bb014ecc67b004c2bd2283758c5ab3987e54a.1731205010.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
The user might not care about getting results of certain request, but
there will still wake up the task (i.e. task_work) and trigger the
waiting loop to terminate.
IOSQE_SET_F_HINT_SILENT attempts to de-priorities such completions.
The completion will be eventually posted, however the execution of the
request can and likely will be delayed to batch it with other requests.
It's an incomplete prototype, it only works with DEFER_TASKRUN, fails to
apply the optimisation for task_works queued before the waiting loop
starts, and interaction with IOSQE_SET_F_HINT_IGNORE_INLINE is likely
broken.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/uapi/linux/io_uring.h | 1 +
io_uring/io_uring.c | 43 +++++++++++++++++++++++------------
io_uring/register.c | 3 ++-
3 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index e6d10fba8ae2..6dff0ee4e20c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -901,6 +901,7 @@ struct io_uring_recvmsg_out {
enum {
IOSQE_SET_F_HINT_IGNORE_INLINE = 1,
+ IOSQE_SET_F_HINT_SILENT = 2,
};
struct io_uring_ioset_reg {
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 6e89435c243d..2e1af10fd4f2 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1270,6 +1270,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
{
unsigned nr_wait, nr_tw, nr_tw_prev;
struct llist_node *head;
+ bool ignore = req->ioset->flags & IOSQE_SET_F_HINT_SILENT;
/* See comment above IO_CQ_WAKE_INIT */
BUILD_BUG_ON(IO_CQ_WAKE_FORCE <= IORING_MAX_CQ_ENTRIES);
@@ -1297,13 +1298,17 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
nr_tw_prev = READ_ONCE(first_req->nr_tw);
}
- /*
- * Theoretically, it can overflow, but that's fine as one of
- * previous adds should've tried to wake the task.
- */
- nr_tw = nr_tw_prev + 1;
- if (!(flags & IOU_F_TWQ_LAZY_WAKE))
- nr_tw = IO_CQ_WAKE_FORCE;
+ nr_tw = nr_tw_prev;
+
+ if (!ignore) {
+ /*
+ * Theoretically, it can overflow, but that's fine as
+ * one of previous adds should've tried to wake the task.
+ */
+ nr_tw += 1;
+ if (!(flags & IOU_F_TWQ_LAZY_WAKE))
+ nr_tw = IO_CQ_WAKE_FORCE;
+ }
req->nr_tw = nr_tw;
req->io_task_work.node.next = head;
@@ -1325,6 +1330,9 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
io_eventfd_signal(ctx);
}
+ if (ignore)
+ return;
+
nr_wait = atomic_read(&ctx->cq_wait_nr);
/* not enough or no one is waiting */
if (nr_tw < nr_wait)
@@ -1405,7 +1413,7 @@ static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
}
static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
- int min_events)
+ int min_events, struct io_wait_queue *waitq)
{
struct llist_node *node;
unsigned int loops = 0;
@@ -1425,6 +1433,10 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
+
+ if (req->ioset->flags & IOSQE_SET_F_HINT_SILENT)
+ waitq->cq_tail++;
+
INDIRECT_CALL_2(req->io_task_work.func,
io_poll_task_func, io_req_rw_complete,
req, ts);
@@ -1450,16 +1462,17 @@ static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
if (llist_empty(&ctx->work_llist))
return 0;
- return __io_run_local_work(ctx, &ts, min_events);
+ return __io_run_local_work(ctx, &ts, min_events, NULL);
}
-static int io_run_local_work(struct io_ring_ctx *ctx, int min_events)
+static int io_run_local_work(struct io_ring_ctx *ctx, int min_events,
+ struct io_wait_queue *waitq)
{
struct io_tw_state ts = {};
int ret;
mutex_lock(&ctx->uring_lock);
- ret = __io_run_local_work(ctx, &ts, min_events);
+ ret = __io_run_local_work(ctx, &ts, min_events, waitq);
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -2643,7 +2656,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
{
if (!llist_empty(&ctx->work_llist)) {
__set_current_state(TASK_RUNNING);
- if (io_run_local_work(ctx, INT_MAX) > 0)
+ if (io_run_local_work(ctx, INT_MAX, NULL) > 0)
return 0;
}
if (io_run_task_work() > 0)
@@ -2806,7 +2819,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
if (!io_allowed_run_tw(ctx))
return -EEXIST;
if (!llist_empty(&ctx->work_llist))
- io_run_local_work(ctx, min_events);
+ io_run_local_work(ctx, min_events, NULL);
io_run_task_work();
if (unlikely(test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)))
@@ -2877,7 +2890,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
* now rather than let the caller do another wait loop.
*/
if (!llist_empty(&ctx->work_llist))
- io_run_local_work(ctx, nr_wait);
+ io_run_local_work(ctx, nr_wait, &iowq);
io_run_task_work();
/*
@@ -3389,7 +3402,7 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
io_allowed_defer_tw_run(ctx))
- ret |= io_run_local_work(ctx, INT_MAX) > 0;
+ ret |= io_run_local_work(ctx, INT_MAX, NULL) > 0;
ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
mutex_lock(&ctx->uring_lock);
ret |= io_poll_remove_all(ctx, tctx, cancel_all);
diff --git a/io_uring/register.c b/io_uring/register.c
index f87ec7b773bd..5462c49bebd3 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -92,7 +92,8 @@ static int io_update_ioset(struct io_ring_ctx *ctx,
{
if (!(ctx->flags & IORING_SETUP_IOSET))
return -EINVAL;
- if (reg->flags & ~IOSQE_SET_F_HINT_IGNORE_INLINE)
+ if (reg->flags & ~(IOSQE_SET_F_HINT_IGNORE_INLINE |
+ IOSQE_SET_F_HINT_SILENT))
return -EINVAL;
if (reg->__resv[0] || reg->__resv[1] || reg->__resv[2])
return -EINVAL;
--
2.46.0
prev parent reply other threads:[~2024-11-10 14:55 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-10 14:56 [RFC 0/3] request parameter set api and wait termination tuning Pavel Begunkov
2024-11-10 14:56 ` [RFC 1/3] io_uring: introduce request parameter sets Pavel Begunkov
2024-11-10 14:56 ` [RFC 2/3] io_uring: add support for ignoring inline completions for waits Pavel Begunkov
2024-11-10 14:56 ` Pavel Begunkov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a15bb014ecc67b004c2bd2283758c5ab3987e54a.1731205010.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox