From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 4/4] io_uring: wire up min batch wake timeout
Date: Thu, 15 Feb 2024 09:06:59 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Expose min_wait_usec in io_uring_getevents_arg, replacing the pad member
that is currently in there. The value is in usecs, which is explained in
the name as well.
Note that if min_wait_usec and a normal timeout is used in conjunction,
the normal timeout is still relative to the base time. For example, if
min_wait_usec is set to 100 and the normal timeout is 1000, the max
total time waited is still 1000. This also means that if the normal
timeout is shorter than min_wait_usec, then only the min_wait_usec will
take effect.
See previous commit for an explanation of how this works.
IORING_FEAT_MIN_TIMEOUT is added as a feature flag for this, as
applications doing submit_and_wait_timeout() style operations will
generally not see the -EINVAL from the wait side as they return the
number of IOs submitted. Only if no IOs are submitted will the -EINVAL
bubble back up to the application.
Signed-off-by: Jens Axboe <[email protected]>
---
include/uapi/linux/io_uring.h | 3 ++-
io_uring/io_uring.c | 21 +++++++++++++--------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7bd10201a02b..dbefda14d087 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -522,6 +522,7 @@ struct io_uring_params {
#define IORING_FEAT_CQE_SKIP (1U << 11)
#define IORING_FEAT_LINKED_FILE (1U << 12)
#define IORING_FEAT_REG_REG_RING (1U << 13)
+#define IORING_FEAT_MIN_TIMEOUT (1U << 14)
/*
* io_uring_register(2) opcodes and arguments
@@ -738,7 +739,7 @@ enum {
struct io_uring_getevents_arg {
__u64 sigmask;
__u32 sigmask_sz;
- __u32 pad;
+ __u32 min_wait_usec;
__u64 ts;
};
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e72261f280a7..8dd5eb647b43 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2647,7 +2647,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
*/
static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
const sigset_t __user *sig, size_t sigsz,
- struct __kernel_timespec __user *uts)
+ struct __kernel_timespec __user *uts,
+ ktime_t min_time)
{
struct io_wait_queue iowq;
struct io_rings *rings = ctx->rings;
@@ -2684,7 +2685,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
iowq.cq_min_tail = READ_ONCE(ctx->rings->cq.tail);
iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events;
- iowq.min_timeout = KTIME_MAX;
+ iowq.min_timeout = min_time;
iowq.timeout = KTIME_MAX;
start_time = ktime_get_ns();
@@ -3634,10 +3635,12 @@ static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t a
static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
struct __kernel_timespec __user **ts,
- const sigset_t __user **sig)
+ const sigset_t __user **sig, ktime_t *min_time)
{
struct io_uring_getevents_arg arg;
+ *min_time = KTIME_MAX;
+
/*
* If EXT_ARG isn't set, then we have no timespec and the argp pointer
* is just a pointer to the sigset_t.
@@ -3656,8 +3659,8 @@ static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz
return -EINVAL;
if (copy_from_user(&arg, argp, sizeof(arg)))
return -EFAULT;
- if (arg.pad)
- return -EINVAL;
+ if (arg.min_wait_usec)
+ *min_time = arg.min_wait_usec * NSEC_PER_USEC;
*sig = u64_to_user_ptr(arg.sigmask);
*argsz = arg.sigmask_sz;
*ts = u64_to_user_ptr(arg.ts);
@@ -3769,13 +3772,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
} else {
const sigset_t __user *sig;
struct __kernel_timespec __user *ts;
+ ktime_t min_time;
- ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig);
+ ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig, &min_time);
if (likely(!ret2)) {
min_complete = min(min_complete,
ctx->cq_entries);
ret2 = io_cqring_wait(ctx, min_complete, sig,
- argsz, ts);
+ argsz, ts, min_time);
}
}
@@ -4058,7 +4062,8 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS |
IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
- IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING;
+ IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING |
+ IORING_FEAT_MIN_TIMEOUT;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;
--
2.43.0
next prev parent reply other threads:[~2024-02-15 16:10 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-15 16:06 [PATCHSET v2 0/4] Add support for batched min timeout Jens Axboe
2024-02-15 16:06 ` [PATCH 1/4] io_uring: move schedule wait logic into helper Jens Axboe
2024-02-15 16:06 ` [PATCH 2/4] io_uring: implement our own schedule timeout handling Jens Axboe
2024-02-15 16:06 ` [PATCH 3/4] io_uring: add support for batch wait timeout Jens Axboe
2024-02-15 16:06 ` Jens Axboe [this message]
-- strict thread matches above, loose matches on Subject: below --
2024-02-13 19:03 [PATCHSET 0/4] Add support for batched min timeout Jens Axboe
2024-02-13 19:03 ` [PATCH 4/4] io_uring: wire up min batch wake timeout Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox