From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
[email protected], Lewis Baker <[email protected]>
Subject: [PATCH v2 4/4] io_uring: user registered clockid for wait timeouts
Date: Wed, 7 Aug 2024 15:18:14 +0100 [thread overview]
Message-ID: <98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Add a new registration opcode IORING_REGISTER_CLOCK, which allows the
user to select which clock id it wants to use with CQ waiting timeouts.
It only allows a subset of all posix clocks and currently supports
CLOCK_MONOTONIC and CLOCK_BOOTTIME.
Suggested-by: Lewis Baker <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 3 +++
include/uapi/linux/io_uring.h | 7 +++++++
io_uring/io_uring.c | 8 ++++++--
io_uring/io_uring.h | 8 ++++++++
io_uring/napi.c | 2 +-
io_uring/register.c | 31 +++++++++++++++++++++++++++++++
6 files changed, 56 insertions(+), 3 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..4b9ba523978d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -239,6 +239,9 @@ struct io_ring_ctx {
struct io_rings *rings;
struct percpu_ref refs;
+ clockid_t clockid;
+ enum tk_offsets clock_offset;
+
enum task_work_notify_mode notify_method;
unsigned sq_thread_idle;
} ____cacheline_aligned_in_smp;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index afc901502804..48c440edf674 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -596,6 +596,8 @@ enum io_uring_register_op {
IORING_REGISTER_NAPI = 27,
IORING_UNREGISTER_NAPI = 28,
+ IORING_REGISTER_CLOCK = 29,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -676,6 +678,11 @@ struct io_uring_restriction {
__u32 resv2[3];
};
+struct io_uring_clock_register {
+ __u32 clockid;
+ __u32 __resv[3];
+};
+
struct io_uring_buf {
__u64 addr;
__u32 len;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5282f9887440..20229e72b65c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2377,7 +2377,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
ret = 0;
if (iowq->timeout == KTIME_MAX)
schedule();
- else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+ else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0,
+ HRTIMER_MODE_ABS, ctx->clockid))
ret = -ETIME;
current->in_iowait = 0;
return ret;
@@ -2422,7 +2423,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
iowq.timeout = timespec64_to_ktime(ts);
if (!(flags & IORING_ENTER_ABS_TIMER))
- iowq.timeout = ktime_add(iowq.timeout, ktime_get());
+ iowq.timeout = ktime_add(iowq.timeout, io_get_time(ctx));
}
if (sig) {
@@ -3424,6 +3425,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
if (!ctx)
return -ENOMEM;
+ ctx->clockid = CLOCK_MONOTONIC;
+ ctx->clock_offset = 0;
+
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
!(ctx->flags & IORING_SETUP_IOPOLL) &&
!(ctx->flags & IORING_SETUP_SQPOLL))
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index c2acf6180845..9935819f12b7 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -437,6 +437,14 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
return false;
}
+static inline ktime_t io_get_time(struct io_ring_ctx *ctx)
+{
+ if (ctx->clockid == CLOCK_MONOTONIC)
+ return ktime_get();
+
+ return ktime_get_with_offset(ctx->clock_offset);
+}
+
enum {
IO_CHECK_CQ_OVERFLOW_BIT,
IO_CHECK_CQ_DROPPED_BIT,
diff --git a/io_uring/napi.c b/io_uring/napi.c
index d39ae20a3db8..1c3e7562e90b 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -285,7 +285,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
if (iowq->timeout != KTIME_MAX) {
- ktime_t dt = ktime_sub(iowq->timeout, ktime_get());
+ ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
}
diff --git a/io_uring/register.c b/io_uring/register.c
index e3c20be5a198..57cb85c42526 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -335,6 +335,31 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
return ret;
}
+static int io_register_clock(struct io_ring_ctx *ctx,
+ struct io_uring_clock_register __user *arg)
+{
+ struct io_uring_clock_register reg;
+
+ if (copy_from_user(®, arg, sizeof(reg)))
+ return -EFAULT;
+ if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
+ return -EINVAL;
+
+ switch (reg.clockid) {
+ case CLOCK_MONOTONIC:
+ ctx->clock_offset = 0;
+ break;
+ case CLOCK_BOOTTIME:
+ ctx->clock_offset = TK_OFFS_BOOT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ctx->clockid = reg.clockid;
+ return 0;
+}
+
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
@@ -511,6 +536,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_unregister_napi(ctx, arg);
break;
+ case IORING_REGISTER_CLOCK:
+ ret = -EINVAL;
+ if (!arg || nr_args)
+ break;
+ ret = io_register_clock(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
--
2.45.2
next prev parent reply other threads:[~2024-08-07 14:17 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-07 14:18 [PATCH v2 0/4] clodkid and abs mode CQ wait timeouts Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 1/4] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 2/4] io_uring/napi: postpone napi timeout adjustment Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 3/4] io_uring: add absolute mode wait timeouts Pavel Begunkov
2024-08-07 14:18 ` Pavel Begunkov [this message]
2024-08-12 18:13 ` [PATCH v2 0/4] clodkid and abs mode CQ " Jens Axboe
2024-08-12 18:30 ` Jens Axboe
2024-08-13 0:50 ` Pavel Begunkov
2024-08-13 0:59 ` Jens Axboe
2024-08-13 1:32 ` Pavel Begunkov
2024-08-13 2:09 ` Jens Axboe
2024-08-13 2:38 ` Pavel Begunkov
2024-08-13 3:28 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox