public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
	[email protected], Lewis Baker <[email protected]>
Subject: [PATCH v2 4/4] io_uring: user registered clockid for wait timeouts
Date: Wed,  7 Aug 2024 15:18:14 +0100	[thread overview]
Message-ID: <98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Add a new registration opcode IORING_REGISTER_CLOCK, which allows the
user to select which clock id it wants to use with CQ waiting timeouts.
It only allows a subset of all posix clocks and currently supports
CLOCK_MONOTONIC and CLOCK_BOOTTIME.

Suggested-by: Lewis Baker <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/io_uring_types.h |  3 +++
 include/uapi/linux/io_uring.h  |  7 +++++++
 io_uring/io_uring.c            |  8 ++++++--
 io_uring/io_uring.h            |  8 ++++++++
 io_uring/napi.c                |  2 +-
 io_uring/register.c            | 31 +++++++++++++++++++++++++++++++
 6 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..4b9ba523978d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -239,6 +239,9 @@ struct io_ring_ctx {
 		struct io_rings		*rings;
 		struct percpu_ref	refs;
 
+		clockid_t		clockid;
+		enum tk_offsets		clock_offset;
+
 		enum task_work_notify_mode	notify_method;
 		unsigned			sq_thread_idle;
 	} ____cacheline_aligned_in_smp;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index afc901502804..48c440edf674 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -596,6 +596,8 @@ enum io_uring_register_op {
 	IORING_REGISTER_NAPI			= 27,
 	IORING_UNREGISTER_NAPI			= 28,
 
+	IORING_REGISTER_CLOCK			= 29,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -676,6 +678,11 @@ struct io_uring_restriction {
 	__u32 resv2[3];
 };
 
+struct io_uring_clock_register {
+	__u32	clockid;
+	__u32	__resv[3];
+};
+
 struct io_uring_buf {
 	__u64	addr;
 	__u32	len;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5282f9887440..20229e72b65c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2377,7 +2377,8 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
 	ret = 0;
 	if (iowq->timeout == KTIME_MAX)
 		schedule();
-	else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
+	else if (!schedule_hrtimeout_range_clock(&iowq->timeout, 0,
+						 HRTIMER_MODE_ABS, ctx->clockid))
 		ret = -ETIME;
 	current->in_iowait = 0;
 	return ret;
@@ -2422,7 +2423,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
 
 		iowq.timeout = timespec64_to_ktime(ts);
 		if (!(flags & IORING_ENTER_ABS_TIMER))
-			iowq.timeout = ktime_add(iowq.timeout, ktime_get());
+			iowq.timeout = ktime_add(iowq.timeout, io_get_time(ctx));
 	}
 
 	if (sig) {
@@ -3424,6 +3425,9 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
 	if (!ctx)
 		return -ENOMEM;
 
+	ctx->clockid = CLOCK_MONOTONIC;
+	ctx->clock_offset = 0;
+
 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
 	    !(ctx->flags & IORING_SETUP_IOPOLL) &&
 	    !(ctx->flags & IORING_SETUP_SQPOLL))
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index c2acf6180845..9935819f12b7 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -437,6 +437,14 @@ static inline bool io_file_can_poll(struct io_kiocb *req)
 	return false;
 }
 
+static inline ktime_t io_get_time(struct io_ring_ctx *ctx)
+{
+	if (ctx->clockid == CLOCK_MONOTONIC)
+		return ktime_get();
+
+	return ktime_get_with_offset(ctx->clock_offset);
+}
+
 enum {
 	IO_CHECK_CQ_OVERFLOW_BIT,
 	IO_CHECK_CQ_DROPPED_BIT,
diff --git a/io_uring/napi.c b/io_uring/napi.c
index d39ae20a3db8..1c3e7562e90b 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -285,7 +285,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
 
 	iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
 	if (iowq->timeout != KTIME_MAX) {
-		ktime_t dt = ktime_sub(iowq->timeout, ktime_get());
+		ktime_t dt = ktime_sub(iowq->timeout, io_get_time(ctx));
 
 		iowq->napi_busy_poll_dt = min_t(u64, iowq->napi_busy_poll_dt, dt);
 	}
diff --git a/io_uring/register.c b/io_uring/register.c
index e3c20be5a198..57cb85c42526 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -335,6 +335,31 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	return ret;
 }
 
+static int io_register_clock(struct io_ring_ctx *ctx,
+			     struct io_uring_clock_register __user *arg)
+{
+	struct io_uring_clock_register reg;
+
+	if (copy_from_user(&reg, arg, sizeof(reg)))
+		return -EFAULT;
+	if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
+		return -EINVAL;
+
+	switch (reg.clockid) {
+	case CLOCK_MONOTONIC:
+		ctx->clock_offset = 0;
+		break;
+	case CLOCK_BOOTTIME:
+		ctx->clock_offset = TK_OFFS_BOOT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ctx->clockid = reg.clockid;
+	return 0;
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -511,6 +536,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_unregister_napi(ctx, arg);
 		break;
+	case IORING_REGISTER_CLOCK:
+		ret = -EINVAL;
+		if (!arg || nr_args)
+			break;
+		ret = io_register_clock(ctx, arg);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
-- 
2.45.2


  parent reply	other threads:[~2024-08-07 14:17 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-07 14:18 [PATCH v2 0/4] clodkid and abs mode CQ wait timeouts Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 1/4] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 2/4] io_uring/napi: postpone napi timeout adjustment Pavel Begunkov
2024-08-07 14:18 ` [PATCH v2 3/4] io_uring: add absolute mode wait timeouts Pavel Begunkov
2024-08-07 14:18 ` Pavel Begunkov [this message]
2024-08-12 18:13 ` [PATCH v2 0/4] clodkid and abs mode CQ " Jens Axboe
2024-08-12 18:30   ` Jens Axboe
2024-08-13  0:50     ` Pavel Begunkov
2024-08-13  0:59       ` Jens Axboe
2024-08-13  1:32         ` Pavel Begunkov
2024-08-13  2:09           ` Jens Axboe
2024-08-13  2:38             ` Pavel Begunkov
2024-08-13  3:28               ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=98f2bc8a3c36cdf8f0e6a275245e81e903459703.1723039801.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox