* [PATCH 0/3] Implement absolute value wait timeouts
@ 2024-07-30 20:29 Pavel Begunkov
2024-07-30 20:29 ` [PATCH 1/3] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-30 20:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence, Lewis Baker
Patches 1-2 are cleaning up timing adjustments for napi busy
polling, and Patch 3 implements the feature.
Note, if we proceed with removing the busy polling adjustments
by the wait timeout, it'd make sense to merge that first and
then I'll resend the series.
Some tests I'll be sending later are here:
https://github.com/isilence/liburing.git abs-timeout
Pavel Begunkov (3):
io_uring/napi: refactor __io_napi_busy_loop()
io_uring/napi: delay napi timeout adjustment
io_uring: add absolute mode wait timeouts
include/uapi/linux/io_uring.h | 1 +
io_uring/io_uring.c | 14 ++++++-------
io_uring/napi.c | 37 ++++++++++-------------------------
io_uring/napi.h | 16 ---------------
4 files changed, 18 insertions(+), 50 deletions(-)
--
2.45.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] io_uring/napi: refactor __io_napi_busy_loop()
2024-07-30 20:29 [PATCH 0/3] Implement absolute value wait timeouts Pavel Begunkov
@ 2024-07-30 20:29 ` Pavel Begunkov
2024-07-30 20:29 ` [PATCH 2/3] io_uring/napi: delay napi timeout adjustment Pavel Begunkov
2024-07-30 20:29 ` [PATCH 3/3] io_uring: add absolute mode wait timeouts Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-30 20:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence, Lewis Baker
we don't need to set ->napi_prefer_busy_poll if we're not going to poll,
do the checks first and all polling preparation after.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/napi.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 4fd6bb331e1e..a670f49e30ef 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -301,10 +301,11 @@ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iow
*/
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
{
- iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
+ if ((ctx->flags & IORING_SETUP_SQPOLL) || !ctx->napi_enabled)
+ return;
- if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled)
- io_napi_blocking_busy_loop(ctx, iowq);
+ iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
+ io_napi_blocking_busy_loop(ctx, iowq);
}
/*
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] io_uring/napi: delay napi timeout adjustment
2024-07-30 20:29 [PATCH 0/3] Implement absolute value wait timeouts Pavel Begunkov
2024-07-30 20:29 ` [PATCH 1/3] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
@ 2024-07-30 20:29 ` Pavel Begunkov
2024-07-30 20:29 ` [PATCH 3/3] io_uring: add absolute mode wait timeouts Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-30 20:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence, Lewis Baker
Instead adjusting busy polling time in io_cqring_wait(), rely on the
deadline value and delay the check until io_napi_busy_loop_timeout()
is called inside the napi busy polling loop. There is a side effect
of comparing a cpu local clock with ktime_get(), however waiting
timeouts are usually long enough not to care and napi.c is already
careless about mixing time flavours, i.e. io_napi_blocking_busy_loop()
gets a cpu local time before disabling preemption.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 1 -
io_uring/napi.c | 30 ++++++------------------------
io_uring/napi.h | 16 ----------------
3 files changed, 6 insertions(+), 41 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3942db160f18..9ec07f76ad19 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2423,7 +2423,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
dt = timespec64_to_ktime(ts);
iowq.timeout = ktime_add(dt, ktime_get());
- io_napi_adjust_timeout(ctx, &iowq, dt);
}
if (sig) {
diff --git a/io_uring/napi.c b/io_uring/napi.c
index a670f49e30ef..c5c1177e2fb4 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -109,12 +109,15 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
}
static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
- ktime_t bp)
+ struct io_wait_queue *iowq)
{
+ ktime_t bp = iowq->napi_busy_poll_dt;
+
if (bp) {
ktime_t end_time = ktime_add(start_time, bp);
ktime_t now = net_to_ktime(busy_loop_current_time());
+ end_time = min(end_time, iowq->timeout);
return ktime_after(now, end_time);
}
@@ -130,8 +133,7 @@ static bool io_napi_busy_loop_should_end(void *data,
return true;
if (io_should_wake(iowq) || io_has_work(iowq->ctx))
return true;
- if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
- iowq->napi_busy_poll_dt))
+ if (io_napi_busy_loop_timeout(net_to_ktime(start_time), iowq))
return true;
return false;
@@ -271,27 +273,6 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
return 0;
}
-/*
- * __io_napi_adjust_timeout() - adjust busy loop timeout
- * @ctx: pointer to io-uring context structure
- * @iowq: pointer to io wait queue
- * @ts: pointer to timespec or NULL
- *
- * Adjust the busy loop timeout according to timespec and busy poll timeout.
- * If the specified NAPI timeout is bigger than the wait timeout, then adjust
- * the NAPI timeout accordingly.
- */
-void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
- ktime_t to_wait)
-{
- ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
-
- if (to_wait)
- poll_dt = min(poll_dt, to_wait);
-
- iowq->napi_busy_poll_dt = poll_dt;
-}
-
/*
* __io_napi_busy_loop() - execute busy poll loop
* @ctx: pointer to io-uring context structure
@@ -304,6 +285,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq)
if ((ctx->flags & IORING_SETUP_SQPOLL) || !ctx->napi_enabled)
return;
+ iowq->napi_busy_poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll);
io_napi_blocking_busy_loop(ctx, iowq);
}
diff --git a/io_uring/napi.h b/io_uring/napi.h
index 88f1c21d5548..87e30b4f8d9e 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -17,8 +17,6 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
-void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
- struct io_wait_queue *iowq, ktime_t to_wait);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
@@ -27,15 +25,6 @@ static inline bool io_napi(struct io_ring_ctx *ctx)
return !list_empty(&ctx->napi_list);
}
-static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
- struct io_wait_queue *iowq,
- ktime_t to_wait)
-{
- if (!io_napi(ctx))
- return;
- __io_napi_adjust_timeout(ctx, iowq, to_wait);
-}
-
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
@@ -86,11 +75,6 @@ static inline bool io_napi(struct io_ring_ctx *ctx)
static inline void io_napi_add(struct io_kiocb *req)
{
}
-static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
- struct io_wait_queue *iowq,
- ktime_t to_wait)
-{
-}
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] io_uring: add absolute mode wait timeouts
2024-07-30 20:29 [PATCH 0/3] Implement absolute value wait timeouts Pavel Begunkov
2024-07-30 20:29 ` [PATCH 1/3] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
2024-07-30 20:29 ` [PATCH 2/3] io_uring/napi: delay napi timeout adjustment Pavel Begunkov
@ 2024-07-30 20:29 ` Pavel Begunkov
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-30 20:29 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence, Lewis Baker
In addition to current relative timeouts for the waiting loop, where the
timespec argument specifies the maximum time it can wait for, add
support for the absolute mode, with the value carrying a CLOCK_MONOTONIC
absolute time until which we should return control back to the user.
Link: https://github.com/axboe/liburing/issues/1162
Suggested-by: Lewis Baker <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/uapi/linux/io_uring.h | 1 +
io_uring/io_uring.c | 13 +++++++------
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2aaf7ee256ac..afc901502804 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -507,6 +507,7 @@ struct io_cqring_offsets {
#define IORING_ENTER_SQ_WAIT (1U << 2)
#define IORING_ENTER_EXT_ARG (1U << 3)
#define IORING_ENTER_REGISTERED_RING (1U << 4)
+#define IORING_ENTER_ABS_TIMER (1U << 5)
/*
* Passed in for io_uring_setup(2). Copied back with updated info on success
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9ec07f76ad19..5940bd8f5630 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2387,7 +2387,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
* Wait until events become available, if we don't already have some. The
* application must reap them itself, as they reside on the shared cq ring.
*/
-static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
const sigset_t __user *sig, size_t sigsz,
struct __kernel_timespec __user *uts)
{
@@ -2416,13 +2416,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (uts) {
struct timespec64 ts;
- ktime_t dt;
if (get_timespec64(&ts, uts))
return -EFAULT;
- dt = timespec64_to_ktime(ts);
- iowq.timeout = ktime_add(dt, ktime_get());
+ iowq.timeout = timespec64_to_ktime(ts);
+ if (!(flags & IORING_ENTER_ABS_TIMER))
+ iowq.timeout = ktime_add(iowq.timeout, ktime_get());
}
if (sig) {
@@ -3153,7 +3153,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP |
IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
- IORING_ENTER_REGISTERED_RING)))
+ IORING_ENTER_REGISTERED_RING |
+ IORING_ENTER_ABS_TIMER)))
return -EINVAL;
/*
@@ -3252,7 +3253,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
min_complete = min(min_complete,
ctx->cq_entries);
ret2 = io_cqring_wait(ctx, min_complete, sig,
- argsz, ts);
+ flags, argsz, ts);
}
}
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-07-30 20:29 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-30 20:29 [PATCH 0/3] Implement absolute value wait timeouts Pavel Begunkov
2024-07-30 20:29 ` [PATCH 1/3] io_uring/napi: refactor __io_napi_busy_loop() Pavel Begunkov
2024-07-30 20:29 ` [PATCH 2/3] io_uring/napi: delay napi timeout adjustment Pavel Begunkov
2024-07-30 20:29 ` [PATCH 3/3] io_uring: add absolute mode wait timeouts Pavel Begunkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox