* [PATCH 1/2] io_uring/napi: use ktime in busy polling
2024-07-26 14:24 [PATCH 0/2] improve net busy polling time conversion Pavel Begunkov
@ 2024-07-26 14:24 ` Pavel Begunkov
2024-07-26 14:24 ` [PATCH 2/2] io_uring/napi: pass ktime to io_napi_adjust_timeout Pavel Begunkov
2024-07-26 15:21 ` [PATCH 0/2] improve net busy polling time conversion Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-26 14:24 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence
It's more natural to use ktime/ns instead of keeping around usec,
especially since we're comparing it against user provided timers,
so convert napi busy poll internal handling to ktime. It's also nicer
since the type (ktime_t vs unsigned long) now tells the unit of measure.
Keep everything as ktime, which we convert to/from micro seconds for
IORING_[UN]REGISTER_NAPI. The net/ busy polling works seems to work with
usec, however it's not real usec as shift by 10 is used to get it from
nsecs, see busy_loop_current_time(), so it's easy to get truncated nsec
back and we get back better precision.
Note, we can further improve it later by removing the truncation and
maybe convincing net/ to use ktime/ns instead.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 2 +-
io_uring/io_uring.h | 2 +-
io_uring/napi.c | 48 +++++++++++++++++++---------------
io_uring/napi.h | 2 +-
4 files changed, 30 insertions(+), 24 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index e62aa9f0629f..3315005df117 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -404,7 +404,7 @@ struct io_ring_ctx {
spinlock_t napi_lock; /* napi_list lock */
/* napi busy poll default timeout */
- unsigned int napi_busy_poll_to;
+ ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
bool napi_enabled;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index e1ce908f0679..c2acf6180845 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -43,7 +43,7 @@ struct io_wait_queue {
ktime_t timeout;
#ifdef CONFIG_NET_RX_BUSY_POLL
- unsigned int napi_busy_poll_to;
+ ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
#endif
};
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 327e5f3a8abe..6bdb267e9c33 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -33,6 +33,12 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list,
return NULL;
}
+static inline ktime_t net_to_ktime(unsigned long t)
+{
+ /* napi approximating usecs, reverse busy_loop_current_time */
+ return ns_to_ktime(t << 10);
+}
+
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{
struct hlist_head *hash_list;
@@ -102,14 +108,14 @@ static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale)
__io_napi_remove_stale(ctx);
}
-static inline bool io_napi_busy_loop_timeout(unsigned long start_time,
- unsigned long bp_usec)
+static inline bool io_napi_busy_loop_timeout(ktime_t start_time,
+ ktime_t bp)
{
- if (bp_usec) {
- unsigned long end_time = start_time + bp_usec;
- unsigned long now = busy_loop_current_time();
+ if (bp) {
+ ktime_t end_time = ktime_add(start_time, bp);
+ ktime_t now = net_to_ktime(busy_loop_current_time());
- return time_after(now, end_time);
+ return ktime_after(now, end_time);
}
return true;
@@ -124,7 +130,8 @@ static bool io_napi_busy_loop_should_end(void *data,
return true;
if (io_should_wake(iowq) || io_has_work(iowq->ctx))
return true;
- if (io_napi_busy_loop_timeout(start_time, iowq->napi_busy_poll_to))
+ if (io_napi_busy_loop_timeout(net_to_ktime(start_time),
+ iowq->napi_busy_poll_dt))
return true;
return false;
@@ -181,10 +188,12 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
*/
void io_napi_init(struct io_ring_ctx *ctx)
{
+ u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC;
+
INIT_LIST_HEAD(&ctx->napi_list);
spin_lock_init(&ctx->napi_lock);
ctx->napi_prefer_busy_poll = false;
- ctx->napi_busy_poll_to = READ_ONCE(sysctl_net_busy_poll);
+ ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
}
/*
@@ -217,7 +226,7 @@ void io_napi_free(struct io_ring_ctx *ctx)
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
{
const struct io_uring_napi curr = {
- .busy_poll_to = ctx->napi_busy_poll_to,
+ .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll
};
struct io_uring_napi napi;
@@ -232,7 +241,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
if (copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT;
- WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
+ WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
WRITE_ONCE(ctx->napi_enabled, true);
return 0;
@@ -249,14 +258,14 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
{
const struct io_uring_napi curr = {
- .busy_poll_to = ctx->napi_busy_poll_to,
+ .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
.prefer_busy_poll = ctx->napi_prefer_busy_poll
};
if (arg && copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT;
- WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+ WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
WRITE_ONCE(ctx->napi_enabled, false);
return 0;
@@ -275,23 +284,20 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
struct timespec64 *ts)
{
- unsigned int poll_to = READ_ONCE(ctx->napi_busy_poll_to);
+ ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
if (ts) {
struct timespec64 poll_to_ts;
- poll_to_ts = ns_to_timespec64(1000 * (s64)poll_to);
+ poll_to_ts = ns_to_timespec64(ktime_to_ns(poll_dt));
if (timespec64_compare(ts, &poll_to_ts) < 0) {
s64 poll_to_ns = timespec64_to_ns(ts);
- if (poll_to_ns > 0) {
- u64 val = poll_to_ns + 999;
- do_div(val, 1000);
- poll_to = val;
- }
+ if (poll_to_ns > 0)
+ poll_dt = ns_to_ktime(poll_to_ns);
}
}
- iowq->napi_busy_poll_to = poll_to;
+ iowq->napi_busy_poll_dt = poll_dt;
}
/*
@@ -320,7 +326,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
LIST_HEAD(napi_list);
bool is_stale = false;
- if (!READ_ONCE(ctx->napi_busy_poll_to))
+ if (!READ_ONCE(ctx->napi_busy_poll_dt))
return 0;
if (list_empty_careful(&ctx->napi_list))
return 0;
diff --git a/io_uring/napi.h b/io_uring/napi.h
index 6fc0393d0dbe..babbee36cd3e 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req)
struct io_ring_ctx *ctx = req->ctx;
struct socket *sock;
- if (!READ_ONCE(ctx->napi_busy_poll_to))
+ if (!READ_ONCE(ctx->napi_busy_poll_dt))
return;
sock = sock_from_file(req->file);
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] io_uring/napi: pass ktime to io_napi_adjust_timeout
2024-07-26 14:24 [PATCH 0/2] improve net busy polling time conversion Pavel Begunkov
2024-07-26 14:24 ` [PATCH 1/2] io_uring/napi: use ktime in busy polling Pavel Begunkov
@ 2024-07-26 14:24 ` Pavel Begunkov
2024-07-26 15:21 ` [PATCH 0/2] improve net busy polling time conversion Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2024-07-26 14:24 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, asml.silence
Pass the waiting time for __io_napi_adjust_timeout as ktime and get rid
of all timespec64 conversions. It's especially simpler since the caller
already have a ktime.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/io_uring.c | 6 ++++--
io_uring/napi.c | 14 +++-----------
io_uring/napi.h | 8 ++++----
3 files changed, 11 insertions(+), 17 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 2626424f5d73..3942db160f18 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2416,12 +2416,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
if (uts) {
struct timespec64 ts;
+ ktime_t dt;
if (get_timespec64(&ts, uts))
return -EFAULT;
- iowq.timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns());
- io_napi_adjust_timeout(ctx, &iowq, &ts);
+ dt = timespec64_to_ktime(ts);
+ iowq.timeout = ktime_add(dt, ktime_get());
+ io_napi_adjust_timeout(ctx, &iowq, dt);
}
if (sig) {
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 6bdb267e9c33..4fd6bb331e1e 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -282,20 +282,12 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
* the NAPI timeout accordingly.
*/
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq,
- struct timespec64 *ts)
+ ktime_t to_wait)
{
ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt);
- if (ts) {
- struct timespec64 poll_to_ts;
-
- poll_to_ts = ns_to_timespec64(ktime_to_ns(poll_dt));
- if (timespec64_compare(ts, &poll_to_ts) < 0) {
- s64 poll_to_ns = timespec64_to_ns(ts);
- if (poll_to_ns > 0)
- poll_dt = ns_to_ktime(poll_to_ns);
- }
- }
+ if (to_wait)
+ poll_dt = min(poll_dt, to_wait);
iowq->napi_busy_poll_dt = poll_dt;
}
diff --git a/io_uring/napi.h b/io_uring/napi.h
index babbee36cd3e..88f1c21d5548 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -18,7 +18,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
- struct io_wait_queue *iowq, struct timespec64 *ts);
+ struct io_wait_queue *iowq, ktime_t to_wait);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
@@ -29,11 +29,11 @@ static inline bool io_napi(struct io_ring_ctx *ctx)
static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq,
- struct timespec64 *ts)
+ ktime_t to_wait)
{
if (!io_napi(ctx))
return;
- __io_napi_adjust_timeout(ctx, iowq, ts);
+ __io_napi_adjust_timeout(ctx, iowq, to_wait);
}
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
@@ -88,7 +88,7 @@ static inline void io_napi_add(struct io_kiocb *req)
}
static inline void io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq,
- struct timespec64 *ts)
+ ktime_t to_wait)
{
}
static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
--
2.45.2
^ permalink raw reply related [flat|nested] 4+ messages in thread