From: Olivier Langlois <[email protected]>
To: Jens Axboe <[email protected]>,Pavel Begunkov
<[email protected]>,[email protected]
Subject: [PATCH 2/2] io_uring/napi: add static napi tracking strategy
Date: Tue, 13 Aug 2024 13:11:14 -0400 [thread overview]
Message-ID: <5fc9dd07e48a7178f547ed1b2aaa0814607fa246.1723567469.git.olivier@trillion01.com> (raw)
In-Reply-To: <[email protected]>
add the static napi tracking strategy that allows the user to manually
manage the napi ids list to busy poll and offload the ring from
dynamically update the list.
Signed-off-by: Olivier Langlois <[email protected]>
---
include/uapi/linux/io_uring.h | 30 ++++++-
io_uring/napi.c | 162 ++++++++++++++++++++++++++++------
2 files changed, 163 insertions(+), 29 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2aaf7ee256ac..f72471b19af2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -728,12 +728,38 @@ struct io_uring_buf_status {
__u32 resv[8];
};
+enum io_uring_napi_op {
+ /* register/ungister backward compatible opcode */
+ IO_URING_NAPI_REGISTER_OP = 0,
+
+ /* opcodes to update napi_list when static tracking is used */
+ IO_URING_NAPI_STATIC_ADD_ID = 1,
+ IO_URING_NAPI_STATIC_DEL_ID = 2
+};
+
+enum io_uring_napi_tracking_strategy {
+ IO_URING_NAPI_TRACKING_DYNAMIC = 0,
+ IO_URING_NAPI_TRACKING_STATIC = 1
+};
+
/* argument for IORING_(UN)REGISTER_NAPI */
struct io_uring_napi {
__u32 busy_poll_to;
__u8 prefer_busy_poll;
- __u8 pad[3];
- __u64 resv;
+
+ /* a io_uring_napi_op value */
+ __u8 opcode;
+ __u8 pad[2];
+
+ /*
+ * for IO_URING_NAPI_REGISTER_OP, it is a
+ * io_uring_napi_tracking_strategy value.
+ *
+ * for IO_URING_NAPI_STATIC_ADD_ID/IO_URING_NAPI_STATIC_DEL_ID
+ * it is the napi id to add/del from napi_list.
+ */
+ __u32 op_param;
+ __u32 resv;
};
/*
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 75ac850af0c0..b66ff15fcc72 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -38,37 +38,29 @@ static inline ktime_t net_to_ktime(unsigned long t)
return ns_to_ktime(t << 10);
}
-static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+static int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
{
struct hlist_head *hash_list;
- unsigned int napi_id;
- struct sock *sk;
struct io_napi_entry *e;
- sk = sock->sk;
- if (!sk)
- return;
-
- napi_id = READ_ONCE(sk->sk_napi_id);
-
/* Non-NAPI IDs can be rejected. */
if (napi_id < MIN_NAPI_ID)
- return;
+ return -EINVAL;
hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
rcu_read_lock();
e = io_napi_hash_find(hash_list, napi_id);
if (e) {
- e->timeout = jiffies + NAPI_TIMEOUT;
+ WRITE_ONCE(e->timeout, jiffies + NAPI_TIMEOUT);
rcu_read_unlock();
- return;
+ return -EEXIST;
}
rcu_read_unlock();
e = kmalloc(sizeof(*e), GFP_NOWAIT);
if (!e)
- return;
+ return -ENOMEM;
e->napi_id = napi_id;
e->timeout = jiffies + NAPI_TIMEOUT;
@@ -77,23 +69,62 @@ static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
spin_unlock(&ctx->napi_lock);
kfree(e);
- return;
+ return -EEXIST;
}
hlist_add_tail_rcu(&e->node, hash_list);
- list_add_tail(&e->list, &ctx->napi_list);
+ list_add_tail_rcu(&e->list, &ctx->napi_list);
spin_unlock(&ctx->napi_lock);
+ return 0;
+}
+
+static int __io_napi_del_id(struct io_ring_ctx *ctx, unsigned int napi_id)
+{
+ struct hlist_head *hash_list;
+ struct io_napi_entry *e;
+
+ /* Non-NAPI IDs can be rejected. */
+ if (napi_id < MIN_NAPI_ID)
+ return -EINVAL;
+
+ hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))];
+ spin_lock(&ctx->napi_lock);
+ e = io_napi_hash_find(hash_list, napi_id);
+ if (unlikely(!e)) {
+ spin_unlock(&ctx->napi_lock);
+ return -ENOENT;
+ }
+
+ list_del_rcu(&e->list);
+ hash_del_rcu(&e->node);
+ kfree_rcu(e, rcu);
+ spin_unlock(&ctx->napi_lock);
+ return 0;
+}
+
+static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+{
+ unsigned int napi_id;
+ struct sock *sk;
+
+ sk = sock->sk;
+ if (!sk)
+ return;
+
+ napi_id = READ_ONCE(sk->sk_napi_id);
+ __io_napi_add_id(ctx, napi_id);
}
static void __io_napi_remove_stale(struct io_ring_ctx *ctx)
{
+ struct hlist_node *tmp;
struct io_napi_entry *e;
unsigned int i;
spin_lock(&ctx->napi_lock);
- hash_for_each(ctx->napi_ht, i, e, node) {
+ hash_for_each_safe(ctx->napi_ht, i, tmp, e, node) {
if (time_after(jiffies, e->timeout)) {
- list_del(&e->list);
+ list_del_rcu(&e->list);
hash_del_rcu(&e->node);
kfree_rcu(e, rcu);
}
@@ -201,23 +232,68 @@ static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
return is_stale;
}
-static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
- struct seq_file *m)
+static void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m,
+ const char *tracking_strategy)
{
seq_puts(m, "NAPI:\tenabled\n");
- seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to);
+ seq_printf(m, "napi tracking:\t%s\n", tracking_strategy);
+ seq_printf(m, "napi_busy_poll_to:\t%llu\n", ctx->napi_busy_poll_dt);
if (ctx->napi_prefer_busy_poll)
seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
else
seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
}
+static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ common_tracking_show_fdinfo(ctx, m, "dynamic");
+}
+
static struct io_napi_tracking_ops dynamic_tracking_ops = {
.add_id = dynamic_tracking_add_id,
.do_busy_loop = dynamic_tracking_do_busy_loop,
- .show_fdinfo = dynamic_tracking_show_fdinfo,
+ .show_fdinfo = dynamic_tracking_show_fdinfo,
+};
+
+/*
+ * never report stale entries
+ */
+static bool static_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
+{
+ struct io_napi_entry *e;
+ bool (*loop_end)(void *, unsigned long) = NULL;
+
+ if (loop_end_arg)
+ loop_end = io_napi_busy_loop_should_end;
+
+ list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+ napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg,
+ ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET);
+ }
+
+ return false;
+}
+
+static void static_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ common_tracking_show_fdinfo(ctx, m, "static");
+}
+
+static struct io_napi_tracking_ops static_tracking_ops = {
+ .add_id = NULL,
+ .do_busy_loop = static_tracking_do_busy_loop,
+ .show_fdinfo = static_tracking_show_fdinfo,
};
+static inline u32 io_napi_get_tracking(struct io_ring_ctx *ctx)
+{
+ return ctx->napi_ops == &static_tracking_ops;
+}
+
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
@@ -273,9 +349,30 @@ void io_napi_free(struct io_ring_ctx *ctx)
hash_del_rcu(&e->node);
kfree_rcu(e, rcu);
}
+ INIT_LIST_HEAD_RCU(&ctx->napi_list);
spin_unlock(&ctx->napi_lock);
}
+static int io_napi_register_napi(struct io_ring_ctx *ctx,
+ struct io_uring_napi *napi)
+{
+ switch (napi->op_param) {
+ case IO_URING_NAPI_TRACKING_DYNAMIC:
+ WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
+ break;
+ case IO_URING_NAPI_TRACKING_STATIC:
+ WRITE_ONCE(ctx->napi_ops, &static_tracking_ops);
+ /* clean the napi list for manual setup */
+ io_napi_free(ctx);
+ break;
+ default:
+ return -EINVAL;
+ }
+ WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
+ WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
+ return 0;
+}
+
/*
* io_napi_register() - Register napi with io-uring
* @ctx: pointer to io-uring context structure
@@ -287,7 +384,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
{
const struct io_uring_napi curr = {
.busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt),
- .prefer_busy_poll = ctx->napi_prefer_busy_poll
+ .prefer_busy_poll = ctx->napi_prefer_busy_poll,
+ .op_param = io_napi_get_tracking(ctx)
};
struct io_uring_napi napi;
@@ -295,16 +393,26 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
return -EINVAL;
if (copy_from_user(&napi, arg, sizeof(napi)))
return -EFAULT;
- if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv)
+ if (napi.pad[0] || napi.pad[1] || napi.resv)
return -EINVAL;
if (copy_to_user(arg, &curr, sizeof(curr)))
return -EFAULT;
- WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
- WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
- WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
- return 0;
+ switch (napi.opcode) {
+ case IO_URING_NAPI_REGISTER_OP:
+ return io_napi_register_napi(ctx, &napi);
+ case IO_URING_NAPI_STATIC_ADD_ID:
+ if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
+ return -EINVAL;
+ return __io_napi_add_id(ctx, napi.op_param);
+ case IO_URING_NAPI_STATIC_DEL_ID:
+ if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
+ return -EINVAL;
+ return __io_napi_del_id(ctx, napi.op_param);
+ default:
+ return -EINVAL;
+ }
}
/*
--
2.46.0
next prev parent reply other threads:[~2024-08-13 17:11 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-13 16:44 [PATCH 0/2] abstract napi tracking strategy Olivier Langlois
2024-08-13 17:10 ` [PATCH 1/2] io_uring/napi: Introduce io_napi_tracking_ops Olivier Langlois
2024-08-14 11:44 ` Olivier Langlois
2024-08-14 13:17 ` Jens Axboe
2024-08-13 17:11 ` Olivier Langlois [this message]
2024-08-13 18:33 ` [PATCH 0/2] abstract napi tracking strategy Jens Axboe
2024-08-13 21:25 ` Olivier Langlois
2024-08-13 21:44 ` Jens Axboe
2024-08-15 22:17 ` Olivier Langlois
2024-08-15 22:44 ` Olivier Langlois
2024-08-16 14:26 ` Pavel Begunkov
2024-09-16 18:29 ` Olivier Langlois
2024-08-13 22:36 ` Pavel Begunkov
2024-08-14 13:28 ` Pavel Begunkov
2024-08-13 21:34 ` Olivier Langlois
2024-08-13 21:45 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5fc9dd07e48a7178f547ed1b2aaa0814607fa246.1723567469.git.olivier@trillion01.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox