From: Olivier Langlois <[email protected]>
To: Jens Axboe <[email protected]>,Pavel Begunkov
<[email protected]>,[email protected]
Subject: [PATCH 1/2] io_uring/napi: Introduce io_napi_tracking_ops
Date: Tue, 13 Aug 2024 13:10:55 -0400 [thread overview]
Message-ID: <bfbb03a7ad6256b68d08429c0888a05032a1b182.1723567469.git.olivier@trillion01.com> (raw)
In-Reply-To: <[email protected]>
The long term goal is to lay out a framework to be able to offer
different napi tracking strategies to the user. The obvious first
alternative strategy is the static tracking where the user would update
manually the napi_list to remove the overhead made by io_uring managing the
list dynamically.
Signed-off-by: Olivier Langlois <[email protected]>
---
include/linux/io_uring_types.h | 12 +++++-
io_uring/fdinfo.c | 4 ++
io_uring/napi.c | 76 ++++++++++++++++++++++++++++++----
io_uring/napi.h | 11 +----
4 files changed, 86 insertions(+), 17 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..c1d1b28f8cca 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -217,6 +217,16 @@ struct io_alloc_cache {
size_t elem_size;
};
+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct io_napi_tracking_ops {
+ void (*add_id)(struct io_kiocb *req);
+ bool (*do_busy_loop)(struct io_ring_ctx *ctx,
+ void *loop_end_arg);
+ void (*show_fdinfo)(struct io_ring_ctx *ctx,
+ struct seq_file *m);
+};
+#endif
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
@@ -402,11 +412,11 @@ struct io_ring_ctx {
#ifdef CONFIG_NET_RX_BUSY_POLL
struct list_head napi_list; /* track busy poll napi_id */
spinlock_t napi_lock; /* napi_list lock */
+ struct io_napi_tracking_ops *napi_ops;
/* napi busy poll default timeout */
ktime_t napi_busy_poll_dt;
bool napi_prefer_busy_poll;
- bool napi_enabled;
DECLARE_HASHTABLE(napi_ht, 4);
#endif
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index b1e0e0d85349..fa773687a684 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -223,5 +223,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
}
spin_unlock(&ctx->completion_lock);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ ctx->napi_ops->show_fdinfo(ctx, m);
+#endif
+
}
#endif
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 1de1d4d62925..75ac850af0c0 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -38,7 +38,7 @@ static inline ktime_t net_to_ktime(unsigned long t)
return ns_to_ktime(t << 10);
}
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
{
struct hlist_head *hash_list;
unsigned int napi_id;
@@ -136,8 +136,52 @@ static bool io_napi_busy_loop_should_end(void *data,
return false;
}
-static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
- void *loop_end_arg)
+/*
+ * does not perform any busy polling but still check if list entries are
+ * stalled if the list is not empty. This could happen by unregistering
+ * napi after having enabled it for some time.
+ */
+static bool no_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
+{
+ struct io_napi_entry *e;
+ bool is_stale = false;
+
+ list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+ if (time_after(jiffies, e->timeout))
+ is_stale = true;
+ }
+
+ return is_stale;
+}
+
+static void no_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ seq_puts(m, "NAPI:\tdisabled\n");
+}
+
+/*
+ * default ops for a newly created ring for which NAPI busy poll is not enabled
+ */
+static struct io_napi_tracking_ops no_tracking_ops = {
+ .add_id = NULL,
+ .do_busy_loop = no_tracking_do_busy_loop,
+ .show_fdinfo = no_tracking_show_fdinfo,
+};
+
+static void dynamic_tracking_add_id(struct io_kiocb *req)
+{
+ struct io_ring_ctx *ctx = req->ctx;
+ struct socket *sock;
+
+ sock = sock_from_file(req->file);
+ if (sock)
+ __io_napi_add(ctx, sock);
+}
+
+static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+ void *loop_end_arg)
{
struct io_napi_entry *e;
bool (*loop_end)(void *, unsigned long) = NULL;
@@ -157,6 +201,23 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
return is_stale;
}
+static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+ struct seq_file *m)
+{
+ seq_puts(m, "NAPI:\tenabled\n");
+ seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to);
+ if (ctx->napi_prefer_busy_poll)
+ seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
+ else
+ seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
+}
+
+static struct io_napi_tracking_ops dynamic_tracking_ops = {
+ .add_id = dynamic_tracking_add_id,
+ .do_busy_loop = dynamic_tracking_do_busy_loop,
+ .show_fdinfo = dynamic_tracking_show_fdinfo,
+};
+
static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
@@ -172,7 +233,7 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
rcu_read_lock();
do {
- is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
+ is_stale = ctx->napi_ops->do_busy_loop(ctx, loop_end_arg);
} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
rcu_read_unlock();
@@ -193,6 +254,7 @@ void io_napi_init(struct io_ring_ctx *ctx)
spin_lock_init(&ctx->napi_lock);
ctx->napi_prefer_busy_poll = false;
ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
+ ctx->napi_ops = &no_tracking_ops;
}
/*
@@ -241,7 +303,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
- WRITE_ONCE(ctx->napi_enabled, true);
+ WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
return 0;
}
@@ -265,7 +327,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
- WRITE_ONCE(ctx->napi_enabled, false);
+ WRITE_ONCE(ctx->napi_ops, &no_tracking_ops);
return 0;
}
@@ -321,7 +383,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
return 0;
rcu_read_lock();
- is_stale = __io_napi_do_busy_loop(ctx, NULL);
+ is_stale = ctx->napi_ops->do_busy_loop(ctx, NULL);
rcu_read_unlock();
io_napi_remove_stale(ctx, is_stale);
diff --git a/io_uring/napi.h b/io_uring/napi.h
index 27b88c3eb428..3d68d8e7b108 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -15,8 +15,6 @@ void io_napi_free(struct io_ring_ctx *ctx);
int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
-
void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq, ktime_t to_wait);
void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
@@ -53,14 +51,9 @@ static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
static inline void io_napi_add(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- struct socket *sock;
-
- if (!READ_ONCE(ctx->napi_enabled))
- return;
- sock = sock_from_file(req->file);
- if (sock)
- __io_napi_add(ctx, sock);
+ if (ctx->napi_ops->add_id)
+ ctx->napi_ops->add_id(req);
}
#else
--
2.46.0
next prev parent reply other threads:[~2024-08-13 17:10 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-13 16:44 [PATCH 0/2] abstract napi tracking strategy Olivier Langlois
2024-08-13 17:10 ` Olivier Langlois [this message]
2024-08-14 11:44 ` [PATCH 1/2] io_uring/napi: Introduce io_napi_tracking_ops Olivier Langlois
2024-08-14 13:17 ` Jens Axboe
2024-08-13 17:11 ` [PATCH 2/2] io_uring/napi: add static napi tracking strategy Olivier Langlois
2024-08-13 18:33 ` [PATCH 0/2] abstract " Jens Axboe
2024-08-13 21:25 ` Olivier Langlois
2024-08-13 21:44 ` Jens Axboe
2024-08-15 22:17 ` Olivier Langlois
2024-08-15 22:44 ` Olivier Langlois
2024-08-16 14:26 ` Pavel Begunkov
2024-09-16 18:29 ` Olivier Langlois
2024-08-13 22:36 ` Pavel Begunkov
2024-08-14 13:28 ` Pavel Begunkov
2024-08-13 21:34 ` Olivier Langlois
2024-08-13 21:45 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=bfbb03a7ad6256b68d08429c0888a05032a1b182.1723567469.git.olivier@trillion01.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox