public inbox for [email protected]
 help / color / mirror / Atom feed
From: Olivier Langlois <[email protected]>
To: Jens Axboe <[email protected]>,Pavel Begunkov
	<[email protected]>,[email protected]
Subject: [PATCH 1/2] io_uring/napi: Introduce io_napi_tracking_ops
Date: Tue, 13 Aug 2024 13:10:55 -0400	[thread overview]
Message-ID: <bfbb03a7ad6256b68d08429c0888a05032a1b182.1723567469.git.olivier@trillion01.com> (raw)
In-Reply-To: <[email protected]>

The long term goal is to lay out a framework to be able to offer
different napi tracking strategies to the user. The obvious first
alternative strategy is the static tracking where the user would update
manually the napi_list to remove the overhead made by io_uring managing the
list dynamically.

Signed-off-by: Olivier Langlois <[email protected]>
---
 include/linux/io_uring_types.h | 12 +++++-
 io_uring/fdinfo.c              |  4 ++
 io_uring/napi.c                | 76 ++++++++++++++++++++++++++++++----
 io_uring/napi.h                | 11 +----
 4 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..c1d1b28f8cca 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -217,6 +217,16 @@ struct io_alloc_cache {
 	size_t			elem_size;
 };
 
+#ifdef CONFIG_NET_RX_BUSY_POLL
+struct io_napi_tracking_ops {
+	void (*add_id)(struct io_kiocb *req);
+	bool (*do_busy_loop)(struct io_ring_ctx *ctx,
+			     void *loop_end_arg);
+	void (*show_fdinfo)(struct io_ring_ctx *ctx,
+			    struct seq_file *m);
+};
+#endif
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -402,11 +412,11 @@ struct io_ring_ctx {
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	struct list_head	napi_list;	/* track busy poll napi_id */
 	spinlock_t		napi_lock;	/* napi_list lock */
+	struct io_napi_tracking_ops *napi_ops;
 
 	/* napi busy poll default timeout */
 	ktime_t			napi_busy_poll_dt;
 	bool			napi_prefer_busy_poll;
-	bool			napi_enabled;
 
 	DECLARE_HASHTABLE(napi_ht, 4);
 #endif
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index b1e0e0d85349..fa773687a684 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -223,5 +223,9 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 	}
 
 	spin_unlock(&ctx->completion_lock);
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	ctx->napi_ops->show_fdinfo(ctx, m);
+#endif
+
 }
 #endif
diff --git a/io_uring/napi.c b/io_uring/napi.c
index 1de1d4d62925..75ac850af0c0 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -38,7 +38,7 @@ static inline ktime_t net_to_ktime(unsigned long t)
 	return ns_to_ktime(t << 10);
 }
 
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
+static inline void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock)
 {
 	struct hlist_head *hash_list;
 	unsigned int napi_id;
@@ -136,8 +136,52 @@ static bool io_napi_busy_loop_should_end(void *data,
 	return false;
 }
 
-static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
-				   void *loop_end_arg)
+/*
+ * does not perform any busy polling but still check if list entries are
+ * stalled if the list is not empty. This could happen by unregistering
+ * napi after having enabled it for some time.
+ */
+static bool no_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+				     void *loop_end_arg)
+{
+	struct io_napi_entry *e;
+	bool is_stale = false;
+
+	list_for_each_entry_rcu(e, &ctx->napi_list, list) {
+		if (time_after(jiffies, e->timeout))
+			is_stale = true;
+	}
+
+	return is_stale;
+}
+
+static void no_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+				    struct seq_file *m)
+{
+	seq_puts(m, "NAPI:\tdisabled\n");
+}
+
+/*
+ * default ops for a newly created ring for which NAPI busy poll is not enabled
+ */
+static struct io_napi_tracking_ops no_tracking_ops = {
+	.add_id = NULL,
+	.do_busy_loop = no_tracking_do_busy_loop,
+	.show_fdinfo = no_tracking_show_fdinfo,
+};
+
+static void dynamic_tracking_add_id(struct io_kiocb *req)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct socket *sock;
+
+	sock = sock_from_file(req->file);
+	if (sock)
+		__io_napi_add(ctx, sock);
+}
+
+static bool dynamic_tracking_do_busy_loop(struct io_ring_ctx *ctx,
+					  void *loop_end_arg)
 {
 	struct io_napi_entry *e;
 	bool (*loop_end)(void *, unsigned long) = NULL;
@@ -157,6 +201,23 @@ static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
 	return is_stale;
 }
 
+static void dynamic_tracking_show_fdinfo(struct io_ring_ctx *ctx,
+					 struct seq_file *m)
+{
+	seq_puts(m, "NAPI:\tenabled\n");
+	seq_printf(m, "napi_busy_poll_to:\t%u\n", ctx->napi_busy_poll_to);
+	if (ctx->napi_prefer_busy_poll)
+		seq_puts(m, "napi_prefer_busy_poll:\ttrue\n");
+	else
+		seq_puts(m, "napi_prefer_busy_poll:\tfalse\n");
+}
+
+static struct io_napi_tracking_ops dynamic_tracking_ops = {
+	.add_id = dynamic_tracking_add_id,
+	.do_busy_loop = dynamic_tracking_do_busy_loop,
+	.show_fdinfo = dynamic_tracking_show_fdinfo,
+};
+
 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
 				       struct io_wait_queue *iowq)
 {
@@ -172,7 +233,7 @@ static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
 
 	rcu_read_lock();
 	do {
-		is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg);
+		is_stale = ctx->napi_ops->do_busy_loop(ctx, loop_end_arg);
 	} while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg);
 	rcu_read_unlock();
 
@@ -193,6 +254,7 @@ void io_napi_init(struct io_ring_ctx *ctx)
 	spin_lock_init(&ctx->napi_lock);
 	ctx->napi_prefer_busy_poll = false;
 	ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt);
+	ctx->napi_ops = &no_tracking_ops;
 }
 
 /*
@@ -241,7 +303,7 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 
 	WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll);
-	WRITE_ONCE(ctx->napi_enabled, true);
+	WRITE_ONCE(ctx->napi_ops, &dynamic_tracking_ops);
 	return 0;
 }
 
@@ -265,7 +327,7 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 
 	WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
-	WRITE_ONCE(ctx->napi_enabled, false);
+	WRITE_ONCE(ctx->napi_ops, &no_tracking_ops);
 	return 0;
 }
 
@@ -321,7 +383,7 @@ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx)
 		return 0;
 
 	rcu_read_lock();
-	is_stale = __io_napi_do_busy_loop(ctx, NULL);
+	is_stale = ctx->napi_ops->do_busy_loop(ctx, NULL);
 	rcu_read_unlock();
 
 	io_napi_remove_stale(ctx, is_stale);
diff --git a/io_uring/napi.h b/io_uring/napi.h
index 27b88c3eb428..3d68d8e7b108 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -15,8 +15,6 @@ void io_napi_free(struct io_ring_ctx *ctx);
 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
 
-void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock);
-
 void __io_napi_adjust_timeout(struct io_ring_ctx *ctx,
 		struct io_wait_queue *iowq, ktime_t to_wait);
 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
@@ -53,14 +51,9 @@ static inline void io_napi_busy_loop(struct io_ring_ctx *ctx,
 static inline void io_napi_add(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
-	struct socket *sock;
-
-	if (!READ_ONCE(ctx->napi_enabled))
-		return;
 
-	sock = sock_from_file(req->file);
-	if (sock)
-		__io_napi_add(ctx, sock);
+	if (ctx->napi_ops->add_id)
+		ctx->napi_ops->add_id(req);
 }
 
 #else
-- 
2.46.0


  reply	other threads:[~2024-08-13 17:10 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-13 16:44 [PATCH 0/2] abstract napi tracking strategy Olivier Langlois
2024-08-13 17:10 ` Olivier Langlois [this message]
2024-08-14 11:44   ` [PATCH 1/2] io_uring/napi: Introduce io_napi_tracking_ops Olivier Langlois
2024-08-14 13:17     ` Jens Axboe
2024-08-13 17:11 ` [PATCH 2/2] io_uring/napi: add static napi tracking strategy Olivier Langlois
2024-08-13 18:33 ` [PATCH 0/2] abstract " Jens Axboe
2024-08-13 21:25   ` Olivier Langlois
2024-08-13 21:44     ` Jens Axboe
2024-08-15 22:17       ` Olivier Langlois
2024-08-15 22:44         ` Olivier Langlois
2024-08-16 14:26           ` Pavel Begunkov
2024-09-16 18:29             ` Olivier Langlois
2024-08-13 22:36     ` Pavel Begunkov
2024-08-14 13:28       ` Pavel Begunkov
2024-08-13 21:34   ` Olivier Langlois
2024-08-13 21:45     ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=bfbb03a7ad6256b68d08429c0888a05032a1b182.1723567469.git.olivier@trillion01.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox