public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] io_uring/napi: clear tracked NAPI entries on unregister
@ 2026-05-03 17:56 Yufan Chen
  2026-05-04  5:10 ` Jens Axboe
  0 siblings, 1 reply; 2+ messages in thread
From: Yufan Chen @ 2026-05-03 17:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, linux-kernel, Yufan Chen

From: Yufan Chen <ericterminal@gmail.com>

IORING_UNREGISTER_NAPI disables NAPI busy polling, but it currently
leaves any previously tracked NAPI IDs on the ring context. The normal
wait path only checks whether the list is empty before entering the busy
poll helper, so an unregistered ring can still observe stale entries and
run an unexpected busy poll pass.

Make unregister switch the context to inactive and free the tracked
entries. Do the same inactive transition while changing the tracking
strategy, and recheck the expected tracking mode under napi_lock before
inserting a newly learned NAPI ID. This prevents a racing poll path from
repopulating the list after unregister or reconfiguration.

Also make the busy poll dispatcher ignore inactive mode explicitly.

Signed-off-by: Yufan Chen <ericterminal@gmail.com>
---
 io_uring/napi.c | 27 ++++++++++++++++++++-------
 io_uring/napi.h |  8 +++++---
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/io_uring/napi.c b/io_uring/napi.c
index 8d68366a4b9..bfc77144591 100644
--- a/io_uring/napi.c
+++ b/io_uring/napi.c
@@ -38,7 +38,8 @@ static inline ktime_t net_to_ktime(unsigned long t)
 	return ns_to_ktime(t << 10);
 }
 
-int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
+int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
+		     unsigned int mode)
 {
 	struct hlist_head *hash_list;
 	struct io_napi_entry *e;
@@ -69,6 +70,11 @@ int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id)
 	 * kfree()
 	 */
 	spin_lock(&ctx->napi_lock);
+	if (unlikely(READ_ONCE(ctx->napi_track_mode) != mode)) {
+		spin_unlock(&ctx->napi_lock);
+		kfree(e);
+		return -EINVAL;
+	}
 	if (unlikely(io_napi_hash_find(hash_list, napi_id))) {
 		spin_unlock(&ctx->napi_lock);
 		kfree(e);
@@ -196,9 +202,14 @@ __io_napi_do_busy_loop(struct io_ring_ctx *ctx,
 		       bool (*loop_end)(void *, unsigned long),
 		       void *loop_end_arg)
 {
-	if (READ_ONCE(ctx->napi_track_mode) == IO_URING_NAPI_TRACKING_STATIC)
+	switch (READ_ONCE(ctx->napi_track_mode)) {
+	case IO_URING_NAPI_TRACKING_STATIC:
 		return static_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
-	return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
+	case IO_URING_NAPI_TRACKING_DYNAMIC:
+		return dynamic_tracking_do_busy_loop(ctx, loop_end, loop_end_arg);
+	default:
+		return false;
+	}
 }
 
 static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx,
@@ -273,13 +284,13 @@ static int io_napi_register_napi(struct io_ring_ctx *ctx,
 	default:
 		return -EINVAL;
 	}
-	/* clean the napi list for new settings */
+	WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
 	io_napi_free(ctx);
-	WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
 	/* cap NAPI at 10 msec of spin time */
 	napi->busy_poll_to = min(10000, napi->busy_poll_to);
 	WRITE_ONCE(ctx->napi_busy_poll_dt, napi->busy_poll_to * NSEC_PER_USEC);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi->prefer_busy_poll);
+	WRITE_ONCE(ctx->napi_track_mode, napi->op_param);
 	return 0;
 }
 
@@ -315,7 +326,8 @@ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
 	case IO_URING_NAPI_STATIC_ADD_ID:
 		if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
 			return -EINVAL;
-		return __io_napi_add_id(ctx, napi.op_param);
+		return __io_napi_add_id(ctx, napi.op_param,
+					IO_URING_NAPI_TRACKING_STATIC);
 	case IO_URING_NAPI_STATIC_DEL_ID:
 		if (curr.op_param != IO_URING_NAPI_TRACKING_STATIC)
 			return -EINVAL;
@@ -343,9 +355,10 @@ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
 	if (arg && copy_to_user(arg, &curr, sizeof(curr)))
 		return -EFAULT;
 
+	WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
 	WRITE_ONCE(ctx->napi_busy_poll_dt, 0);
 	WRITE_ONCE(ctx->napi_prefer_busy_poll, false);
-	WRITE_ONCE(ctx->napi_track_mode, IO_URING_NAPI_TRACKING_INACTIVE);
+	io_napi_free(ctx);
 	return 0;
 }
 
diff --git a/io_uring/napi.h b/io_uring/napi.h
index fa742f42e09..e0aecccc506 100644
--- a/io_uring/napi.h
+++ b/io_uring/napi.h
@@ -15,7 +15,8 @@ void io_napi_free(struct io_ring_ctx *ctx);
 int io_register_napi(struct io_ring_ctx *ctx, void __user *arg);
 int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg);
 
-int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id);
+int __io_napi_add_id(struct io_ring_ctx *ctx, unsigned int napi_id,
+		     unsigned int mode);
 
 void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq);
 int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx);
@@ -43,13 +44,14 @@ static inline void io_napi_add(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	struct socket *sock;
+	unsigned int mode = IO_URING_NAPI_TRACKING_DYNAMIC;
 
-	if (READ_ONCE(ctx->napi_track_mode) != IO_URING_NAPI_TRACKING_DYNAMIC)
+	if (READ_ONCE(ctx->napi_track_mode) != mode)
 		return;
 
 	sock = sock_from_file(req->file);
 	if (sock && sock->sk)
-		__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id));
+		__io_napi_add_id(ctx, READ_ONCE(sock->sk->sk_napi_id), mode);
 }
 
 #else
-- 
2.47.3


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-05-04  5:10 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-03 17:56 [PATCH] io_uring/napi: clear tracked NAPI entries on unregister Yufan Chen
2026-05-04  5:10 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox