public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: David Wei <dw@davidwei.uk>
To: io-uring@vger.kernel.org, netdev@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>, Pavel Begunkov <asml.silence@gmail.com>
Subject: [PATCH v4 09/12] io_uring/zcrx: reverse ifq refcount
Date: Mon,  3 Nov 2025 15:41:07 -0800	[thread overview]
Message-ID: <20251103234110.127790-10-dw@davidwei.uk> (raw)
In-Reply-To: <20251103234110.127790-1-dw@davidwei.uk>

Add two refcounts to struct io_zcrx_ifq to reverse the refcounting
relationship i.e. rings now reference ifqs instead. As a result of this,
remove ctx->refs that an ifq holds on a ring via the page pool memory
provider.

The first ref is ifq->refs, held by internal users of an ifq, namely
rings and the page pool memory provider associated with an ifq. This is
needed to keep the ifq around until the page pool is destroyed.

The second ref is ifq->user_refs, held by userspace facing users like
rings. For now, only the ring that created the ifq will have a ref, but
with ifq sharing added, this will include multiple rings.

ifq->refs will be 1 larger than ifq->user_refs, with the extra ref held
by the page pool. Once ifq->user_refs falls to 0, the ifq is cleaned up
including destroying the page pool. Once the page pool is destroyed,
ifq->refs will fall to 0 and free the ifq.

Since ifqs now no longer hold refs to ring ctx, there isn't a need to
split the cleanup of ifqs into two: io_shutdown_zcrx_ifqs() in
io_ring_exit_work() while waiting for ctx->refs to drop to 0, and
io_unregister_zcrx_ifqs() after. Remove io_shutdown_zcrx_ifqs().

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/io_uring.c |  5 -----
 io_uring/zcrx.c     | 36 +++++++++++++++++-------------------
 io_uring/zcrx.h     |  8 +++-----
 3 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7d42748774f8..8af5efda9c11 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3042,11 +3042,6 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 			io_cqring_overflow_kill(ctx);
 			mutex_unlock(&ctx->uring_lock);
 		}
-		if (!xa_empty(&ctx->zcrx_ctxs)) {
-			mutex_lock(&ctx->uring_lock);
-			io_shutdown_zcrx_ifqs(ctx);
-			mutex_unlock(&ctx->uring_lock);
-		}
 
 		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
 			io_move_task_work_from_local(ctx);
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index bb5cc6ec5b9b..00498e3dcbd3 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -479,9 +479,10 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 		return NULL;
 
 	ifq->if_rxq = -1;
-	ifq->ctx = ctx;
 	spin_lock_init(&ifq->rq_lock);
 	mutex_init(&ifq->pp_lock);
+	refcount_set(&ifq->refs, 1);
+	refcount_set(&ifq->user_refs, 1);
 	return ifq;
 }
 
@@ -537,6 +538,12 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 	kfree(ifq);
 }
 
+static void io_put_zcrx_ifq(struct io_zcrx_ifq *ifq)
+{
+	if (refcount_dec_and_test(&ifq->refs))
+		io_zcrx_ifq_free(ifq);
+}
+
 struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
 					    unsigned int id)
 {
@@ -611,6 +618,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	ifq = io_zcrx_ifq_alloc(ctx);
 	if (!ifq)
 		return -ENOMEM;
+
 	if (ctx->user) {
 		get_uid(ctx->user);
 		ifq->user = ctx->user;
@@ -733,19 +741,6 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
 	}
 }
 
-void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
-{
-	struct io_zcrx_ifq *ifq;
-	unsigned long index;
-
-	lockdep_assert_held(&ctx->uring_lock);
-
-	xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
-		io_zcrx_scrub(ifq);
-		io_close_queue(ifq);
-	}
-}
-
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 {
 	struct io_zcrx_ifq *ifq;
@@ -762,7 +757,12 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 		}
 		if (!ifq)
 			break;
-		io_zcrx_ifq_free(ifq);
+
+		if (refcount_dec_and_test(&ifq->user_refs)) {
+			io_close_queue(ifq);
+			io_zcrx_scrub(ifq);
+		}
+		io_put_zcrx_ifq(ifq);
 	}
 
 	xa_destroy(&ctx->zcrx_ctxs);
@@ -913,15 +913,13 @@ static int io_pp_zc_init(struct page_pool *pp)
 	if (ret)
 		return ret;
 
-	percpu_ref_get(&ifq->ctx->refs);
+	refcount_inc(&ifq->refs);
 	return 0;
 }
 
 static void io_pp_zc_destroy(struct page_pool *pp)
 {
-	struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
-
-	percpu_ref_put(&ifq->ctx->refs);
+	io_put_zcrx_ifq(io_pp_to_ifq(pp));
 }
 
 static int io_pp_nl_fill(void *mp_priv, struct sk_buff *rsp,
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 2396436643e5..9014a1fd0f61 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -39,7 +39,6 @@ struct io_zcrx_area {
 };
 
 struct io_zcrx_ifq {
-	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
 	unsigned			niov_shift;
 	struct user_struct		*user;
@@ -55,6 +54,9 @@ struct io_zcrx_ifq {
 	struct device			*dev;
 	struct net_device		*netdev;
 	netdevice_tracker		netdev_tracker;
+	refcount_t			refs;
+	/* counts userspace facing users like io_uring */
+	refcount_t			user_refs;
 
 	/*
 	 * Page pool and net configuration lock, can be taken deeper in the
@@ -69,7 +71,6 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_arg);
 int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			 struct io_uring_zcrx_ifq_reg __user *arg);
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
-void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx);
 int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 		 struct socket *sock, unsigned int flags,
 		 unsigned issue_flags, unsigned int *len);
@@ -84,9 +85,6 @@ static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 {
 }
-static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
-{
-}
 static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 			       struct socket *sock, unsigned int flags,
 			       unsigned issue_flags, unsigned int *len)
-- 
2.47.3


  parent reply	other threads:[~2025-11-03 23:41 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-03 23:40 [PATCH v4 00/12] io_uring zcrx ifq sharing David Wei
2025-11-03 23:40 ` [PATCH v4 01/12] io_uring/zcrx: remove sync refill uapi David Wei
2025-11-04 13:19   ` Pavel Begunkov
2025-11-03 23:41 ` [PATCH v4 02/12] io_uring/zcrx: introduce IORING_REGISTER_ZCRX_CTRL David Wei
2025-11-03 23:41 ` [PATCH v4 03/12] io_uring/memmap: remove unneeded io_ring_ctx arg David Wei
2025-11-03 23:41 ` [PATCH v4 04/12] io_uring/memmap: refactor io_free_region() to take user_struct param David Wei
2025-11-03 23:41 ` [PATCH v4 05/12] io_uring/rsrc: refactor io_{un}account_mem() to take {user,mm}_struct param David Wei
2025-11-03 23:41 ` [PATCH v4 06/12] io_uring/zcrx: add io_zcrx_ifq arg to io_zcrx_free_area() David Wei
2025-11-03 23:41 ` [PATCH v4 07/12] io_uring/zcrx: add user_struct and mm_struct to io_zcrx_ifq David Wei
2025-11-03 23:41 ` [PATCH v4 08/12] io_uring/zcrx: move io_unregister_zcrx_ifqs() down David Wei
2025-11-03 23:41 ` David Wei [this message]
2025-11-04 13:38   ` [PATCH v4 09/12] io_uring/zcrx: reverse ifq refcount Pavel Begunkov
2025-11-03 23:41 ` [PATCH v4 10/12] io_uring/zcrx: move io_zcrx_scrub() and dependencies up David Wei
2025-11-03 23:41 ` [PATCH v4 11/12] io_uring/zcrx: export zcrx via a file David Wei
2025-11-03 23:41 ` [PATCH v4 12/12] io_uring/zcrx: share an ifq between rings David Wei
2025-11-04 13:53   ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251103234110.127790-10-dw@davidwei.uk \
    --to=dw@davidwei.uk \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox