* [PATCH v4 1/8] io_uring/memmap: remove unneeded io_ring_ctx arg
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 2/8] io_uring/memmap: refactor io_free_region() to take user_struct param David Wei
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Remove io_ring_ctx arg from io_region_pin_pages() and
io_region_allocate_pages() that isn't used.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/memmap.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index aa388ecd4754..d1318079c337 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -131,9 +131,8 @@ static int io_region_init_ptr(struct io_mapped_region *mr)
 	return 0;
 }
 
-static int io_region_pin_pages(struct io_ring_ctx *ctx,
-				struct io_mapped_region *mr,
-				struct io_uring_region_desc *reg)
+static int io_region_pin_pages(struct io_mapped_region *mr,
+			       struct io_uring_region_desc *reg)
 {
 	unsigned long size = mr->nr_pages << PAGE_SHIFT;
 	struct page **pages;
@@ -150,8 +149,7 @@ static int io_region_pin_pages(struct io_ring_ctx *ctx,
 	return 0;
 }
 
-static int io_region_allocate_pages(struct io_ring_ctx *ctx,
-				    struct io_mapped_region *mr,
+static int io_region_allocate_pages(struct io_mapped_region *mr,
 				    struct io_uring_region_desc *reg,
 				    unsigned long mmap_offset)
 {
@@ -219,9 +217,9 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
 	mr->nr_pages = nr_pages;
 
 	if (reg->flags & IORING_MEM_REGION_TYPE_USER)
-		ret = io_region_pin_pages(ctx, mr, reg);
+		ret = io_region_pin_pages(mr, reg);
 	else
-		ret = io_region_allocate_pages(ctx, mr, reg, mmap_offset);
+		ret = io_region_allocate_pages(mr, reg, mmap_offset);
 	if (ret)
 		goto out_free;
 
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 2/8] io_uring/memmap: refactor io_free_region() to take user_struct param
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
  2025-10-28 17:46 ` [PATCH v4 1/8] io_uring/memmap: remove unneeded io_ring_ctx arg David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 3/8] io_uring/rsrc: refactor io_{un}account_mem() to take {user,mm}_struct param David Wei
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Refactor io_free_region() to take user_struct directly, instead of
accessing it from the ring ctx.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/io_uring.c | 6 +++---
 io_uring/kbuf.c     | 4 ++--
 io_uring/memmap.c   | 8 ++++----
 io_uring/memmap.h   | 2 +-
 io_uring/register.c | 6 +++---
 io_uring/zcrx.c     | 2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 200b6c4bb2cc..7d42748774f8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2798,8 +2798,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
 
 static void io_rings_free(struct io_ring_ctx *ctx)
 {
-	io_free_region(ctx, &ctx->sq_region);
-	io_free_region(ctx, &ctx->ring_region);
+	io_free_region(ctx->user, &ctx->sq_region);
+	io_free_region(ctx->user, &ctx->ring_region);
 	ctx->rings = NULL;
 	ctx->sq_sqes = NULL;
 }
@@ -2884,7 +2884,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	io_eventfd_unregister(ctx);
 	io_free_alloc_caches(ctx);
 	io_destroy_buffers(ctx);
-	io_free_region(ctx, &ctx->param_region);
+	io_free_region(ctx->user, &ctx->param_region);
 	mutex_unlock(&ctx->uring_lock);
 	if (ctx->sq_creds)
 		put_cred(ctx->sq_creds);
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index c034c90396bc..8a329556f8df 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -428,7 +428,7 @@ static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
 static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
 {
 	if (bl->flags & IOBL_BUF_RING)
-		io_free_region(ctx, &bl->region);
+		io_free_region(ctx->user, &bl->region);
 	else
 		io_remove_buffers_legacy(ctx, bl, -1U);
 
@@ -672,7 +672,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
 	io_buffer_add_list(ctx, bl, reg.bgid);
 	return 0;
 fail:
-	io_free_region(ctx, &bl->region);
+	io_free_region(ctx->user, &bl->region);
 	kfree(bl);
 	return ret;
 }
diff --git a/io_uring/memmap.c b/io_uring/memmap.c
index d1318079c337..b1054fe94568 100644
--- a/io_uring/memmap.c
+++ b/io_uring/memmap.c
@@ -88,7 +88,7 @@ enum {
 	IO_REGION_F_SINGLE_REF			= 4,
 };
 
-void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
+void io_free_region(struct user_struct *user, struct io_mapped_region *mr)
 {
 	if (mr->pages) {
 		long nr_refs = mr->nr_pages;
@@ -105,8 +105,8 @@ void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr)
 	}
 	if ((mr->flags & IO_REGION_F_VMAP) && mr->ptr)
 		vunmap(mr->ptr);
-	if (mr->nr_pages && ctx->user)
-		__io_unaccount_mem(ctx->user, mr->nr_pages);
+	if (mr->nr_pages && user)
+		__io_unaccount_mem(user, mr->nr_pages);
 
 	memset(mr, 0, sizeof(*mr));
 }
@@ -228,7 +228,7 @@ int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
 		goto out_free;
 	return 0;
 out_free:
-	io_free_region(ctx, mr);
+	io_free_region(ctx->user, mr);
 	return ret;
 }
 
diff --git a/io_uring/memmap.h b/io_uring/memmap.h
index 58002976e0c3..a7c476f499d5 100644
--- a/io_uring/memmap.h
+++ b/io_uring/memmap.h
@@ -16,7 +16,7 @@ unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr,
 					 unsigned long flags);
 int io_uring_mmap(struct file *file, struct vm_area_struct *vma);
 
-void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr);
+void io_free_region(struct user_struct *user, struct io_mapped_region *mr);
 int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr,
 		     struct io_uring_region_desc *reg,
 		     unsigned long mmap_offset);
diff --git a/io_uring/register.c b/io_uring/register.c
index 1a3e05be6e7b..023f5e7a18da 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -381,8 +381,8 @@ struct io_ring_ctx_rings {
 static void io_register_free_rings(struct io_ring_ctx *ctx,
 				   struct io_ring_ctx_rings *r)
 {
-	io_free_region(ctx, &r->sq_region);
-	io_free_region(ctx, &r->ring_region);
+	io_free_region(ctx->user, &r->sq_region);
+	io_free_region(ctx->user, &r->ring_region);
 }
 
 #define swap_old(ctx, o, n, field)		\
@@ -604,7 +604,7 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
 	if (ret)
 		return ret;
 	if (copy_to_user(rd_uptr, &rd, sizeof(rd))) {
-		io_free_region(ctx, ®ion);
+		io_free_region(ctx->user, ®ion);
 		return -EFAULT;
 	}
 
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index a816f5902091..d15453884004 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -378,7 +378,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
 
 static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
 {
-	io_free_region(ifq->ctx, &ifq->region);
+	io_free_region(ifq->ctx->user, &ifq->region);
 	ifq->rq_ring = NULL;
 	ifq->rqes = NULL;
 }
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 3/8] io_uring/rsrc: refactor io_{un}account_mem() to take {user,mm}_struct param
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
  2025-10-28 17:46 ` [PATCH v4 1/8] io_uring/memmap: remove unneeded io_ring_ctx arg David Wei
  2025-10-28 17:46 ` [PATCH v4 2/8] io_uring/memmap: refactor io_free_region() to take user_struct param David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 4/8] io_uring/zcrx: add io_zcrx_ifq arg to io_zcrx_free_area() David Wei
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Refactor io_{un}account_mem() to take user_struct and mm_struct
directly, instead of accessing it from the ring ctx.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/rsrc.c | 26 ++++++++++++++------------
 io_uring/rsrc.h |  6 ++++--
 io_uring/zcrx.c |  5 +++--
 3 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index d787c16dc1c3..59135fe84082 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -56,27 +56,29 @@ int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
 	return 0;
 }
 
-void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
+void io_unaccount_mem(struct user_struct *user, struct mm_struct *mm_account,
+		      unsigned long nr_pages)
 {
-	if (ctx->user)
-		__io_unaccount_mem(ctx->user, nr_pages);
+	if (user)
+		__io_unaccount_mem(user, nr_pages);
 
-	if (ctx->mm_account)
-		atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
+	if (mm_account)
+		atomic64_sub(nr_pages, &mm_account->pinned_vm);
 }
 
-int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
+int io_account_mem(struct user_struct *user, struct mm_struct *mm_account,
+		   unsigned long nr_pages)
 {
 	int ret;
 
-	if (ctx->user) {
-		ret = __io_account_mem(ctx->user, nr_pages);
+	if (user) {
+		ret = __io_account_mem(user, nr_pages);
 		if (ret)
 			return ret;
 	}
 
-	if (ctx->mm_account)
-		atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
+	if (mm_account)
+		atomic64_add(nr_pages, &mm_account->pinned_vm);
 
 	return 0;
 }
@@ -145,7 +147,7 @@ static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
 	}
 
 	if (imu->acct_pages)
-		io_unaccount_mem(ctx, imu->acct_pages);
+		io_unaccount_mem(ctx->user, ctx->mm_account, imu->acct_pages);
 	imu->release(imu->priv);
 	io_free_imu(ctx, imu);
 }
@@ -684,7 +686,7 @@ static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
 	if (!imu->acct_pages)
 		return 0;
 
-	ret = io_account_mem(ctx, imu->acct_pages);
+	ret = io_account_mem(ctx->user, ctx->mm_account, imu->acct_pages);
 	if (ret)
 		imu->acct_pages = 0;
 	return ret;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index a3ca6ba66596..d603f6a47f5e 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -120,8 +120,10 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
 int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 
 int __io_account_mem(struct user_struct *user, unsigned long nr_pages);
-int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages);
-void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages);
+int io_account_mem(struct user_struct *user, struct mm_struct *mm_account,
+		   unsigned long nr_pages);
+void io_unaccount_mem(struct user_struct *user, struct mm_struct *mm_account,
+		      unsigned long nr_pages);
 
 static inline void __io_unaccount_mem(struct user_struct *user,
 				      unsigned long nr_pages)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index d15453884004..30d3a7b3c407 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -200,7 +200,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
 	}
 
 	mem->account_pages = io_count_account_pages(pages, nr_pages);
-	ret = io_account_mem(ifq->ctx, mem->account_pages);
+	ret = io_account_mem(ifq->ctx->user, ifq->ctx->mm_account, mem->account_pages);
 	if (ret < 0)
 		mem->account_pages = 0;
 
@@ -389,7 +389,8 @@ static void io_zcrx_free_area(struct io_zcrx_area *area)
 	io_release_area_mem(&area->mem);
 
 	if (area->mem.account_pages)
-		io_unaccount_mem(area->ifq->ctx, area->mem.account_pages);
+		io_unaccount_mem(area->ifq->ctx->user, area->ifq->ctx->mm_account,
+				 area->mem.account_pages);
 
 	kvfree(area->freelist);
 	kvfree(area->nia.niovs);
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 4/8] io_uring/zcrx: add io_zcrx_ifq arg to io_zcrx_free_area()
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
                   ` (2 preceding siblings ...)
  2025-10-28 17:46 ` [PATCH v4 3/8] io_uring/rsrc: refactor io_{un}account_mem() to take {user,mm}_struct param David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 5/8] io_uring/zcrx: add user_struct and mm_struct to io_zcrx_ifq David Wei
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Add io_zcrx_ifq arg to io_zcrx_free_area(). A QOL change to reduce line
widths.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/zcrx.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 30d3a7b3c407..5c90404283ff 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -383,9 +383,10 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
 	ifq->rqes = NULL;
 }
 
-static void io_zcrx_free_area(struct io_zcrx_area *area)
+static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
+			      struct io_zcrx_area *area)
 {
-	io_zcrx_unmap_area(area->ifq, area);
+	io_zcrx_unmap_area(ifq, area);
 	io_release_area_mem(&area->mem);
 
 	if (area->mem.account_pages)
@@ -464,7 +465,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 		return 0;
 err:
 	if (area)
-		io_zcrx_free_area(area);
+		io_zcrx_free_area(ifq, area);
 	return ret;
 }
 
@@ -523,7 +524,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 	io_close_queue(ifq);
 
 	if (ifq->area)
-		io_zcrx_free_area(ifq->area);
+		io_zcrx_free_area(ifq, ifq->area);
 	if (ifq->dev)
 		put_device(ifq->dev);
 
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 5/8] io_uring/zcrx: add user_struct and mm_struct to io_zcrx_ifq
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
                   ` (3 preceding siblings ...)
  2025-10-28 17:46 ` [PATCH v4 4/8] io_uring/zcrx: add io_zcrx_ifq arg to io_zcrx_free_area() David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 6/8] io_uring/zcrx: move io_unregister_zcrx_ifqs() down David Wei
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
In preparation for removing ifq->ctx and making ifq lifetime independent
of ring ctx, add user_struct and mm_struct to io_zcrx_ifq.
In the ifq cleanup path, these are the only fields used from the main
ring ctx to do accounting. Taking a copy in the ifq allows ifq->ctx to
be removed later, including the ctx->refs held by the ifq.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/zcrx.c | 24 ++++++++++++++++++------
 io_uring/zcrx.h |  2 ++
 2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 5c90404283ff..774efbce8cb6 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -200,7 +200,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
 	}
 
 	mem->account_pages = io_count_account_pages(pages, nr_pages);
-	ret = io_account_mem(ifq->ctx->user, ifq->ctx->mm_account, mem->account_pages);
+	ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages);
 	if (ret < 0)
 		mem->account_pages = 0;
 
@@ -344,7 +344,8 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
 	atomic_inc(io_get_user_counter(niov));
 }
 
-static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
+static int io_allocate_rbuf_ring(struct io_ring_ctx *ctx,
+				 struct io_zcrx_ifq *ifq,
 				 struct io_uring_zcrx_ifq_reg *reg,
 				 struct io_uring_region_desc *rd,
 				 u32 id)
@@ -362,7 +363,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
 	mmap_offset = IORING_MAP_OFF_ZCRX_REGION;
 	mmap_offset += id << IORING_OFF_PBUF_SHIFT;
 
-	ret = io_create_region(ifq->ctx, &ifq->region, rd, mmap_offset);
+	ret = io_create_region(ctx, &ifq->region, rd, mmap_offset);
 	if (ret < 0)
 		return ret;
 
@@ -378,7 +379,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
 
 static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
 {
-	io_free_region(ifq->ctx->user, &ifq->region);
+	io_free_region(ifq->user, &ifq->region);
 	ifq->rq_ring = NULL;
 	ifq->rqes = NULL;
 }
@@ -390,7 +391,7 @@ static void io_zcrx_free_area(struct io_zcrx_ifq *ifq,
 	io_release_area_mem(&area->mem);
 
 	if (area->mem.account_pages)
-		io_unaccount_mem(area->ifq->ctx->user, area->ifq->ctx->mm_account,
+		io_unaccount_mem(ifq->user, ifq->mm_account,
 				 area->mem.account_pages);
 
 	kvfree(area->freelist);
@@ -525,6 +526,9 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 
 	if (ifq->area)
 		io_zcrx_free_area(ifq, ifq->area);
+	free_uid(ifq->user);
+	if (ifq->mm_account)
+		mmdrop(ifq->mm_account);
 	if (ifq->dev)
 		put_device(ifq->dev);
 
@@ -588,6 +592,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	ifq = io_zcrx_ifq_alloc(ctx);
 	if (!ifq)
 		return -ENOMEM;
+	if (ctx->user) {
+		get_uid(ctx->user);
+		ifq->user = ctx->user;
+	}
+	if (ctx->mm_account) {
+		mmgrab(ctx->mm_account);
+		ifq->mm_account = ctx->mm_account;
+	}
 	ifq->rq_entries = reg.rq_entries;
 
 	scoped_guard(mutex, &ctx->mmap_lock) {
@@ -597,7 +609,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			goto ifq_free;
 	}
 
-	ret = io_allocate_rbuf_ring(ifq, ®, &rd, id);
+	ret = io_allocate_rbuf_ring(ctx, ifq, ®, &rd, id);
 	if (ret)
 		goto err;
 
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 33ef61503092..8d828dc9b0e4 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -42,6 +42,8 @@ struct io_zcrx_ifq {
 	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
 	unsigned			niov_shift;
+	struct user_struct		*user;
+	struct mm_struct		*mm_account;
 
 	spinlock_t			rq_lock ____cacheline_aligned_in_smp;
 	struct io_uring			*rq_ring;
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 6/8] io_uring/zcrx: move io_unregister_zcrx_ifqs() down
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
                   ` (4 preceding siblings ...)
  2025-10-28 17:46 ` [PATCH v4 5/8] io_uring/zcrx: add user_struct and mm_struct to io_zcrx_ifq David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-28 17:46 ` [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx David Wei
  2025-10-28 17:46 ` [PATCH v4 8/8] io_uring/zcrx: share an ifq between rings David Wei
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
In preparation for removing the ref on ctx->refs held by an ifq and
removing io_shutdown_zcrx_ifqs(), move io_unregister_zcrx_ifqs() down
such that it can call io_zcrx_scrub().
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/zcrx.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 774efbce8cb6..b3f3d55d2f63 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -662,28 +662,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	return ret;
 }
 
-void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
-{
-	struct io_zcrx_ifq *ifq;
-
-	lockdep_assert_held(&ctx->uring_lock);
-
-	while (1) {
-		scoped_guard(mutex, &ctx->mmap_lock) {
-			unsigned long id = 0;
-
-			ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
-			if (ifq)
-				xa_erase(&ctx->zcrx_ctxs, id);
-		}
-		if (!ifq)
-			break;
-		io_zcrx_ifq_free(ifq);
-	}
-
-	xa_destroy(&ctx->zcrx_ctxs);
-}
-
 static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area)
 {
 	unsigned niov_idx;
@@ -749,6 +727,28 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
 	}
 }
 
+void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
+{
+	struct io_zcrx_ifq *ifq;
+
+	lockdep_assert_held(&ctx->uring_lock);
+
+	while (1) {
+		scoped_guard(mutex, &ctx->mmap_lock) {
+			unsigned long id = 0;
+
+			ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
+			if (ifq)
+				xa_erase(&ctx->zcrx_ctxs, id);
+		}
+		if (!ifq)
+			break;
+		io_zcrx_ifq_free(ifq);
+	}
+
+	xa_destroy(&ctx->zcrx_ctxs);
+}
+
 static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
 {
 	u32 entries;
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
                   ` (5 preceding siblings ...)
  2025-10-28 17:46 ` [PATCH v4 6/8] io_uring/zcrx: move io_unregister_zcrx_ifqs() down David Wei
@ 2025-10-28 17:46 ` David Wei
  2025-10-29 15:22   ` Pavel Begunkov
  2025-10-28 17:46 ` [PATCH v4 8/8] io_uring/zcrx: share an ifq between rings David Wei
  7 siblings, 1 reply; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Add a refcount to struct io_zcrx_ifq to track the number of rings that
share it. For now, this is only ever 1 i.e. not shared.
This refcount replaces the ref that the ifq holds on ctx->refs via the
page pool memory provider. This was used to keep the ifq around until
the ring ctx is being freed i.e. ctx->refs fall to 0. But with ifq now
being refcounted directly by the ring, and ifq->ctx removed, this is no
longer necessary.
Since ifqs now no longer hold refs to ring ctx, there isn't a need to
split the cleanup of ifqs into two: io_shutdown_zcrx_ifqs() in
io_ring_exit_work() while waiting for ctx->refs to drop to 0, and
io_unregister_zcrx_ifqs() after. Remove io_shutdown_zcrx_ifqs().
So an ifq now behaves like a normal refcounted object; the last ref from
a ring will free the ifq.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/io_uring.c |  5 -----
 io_uring/zcrx.c     | 24 +++++-------------------
 io_uring/zcrx.h     |  6 +-----
 3 files changed, 6 insertions(+), 29 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7d42748774f8..8af5efda9c11 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3042,11 +3042,6 @@ static __cold void io_ring_exit_work(struct work_struct *work)
 			io_cqring_overflow_kill(ctx);
 			mutex_unlock(&ctx->uring_lock);
 		}
-		if (!xa_empty(&ctx->zcrx_ctxs)) {
-			mutex_lock(&ctx->uring_lock);
-			io_shutdown_zcrx_ifqs(ctx);
-			mutex_unlock(&ctx->uring_lock);
-		}
 
 		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
 			io_move_task_work_from_local(ctx);
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index b3f3d55d2f63..6324dfa61ce0 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -479,7 +479,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 		return NULL;
 
 	ifq->if_rxq = -1;
-	ifq->ctx = ctx;
 	spin_lock_init(&ifq->rq_lock);
 	mutex_init(&ifq->pp_lock);
 	return ifq;
@@ -592,6 +591,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	ifq = io_zcrx_ifq_alloc(ctx);
 	if (!ifq)
 		return -ENOMEM;
+	refcount_set(&ifq->refs, 1);
 	if (ctx->user) {
 		get_uid(ctx->user);
 		ifq->user = ctx->user;
@@ -714,19 +714,6 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
 	}
 }
 
-void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
-{
-	struct io_zcrx_ifq *ifq;
-	unsigned long index;
-
-	lockdep_assert_held(&ctx->uring_lock);
-
-	xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
-		io_zcrx_scrub(ifq);
-		io_close_queue(ifq);
-	}
-}
-
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 {
 	struct io_zcrx_ifq *ifq;
@@ -743,7 +730,10 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 		}
 		if (!ifq)
 			break;
-		io_zcrx_ifq_free(ifq);
+		if (refcount_dec_and_test(&ifq->refs)) {
+			io_zcrx_scrub(ifq);
+			io_zcrx_ifq_free(ifq);
+		}
 	}
 
 	xa_destroy(&ctx->zcrx_ctxs);
@@ -894,15 +884,11 @@ static int io_pp_zc_init(struct page_pool *pp)
 	if (ret)
 		return ret;
 
-	percpu_ref_get(&ifq->ctx->refs);
 	return 0;
 }
 
 static void io_pp_zc_destroy(struct page_pool *pp)
 {
-	struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
-
-	percpu_ref_put(&ifq->ctx->refs);
 }
 
 static int io_pp_nl_fill(void *mp_priv, struct sk_buff *rsp,
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 8d828dc9b0e4..5951f127298c 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -39,9 +39,9 @@ struct io_zcrx_area {
 };
 
 struct io_zcrx_ifq {
-	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
 	unsigned			niov_shift;
+	refcount_t			refs;
 	struct user_struct		*user;
 	struct mm_struct		*mm_account;
 
@@ -70,7 +70,6 @@ int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
 int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			 struct io_uring_zcrx_ifq_reg __user *arg);
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
-void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx);
 int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 		 struct socket *sock, unsigned int flags,
 		 unsigned issue_flags, unsigned int *len);
@@ -85,9 +84,6 @@ static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
 {
 }
-static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
-{
-}
 static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
 			       struct socket *sock, unsigned int flags,
 			       unsigned issue_flags, unsigned int *len)
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread* Re: [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx
  2025-10-28 17:46 ` [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx David Wei
@ 2025-10-29 15:22   ` Pavel Begunkov
  2025-10-29 16:16     ` Pavel Begunkov
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Begunkov @ 2025-10-29 15:22 UTC (permalink / raw)
  To: David Wei, io-uring, netdev; +Cc: Jens Axboe
On 10/28/25 17:46, David Wei wrote:
> Add a refcount to struct io_zcrx_ifq to track the number of rings that
> share it. For now, this is only ever 1 i.e. not shared.
> 
> This refcount replaces the ref that the ifq holds on ctx->refs via the
> page pool memory provider. This was used to keep the ifq around until
> the ring ctx is being freed i.e. ctx->refs fall to 0. But with ifq now
> being refcounted directly by the ring, and ifq->ctx removed, this is no
> longer necessary.
> 
> Since ifqs now no longer hold refs to ring ctx, there isn't a need to
> split the cleanup of ifqs into two: io_shutdown_zcrx_ifqs() in
> io_ring_exit_work() while waiting for ctx->refs to drop to 0, and
> io_unregister_zcrx_ifqs() after. Remove io_shutdown_zcrx_ifqs().
> 
> So an ifq now behaves like a normal refcounted object; the last ref from
> a ring will free the ifq.
> 
> Signed-off-by: David Wei <dw@davidwei.uk>
> ---
>   io_uring/io_uring.c |  5 -----
>   io_uring/zcrx.c     | 24 +++++-------------------
>   io_uring/zcrx.h     |  6 +-----
>   3 files changed, 6 insertions(+), 29 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 7d42748774f8..8af5efda9c11 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3042,11 +3042,6 @@ static __cold void io_ring_exit_work(struct work_struct *work)
>   			io_cqring_overflow_kill(ctx);
>   			mutex_unlock(&ctx->uring_lock);
>   		}
> -		if (!xa_empty(&ctx->zcrx_ctxs)) {
> -			mutex_lock(&ctx->uring_lock);
> -			io_shutdown_zcrx_ifqs(ctx);
> -			mutex_unlock(&ctx->uring_lock);
> -		}
>   
>   		if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
>   			io_move_task_work_from_local(ctx);
> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
> index b3f3d55d2f63..6324dfa61ce0 100644
> --- a/io_uring/zcrx.c
> +++ b/io_uring/zcrx.c
> @@ -479,7 +479,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
>   		return NULL;
>   
>   	ifq->if_rxq = -1;
> -	ifq->ctx = ctx;
>   	spin_lock_init(&ifq->rq_lock);
>   	mutex_init(&ifq->pp_lock);
>   	return ifq;
> @@ -592,6 +591,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>   	ifq = io_zcrx_ifq_alloc(ctx);
>   	if (!ifq)
>   		return -ENOMEM;
> +	refcount_set(&ifq->refs, 1);
>   	if (ctx->user) {
>   		get_uid(ctx->user);
>   		ifq->user = ctx->user;
> @@ -714,19 +714,6 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
>   	}
>   }
>   
> -void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
> -{
> -	struct io_zcrx_ifq *ifq;
> -	unsigned long index;
> -
> -	lockdep_assert_held(&ctx->uring_lock);
> -
> -	xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
> -		io_zcrx_scrub(ifq);
> -		io_close_queue(ifq);
> -	}
> -}
> -
>   void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>   {
>   	struct io_zcrx_ifq *ifq;
> @@ -743,7 +730,10 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>   		}
>   		if (!ifq)
>   			break;
> -		io_zcrx_ifq_free(ifq);
> +		if (refcount_dec_and_test(&ifq->refs)) {
> +			io_zcrx_scrub(ifq);
> +			io_zcrx_ifq_free(ifq);
> +		}
>   	}
>   
>   	xa_destroy(&ctx->zcrx_ctxs);
> @@ -894,15 +884,11 @@ static int io_pp_zc_init(struct page_pool *pp)
>   	if (ret)
>   		return ret;
>   
> -	percpu_ref_get(&ifq->ctx->refs);
>   	return 0;
refcount_inc();
>   }
>   
>   static void io_pp_zc_destroy(struct page_pool *pp)
>   {
> -	struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
> -
> -	percpu_ref_put(&ifq->ctx->refs);
refcount_dec_and_test + destroy. Otherwise, seems like
nothing protects it from going away under pp.
-- 
Pavel Begunkov
^ permalink raw reply	[flat|nested] 12+ messages in thread* Re: [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx
  2025-10-29 15:22   ` Pavel Begunkov
@ 2025-10-29 16:16     ` Pavel Begunkov
  2025-10-30 15:24       ` Pavel Begunkov
  0 siblings, 1 reply; 12+ messages in thread
From: Pavel Begunkov @ 2025-10-29 16:16 UTC (permalink / raw)
  To: David Wei, io-uring, netdev; +Cc: Jens Axboe
On 10/29/25 15:22, Pavel Begunkov wrote:
> On 10/28/25 17:46, David Wei wrote:
>> Add a refcount to struct io_zcrx_ifq to track the number of rings that
>> share it. For now, this is only ever 1 i.e. not shared.
>>
>> This refcount replaces the ref that the ifq holds on ctx->refs via the
>> page pool memory provider. This was used to keep the ifq around until
>> the ring ctx is being freed i.e. ctx->refs fall to 0. But with ifq now
>> being refcounted directly by the ring, and ifq->ctx removed, this is no
>> longer necessary.
>>
>> Since ifqs now no longer hold refs to ring ctx, there isn't a need to
>> split the cleanup of ifqs into two: io_shutdown_zcrx_ifqs() in
>> io_ring_exit_work() while waiting for ctx->refs to drop to 0, and
>> io_unregister_zcrx_ifqs() after. Remove io_shutdown_zcrx_ifqs().
>>
>> So an ifq now behaves like a normal refcounted object; the last ref from
>> a ring will free the ifq.
>>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>>   io_uring/io_uring.c |  5 -----
>>   io_uring/zcrx.c     | 24 +++++-------------------
>>   io_uring/zcrx.h     |  6 +-----
>>   3 files changed, 6 insertions(+), 29 deletions(-)
>>
>> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>> index 7d42748774f8..8af5efda9c11 100644
>> --- a/io_uring/io_uring.c
>> +++ b/io_uring/io_uring.c
>> @@ -3042,11 +3042,6 @@ static __cold void io_ring_exit_work(struct work_struct *work)
>>               io_cqring_overflow_kill(ctx);
>>               mutex_unlock(&ctx->uring_lock);
>>           }
>> -        if (!xa_empty(&ctx->zcrx_ctxs)) {
>> -            mutex_lock(&ctx->uring_lock);
>> -            io_shutdown_zcrx_ifqs(ctx);
>> -            mutex_unlock(&ctx->uring_lock);
>> -        }
>>           if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
>>               io_move_task_work_from_local(ctx);
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index b3f3d55d2f63..6324dfa61ce0 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -479,7 +479,6 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
>>           return NULL;
>>       ifq->if_rxq = -1;
>> -    ifq->ctx = ctx;
>>       spin_lock_init(&ifq->rq_lock);
>>       mutex_init(&ifq->pp_lock);
>>       return ifq;
>> @@ -592,6 +591,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
>>       ifq = io_zcrx_ifq_alloc(ctx);
>>       if (!ifq)
>>           return -ENOMEM;
>> +    refcount_set(&ifq->refs, 1);
>>       if (ctx->user) {
>>           get_uid(ctx->user);
>>           ifq->user = ctx->user;
>> @@ -714,19 +714,6 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
>>       }
>>   }
>> -void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>> -{
>> -    struct io_zcrx_ifq *ifq;
>> -    unsigned long index;
>> -
>> -    lockdep_assert_held(&ctx->uring_lock);
>> -
>> -    xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
>> -        io_zcrx_scrub(ifq);
>> -        io_close_queue(ifq);
>> -    }
>> -}
>> -
>>   void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>   {
>>       struct io_zcrx_ifq *ifq;
>> @@ -743,7 +730,10 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>           }
>>           if (!ifq)
>>               break;
>> -        io_zcrx_ifq_free(ifq);
>> +        if (refcount_dec_and_test(&ifq->refs)) {
>> +            io_zcrx_scrub(ifq);
>> +            io_zcrx_ifq_free(ifq);
>> +        }
>>       }
>>       xa_destroy(&ctx->zcrx_ctxs);
>> @@ -894,15 +884,11 @@ static int io_pp_zc_init(struct page_pool *pp)
>>       if (ret)
>>           return ret;
>> -    percpu_ref_get(&ifq->ctx->refs);
>>       return 0;
> 
> refcount_inc();
Which would add another ref cycle problem, the same that IIRC
was solved with two step shutdown + release. I'll take a closer
look.
-- 
Pavel Begunkov
^ permalink raw reply	[flat|nested] 12+ messages in thread* Re: [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx
  2025-10-29 16:16     ` Pavel Begunkov
@ 2025-10-30 15:24       ` Pavel Begunkov
  0 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2025-10-30 15:24 UTC (permalink / raw)
  To: David Wei, io-uring, netdev; +Cc: Jens Axboe
On 10/29/25 16:16, Pavel Begunkov wrote:
> On 10/29/25 15:22, Pavel Begunkov wrote:
>> On 10/28/25 17:46, David Wei wrote:
...>>>   void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>>   {
>>>       struct io_zcrx_ifq *ifq;
>>> @@ -743,7 +730,10 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
>>>           }
>>>           if (!ifq)
>>>               break;
>>> -        io_zcrx_ifq_free(ifq);
>>> +        if (refcount_dec_and_test(&ifq->refs)) {
>>> +            io_zcrx_scrub(ifq);
>>> +            io_zcrx_ifq_free(ifq);
>>> +        }
>>>       }
>>>       xa_destroy(&ctx->zcrx_ctxs);
>>> @@ -894,15 +884,11 @@ static int io_pp_zc_init(struct page_pool *pp)
>>>       if (ret)
>>>           return ret;
>>> -    percpu_ref_get(&ifq->ctx->refs);
>>>       return 0;
>>
>> refcount_inc();
> 
> Which would add another ref cycle problem, the same that IIRC
> was solved with two step shutdown + release. I'll take a closer
> look.
The simplest solution is to keep the two 2 level release and
split refcounting for sharing. It's still better as now
shutdown can be folded into the io_uring ifq unregstration
helper.
https://github.com/isilence/linux.git zcrx/zcrx-sharing
https://github.com/isilence/liburing.git zcrx/zcrx-sharing
I fixed up synchronisation and drafted the export/import via
a file part, take a look.
-- 
Pavel Begunkov
^ permalink raw reply	[flat|nested] 12+ messages in thread
* [PATCH v4 8/8] io_uring/zcrx: share an ifq between rings
  2025-10-28 17:46 [PATCH v4 0/8] io_uring zcrx ifq sharing David Wei
                   ` (6 preceding siblings ...)
  2025-10-28 17:46 ` [PATCH v4 7/8] io_uring/zcrx: add refcount to ifq and remove ifq->ctx David Wei
@ 2025-10-28 17:46 ` David Wei
  7 siblings, 0 replies; 12+ messages in thread
From: David Wei @ 2025-10-28 17:46 UTC (permalink / raw)
  To: io-uring, netdev; +Cc: Jens Axboe, Pavel Begunkov
Add a way to share an ifq from a src ring that is real (i.e. bound to a
HW RX queue) with other rings. This is done by passing a new flag
IORING_ZCRX_IFQ_REG_SHARE in the registration struct
io_uring_zcrx_ifq_reg, alongside the fd of the src ring and its ifq id
to be shared.
Signed-off-by: David Wei <dw@davidwei.uk>
---
 include/uapi/linux/io_uring.h |  4 +++
 io_uring/zcrx.c               | 65 ++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 04797a9b76bc..4da4552a4215 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1063,6 +1063,10 @@ struct io_uring_zcrx_area_reg {
 	__u64	__resv2[2];
 };
 
+enum io_uring_zcrx_ifq_reg_flags {
+	IORING_ZCRX_IFQ_REG_SHARE	= 1,
+};
+
 /*
  * Argument for IORING_REGISTER_ZCRX_IFQ
  */
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 6324dfa61ce0..094bd595d517 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -22,10 +22,10 @@
 #include <uapi/linux/io_uring.h>
 
 #include "io_uring.h"
-#include "kbuf.h"
 #include "memmap.h"
 #include "zcrx.h"
 #include "rsrc.h"
+#include "register.h"
 
 #define IO_ZCRX_AREA_SUPPORTED_FLAGS	(IORING_ZCRX_AREA_DMABUF)
 
@@ -546,6 +546,67 @@ struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
 	return ifq ? &ifq->region : NULL;
 }
 
+static int io_share_zcrx_ifq(struct io_ring_ctx *ctx,
+			     struct io_uring_zcrx_ifq_reg __user *arg,
+			     struct io_uring_zcrx_ifq_reg *reg)
+{
+	struct io_ring_ctx *src_ctx;
+	struct io_zcrx_ifq *src_ifq;
+	struct file *file;
+	int src_fd, ret;
+	u32 src_id, id;
+
+	src_fd = reg->if_idx;
+	src_id = reg->if_rxq;
+
+	file = io_uring_register_get_file(src_fd, false);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	src_ctx = file->private_data;
+	if (src_ctx == ctx)
+		return -EBADFD;
+
+	mutex_unlock(&ctx->uring_lock);
+	mutex_lock(&src_ctx->uring_lock);
+
+	src_ifq = xa_load(&src_ctx->zcrx_ctxs, src_id);
+	if (!src_ifq) {
+		mutex_unlock(&src_ctx->uring_lock);
+		fput(file);
+		mutex_lock(&ctx->uring_lock);
+		return -EINVAL;
+	}
+
+	refcount_inc(&src_ifq->refs);
+	mutex_unlock(&src_ctx->uring_lock);
+	fput(file);
+	mutex_lock(&ctx->uring_lock);
+
+	scoped_guard(mutex, &ctx->mmap_lock) {
+		ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
+		if (ret)
+			return ret;
+	}
+
+	reg->zcrx_id = id;
+	if (copy_to_user(arg, reg, sizeof(*reg))) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	scoped_guard(mutex, &ctx->mmap_lock) {
+		ret = -ENOMEM;
+		if (xa_store(&ctx->zcrx_ctxs, id, src_ifq, GFP_KERNEL))
+			goto err;
+	}
+	return 0;
+err:
+	scoped_guard(mutex, &ctx->mmap_lock)
+		xa_erase(&ctx->zcrx_ctxs, id);
+	return ret;
+}
+
 int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			  struct io_uring_zcrx_ifq_reg __user *arg)
 {
@@ -571,6 +632,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 		return -EINVAL;
 	if (copy_from_user(®, arg, sizeof(reg)))
 		return -EFAULT;
+	if (reg.flags & IORING_ZCRX_IFQ_REG_SHARE)
+		return io_share_zcrx_ifq(ctx, arg, ®);
 	if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
 		return -EFAULT;
 	if (!mem_is_zero(®.__resv, sizeof(reg.__resv)) ||
-- 
2.47.3
^ permalink raw reply related	[flat|nested] 12+ messages in thread