public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com, axboe@kernel.dk, netdev@vger.kernel.org
Subject: [PATCH io_uring for-6.18 12/20] io_uring/zcrx: make niov size variable
Date: Tue, 16 Sep 2025 15:27:55 +0100	[thread overview]
Message-ID: <5d29fc35107408e1d23d4d9e853cccc4e270b0f1.1758030357.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1758030357.git.asml.silence@gmail.com>

Instead of using PAGE_SIZE for the niov size add a niov_shift field to
ifq, and patch up all important places. Copy fallback still assumes
PAGE_SIZE, so it'll be wasting some memory for now.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 io_uring/zcrx.c | 30 ++++++++++++++++++++----------
 io_uring/zcrx.h |  1 +
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 764723bf04d6..85832f60d68a 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -45,15 +45,18 @@ static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *nio
 static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 {
 	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+	unsigned niov_pages_shift;
 
 	lockdep_assert(!area->mem.is_dmabuf);
 
-	return area->mem.pages[net_iov_idx(niov)];
+	niov_pages_shift = area->ifq->niov_shift - PAGE_SHIFT;
+	return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
 }
 
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area)
 {
+	unsigned niov_size = 1U << ifq->niov_shift;
 	struct sg_table *sgt = area->mem.sgt;
 	struct scatterlist *sg;
 	unsigned i, niov_idx = 0;
@@ -62,13 +65,16 @@ static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 		dma_addr_t dma = sg_dma_address(sg);
 		unsigned long sg_len = sg_dma_len(sg);
 
+		if (WARN_ON_ONCE(sg_len % niov_size))
+			return -EINVAL;
+
 		while (sg_len && niov_idx < area->nia.num_niovs) {
 			struct net_iov *niov = &area->nia.niovs[niov_idx];
 
 			if (net_mp_niov_set_dma_addr(niov, dma))
 				return -EFAULT;
-			sg_len -= PAGE_SIZE;
-			dma += PAGE_SIZE;
+			sg_len -= niov_size;
+			dma += niov_size;
 			niov_idx++;
 		}
 	}
@@ -284,18 +290,21 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
 	return ret;
 }
 
-static void io_zcrx_sync_for_device(const struct page_pool *pool,
+static void io_zcrx_sync_for_device(struct page_pool *pool,
 				    struct net_iov *niov)
 {
 #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
 	dma_addr_t dma_addr;
 
+	unsigned niov_size;
+
 	if (!dma_dev_need_sync(pool->p.dev))
 		return;
 
+	niov_size = 1U << io_pp_to_ifq(pool)->niov_shift;
 	dma_addr = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
 	__dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
-				     PAGE_SIZE, pool->p.dma_dir);
+				     niov_size, pool->p.dma_dir);
 #endif
 }
 
@@ -413,7 +422,8 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	if (ret)
 		goto err;
 
-	nr_iovs = area->mem.size >> PAGE_SHIFT;
+	ifq->niov_shift = PAGE_SHIFT;
+	nr_iovs = area->mem.size >> ifq->niov_shift;
 	area->nia.num_niovs = nr_iovs;
 
 	ret = -ENOMEM;
@@ -764,7 +774,7 @@ static void io_zcrx_ring_refill(struct page_pool *pp,
 		unsigned niov_idx, area_idx;
 
 		area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT;
-		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> PAGE_SHIFT;
+		niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> ifq->niov_shift;
 
 		if (unlikely(rqe->__pad || area_idx))
 			continue;
@@ -854,8 +864,8 @@ static int io_pp_zc_init(struct page_pool *pp)
 		return -EINVAL;
 	if (WARN_ON_ONCE(!pp->dma_map))
 		return -EOPNOTSUPP;
-	if (pp->p.order != 0)
-		return -EOPNOTSUPP;
+	if (pp->p.order + PAGE_SHIFT != ifq->niov_shift)
+		return -EINVAL;
 	if (pp->p.dma_dir != DMA_FROM_DEVICE)
 		return -EOPNOTSUPP;
 
@@ -930,7 +940,7 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 		cqe->flags |= IORING_CQE_F_32;
 
 	area = io_zcrx_iov_to_area(niov);
-	offset = off + (net_iov_idx(niov) << PAGE_SHIFT);
+	offset = off + (net_iov_idx(niov) << ifq->niov_shift);
 	rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
 	rcqe->off = offset + ((u64)area->area_id << IORING_ZCRX_AREA_SHIFT);
 	rcqe->__pad = 0;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 27d7cf28a04e..7604f1f85ccb 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -41,6 +41,7 @@ struct io_zcrx_area {
 struct io_zcrx_ifq {
 	struct io_ring_ctx		*ctx;
 	struct io_zcrx_area		*area;
+	unsigned			niov_shift;
 
 	spinlock_t			rq_lock ____cacheline_aligned_in_smp;
 	struct io_uring			*rq_ring;
-- 
2.49.0


  parent reply	other threads:[~2025-09-16 14:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-16 14:27 [PATCH io_uring for-6.18 00/20] zcrx for-6.18 updates Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 01/20] io_uring/zcrx: improve rqe cache alignment Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 02/20] io_uring/zcrx: replace memchar_inv with is_zero Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 03/20] io_uring/zcrx: use page_pool_unref_and_test() Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 04/20] io_uring/zcrx: remove extra io_zcrx_drop_netdev Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 05/20] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 06/20] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 07/20] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 08/20] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 09/20] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 10/20] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 11/20] io_uring/zcrx: set sgt for umem area Pavel Begunkov
2025-09-16 14:27 ` Pavel Begunkov [this message]
2025-09-16 14:27 ` [PATCH io_uring for-6.18 13/20] io_uring/zcrx: rename dma lock Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 14/20] io_uring/zcrx: protect netdev with pp_lock Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 15/20] io_uring/zcrx: reduce netmem scope in refill Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 16/20] io_uring/zcrx: use guards for the refill lock Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 17/20] io_uring/zcrx: don't adjust free cache space Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 18/20] io_uring/zcrx: introduce io_parse_rqe() Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 19/20] io_uring/zcrx: allow synchronous buffer return Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 20/20] io_uring/zcrx: account niov arrays to cgroup Pavel Begunkov
2025-09-16 18:37 ` [PATCH io_uring for-6.18 00/20] zcrx for-6.18 updates Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5d29fc35107408e1d23d4d9e853cccc4e270b0f1.1758030357.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox