public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/1] io_uring: zcrx large buffers support
@ 2026-01-24 10:36 Pavel Begunkov
  2026-01-24 10:36 ` [PATCH 1/1] io_uring/zcrx: implement large rx buffer support Pavel Begunkov
  0 siblings, 1 reply; 5+ messages in thread
From: Pavel Begunkov @ 2026-01-24 10:36 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, axboe, netdev

A single patch enabling large buffer support for zcrx by propagating
user parameters to the net + sanitisation. It depends on net/ changes
that can be found in a 6.19-rc5 based branch [1] that's already pulled
into the net tree.

[1] https://github.com/isilence/linux.git tags/net-queue-rx-buf-len-v9

For convenience, all changes including net and io_uring can be found in:

https://github.com/isilence/linux.git zcrx/for-next

Link to the netdev series:

https://lore.kernel.org/netdev/cover.1768493907.git.asml.silence@gmail.com/

Pavel Begunkov (1):
  io_uring/zcrx: implement large rx buffer support

 include/uapi/linux/io_uring.h |  2 +-
 io_uring/zcrx.c               | 39 ++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

-- 
2.52.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/1] io_uring/zcrx: implement large rx buffer support
  2026-01-24 10:36 [PATCH 0/1] io_uring: zcrx large buffers support Pavel Begunkov
@ 2026-01-24 10:36 ` Pavel Begunkov
  2026-01-24 15:32   ` Jens Axboe
  2026-01-24 15:34   ` Jens Axboe
  0 siblings, 2 replies; 5+ messages in thread
From: Pavel Begunkov @ 2026-01-24 10:36 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, axboe, netdev

There are network cards that support receive buffers larger than 4K, and
that can be vastly beneficial for performance, and benchmarks for this
patch showed up to 30% CPU util improvement for 32K vs 4K buffers.

Allows zcrx users to specify the size in struct
io_uring_zcrx_ifq_reg::rx_buf_len. If set to zero, zcrx will use a
default value. zcrx will check and fail if the memory backing the area
can't be split into physically contiguous chunks of the required size.
It's more restrictive as it only needs dma addresses to be contig, but
that's beyond this series.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/uapi/linux/io_uring.h |  2 +-
 io_uring/zcrx.c               | 39 ++++++++++++++++++++++++++++++-----
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 475094c7a668..ec13ff37db39 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1094,7 +1094,7 @@ struct io_uring_zcrx_ifq_reg {
 
 	struct io_uring_zcrx_offsets offsets;
 	__u32	zcrx_id;
-	__u32	__resv2;
+	__u32	rx_buf_len;
 	__u64	__resv[3];
 };
 
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index b99cf2c6670a..b5166c9118e5 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -15,6 +15,7 @@
 #include <net/netlink.h>
 #include <net/netdev_queues.h>
 #include <net/netdev_rx_queue.h>
+#include <net/netdev_queues.h>
 #include <net/tcp.h>
 #include <net/rps.h>
 
@@ -55,6 +56,18 @@ static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 	return area->mem.pages[net_iov_idx(niov) << niov_pages_shift];
 }
 
+static int io_area_max_shift(struct io_zcrx_mem *mem)
+{
+	struct sg_table *sgt = mem->sgt;
+	struct scatterlist *sg;
+	unsigned shift = -1U;
+	unsigned i;
+
+	for_each_sgtable_dma_sg(sgt, sg, i)
+		shift = min(shift, __ffs(sg->length));
+	return shift;
+}
+
 static int io_populate_area_dma(struct io_zcrx_ifq *ifq,
 				struct io_zcrx_area *area)
 {
@@ -416,12 +429,21 @@ static int io_zcrx_append_area(struct io_zcrx_ifq *ifq,
 }
 
 static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
-			       struct io_uring_zcrx_area_reg *area_reg)
+			       struct io_uring_zcrx_area_reg *area_reg,
+			       struct io_uring_zcrx_ifq_reg *reg)
 {
+	int buf_size_shift = PAGE_SHIFT;
 	struct io_zcrx_area *area;
 	unsigned nr_iovs;
 	int i, ret;
 
+	if (reg->rx_buf_len) {
+		if (!is_power_of_2(reg->rx_buf_len) ||
+		     reg->rx_buf_len < PAGE_SIZE)
+			return -EINVAL;
+		buf_size_shift = ilog2(reg->rx_buf_len);
+	}
+
 	ret = -ENOMEM;
 	area = kzalloc(sizeof(*area), GFP_KERNEL);
 	if (!area)
@@ -432,7 +454,12 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
 	if (ret)
 		goto err;
 
-	ifq->niov_shift = PAGE_SHIFT;
+	if (buf_size_shift > io_area_max_shift(&area->mem)) {
+		ret = -ERANGE;
+		goto err;
+	}
+
+	ifq->niov_shift = buf_size_shift;
 	nr_iovs = area->mem.size >> ifq->niov_shift;
 	area->nia.num_niovs = nr_iovs;
 
@@ -742,8 +769,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 		return -EINVAL;
 	if (copy_from_user(&reg, arg, sizeof(reg)))
 		return -EFAULT;
-	if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) ||
-	    reg.__resv2 || reg.zcrx_id)
+	if (!mem_is_zero(&reg.__resv, sizeof(reg.__resv)) || reg.zcrx_id)
 		return -EINVAL;
 	if (reg.flags & ZCRX_REG_IMPORT)
 		return import_zcrx(ctx, arg, &reg);
@@ -800,10 +826,11 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	}
 	get_device(ifq->dev);
 
-	ret = io_zcrx_create_area(ifq, &area);
+	ret = io_zcrx_create_area(ifq, &area, &reg);
 	if (ret)
 		goto netdev_put_unlock;
 
+	mp_param.rx_page_size = 1U << ifq->niov_shift;
 	mp_param.mp_ops = &io_uring_pp_zc_ops;
 	mp_param.mp_priv = ifq;
 	ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
@@ -821,6 +848,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			goto err;
 	}
 
+	reg.rx_buf_len = 1U << ifq->niov_shift;
+
 	if (copy_to_user(arg, &reg, sizeof(reg)) ||
 	    copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
 	    copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {
-- 
2.52.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] io_uring/zcrx: implement large rx buffer support
  2026-01-24 10:36 ` [PATCH 1/1] io_uring/zcrx: implement large rx buffer support Pavel Begunkov
@ 2026-01-24 15:32   ` Jens Axboe
  2026-01-24 16:31     ` Pavel Begunkov
  2026-01-24 15:34   ` Jens Axboe
  1 sibling, 1 reply; 5+ messages in thread
From: Jens Axboe @ 2026-01-24 15:32 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring; +Cc: netdev

On 1/24/26 3:36 AM, Pavel Begunkov wrote:
> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
> index b99cf2c6670a..b5166c9118e5 100644
> --- a/io_uring/zcrx.c
> +++ b/io_uring/zcrx.c
> @@ -15,6 +15,7 @@
>  #include <net/netlink.h>
>  #include <net/netdev_queues.h>
>  #include <net/netdev_rx_queue.h>
> +#include <net/netdev_queues.h>
>  #include <net/tcp.h>
>  #include <net/rps.h>

Duplicate header? Rest of the patch looks fine to me, I'll just kill it
while applying.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] io_uring/zcrx: implement large rx buffer support
  2026-01-24 10:36 ` [PATCH 1/1] io_uring/zcrx: implement large rx buffer support Pavel Begunkov
  2026-01-24 15:32   ` Jens Axboe
@ 2026-01-24 15:34   ` Jens Axboe
  1 sibling, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2026-01-24 15:34 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov; +Cc: netdev


On Sat, 24 Jan 2026 10:36:17 +0000, Pavel Begunkov wrote:
> There are network cards that support receive buffers larger than 4K, and
> that can be vastly beneficial for performance, and benchmarks for this
> patch showed up to 30% CPU util improvement for 32K vs 4K buffers.
> 
> Allows zcrx users to specify the size in struct
> io_uring_zcrx_ifq_reg::rx_buf_len. If set to zero, zcrx will use a
> default value. zcrx will check and fail if the memory backing the area
> can't be split into physically contiguous chunks of the required size.
> It's more restrictive as it only needs dma addresses to be contig, but
> that's beyond this series.
> 
> [...]

Applied, thanks!

[1/1] io_uring/zcrx: implement large rx buffer support
      commit: 795663b4d160ba652959f1a46381c5e8b1342a53

Best regards,
-- 
Jens Axboe




^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/1] io_uring/zcrx: implement large rx buffer support
  2026-01-24 15:32   ` Jens Axboe
@ 2026-01-24 16:31     ` Pavel Begunkov
  0 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2026-01-24 16:31 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: netdev

On 1/24/26 15:32, Jens Axboe wrote:
> On 1/24/26 3:36 AM, Pavel Begunkov wrote:
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index b99cf2c6670a..b5166c9118e5 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -15,6 +15,7 @@
>>   #include <net/netlink.h>
>>   #include <net/netdev_queues.h>
>>   #include <net/netdev_rx_queue.h>
>> +#include <net/netdev_queues.h>
>>   #include <net/tcp.h>
>>   #include <net/rps.h>
> 
> Duplicate header? Rest of the patch looks fine to me, I'll just kill it
> while applying.

Looks like it, probably slipped through during rebases

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-01-24 16:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-24 10:36 [PATCH 0/1] io_uring: zcrx large buffers support Pavel Begunkov
2026-01-24 10:36 ` [PATCH 1/1] io_uring/zcrx: implement large rx buffer support Pavel Begunkov
2026-01-24 15:32   ` Jens Axboe
2026-01-24 16:31     ` Pavel Begunkov
2026-01-24 15:34   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox