* [PATCH review-only 1/4] io_uring/zcrx: fully clean area on error in io_import_umem()
2026-02-17 10:58 [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Pavel Begunkov
@ 2026-02-17 10:58 ` Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 2/4] io_uring/zcrx: always dma map in advance Pavel Begunkov
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2026-02-17 10:58 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, axboe, netdev
When accounting fails, io_import_umem() sets the page array, etc. and
returns an error expecting that the error handling code will take care
of the rest. To make the next patch simpler, only return a fully
initialised areas from the function.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
io_uring/zcrx.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 60e12eb5d4f3..117d578224f5 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -207,22 +207,26 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
ret = sg_alloc_table_from_pages(&mem->page_sg_table, pages, nr_pages,
0, (unsigned long)nr_pages << PAGE_SHIFT,
GFP_KERNEL_ACCOUNT);
- if (ret) {
- unpin_user_pages(pages, nr_pages);
- kvfree(pages);
- return ret;
- }
+ if (ret)
+ goto out_err;
mem->account_pages = io_count_account_pages(pages, nr_pages);
ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages);
- if (ret < 0)
+ if (ret < 0) {
mem->account_pages = 0;
+ goto out_err;
+ }
mem->sgt = &mem->page_sg_table;
mem->pages = pages;
mem->nr_folios = nr_pages;
mem->size = area_reg->len;
return ret;
+out_err:
+ sg_free_table(&mem->page_sg_table);
+ unpin_user_pages(pages, nr_pages);
+ kvfree(pages);
+ return ret;
}
static void io_release_area_mem(struct io_zcrx_mem *mem)
--
2.52.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH review-only 2/4] io_uring/zcrx: always dma map in advance
2026-02-17 10:58 [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 1/4] io_uring/zcrx: fully clean area on error in io_import_umem() Pavel Begunkov
@ 2026-02-17 10:58 ` Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 3/4] io_uring/zcrx: extract netdev+area init into a helper Pavel Begunkov
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2026-02-17 10:58 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, axboe, netdev
zcrx was originally establisihing dma mappings at a late stage when it
was being bound to a page pool. Dma-buf couldn't work this way, so it's
initialised during area creation.
It's messy having them do it at different spots, just move everything to
the area creation time.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
io_uring/zcrx.c | 44 +++++++++++++++-----------------------------
1 file changed, 15 insertions(+), 29 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 117d578224f5..290db098cfe7 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -194,6 +194,7 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
{
struct page **pages;
int nr_pages, ret;
+ bool mapped = false;
if (area_reg->dmabuf_fd)
return -EINVAL;
@@ -210,6 +211,12 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
if (ret)
goto out_err;
+ ret = dma_map_sgtable(ifq->dev, &mem->page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ if (ret < 0)
+ goto out_err;
+ mapped = true;
+
mem->account_pages = io_count_account_pages(pages, nr_pages);
ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages);
if (ret < 0) {
@@ -223,6 +230,9 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
mem->size = area_reg->len;
return ret;
out_err:
+ if (mapped)
+ dma_unmap_sgtable(ifq->dev, &mem->page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
sg_free_table(&mem->page_sg_table);
unpin_user_pages(pages, nr_pages);
kvfree(pages);
@@ -288,30 +298,6 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
}
}
-static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
-{
- int ret;
-
- guard(mutex)(&ifq->pp_lock);
- if (area->is_mapped)
- return 0;
-
- if (!area->mem.is_dmabuf) {
- ret = dma_map_sgtable(ifq->dev, &area->mem.page_sg_table,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
- if (ret < 0)
- return ret;
- }
-
- ret = io_populate_area_dma(ifq, area);
- if (ret && !area->mem.is_dmabuf)
- dma_unmap_sgtable(ifq->dev, &area->mem.page_sg_table,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
- if (ret == 0)
- area->is_mapped = true;
- return ret;
-}
-
static void io_zcrx_sync_for_device(struct page_pool *pool,
struct net_iov *niov)
{
@@ -460,6 +446,7 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
ret = io_import_area(ifq, &area->mem, area_reg);
if (ret)
goto err;
+ area->is_mapped = true;
if (buf_size_shift > io_area_max_shift(&area->mem)) {
ret = -ERANGE;
@@ -495,6 +482,10 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
niov->type = NET_IOV_IOURING;
}
+ ret = io_populate_area_dma(ifq, area);
+ if (ret)
+ goto err;
+
area->free_count = nr_iovs;
/* we're only supporting one area per ifq for now */
area->area_id = 0;
@@ -1036,7 +1027,6 @@ static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem)
static int io_pp_zc_init(struct page_pool *pp)
{
struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
- int ret;
if (WARN_ON_ONCE(!ifq))
return -EINVAL;
@@ -1049,10 +1039,6 @@ static int io_pp_zc_init(struct page_pool *pp)
if (pp->p.dma_dir != DMA_FROM_DEVICE)
return -EOPNOTSUPP;
- ret = io_zcrx_map_area(ifq, ifq->area);
- if (ret)
- return ret;
-
refcount_inc(&ifq->refs);
return 0;
}
--
2.52.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH review-only 3/4] io_uring/zcrx: extract netdev+area init into a helper
2026-02-17 10:58 [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 1/4] io_uring/zcrx: fully clean area on error in io_import_umem() Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 2/4] io_uring/zcrx: always dma map in advance Pavel Begunkov
@ 2026-02-17 10:58 ` Pavel Begunkov
2026-02-17 10:58 ` [PATCH review-only 4/4] io_uring/zcrx: implement device-less mode for zcrx Pavel Begunkov
2026-02-17 16:12 ` [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2026-02-17 10:58 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, axboe, netdev
In preparation to following patches, add a function that is responsibly
for looking up a netdev, creating an area, DMA mapping it and opening a
queue.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
io_uring/zcrx.c | 70 +++++++++++++++++++++++++++++--------------------
1 file changed, 42 insertions(+), 28 deletions(-)
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 290db098cfe7..4db3df6d7658 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -742,10 +742,49 @@ static int import_zcrx(struct io_ring_ctx *ctx,
return ret;
}
+static int zcrx_register_netdev(struct io_zcrx_ifq *ifq,
+ struct io_uring_zcrx_ifq_reg *reg,
+ struct io_uring_zcrx_area_reg *area)
+{
+ struct pp_memory_provider_params mp_param = {};
+ unsigned if_rxq = reg->if_rxq;
+ int ret;
+
+ ifq->netdev = netdev_get_by_index_lock(current->nsproxy->net_ns,
+ reg->if_idx);
+ if (!ifq->netdev)
+ return -ENODEV;
+
+ netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL);
+
+ ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, if_rxq);
+ if (!ifq->dev) {
+ ret = -EOPNOTSUPP;
+ goto netdev_put_unlock;
+ }
+ get_device(ifq->dev);
+
+ ret = io_zcrx_create_area(ifq, area, reg);
+ if (ret)
+ goto netdev_put_unlock;
+
+ mp_param.rx_page_size = 1U << ifq->niov_shift;
+ mp_param.mp_ops = &io_uring_pp_zc_ops;
+ mp_param.mp_priv = ifq;
+ ret = __net_mp_open_rxq(ifq->netdev, if_rxq, &mp_param, NULL);
+ if (ret)
+ goto netdev_put_unlock;
+
+ ifq->if_rxq = if_rxq;
+ ret = 0;
+netdev_put_unlock:
+ netdev_unlock(ifq->netdev);
+ return ret;
+}
+
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
{
- struct pp_memory_provider_params mp_param = {};
struct io_uring_zcrx_area_reg area;
struct io_uring_zcrx_ifq_reg reg;
struct io_uring_region_desc rd;
@@ -812,32 +851,9 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (ret)
goto err;
- ifq->netdev = netdev_get_by_index_lock(current->nsproxy->net_ns, reg.if_idx);
- if (!ifq->netdev) {
- ret = -ENODEV;
- goto err;
- }
- netdev_hold(ifq->netdev, &ifq->netdev_tracker, GFP_KERNEL);
-
- ifq->dev = netdev_queue_get_dma_dev(ifq->netdev, reg.if_rxq);
- if (!ifq->dev) {
- ret = -EOPNOTSUPP;
- goto netdev_put_unlock;
- }
- get_device(ifq->dev);
-
- ret = io_zcrx_create_area(ifq, &area, ®);
- if (ret)
- goto netdev_put_unlock;
-
- mp_param.rx_page_size = 1U << ifq->niov_shift;
- mp_param.mp_ops = &io_uring_pp_zc_ops;
- mp_param.mp_priv = ifq;
- ret = __net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param, NULL);
+ ret = zcrx_register_netdev(ifq, ®, &area);
if (ret)
- goto netdev_put_unlock;
- netdev_unlock(ifq->netdev);
- ifq->if_rxq = reg.if_rxq;
+ goto err;
reg.zcrx_id = id;
@@ -857,8 +873,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
goto err;
}
return 0;
-netdev_put_unlock:
- netdev_unlock(ifq->netdev);
err:
scoped_guard(mutex, &ctx->mmap_lock)
xa_erase(&ctx->zcrx_ctxs, id);
--
2.52.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH review-only 4/4] io_uring/zcrx: implement device-less mode for zcrx
2026-02-17 10:58 [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Pavel Begunkov
` (2 preceding siblings ...)
2026-02-17 10:58 ` [PATCH review-only 3/4] io_uring/zcrx: extract netdev+area init into a helper Pavel Begunkov
@ 2026-02-17 10:58 ` Pavel Begunkov
2026-02-17 16:12 ` [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2026-02-17 10:58 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, axboe, netdev
Allow creating a zcrx instance without attaching it to a net device.
All data will be copied through the fallback path. The user is also
expected to use ZCRX_CTRL_FLUSH_RQ to handle overflows as it normally
should even with a netdev, but it becomes even more relevant as there
will likely be no one to automatically pick up buffers.
Apart from that, it follows the zcrx uapi for the I/O path, and is
useful for testing, experimentation, and potentially for the copy
recieve path in the future if improved.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
include/uapi/linux/io_uring/zcrx.h | 9 ++++++-
io_uring/zcrx.c | 41 ++++++++++++++++++++----------
io_uring/zcrx.h | 2 +-
3 files changed, 36 insertions(+), 16 deletions(-)
diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index 3163a4b8aeb0..103d65e690eb 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -49,7 +49,14 @@ struct io_uring_zcrx_area_reg {
};
enum zcrx_reg_flags {
- ZCRX_REG_IMPORT = 1,
+ ZCRX_REG_IMPORT = 1,
+
+ /*
+ * Register a zcrx instance without a net device. All data will be
+ * copied. The refill queue entries might not be automatically
+ * consmumed and need to be flushed, see ZCRX_CTRL_FLUSH_RQ.
+ */
+ ZCRX_REG_NODEV = 2,
};
enum zcrx_features {
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 4db3df6d7658..3d377523ff7e 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -127,10 +127,10 @@ static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
int dmabuf_fd = area_reg->dmabuf_fd;
int i, ret;
+ if (!ifq->dev)
+ return -EINVAL;
if (off)
return -EINVAL;
- if (WARN_ON_ONCE(!ifq->dev))
- return -EFAULT;
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
return -EINVAL;
@@ -211,11 +211,13 @@ static int io_import_umem(struct io_zcrx_ifq *ifq,
if (ret)
goto out_err;
- ret = dma_map_sgtable(ifq->dev, &mem->page_sg_table,
- DMA_FROM_DEVICE, IO_DMA_ATTR);
- if (ret < 0)
- goto out_err;
- mapped = true;
+ if (ifq->dev) {
+ ret = dma_map_sgtable(ifq->dev, &mem->page_sg_table,
+ DMA_FROM_DEVICE, IO_DMA_ATTR);
+ if (ret < 0)
+ goto out_err;
+ mapped = true;
+ }
mem->account_pages = io_count_account_pages(pages, nr_pages);
ret = io_account_mem(ifq->user, ifq->mm_account, mem->account_pages);
@@ -446,7 +448,8 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
ret = io_import_area(ifq, &area->mem, area_reg);
if (ret)
goto err;
- area->is_mapped = true;
+ if (ifq->dev)
+ area->is_mapped = true;
if (buf_size_shift > io_area_max_shift(&area->mem)) {
ret = -ERANGE;
@@ -482,9 +485,11 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
niov->type = NET_IOV_IOURING;
}
- ret = io_populate_area_dma(ifq, area);
- if (ret)
- goto err;
+ if (ifq->dev) {
+ ret = io_populate_area_dma(ifq, area);
+ if (ret)
+ goto err;
+ }
area->free_count = nr_iovs;
/* we're only supporting one area per ifq for now */
@@ -816,6 +821,8 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
return -EFAULT;
if (reg.if_rxq == -1 || !reg.rq_entries)
return -EINVAL;
+ if ((reg.if_rxq || reg.if_idx) && (reg.flags & ZCRX_REG_NODEV))
+ return -EINVAL;
if (reg.rq_entries > IO_RQ_MAX_ENTRIES) {
if (!(ctx->flags & IORING_SETUP_CLAMP))
return -EINVAL;
@@ -851,9 +858,15 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
if (ret)
goto err;
- ret = zcrx_register_netdev(ifq, ®, &area);
- if (ret)
- goto err;
+ if (!(reg.flags & ZCRX_REG_NODEV)) {
+ ret = zcrx_register_netdev(ifq, ®, &area);
+ if (ret)
+ goto err;
+ } else {
+ ret = io_zcrx_create_area(ifq, &area, ®);
+ if (ret)
+ goto err;
+ }
reg.zcrx_id = id;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 0ddcf0ee8861..db427f4a55b6 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -8,7 +8,7 @@
#include <net/page_pool/types.h>
#include <net/net_trackers.h>
-#define ZCRX_SUPPORTED_REG_FLAGS (ZCRX_REG_IMPORT)
+#define ZCRX_SUPPORTED_REG_FLAGS (ZCRX_REG_IMPORT | ZCRX_REG_NODEV)
#define ZCRX_FEATURES (ZCRX_FEATURE_RX_PAGE_SIZE)
struct io_zcrx_mem {
--
2.52.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances
2026-02-17 10:58 [RFC io_uring review-only 0/4] zcrx mapping cleanups and device-less instances Pavel Begunkov
` (3 preceding siblings ...)
2026-02-17 10:58 ` [PATCH review-only 4/4] io_uring/zcrx: implement device-less mode for zcrx Pavel Begunkov
@ 2026-02-17 16:12 ` Jens Axboe
4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2026-02-17 16:12 UTC (permalink / raw)
To: Pavel Begunkov, io-uring; +Cc: netdev
On 2/17/26 3:58 AM, Pavel Begunkov wrote:
> First two patches move user memory DMA map creation to an earlier point,
> which makes it more uniform among different memory types and easier to
> manage. Patches 3 and 4 introduce device-less zcrx instances
> for testing purposes, which always copy data via the fallback path.
>
> note, based on two other recently sent patches splitting out a uapi
> file and defining constants in zcrx.h
All look good to me.
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread