public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com, axboe@kernel.dk, netdev@vger.kernel.org
Subject: [PATCH 05/10] io_uring/zcrx: add sync refill queue flushing
Date: Thu, 13 Nov 2025 10:46:13 +0000	[thread overview]
Message-ID: <287ddbe37aaad197cd64e10f6e41ed7c35d79e38.1763029704.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1763029704.git.asml.silence@gmail.com>

Add an zcrx interface via IORING_REGISTER_ZCRX_CTRL that forces the
kernel to flush / consume entries from the refill queue. Just as with
the IORING_REGISTER_ZCRX_REFILL attempt, the motivation is to address
cases where the refill queue becomes full, and the user can't return
buffers and needs to stash them. It's still a slow path, and the user
should size refill queue appropriately, but it should be helpful for
handling temporary traffic spikes and other unpredictable conditions.

The interface is simpler comparing to ZCRX_REFILL as it doesn't need
temporary refill entry arrays and gives natural batching, whereas
ZCRX_REFILL requires even more user logic to be somewhat efficient.

Also, add a structure for the operation. It's not currently used but
can serve for future improvements like limiting the number of buffers to
process, etc.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/uapi/linux/io_uring.h | 10 ++++-
 io_uring/zcrx.c               | 74 +++++++++++++++++++++++++++++++++--
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 5b7851704efe..7e20a555b697 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -1086,13 +1086,21 @@ struct io_uring_zcrx_ifq_reg {
 };
 
 enum zcrx_ctrl_op {
+	ZCRX_CTRL_FLUSH_RQ,
+
 	__ZCRX_CTRL_LAST,
 };
 
+struct zcrx_ctrl_flush_rq {
+	__u64		__resv[6];
+};
+
 struct zcrx_ctrl {
 	__u32	zcrx_id;
 	__u32	op; /* see enum zcrx_ctrl_op */
-	__u64	__resv[8];
+	__u64	__resv[2];
+
+	struct zcrx_ctrl_flush_rq	zc_flush;
 };
 
 #ifdef __cplusplus
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 0b5f4320c7a9..08c103af69bc 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -941,6 +941,71 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
 	.uninstall		= io_pp_uninstall,
 };
 
+static unsigned zcrx_parse_rq(netmem_ref *netmem_array, unsigned nr,
+			      struct io_zcrx_ifq *zcrx)
+{
+	unsigned int mask = zcrx->rq_entries - 1;
+	unsigned int i;
+
+	guard(spinlock_bh)(&zcrx->rq_lock);
+
+	nr = min(nr, io_zcrx_rqring_entries(zcrx));
+	for (i = 0; i < nr; i++) {
+		struct io_uring_zcrx_rqe *rqe = io_zcrx_get_rqe(zcrx, mask);
+		struct net_iov *niov;
+
+		if (!io_parse_rqe(rqe, zcrx, &niov))
+			break;
+		netmem_array[i] = net_iov_to_netmem(niov);
+	}
+
+	smp_store_release(&zcrx->rq_ring->head, zcrx->cached_rq_head);
+	return i;
+}
+
+#define ZCRX_FLUSH_BATCH 32
+
+static void zcrx_return_buffers(netmem_ref *netmems, unsigned nr)
+{
+	unsigned i;
+
+	for (i = 0; i < nr; i++) {
+		netmem_ref netmem = netmems[i];
+		struct net_iov *niov = netmem_to_net_iov(netmem);
+
+		if (!io_zcrx_put_niov_uref(niov))
+			continue;
+		if (!page_pool_unref_and_test(netmem))
+			continue;
+		io_zcrx_return_niov(niov);
+	}
+}
+
+static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx,
+			 struct zcrx_ctrl *ctrl)
+{
+	struct zcrx_ctrl_flush_rq *frq = &ctrl->zc_flush;
+	netmem_ref netmems[ZCRX_FLUSH_BATCH];
+	unsigned total = 0;
+	unsigned nr;
+
+	if (!mem_is_zero(&frq->__resv, sizeof(frq->__resv)))
+		return -EINVAL;
+
+	do {
+		nr = zcrx_parse_rq(netmems, ZCRX_FLUSH_BATCH, zcrx);
+
+		zcrx_return_buffers(netmems, nr);
+		total += nr;
+
+		if (fatal_signal_pending(current))
+			break;
+		cond_resched();
+	} while (nr == ZCRX_FLUSH_BATCH && total < zcrx->rq_entries);
+
+	return 0;
+}
+
 int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 {
 	struct zcrx_ctrl ctrl;
@@ -956,10 +1021,13 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 	zcrx = xa_load(&ctx->zcrx_ctxs, ctrl.zcrx_id);
 	if (!zcrx)
 		return -ENXIO;
-	if (ctrl.op >= __ZCRX_CTRL_LAST)
-		return -EOPNOTSUPP;
 
-	return -EINVAL;
+	switch (ctrl.op) {
+	case ZCRX_CTRL_FLUSH_RQ:
+		return zcrx_flush_rq(ctx, zcrx, &ctrl);
+	}
+
+	return -EOPNOTSUPP;
 }
 
 static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
-- 
2.49.0


  parent reply	other threads:[~2025-11-13 10:46 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-13 10:46 [PATCH 00/10] io_uring for-6.19 zcrx updates Pavel Begunkov
2025-11-13 10:46 ` [PATCH 01/10] io_uring/zcrx: convert to use netmem_desc Pavel Begunkov
2025-11-13 10:46 ` [PATCH 02/10] io_uring/zcrx: use folio_nr_pages() instead of shift operation Pavel Begunkov
2025-11-13 10:46 ` [PATCH 03/10] io_uring/zcrx: elide passing msg flags Pavel Begunkov
2025-11-13 10:46 ` [PATCH 04/10] io_uring/zcrx: introduce IORING_REGISTER_ZCRX_CTRL Pavel Begunkov
2025-11-13 10:46 ` Pavel Begunkov [this message]
2025-11-13 10:46 ` [PATCH 06/10] io_uring/zcrx: count zcrx users Pavel Begunkov
2025-11-13 10:46 ` [PATCH 07/10] io_uring/zcrx: move io_zcrx_scrub() and dependencies up Pavel Begunkov
2025-11-13 10:46 ` [PATCH 08/10] io_uring/zcrx: export zcrx via a file Pavel Begunkov
2025-11-13 10:46 ` [PATCH 09/10] io_uring/zcrx: add io_fill_zcrx_offsets() Pavel Begunkov
2025-11-13 10:46 ` [PATCH 10/10] io_uring/zcrx: share an ifq between rings Pavel Begunkov
2025-11-13 18:38 ` [PATCH 00/10] io_uring for-6.19 zcrx updates Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=287ddbe37aaad197cd64e10f6e41ed7c35d79e38.1763029704.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox