public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org
Cc: asml.silence@gmail.com, axboe@kernel.dk, netdev@vger.kernel.org
Subject: [PATCH io_uring for-6.18 19/20] io_uring/zcrx: allow synchronous buffer return
Date: Tue, 16 Sep 2025 15:28:02 +0100	[thread overview]
Message-ID: <58e9280cb02c97e52d9a2f15944f7a9e4d344927.1758030357.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1758030357.git.asml.silence@gmail.com>

Returning buffers via a ring is performant and convenient, but it
becomes a problem when/if the user misconfigured the ring size and it
becomes full. Add a synchronous way to return buffers back to the page
pool via a new register opcode. It's supposed to be a reliable slow
path for refilling.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/uapi/linux/io_uring.h | 12 +++++++
 io_uring/register.c           |  3 ++
 io_uring/zcrx.c               | 68 +++++++++++++++++++++++++++++++++++
 io_uring/zcrx.h               |  7 ++++
 4 files changed, 90 insertions(+)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 1ce17c535944..a0cc1cc0dd01 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -689,6 +689,9 @@ enum io_uring_register_op {
 	/* query various aspects of io_uring, see linux/io_uring/query.h */
 	IORING_REGISTER_QUERY			= 35,
 
+	/* return zcrx buffers back into circulation */
+	IORING_REGISTER_ZCRX_REFILL		= 36,
+
 	/* this goes last */
 	IORING_REGISTER_LAST,
 
@@ -1070,6 +1073,15 @@ struct io_uring_zcrx_ifq_reg {
 	__u64	__resv[3];
 };
 
+struct io_uring_zcrx_sync_refill {
+	__u32		zcrx_id;
+	/* the number of entries to return */
+	__u32		nr_entries;
+	/* pointer to an array of struct io_uring_zcrx_rqe */
+	__u64		rqes;
+	__u64		__resv[2];
+};
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/io_uring/register.c b/io_uring/register.c
index 96e9cac12823..43f04c47522c 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -833,6 +833,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 	case IORING_REGISTER_QUERY:
 		ret = io_query(ctx, arg, nr_args);
 		break;
+	case IORING_REGISTER_ZCRX_REFILL:
+		ret = io_zcrx_return_bufs(ctx, arg, nr_args);
+		break;
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 81d4aa75a69f..07a114f9a542 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -927,6 +927,74 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
 	.uninstall		= io_pp_uninstall,
 };
 
+#define IO_ZCRX_MAX_SYS_REFILL_BUFS		(1 << 16)
+#define IO_ZCRX_SYS_REFILL_BATCH		32
+
+static void io_return_buffers(struct io_zcrx_ifq *ifq,
+			      struct io_uring_zcrx_rqe *rqes, unsigned nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		struct net_iov *niov;
+		netmem_ref netmem;
+
+		if (!io_parse_rqe(&rqes[i], ifq, &niov))
+			continue;
+
+		scoped_guard(spinlock_bh, &ifq->rq_lock) {
+			if (!io_zcrx_put_niov_uref(niov))
+				continue;
+		}
+
+		netmem = net_iov_to_netmem(niov);
+		if (!page_pool_unref_and_test(netmem))
+			continue;
+		io_zcrx_return_niov(niov);
+	}
+}
+
+int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+			void __user *arg, unsigned nr_arg)
+{
+	struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH];
+	struct io_uring_zcrx_rqe __user *user_rqes;
+	struct io_uring_zcrx_sync_refill zr;
+	struct io_zcrx_ifq *ifq;
+	unsigned nr, i;
+
+	if (nr_arg)
+		return -EINVAL;
+	if (copy_from_user(&zr, arg, sizeof(zr)))
+		return -EFAULT;
+	if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS)
+		return -EINVAL;
+	if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv)))
+		return -EINVAL;
+
+	ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id);
+	if (!ifq)
+		return -EINVAL;
+	nr = zr.nr_entries;
+	user_rqes = u64_to_user_ptr(zr.rqes);
+
+	for (i = 0; i < nr;) {
+		unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH);
+		size_t size = batch * sizeof(rqes[0]);
+
+		if (copy_from_user(rqes, user_rqes + i, size))
+			return i ? i : -EFAULT;
+		io_return_buffers(ifq, rqes, batch);
+
+		i += batch;
+
+		if (fatal_signal_pending(current))
+			return i;
+		cond_resched();
+	}
+	return nr;
+}
+
 static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
 			      struct io_zcrx_ifq *ifq, int off, int len)
 {
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index a48871b5adad..33ef61503092 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -63,6 +63,8 @@ struct io_zcrx_ifq {
 };
 
 #if defined(CONFIG_IO_URING_ZCRX)
+int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+			void __user *arg, unsigned nr_arg);
 int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			 struct io_uring_zcrx_ifq_reg __user *arg);
 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
@@ -95,6 +97,11 @@ static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ct
 {
 	return NULL;
 }
+static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx,
+				      void __user *arg, unsigned nr_arg)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
-- 
2.49.0


  parent reply	other threads:[~2025-09-16 14:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-16 14:27 [PATCH io_uring for-6.18 00/20] zcrx for-6.18 updates Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 01/20] io_uring/zcrx: improve rqe cache alignment Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 02/20] io_uring/zcrx: replace memchar_inv with is_zero Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 03/20] io_uring/zcrx: use page_pool_unref_and_test() Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 04/20] io_uring/zcrx: remove extra io_zcrx_drop_netdev Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 05/20] io_uring/zcrx: don't pass slot to io_zcrx_create_area Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 06/20] io_uring/zcrx: move area reg checks into io_import_area Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 07/20] io_uring/zcrx: check all niovs filled with dma addresses Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 08/20] io_uring/zcrx: pass ifq to io_zcrx_alloc_fallback() Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 09/20] io_uring/zcrx: deduplicate area mapping Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 10/20] io_uring/zcrx: remove dmabuf_offset Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 11/20] io_uring/zcrx: set sgt for umem area Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 12/20] io_uring/zcrx: make niov size variable Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 13/20] io_uring/zcrx: rename dma lock Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 14/20] io_uring/zcrx: protect netdev with pp_lock Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 15/20] io_uring/zcrx: reduce netmem scope in refill Pavel Begunkov
2025-09-16 14:27 ` [PATCH io_uring for-6.18 16/20] io_uring/zcrx: use guards for the refill lock Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 17/20] io_uring/zcrx: don't adjust free cache space Pavel Begunkov
2025-09-16 14:28 ` [PATCH io_uring for-6.18 18/20] io_uring/zcrx: introduce io_parse_rqe() Pavel Begunkov
2025-09-16 14:28 ` Pavel Begunkov [this message]
2025-09-16 14:28 ` [PATCH io_uring for-6.18 20/20] io_uring/zcrx: account niov arrays to cgroup Pavel Begunkov
2025-09-16 18:37 ` [PATCH io_uring for-6.18 00/20] zcrx for-6.18 updates Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=58e9280cb02c97e52d9a2f15944f7a9e4d344927.1758030357.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox