public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>
Cc: io-uring@vger.kernel.org, Pavel Begunkov <asml.silence@gmail.com>,
	Caleb Sander Mateos <csander@purestorage.com>,
	Stefan Metzmacher <metze@samba.org>,
	Ming Lei <ming.lei@redhat.com>
Subject: [PATCH V2 08/13] io_uring: bpf: add uring_bpf_memcpy() kfunc
Date: Tue,  6 Jan 2026 18:11:17 +0800	[thread overview]
Message-ID: <20260106101126.4064990-9-ming.lei@redhat.com> (raw)
In-Reply-To: <20260106101126.4064990-1-ming.lei@redhat.com>

Add uring_bpf_memcpy() kfunc that copies data between io_uring BPF
buffers. This kfunc supports all 5 buffer types defined in
io_bpf_buf_desc:

- IO_BPF_BUF_USER: plain userspace buffer
- IO_BPF_BUF_FIXED: fixed buffer (absolute address within buffer)
- IO_BPF_BUF_VEC: vectored userspace buffer (iovec array)
- IO_BPF_BUF_KFIXED: kernel fixed buffer (offset-based addressing)
- IO_BPF_BUF_KVEC: kernel vectored buffer

Add helper functions for buffer import:
- io_bpf_import_fixed_buf(): handles FIXED/KFIXED types with proper
  node reference counting
- io_bpf_import_kvec_buf(): handles KVEC using __io_prep_reg_iovec()
  and __io_import_reg_vec()
- io_bpf_import_buffer(): unified dispatcher for all buffer types
- io_bpf_copy_iters(): page-based copy between iov_iters

The kfunc properly manages buffer node references and submit lock
for registered buffer access.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 io_uring/bpf_op.c | 320 +++++++++++++++++++++++++++++++++++++++++++++-
 io_uring/bpf_op.h |   3 +-
 2 files changed, 321 insertions(+), 2 deletions(-)

diff --git a/io_uring/bpf_op.c b/io_uring/bpf_op.c
index d6f146abe304..3c577aa3dfc4 100644
--- a/io_uring/bpf_op.c
+++ b/io_uring/bpf_op.c
@@ -10,9 +10,11 @@
 #include <linux/btf.h>
 #include <linux/btf_ids.h>
 #include <linux/filter.h>
+#include <linux/uio.h>
 #include <uapi/linux/io_uring.h>
 #include "io_uring.h"
 #include "register.h"
+#include "rsrc.h"
 #include "bpf_op.h"
 
 static inline unsigned char uring_bpf_get_op(u32 op_flags)
@@ -47,6 +49,7 @@ int io_uring_bpf_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	data->opf = opf;
 	data->ops = ops;
+	data->issue_flags = 0;
 	ret = ops->prep_fn(data, sqe);
 	if (!ret) {
 		/* Only increment refcount on success (uring_lock already held) */
@@ -74,7 +77,13 @@ static int __io_uring_bpf_issue(struct io_kiocb *req)
 
 int io_uring_bpf_issue(struct io_kiocb *req, unsigned int issue_flags)
 {
-	return __io_uring_bpf_issue(req);
+	struct uring_bpf_data *data = io_kiocb_to_cmd(req, struct uring_bpf_data);
+	int ret;
+
+	data->issue_flags = issue_flags;
+	ret = __io_uring_bpf_issue(req);
+	data->issue_flags = 0;
+	return ret;
 }
 
 void io_uring_bpf_fail(struct io_kiocb *req)
@@ -291,6 +300,235 @@ static struct bpf_struct_ops bpf_uring_bpf_ops = {
 	.owner = THIS_MODULE,
 };
 
+/*
+ * Helper to copy data between two iov_iters using page extraction.
+ * Extracts pages from source iterator and copies them to destination.
+ * Returns number of bytes copied or negative error code.
+ */
+static ssize_t io_bpf_copy_iters(struct iov_iter *src, struct iov_iter *dst,
+				 size_t len)
+{
+#define MAX_PAGES_PER_LOOP 32
+	struct page *pages[MAX_PAGES_PER_LOOP];
+	size_t total_copied = 0;
+	bool need_unpin;
+
+	need_unpin = iov_iter_extract_will_pin(src);
+
+	while (len > 0) {
+		struct page **page_array = pages;
+		size_t offset, copied = 0;
+		ssize_t extracted;
+		unsigned int nr_pages;
+		size_t chunk_len;
+		int i;
+
+		chunk_len = min_t(size_t, len, MAX_PAGES_PER_LOOP * PAGE_SIZE);
+		extracted = iov_iter_extract_pages(src, &page_array, chunk_len,
+						   MAX_PAGES_PER_LOOP, 0, &offset);
+		if (extracted <= 0) {
+			if (total_copied > 0)
+				break;
+			return extracted < 0 ? extracted : -EFAULT;
+		}
+
+		nr_pages = DIV_ROUND_UP(offset + extracted, PAGE_SIZE);
+
+		for (i = 0; i < nr_pages && copied < extracted; i++) {
+			size_t page_offset = (i == 0) ? offset : 0;
+			size_t page_len = min_t(size_t, extracted - copied,
+						PAGE_SIZE - page_offset);
+			size_t n;
+
+			n = copy_page_to_iter(page_array[i], page_offset, page_len, dst);
+			copied += n;
+			if (n < page_len)
+				break;
+		}
+
+		if (need_unpin)
+			unpin_user_pages(page_array, nr_pages);
+
+		total_copied += copied;
+		len -= copied;
+
+		if (copied < extracted)
+			break;
+	}
+
+	return total_copied;
+#undef MAX_PAGES_PER_LOOP
+}
+
+/*
+ * Helper to import fixed buffer (FIXED or KFIXED).
+ * Must be called with submit lock held.
+ *
+ * FIXED: addr is absolute userspace address within buffer
+ * KFIXED: addr is offset from buffer start
+ *
+ * Returns node with incremented refcount on success, ERR_PTR on failure.
+ */
+static struct io_rsrc_node *io_bpf_import_fixed_buf(struct io_ring_ctx *ctx,
+						    struct iov_iter *iter,
+						    const struct io_bpf_buf_desc *desc,
+						    int ddir)
+{
+	struct io_rsrc_node *node;
+	struct io_mapped_ubuf *imu;
+	int ret;
+
+	node = io_rsrc_node_lookup(&ctx->buf_table, desc->buf_index);
+	if (!node)
+		return ERR_PTR(-EFAULT);
+
+	imu = node->buf;
+	if (!(imu->dir & (1 << ddir)))
+		return ERR_PTR(-EFAULT);
+
+	node->refs++;
+
+	ret = io_import_fixed(ddir, iter, imu, desc->addr, desc->len);
+	if (ret) {
+		node->refs--;
+		return ERR_PTR(ret);
+	}
+
+	return node;
+}
+
+/*
+ * Helper to import registered vectored buffer (KVEC).
+ * Must be called with submit lock held.
+ *
+ * addr: userspace iovec pointer
+ * len: number of iovecs
+ * buf_index: registered buffer index
+ *
+ * Returns node with incremented refcount on success, ERR_PTR on failure.
+ * Caller must call io_vec_free(vec) after use.
+ */
+static struct io_rsrc_node *io_bpf_import_reg_vec(struct io_ring_ctx *ctx,
+						   struct iov_iter *iter,
+						   const struct io_bpf_buf_desc *desc,
+						   int ddir, struct iou_vec *vec)
+{
+	struct io_rsrc_node *node;
+	struct io_mapped_ubuf *imu;
+	int ret;
+
+	node = io_rsrc_node_lookup(&ctx->buf_table, desc->buf_index);
+	if (!node)
+		return ERR_PTR(-EFAULT);
+
+	imu = node->buf;
+	if (!(imu->dir & (1 << ddir)))
+		return ERR_PTR(-EFAULT);
+
+	node->refs++;
+
+	/* Prepare iovec from userspace */
+	ret = __io_prep_reg_iovec(vec, u64_to_user_ptr(desc->addr),
+				  desc->len, ctx->compat, NULL);
+	if (ret)
+		goto err;
+
+	/* Import vectored buffer from registered buffer */
+	ret = __io_import_reg_vec(ddir, iter, imu, vec, desc->len, NULL);
+	if (ret)
+		goto err;
+
+	return node;
+err:
+	node->refs--;
+	return ERR_PTR(ret);
+}
+
+/*
+ * Helper to import a vectored user buffer (VEC) into iou_vec.
+ * Allocates space in vec and copies iovec from userspace.
+ *
+ * Returns 0 on success, negative error code on failure.
+ * Caller must call io_vec_free(vec) after use.
+ */
+static int io_bpf_import_vec_buf(struct io_ring_ctx *ctx,
+				 struct iov_iter *iter,
+				 const struct io_bpf_buf_desc *desc,
+				 int ddir, struct iou_vec *vec)
+{
+	unsigned nr_vecs = desc->len;
+	struct iovec *iov;
+	size_t total_len = 0;
+	void *res;
+	int ret, i;
+
+	if (nr_vecs > vec->nr) {
+		ret = io_vec_realloc(vec, nr_vecs);
+		if (ret)
+			return ret;
+	}
+
+	iov = vec->iovec;
+	res = iovec_from_user(u64_to_user_ptr(desc->addr), nr_vecs,
+			      nr_vecs, iov, ctx->compat);
+	if (IS_ERR(res))
+		return PTR_ERR(res);
+
+	for (i = 0; i < nr_vecs; i++)
+		total_len += iov[i].iov_len;
+
+	iov_iter_init(iter, ddir, iov, nr_vecs, total_len);
+	return 0;
+}
+
+/*
+ * Helper to import a buffer into an iov_iter based on io_bpf_buf_desc.
+ * Supports all 5 buffer types: USER, FIXED, VEC, KFIXED, KVEC.
+ * Must be called with submit lock held for FIXED/KFIXED/KVEC types.
+ *
+ * @ctx: ring context
+ * @iter: output iterator
+ * @desc: buffer descriptor
+ * @ddir: direction (ITER_SOURCE for source, ITER_DEST for destination)
+ * @vec: iou_vec for VEC/KVEC types (caller must call io_vec_free after use)
+ *
+ * Returns node pointer (may be NULL for USER/VEC), or ERR_PTR on failure.
+ * Caller must drop node reference when done if non-NULL.
+ */
+static struct io_rsrc_node *io_bpf_import_buffer(struct io_ring_ctx *ctx,
+						 struct iov_iter *iter,
+						 const struct io_bpf_buf_desc *desc,
+						 int ddir, struct iou_vec *vec)
+{
+	int ret;
+
+	switch (desc->type) {
+	case IO_BPF_BUF_USER:
+		/* Plain user buffer */
+		ret = import_ubuf(ddir, u64_to_user_ptr(desc->addr),
+				  desc->len, iter);
+		return ret ? ERR_PTR(ret) : NULL;
+
+	case IO_BPF_BUF_FIXED:
+	case IO_BPF_BUF_KFIXED:
+		/* FIXED: addr is absolute address within buffer */
+		/* KFIXED: addr is offset from buffer start */
+		return io_bpf_import_fixed_buf(ctx, iter, desc, ddir);
+
+	case IO_BPF_BUF_VEC:
+		/* Vectored user buffer - addr is iovec ptr, len is nr_vecs */
+		ret = io_bpf_import_vec_buf(ctx, iter, desc, ddir, vec);
+		return ret ? ERR_PTR(ret) : NULL;
+
+	case IO_BPF_BUF_REG_VEC:
+		/* Registered vectored buffer */
+		return io_bpf_import_reg_vec(ctx, iter, desc, ddir, vec);
+
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+}
+
 __bpf_kfunc_start_defs();
 __bpf_kfunc void uring_bpf_set_result(struct uring_bpf_data *data, int res)
 {
@@ -300,10 +538,90 @@ __bpf_kfunc void uring_bpf_set_result(struct uring_bpf_data *data, int res)
 		req_set_fail(req);
 	io_req_set_res(req, res, 0);
 }
+
+/**
+ * uring_bpf_memcpy - Copy data between io_uring BPF buffers
+ * @data: BPF request data containing request context
+ * @dst: Destination buffer descriptor
+ * @src: Source buffer descriptor
+ *
+ * Copies data from source buffer to destination buffer.
+ * Supports all 5 buffer types: USER, FIXED, VEC, KFIXED, REG_VEC.
+ * The copy length is min of actual buffer sizes (for VEC types,
+ * total bytes across all vectors, not nr_vecs).
+ *
+ * Returns: Number of bytes copied on success, negative error code on failure
+ */
+__bpf_kfunc ssize_t uring_bpf_memcpy(const struct uring_bpf_data *data,
+				     struct io_bpf_buf_desc *dst,
+				     struct io_bpf_buf_desc *src)
+{
+	struct io_kiocb *req = cmd_to_io_kiocb((void *)data);
+	struct io_ring_ctx *ctx = req->ctx;
+	unsigned int issue_flags = data->issue_flags;
+	struct io_rsrc_node *src_node, *dst_node;
+	struct iov_iter src_iter, dst_iter;
+	struct iou_vec src_vec = {};
+	struct iou_vec dst_vec = {};
+	ssize_t ret;
+	size_t len;
+
+	/* Validate buffer types */
+	if (src->type > IO_BPF_BUF_REG_VEC || dst->type > IO_BPF_BUF_REG_VEC)
+		return -EINVAL;
+
+	io_ring_submit_lock(ctx, issue_flags);
+
+	/* Import source buffer */
+	src_node = io_bpf_import_buffer(ctx, &src_iter, src, ITER_SOURCE,
+					&src_vec);
+	if (IS_ERR(src_node)) {
+		ret = PTR_ERR(src_node);
+		goto unlock;
+	}
+
+	/* Import destination buffer */
+	dst_node = io_bpf_import_buffer(ctx, &dst_iter, dst, ITER_DEST,
+					&dst_vec);
+	if (IS_ERR(dst_node)) {
+		ret = PTR_ERR(dst_node);
+		goto put_src;
+	}
+
+	/*
+	 * Calculate copy length from actual iterator sizes.
+	 * For VEC types, desc->len is nr_vecs, not total bytes.
+	 */
+	len = min(iov_iter_count(&src_iter), iov_iter_count(&dst_iter));
+	if (!len) {
+		ret = 0;
+		goto put_dst;
+	}
+	if (len > MAX_RW_COUNT) {
+		ret = -EINVAL;
+		goto put_dst;
+	}
+
+	/* Copy data between iterators */
+	ret = io_bpf_copy_iters(&src_iter, &dst_iter, len);
+
+put_dst:
+	io_vec_free(&dst_vec);
+	if (dst_node)
+		io_put_rsrc_node(ctx, dst_node);
+put_src:
+	io_vec_free(&src_vec);
+	if (src_node)
+		io_put_rsrc_node(ctx, src_node);
+unlock:
+	io_ring_submit_unlock(ctx, issue_flags);
+	return ret;
+}
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(uring_bpf_kfuncs)
 BTF_ID_FLAGS(func, uring_bpf_set_result)
+BTF_ID_FLAGS(func, uring_bpf_memcpy)
 BTF_KFUNCS_END(uring_bpf_kfuncs)
 
 static const struct btf_kfunc_id_set uring_kfunc_set = {
diff --git a/io_uring/bpf_op.h b/io_uring/bpf_op.h
index 9de0606f5d25..6004fb906983 100644
--- a/io_uring/bpf_op.h
+++ b/io_uring/bpf_op.h
@@ -13,10 +13,11 @@ struct uring_bpf_data {
 	void				*req_data;  /* not for bpf prog */
 	const struct uring_bpf_ops	*ops;
 	u32				opf;
+	u32				issue_flags; /* io_uring issue flags */
 
 	/* writeable for bpf prog */
 	u8              pdu[64 - sizeof(void *) -
-		sizeof(struct uring_bpf_ops *) - sizeof(u32)];
+		sizeof(struct uring_bpf_ops *) - 2 * sizeof(u32)];
 };
 
 typedef int (*uring_bpf_prep_t)(struct uring_bpf_data *data,
-- 
2.47.0


  parent reply	other threads:[~2026-01-06 10:12 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-06 10:11 [PATCH v2 0/13] io_uring: add IORING_OP_BPF for extending io_uring Ming Lei
2026-01-06 10:11 ` [PATCH V2 01/13] io_uring: make io_import_fixed() global Ming Lei
2026-01-06 10:11 ` [PATCH V2 02/13] io_uring: refactor io_prep_reg_iovec() for BPF kfunc use Ming Lei
2026-01-06 10:11 ` [PATCH V2 03/13] io_uring: refactor io_import_reg_vec() " Ming Lei
2026-01-06 10:11 ` [PATCH V2 04/13] io_uring: prepare for extending io_uring with bpf Ming Lei
2026-01-06 10:11 ` [PATCH V2 05/13] io_uring: bpf: extend io_uring with bpf struct_ops Ming Lei
2026-01-06 10:11 ` [PATCH V2 06/13] io_uring: bpf: implement struct_ops registration Ming Lei
2026-01-06 10:11 ` [PATCH V2 07/13] io_uring: bpf: add BPF buffer descriptor for IORING_OP_BPF Ming Lei
2026-01-06 10:11 ` Ming Lei [this message]
2026-01-06 10:11 ` [PATCH V2 09/13] selftests/io_uring: update mini liburing Ming Lei
2026-01-06 10:11 ` [PATCH V2 10/13] selftests/io_uring: add BPF struct_ops and kfunc tests Ming Lei
2026-01-06 10:11 ` [PATCH V2 11/13] selftests/io_uring: add bpf_memcpy selftest for uring_bpf_memcpy() kfunc Ming Lei
2026-01-06 10:11 ` [PATCH V2 12/13] selftests/io_uring: add copy_user_to_fixed() and copy_fixed_to_user() bpf_memcpy tests Ming Lei
2026-01-06 10:11 ` [PATCH V2 13/13] selftests/io_uring: add copy_user_to_reg_vec() and copy_reg_vec_to_user() " Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260106101126.4064990-9-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=csander@purestorage.com \
    --cc=io-uring@vger.kernel.org \
    --cc=metze@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox