public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: [email protected]
Subject: [PATCH v4 2/9] io_uring: add infra for importing vectored reg buffers
Date: Fri,  7 Mar 2025 16:00:30 +0000	[thread overview]
Message-ID: <60bd246b1249476a6996407c1dbc38ef6febad14.1741362889.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Add io_import_reg_vec(), which will be responsible for importing
vectored registered buffers. The function might reallocate the vector,
but it'd try to do the conversion in place first, which is why it's
required of the user to pad the iovec to the right border of the cache.

Overlapping also depends on struct iovec being larger than bvec, which
is not the case on e.g. 32 bit architectures. Don't try to complicate
this case and make sure vectors never overlap, it'll be improved later.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/io_uring_types.h |   7 +-
 io_uring/rsrc.c                | 128 +++++++++++++++++++++++++++++++++
 io_uring/rsrc.h                |   5 ++
 3 files changed, 138 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9101f12d21ef..cc84f6e5a64c 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -111,8 +111,11 @@ struct io_uring_task {
 };
 
 struct iou_vec {
-	struct iovec		*iovec;
-	unsigned		nr;
+	union {
+		struct iovec	*iovec;
+		struct bio_vec	*bvec;
+	};
+	unsigned		nr; /* number of struct iovec it can hold */
 };
 
 struct io_uring {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index bac509f85c80..71fe47facd4c 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1269,3 +1269,131 @@ void io_vec_free(struct iou_vec *iv)
 	iv->iovec = NULL;
 	iv->nr = 0;
 }
+
+int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries)
+{
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+	struct iovec *iov;
+
+	iov = kmalloc_array(nr_entries, sizeof(iov[0]), gfp);
+	if (!iov)
+		return -ENOMEM;
+
+	io_vec_free(iv);
+	iv->iovec = iov;
+	iv->nr = nr_entries;
+	return 0;
+}
+
+static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
+				struct io_mapped_ubuf *imu,
+				struct iovec *iovec, unsigned nr_iovs,
+				struct iou_vec *vec)
+{
+	unsigned long folio_size = 1 << imu->folio_shift;
+	unsigned long folio_mask = folio_size - 1;
+	u64 folio_addr = imu->ubuf & ~folio_mask;
+	struct bio_vec *res_bvec = vec->bvec;
+	size_t total_len = 0;
+	unsigned bvec_idx = 0;
+	unsigned iov_idx;
+
+	for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) {
+		size_t iov_len = iovec[iov_idx].iov_len;
+		u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base;
+		struct bio_vec *src_bvec;
+		size_t offset;
+		u64 buf_end;
+
+		if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end)))
+			return -EFAULT;
+		if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))
+			return -EFAULT;
+		if (unlikely(!iov_len))
+			return -EFAULT;
+		if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
+			return -EOVERFLOW;
+
+		/* by using folio address it also accounts for bvec offset */
+		offset = buf_addr - folio_addr;
+		src_bvec = imu->bvec + (offset >> imu->folio_shift);
+		offset &= folio_mask;
+
+		for (; iov_len; offset = 0, bvec_idx++, src_bvec++) {
+			size_t seg_size = min_t(size_t, iov_len,
+						folio_size - offset);
+
+			bvec_set_page(&res_bvec[bvec_idx],
+				      src_bvec->bv_page, seg_size, offset);
+			iov_len -= seg_size;
+		}
+	}
+	if (total_len > MAX_RW_COUNT)
+		return -EINVAL;
+
+	iov_iter_bvec(iter, ddir, res_bvec, bvec_idx, total_len);
+	return 0;
+}
+
+static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
+				 struct io_mapped_ubuf *imu)
+{
+	unsigned shift = imu->folio_shift;
+	size_t max_segs = 0;
+	unsigned i;
+
+	for (i = 0; i < nr_iovs; i++)
+		max_segs += (iov[i].iov_len >> shift) + 2;
+	return max_segs;
+}
+
+int io_import_reg_vec(int ddir, struct iov_iter *iter,
+			struct io_kiocb *req, struct iou_vec *vec,
+			unsigned nr_iovs, unsigned iovec_off,
+			unsigned issue_flags)
+{
+	struct io_rsrc_node *node;
+	struct io_mapped_ubuf *imu;
+	struct iovec *iov;
+	unsigned nr_segs;
+
+	node = io_find_buf_node(req, issue_flags);
+	if (!node)
+		return -EFAULT;
+	imu = node->buf;
+	if (imu->is_kbuf)
+		return -EOPNOTSUPP;
+	if (!(imu->dir & (1 << ddir)))
+		return -EFAULT;
+
+	iov = vec->iovec + iovec_off;
+	nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+
+	if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
+		size_t bvec_bytes;
+
+		bvec_bytes = nr_segs * sizeof(struct bio_vec);
+		nr_segs = (bvec_bytes + sizeof(*iov) - 1) / sizeof(*iov);
+		nr_segs += nr_iovs;
+	}
+
+	if (WARN_ON_ONCE(iovec_off + nr_iovs != vec->nr) ||
+	    nr_segs > vec->nr) {
+		struct iou_vec tmp_vec = {};
+		int ret;
+
+		ret = io_vec_realloc(&tmp_vec, nr_segs);
+		if (ret)
+			return ret;
+
+		iovec_off = tmp_vec.nr - nr_iovs;
+		memcpy(tmp_vec.iovec + iovec_off, iov, sizeof(*iov) * nr_iovs);
+		io_vec_free(vec);
+
+		*vec = tmp_vec;
+		iov = vec->iovec + iovec_off;
+		req->flags |= REQ_F_NEED_CLEANUP;
+	}
+
+	return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);
+}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index f35e1a07619a..0d5c18296130 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -61,6 +61,10 @@ struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
 int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
 			u64 buf_addr, size_t len, int ddir,
 			unsigned issue_flags);
+int io_import_reg_vec(int ddir, struct iov_iter *iter,
+			struct io_kiocb *req, struct iou_vec *vec,
+			unsigned nr_iovs, unsigned iovec_off,
+			unsigned issue_flags);
 
 int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
 int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
@@ -146,6 +150,7 @@ static inline void __io_unaccount_mem(struct user_struct *user,
 }
 
 void io_vec_free(struct iou_vec *iv);
+int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries);
 
 static inline void io_vec_reset_iovec(struct iou_vec *iv,
 				      struct iovec *iovec, unsigned nr)
-- 
2.48.1


  parent reply	other threads:[~2025-03-07 15:59 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-07 16:00 [PATCH v4 0/9] Add support for vectored registered buffers Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 1/9] io_uring: introduce struct iou_vec Pavel Begunkov
2025-03-07 16:00 ` Pavel Begunkov [this message]
2025-03-07 16:00 ` [PATCH v4 3/9] io_uring/rw: implement vectored registered rw Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 4/9] io_uring/rw: defer reg buf vec import Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 5/9] io_uring/net: combine msghdr copy Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 6/9] io_uring/net: pull vec alloc out of msghdr import Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 7/9] io_uring/net: convert to struct iou_vec Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 8/9] io_uring/net: implement vectored reg bufs for zctx Pavel Begunkov
2025-03-07 16:00 ` [PATCH v4 9/9] io_uring: cap cached iovec/bvec size Pavel Begunkov
2025-03-07 16:40 ` [PATCH v4 0/9] Add support for vectored registered buffers Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=60bd246b1249476a6996407c1dbc38ef6febad14.1741362889.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox