* [PATCH v3 0/9] Add support for vectored registered buffers
@ 2025-03-07 15:49 Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 1/9] io_uring: introduce struct iou_vec Pavel Begunkov
` (8 more replies)
0 siblings, 9 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Add registered buffer support for vectored io_uring operations. That
allows to pass an iovec, all entries of which must belong to and
point into the same registered buffer specified by sqe->buf_index.
The series covers zerocopy sendmsg and reads / writes. Reads and
writes are implemented as new opcodes, while zerocopy sendmsg
reuses IORING_RECVSEND_FIXED_BUF for the api.
Results are aligned to what one would expect from registered buffers:
t/io_uring + nullblk, single segment 16K:
34 -> 46 GiB/s
examples/send-zerocopy.c default send size (64KB):
82558 -> 123855 MB/s
The series is placed on top of 6.15 + zcrx + epoll.
v3:
Handle 32 bit where bvec is larger than iovec
v2:
Nowarn alloc
Cap bvec caching
Check length overflow
Reject 0 len segments
Check op direction
Other minor changes
Pavel Begunkov (9):
io_uring: introduce struct iou_vec
io_uring: add infra for importing vectored reg buffers
io_uring/rw: implement vectored registered rw
io_uring/rw: defer reg buf vec import
io_uring/net: combine msghdr copy
io_uring/net: pull vec alloc out of msghdr import
io_uring/net: convert to struct iou_vec
io_uring/net: implement vectored reg bufs for zctx
io_uring: cap cached iovec/bvec size
include/linux/io_uring_types.h | 11 ++
include/uapi/linux/io_uring.h | 2 +
io_uring/alloc_cache.h | 9 --
io_uring/net.c | 180 +++++++++++++++++++++------------
io_uring/net.h | 6 +-
io_uring/opdef.c | 39 +++++++
io_uring/rsrc.c | 137 +++++++++++++++++++++++++
io_uring/rsrc.h | 24 +++++
io_uring/rw.c | 99 ++++++++++++++++--
io_uring/rw.h | 6 +-
10 files changed, 421 insertions(+), 92 deletions(-)
--
2.48.1
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v3 1/9] io_uring: introduce struct iou_vec
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 2/9] io_uring: add infra for importing vectored reg buffers Pavel Begunkov
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
I need a convenient way to pass around and work with iovec+size pair,
put them into a structure and makes use of it in rw.c
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 5 +++++
io_uring/rsrc.c | 9 +++++++++
io_uring/rsrc.h | 17 +++++++++++++++++
io_uring/rw.c | 17 +++++++----------
io_uring/rw.h | 4 ++--
5 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 35fc241c4672..9101f12d21ef 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -110,6 +110,11 @@ struct io_uring_task {
} ____cacheline_aligned_in_smp;
};
+struct iou_vec {
+ struct iovec *iovec;
+ unsigned nr;
+};
+
struct io_uring {
u32 head;
u32 tail;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 28783f1dde00..bac509f85c80 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1260,3 +1260,12 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
fput(file);
return ret;
}
+
+void io_vec_free(struct iou_vec *iv)
+{
+ if (!iv->iovec)
+ return;
+ kfree(iv->iovec);
+ iv->iovec = NULL;
+ iv->nr = 0;
+}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index 284e300e63fb..ff78ead6bc75 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -4,6 +4,7 @@
#include <linux/io_uring_types.h>
#include <linux/lockdep.h>
+#include <linux/io_uring_types.h>
enum {
IORING_RSRC_FILE = 0,
@@ -145,4 +146,20 @@ static inline void __io_unaccount_mem(struct user_struct *user,
atomic_long_sub(nr_pages, &user->locked_vm);
}
+void io_vec_free(struct iou_vec *iv);
+
+static inline void io_vec_reset_iovec(struct iou_vec *iv,
+ struct iovec *iovec, unsigned nr)
+{
+ io_vec_free(iv);
+ iv->iovec = iovec;
+ iv->nr = nr;
+}
+
+static inline void io_alloc_cache_vec_kasan(struct iou_vec *iv)
+{
+ if (IS_ENABLED(CONFIG_KASAN))
+ io_vec_free(iv);
+}
+
#endif
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 5ee9f8949e8b..ad7f647d48e9 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -87,9 +87,9 @@ static int io_import_vec(int ddir, struct io_kiocb *req,
int ret, nr_segs;
struct iovec *iov;
- if (io->free_iovec) {
- nr_segs = io->free_iov_nr;
- iov = io->free_iovec;
+ if (io->vec.iovec) {
+ nr_segs = io->vec.nr;
+ iov = io->vec.iovec;
} else {
nr_segs = 1;
iov = &io->fast_iov;
@@ -101,9 +101,7 @@ static int io_import_vec(int ddir, struct io_kiocb *req,
return ret;
if (iov) {
req->flags |= REQ_F_NEED_CLEANUP;
- io->free_iov_nr = io->iter.nr_segs;
- kfree(io->free_iovec);
- io->free_iovec = iov;
+ io_vec_reset_iovec(&io->vec, iov, io->iter.nr_segs);
}
return 0;
}
@@ -151,7 +149,7 @@ static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
if (unlikely(issue_flags & IO_URING_F_UNLOCKED))
return;
- io_alloc_cache_kasan(&rw->free_iovec, &rw->free_iov_nr);
+ io_alloc_cache_vec_kasan(&rw->vec);
if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
@@ -201,7 +199,7 @@ static int io_rw_alloc_async(struct io_kiocb *req)
rw = io_uring_alloc_async_data(&ctx->rw_cache, req);
if (!rw)
return -ENOMEM;
- if (rw->free_iovec)
+ if (rw->vec.iovec)
req->flags |= REQ_F_NEED_CLEANUP;
rw->bytes_done = 0;
return 0;
@@ -1327,7 +1325,6 @@ void io_rw_cache_free(const void *entry)
{
struct io_async_rw *rw = (struct io_async_rw *) entry;
- if (rw->free_iovec)
- kfree(rw->free_iovec);
+ io_vec_free(&rw->vec);
kfree(rw);
}
diff --git a/io_uring/rw.h b/io_uring/rw.h
index bf121b81ebe8..529fd2f96a7f 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -9,13 +9,13 @@ struct io_meta_state {
};
struct io_async_rw {
+ struct iou_vec vec;
size_t bytes_done;
- struct iovec *free_iovec;
+
struct_group(clear,
struct iov_iter iter;
struct iov_iter_state iter_state;
struct iovec fast_iov;
- int free_iov_nr;
/*
* wpq is for buffered io, while meta fields are used with
* direct io
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 2/9] io_uring: add infra for importing vectored reg buffers
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 1/9] io_uring: introduce struct iou_vec Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 3/9] io_uring/rw: implement vectored registered rw Pavel Begunkov
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Add io_import_reg_vec(), which will be responsible for importing
vectored registered buffers. The function might reallocate the vector,
but it'd try to do the conversion in place first, which is why it's
required of the user to pad the iovec to the right border of the cache.
Overlapping also depends on struct iovec being larger than bvec, which
is not the case on e.g. 32 bit architectures. Don't try to complicate
this case and make sure vectors never overlap, it'll be improved later.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 7 +-
io_uring/rsrc.c | 128 +++++++++++++++++++++++++++++++++
io_uring/rsrc.h | 5 ++
3 files changed, 138 insertions(+), 2 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 9101f12d21ef..cc84f6e5a64c 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -111,8 +111,11 @@ struct io_uring_task {
};
struct iou_vec {
- struct iovec *iovec;
- unsigned nr;
+ union {
+ struct iovec *iovec;
+ struct bio_vec *bvec;
+ };
+ unsigned nr; /* number of struct iovec it can hold */
};
struct io_uring {
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index bac509f85c80..8327c0ffca68 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1269,3 +1269,131 @@ void io_vec_free(struct iou_vec *iv)
iv->iovec = NULL;
iv->nr = 0;
}
+
+int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries)
+{
+ gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
+ struct iovec *iov;
+
+ iov = kmalloc_array(nr_entries, sizeof(iov[0]), gfp);
+ if (!iov)
+ return -ENOMEM;
+
+ io_vec_free(iv);
+ iv->iovec = iov;
+ iv->nr = nr_entries;
+ return 0;
+}
+
+static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
+ struct io_mapped_ubuf *imu,
+ struct iovec *iovec, unsigned nr_iovs,
+ struct iou_vec *vec)
+{
+ unsigned long folio_size = 1 << imu->folio_shift;
+ unsigned long folio_mask = folio_size - 1;
+ u64 folio_addr = imu->ubuf & ~folio_mask;
+ struct bio_vec *res_bvec = vec->bvec;
+ size_t total_len = 0;
+ unsigned bvec_idx = 0;
+ unsigned iov_idx;
+
+ for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) {
+ size_t iov_len = iovec[iov_idx].iov_len;
+ u64 buf_addr = (u64)iovec[iov_idx].iov_base;
+ struct bio_vec *src_bvec;
+ size_t offset;
+ u64 buf_end;
+
+ if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end)))
+ return -EFAULT;
+ if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))
+ return -EFAULT;
+ if (unlikely(!iov_len))
+ return -EFAULT;
+ if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
+ return -EOVERFLOW;
+
+ /* by using folio address it also accounts for bvec offset */
+ offset = buf_addr - folio_addr;
+ src_bvec = imu->bvec + (offset >> imu->folio_shift);
+ offset &= folio_mask;
+
+ for (; iov_len; offset = 0, bvec_idx++, src_bvec++) {
+ size_t seg_size = min_t(size_t, iov_len,
+ folio_size - offset);
+
+ bvec_set_page(&res_bvec[bvec_idx],
+ src_bvec->bv_page, seg_size, offset);
+ iov_len -= seg_size;
+ }
+ }
+ if (total_len > MAX_RW_COUNT)
+ return -EINVAL;
+
+ iov_iter_bvec(iter, ddir, res_bvec, bvec_idx, total_len);
+ return 0;
+}
+
+static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
+ struct io_mapped_ubuf *imu)
+{
+ unsigned shift = imu->folio_shift;
+ size_t max_segs = 0;
+ unsigned i;
+
+ for (i = 0; i < nr_iovs; i++)
+ max_segs += (iov[i].iov_len >> shift) + 2;
+ return max_segs;
+}
+
+int io_import_reg_vec(int ddir, struct iov_iter *iter,
+ struct io_kiocb *req, struct iou_vec *vec,
+ unsigned nr_iovs, unsigned iovec_off,
+ unsigned issue_flags)
+{
+ struct io_rsrc_node *node;
+ struct io_mapped_ubuf *imu;
+ struct iovec *iov;
+ unsigned nr_segs;
+
+ node = io_find_buf_node(req, issue_flags);
+ if (!node)
+ return -EFAULT;
+ imu = node->buf;
+ if (imu->is_kbuf)
+ return -EOPNOTSUPP;
+ if (!(imu->dir & (1 << ddir)))
+ return -EFAULT;
+
+ iov = vec->iovec + iovec_off;
+ nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+
+ if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
+ size_t bvec_bytes;
+
+ bvec_bytes = nr_segs * sizeof(struct bio_vec);
+ nr_segs = (bvec_bytes + sizeof(*iov) - 1) / sizeof(*iov);
+ nr_segs += nr_iovs;
+ }
+
+ if (WARN_ON_ONCE(iovec_off + nr_iovs != vec->nr) ||
+ nr_segs > vec->nr) {
+ struct iou_vec tmp_vec = {};
+ int ret;
+
+ ret = io_vec_realloc(&tmp_vec, nr_segs);
+ if (ret)
+ return ret;
+
+ iovec_off = tmp_vec.nr - nr_iovs;
+ memcpy(tmp_vec.iovec + iovec_off, iov, sizeof(*iov) * nr_iovs);
+ io_vec_free(vec);
+
+ *vec = tmp_vec;
+ iov = vec->iovec + iovec_off;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+
+ return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);
+}
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index ff78ead6bc75..f1496f7d844f 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -62,6 +62,10 @@ struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
u64 buf_addr, size_t len, int ddir,
unsigned issue_flags);
+int io_import_reg_vec(int ddir, struct iov_iter *iter,
+ struct io_kiocb *req, struct iou_vec *vec,
+ unsigned nr_iovs, unsigned iovec_off,
+ unsigned issue_flags);
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
@@ -147,6 +151,7 @@ static inline void __io_unaccount_mem(struct user_struct *user,
}
void io_vec_free(struct iou_vec *iv);
+int io_vec_realloc(struct iou_vec *iv, unsigned nr_entries);
static inline void io_vec_reset_iovec(struct iou_vec *iv,
struct iovec *iovec, unsigned nr)
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 3/9] io_uring/rw: implement vectored registered rw
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 1/9] io_uring: introduce struct iou_vec Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 2/9] io_uring: add infra for importing vectored reg buffers Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 4/9] io_uring/rw: defer reg buf vec import Pavel Begunkov
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Implement registered buffer vectored reads with new opcodes
IORING_OP_WRITEV_FIXED and IORING_OP_READV_FIXED.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/uapi/linux/io_uring.h | 2 ++
io_uring/opdef.c | 39 +++++++++++++++++++++++++++
io_uring/rw.c | 51 +++++++++++++++++++++++++++++++++++
io_uring/rw.h | 2 ++
4 files changed, 94 insertions(+)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3d99bf9bbf61..9e5eec7490bb 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -281,6 +281,8 @@ enum io_uring_op {
IORING_OP_LISTEN,
IORING_OP_RECV_ZC,
IORING_OP_EPOLL_WAIT,
+ IORING_OP_READV_FIXED,
+ IORING_OP_WRITEV_FIXED,
/* this goes last, obviously */
IORING_OP_LAST,
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index db77df513d55..7fd173197b1e 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -540,6 +540,35 @@ const struct io_issue_def io_issue_defs[] = {
.prep = io_eopnotsupp_prep,
#endif
},
+ [IORING_OP_READV_FIXED] = {
+ .needs_file = 1,
+ .unbound_nonreg_file = 1,
+ .pollin = 1,
+ .plug = 1,
+ .audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
+ .iopoll_queue = 1,
+ .vectored = 1,
+ .async_size = sizeof(struct io_async_rw),
+ .prep = io_prep_readv_fixed,
+ .issue = io_read,
+ },
+ [IORING_OP_WRITEV_FIXED] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ .pollout = 1,
+ .plug = 1,
+ .audit_skip = 1,
+ .ioprio = 1,
+ .iopoll = 1,
+ .iopoll_queue = 1,
+ .vectored = 1,
+ .async_size = sizeof(struct io_async_rw),
+ .prep = io_prep_writev_fixed,
+ .issue = io_write,
+ },
};
const struct io_cold_def io_cold_defs[] = {
@@ -775,6 +804,16 @@ const struct io_cold_def io_cold_defs[] = {
[IORING_OP_EPOLL_WAIT] = {
.name = "EPOLL_WAIT",
},
+ [IORING_OP_READV_FIXED] = {
+ .name = "READV_FIXED",
+ .cleanup = io_readv_writev_cleanup,
+ .fail = io_rw_fail,
+ },
+ [IORING_OP_WRITEV_FIXED] = {
+ .name = "WRITEV_FIXED",
+ .cleanup = io_readv_writev_cleanup,
+ .fail = io_rw_fail,
+ },
};
const char *io_uring_get_opcode(u8 opcode)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index ad7f647d48e9..4c4229f41aaa 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -381,6 +381,57 @@ int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_prep_rw(req, sqe, ITER_SOURCE);
}
+static int io_rw_prep_reg_vec(struct io_kiocb *req, int ddir)
+{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ struct io_async_rw *io = req->async_data;
+ const struct iovec __user *uvec;
+ size_t uvec_segs = rw->len;
+ struct iovec *iov;
+ int iovec_off, ret;
+ void *res;
+
+ if (uvec_segs > io->vec.nr) {
+ ret = io_vec_realloc(&io->vec, uvec_segs);
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ /* pad iovec to the right */
+ iovec_off = io->vec.nr - uvec_segs;
+ iov = io->vec.iovec + iovec_off;
+ uvec = u64_to_user_ptr(rw->addr);
+ res = iovec_from_user(uvec, uvec_segs, uvec_segs, iov,
+ io_is_compat(req->ctx));
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ ret = io_import_reg_vec(ddir, &io->iter, req, &io->vec,
+ uvec_segs, iovec_off, 0);
+ iov_iter_save_state(&io->iter, &io->iter_state);
+ return ret;
+}
+
+int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ int ret;
+
+ ret = __io_prep_rw(req, sqe, ITER_DEST);
+ if (unlikely(ret))
+ return ret;
+ return io_rw_prep_reg_vec(req, ITER_DEST);
+}
+
+int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ int ret;
+
+ ret = __io_prep_rw(req, sqe, ITER_SOURCE);
+ if (unlikely(ret))
+ return ret;
+ return io_rw_prep_reg_vec(req, ITER_SOURCE);
+}
+
/*
* Multishot read is prepared just like a normal read/write request, only
* difference is that we set the MULTISHOT flag.
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 529fd2f96a7f..81d6d9a8cf69 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -32,6 +32,8 @@ struct io_async_rw {
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_readv(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe);
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 4/9] io_uring/rw: defer reg buf vec import
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (2 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 3/9] io_uring/rw: implement vectored registered rw Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 5/9] io_uring/net: combine msghdr copy Pavel Begunkov
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Import registered buffers for vectored reads and writes later at issue
time as we now do for other fixed ops.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 3 +++
io_uring/rw.c | 42 +++++++++++++++++++++++++++-------
2 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index cc84f6e5a64c..0e87e292bfb5 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -502,6 +502,7 @@ enum {
REQ_F_BUFFERS_COMMIT_BIT,
REQ_F_BUF_NODE_BIT,
REQ_F_HAS_METADATA_BIT,
+ REQ_F_IMPORT_BUFFER_BIT,
/* not a real bit, just to check we're not overflowing the space */
__REQ_F_LAST_BIT,
@@ -584,6 +585,8 @@ enum {
REQ_F_BUF_NODE = IO_REQ_FLAG(REQ_F_BUF_NODE_BIT),
/* request has read/write metadata assigned */
REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
+ /* resolve padded iovec to registered buffers */
+ REQ_F_IMPORT_BUFFER = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
};
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 4c4229f41aaa..e62f4ce34171 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -381,7 +381,25 @@ int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return __io_prep_rw(req, sqe, ITER_SOURCE);
}
-static int io_rw_prep_reg_vec(struct io_kiocb *req, int ddir)
+static int io_rw_import_reg_vec(struct io_kiocb *req,
+ struct io_async_rw *io,
+ int ddir, unsigned int issue_flags)
+{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ unsigned uvec_segs = rw->len;
+ unsigned iovec_off = io->vec.nr - uvec_segs;
+ int ret;
+
+ ret = io_import_reg_vec(ddir, &io->iter, req, &io->vec,
+ uvec_segs, iovec_off, issue_flags);
+ if (unlikely(ret))
+ return ret;
+ iov_iter_save_state(&io->iter, &io->iter_state);
+ req->flags &= ~REQ_F_IMPORT_BUFFER;
+ return 0;
+}
+
+static int io_rw_prep_reg_vec(struct io_kiocb *req)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
struct io_async_rw *io = req->async_data;
@@ -406,10 +424,8 @@ static int io_rw_prep_reg_vec(struct io_kiocb *req, int ddir)
if (IS_ERR(res))
return PTR_ERR(res);
- ret = io_import_reg_vec(ddir, &io->iter, req, &io->vec,
- uvec_segs, iovec_off, 0);
- iov_iter_save_state(&io->iter, &io->iter_state);
- return ret;
+ req->flags |= REQ_F_IMPORT_BUFFER;
+ return 0;
}
int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -419,7 +435,7 @@ int io_prep_readv_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
ret = __io_prep_rw(req, sqe, ITER_DEST);
if (unlikely(ret))
return ret;
- return io_rw_prep_reg_vec(req, ITER_DEST);
+ return io_rw_prep_reg_vec(req);
}
int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -429,7 +445,7 @@ int io_prep_writev_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
ret = __io_prep_rw(req, sqe, ITER_SOURCE);
if (unlikely(ret))
return ret;
- return io_rw_prep_reg_vec(req, ITER_SOURCE);
+ return io_rw_prep_reg_vec(req);
}
/*
@@ -906,7 +922,11 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
ssize_t ret;
loff_t *ppos;
- if (io_do_buffer_select(req)) {
+ if (req->flags & REQ_F_IMPORT_BUFFER) {
+ ret = io_rw_import_reg_vec(req, io, ITER_DEST, issue_flags);
+ if (unlikely(ret))
+ return ret;
+ } else if (io_do_buffer_select(req)) {
ret = io_import_rw_buffer(ITER_DEST, req, io, issue_flags);
if (unlikely(ret < 0))
return ret;
@@ -1117,6 +1137,12 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
ssize_t ret, ret2;
loff_t *ppos;
+ if (req->flags & REQ_F_IMPORT_BUFFER) {
+ ret = io_rw_import_reg_vec(req, io, ITER_SOURCE, issue_flags);
+ if (unlikely(ret))
+ return ret;
+ }
+
ret = io_rw_init_file(req, FMODE_WRITE, WRITE);
if (unlikely(ret))
return ret;
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 5/9] io_uring/net: combine msghdr copy
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (3 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 4/9] io_uring/rw: defer reg buf vec import Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 6/9] io_uring/net: pull vec alloc out of msghdr import Pavel Begunkov
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Call the compat version from inside of io_msg_copy_hdr() and don't
duplicate it in callers.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/net.c | 46 +++++++++++++++++++---------------------------
1 file changed, 19 insertions(+), 27 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 905d1ee01201..33076bd22c16 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -287,6 +287,24 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
struct user_msghdr __user *umsg = sr->umsg;
int ret;
+ iomsg->msg.msg_name = &iomsg->addr;
+ iomsg->msg.msg_iter.nr_segs = 0;
+
+ if (io_is_compat(req->ctx)) {
+ struct compat_msghdr cmsg;
+
+ ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ddir, save_addr);
+ if (ret)
+ return ret;
+
+ memset(&msg, 0, sizeof(msg));
+ msg->msg_namelen = cmsg.msg_namelen;
+ msg->msg_controllen = cmsg.msg_controllen;
+ msg->msg_iov = compat_ptr(cmsg.msg_iov);
+ msg->msg_iovlen = cmsg.msg_iovlen;
+ return 0;
+ }
+
ret = io_copy_msghdr_from_user(msg, umsg);
if (unlikely(ret))
return ret;
@@ -323,18 +341,6 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
struct user_msghdr msg;
int ret;
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
-
- if (io_is_compat(req->ctx)) {
- struct compat_msghdr cmsg;
-
- ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE,
- NULL);
- sr->msg_control = iomsg->msg.msg_control_user;
- return ret;
- }
-
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL);
/* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = iomsg->msg.msg_control_user;
@@ -710,21 +716,7 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
struct user_msghdr msg;
int ret;
- iomsg->msg.msg_name = &iomsg->addr;
- iomsg->msg.msg_iter.nr_segs = 0;
-
- if (io_is_compat(req->ctx)) {
- struct compat_msghdr cmsg;
-
- ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST,
- &iomsg->uaddr);
- memset(&msg, 0, sizeof(msg));
- msg.msg_namelen = cmsg.msg_namelen;
- msg.msg_controllen = cmsg.msg_controllen;
- } else {
- ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
- }
-
+ ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
if (unlikely(ret))
return ret;
return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 6/9] io_uring/net: pull vec alloc out of msghdr import
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (4 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 5/9] io_uring/net: combine msghdr copy Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 7/9] io_uring/net: convert to struct iou_vec Pavel Begunkov
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
I'll need more control over iovec management, move
io_net_import_vec() out of io_msg_copy_hdr().
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/net.c | 23 +++++++++++++++--------
1 file changed, 15 insertions(+), 8 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 33076bd22c16..cbb889b85cfc 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -253,12 +253,8 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
return -EFAULT;
sr->len = tmp_iov.iov_len;
}
-
- return 0;
}
-
- return io_net_import_vec(req, iomsg, (struct iovec __user *)uiov,
- msg->msg_iovlen, ddir);
+ return 0;
}
static int io_copy_msghdr_from_user(struct user_msghdr *msg,
@@ -328,10 +324,8 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
return -EFAULT;
sr->len = tmp_iov.iov_len;
}
- return 0;
}
-
- return io_net_import_vec(req, iomsg, msg->msg_iov, msg->msg_iovlen, ddir);
+ return 0;
}
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
@@ -342,6 +336,12 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
int ret;
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL);
+ if (unlikely(ret))
+ return ret;
+
+ if (!(req->flags & REQ_F_BUFFER_SELECT))
+ ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
+ ITER_SOURCE);
/* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = iomsg->msg.msg_control_user;
return ret;
@@ -719,6 +719,13 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
if (unlikely(ret))
return ret;
+
+ if (!(req->flags & REQ_F_BUFFER_SELECT)) {
+ ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
+ ITER_DEST);
+ if (unlikely(ret))
+ return ret;
+ }
return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
msg.msg_controllen);
}
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 7/9] io_uring/net: convert to struct iou_vec
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (5 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 6/9] io_uring/net: pull vec alloc out of msghdr import Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 8/9] io_uring/net: implement vectored reg bufs for zctx Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 9/9] io_uring: cap cached iovec/bvec size Pavel Begunkov
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Convert net.c to use struct iou_vec.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/alloc_cache.h | 9 --------
io_uring/net.c | 51 ++++++++++++++++++------------------------
io_uring/net.h | 6 ++---
3 files changed, 25 insertions(+), 41 deletions(-)
diff --git a/io_uring/alloc_cache.h b/io_uring/alloc_cache.h
index 7f68eff2e7f3..d33ce159ef33 100644
--- a/io_uring/alloc_cache.h
+++ b/io_uring/alloc_cache.h
@@ -16,15 +16,6 @@ bool io_alloc_cache_init(struct io_alloc_cache *cache,
void *io_cache_alloc_new(struct io_alloc_cache *cache, gfp_t gfp);
-static inline void io_alloc_cache_kasan(struct iovec **iov, int *nr)
-{
- if (IS_ENABLED(CONFIG_KASAN)) {
- kfree(*iov);
- *iov = NULL;
- *nr = 0;
- }
-}
-
static inline bool io_alloc_cache_put(struct io_alloc_cache *cache,
void *entry)
{
diff --git a/io_uring/net.c b/io_uring/net.c
index cbb889b85cfc..a4b39343f345 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -136,11 +136,8 @@ static bool io_net_retry(struct socket *sock, int flags)
static void io_netmsg_iovec_free(struct io_async_msghdr *kmsg)
{
- if (kmsg->free_iov) {
- kfree(kmsg->free_iov);
- kmsg->free_iov_nr = 0;
- kmsg->free_iov = NULL;
- }
+ if (kmsg->vec.iovec)
+ io_vec_free(&kmsg->vec);
}
static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
@@ -154,7 +151,7 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
}
/* Let normal cleanup path reap it if we fail adding to the cache */
- io_alloc_cache_kasan(&hdr->free_iov, &hdr->free_iov_nr);
+ io_alloc_cache_vec_kasan(&hdr->vec);
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
@@ -171,7 +168,7 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
return NULL;
/* If the async data was cached, we might have an iov cached inside. */
- if (hdr->free_iov)
+ if (hdr->vec.iovec)
req->flags |= REQ_F_NEED_CLEANUP;
return hdr;
}
@@ -182,10 +179,7 @@ static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg
{
if (iov) {
req->flags |= REQ_F_NEED_CLEANUP;
- kmsg->free_iov_nr = kmsg->msg.msg_iter.nr_segs;
- if (kmsg->free_iov)
- kfree(kmsg->free_iov);
- kmsg->free_iov = iov;
+ io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs);
}
}
@@ -208,9 +202,9 @@ static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg
struct iovec *iov;
int ret, nr_segs;
- if (iomsg->free_iov) {
- nr_segs = iomsg->free_iov_nr;
- iov = iomsg->free_iov;
+ if (iomsg->vec.iovec) {
+ nr_segs = iomsg->vec.nr;
+ iov = iomsg->vec.iovec;
} else {
nr_segs = 1;
iov = &iomsg->fast_iov;
@@ -468,7 +462,7 @@ static int io_bundle_nbufs(struct io_async_msghdr *kmsg, int ret)
if (iter_is_ubuf(&kmsg->msg.msg_iter))
return 1;
- iov = kmsg->free_iov;
+ iov = kmsg->vec.iovec;
if (!iov)
iov = &kmsg->fast_iov;
@@ -584,9 +578,9 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
.nr_iovs = 1,
};
- if (kmsg->free_iov) {
- arg.nr_iovs = kmsg->free_iov_nr;
- arg.iovs = kmsg->free_iov;
+ if (kmsg->vec.iovec) {
+ arg.nr_iovs = kmsg->vec.nr;
+ arg.iovs = kmsg->vec.iovec;
arg.mode = KBUF_MODE_FREE;
}
@@ -599,9 +593,9 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
if (unlikely(ret < 0))
return ret;
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
+ kmsg->vec.nr = ret;
+ kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
sr->len = arg.out_len;
@@ -1085,9 +1079,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
.mode = KBUF_MODE_EXPAND,
};
- if (kmsg->free_iov) {
- arg.nr_iovs = kmsg->free_iov_nr;
- arg.iovs = kmsg->free_iov;
+ if (kmsg->vec.iovec) {
+ arg.nr_iovs = kmsg->vec.nr;
+ arg.iovs = kmsg->vec.iovec;
arg.mode |= KBUF_MODE_FREE;
}
@@ -1106,9 +1100,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
}
iov_iter_init(&kmsg->msg.msg_iter, ITER_DEST, arg.iovs, ret,
arg.out_len);
- if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) {
- kmsg->free_iov_nr = ret;
- kmsg->free_iov = arg.iovs;
+ if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->vec.iovec) {
+ kmsg->vec.nr = ret;
+ kmsg->vec.iovec = arg.iovs;
req->flags |= REQ_F_NEED_CLEANUP;
}
} else {
@@ -1874,8 +1868,7 @@ void io_netmsg_cache_free(const void *entry)
{
struct io_async_msghdr *kmsg = (struct io_async_msghdr *) entry;
- if (kmsg->free_iov)
- io_netmsg_iovec_free(kmsg);
+ io_vec_free(&kmsg->vec);
kfree(kmsg);
}
#endif
diff --git a/io_uring/net.h b/io_uring/net.h
index b804c2b36e60..43e5ce5416b7 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -2,12 +2,12 @@
#include <linux/net.h>
#include <linux/uio.h>
+#include <linux/io_uring_types.h>
struct io_async_msghdr {
#if defined(CONFIG_NET)
- struct iovec *free_iov;
- /* points to an allocated iov, if NULL we use fast_iov instead */
- int free_iov_nr;
+ struct iou_vec vec;
+
struct_group(clear,
int namelen;
struct iovec fast_iov;
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 8/9] io_uring/net: implement vectored reg bufs for zctx
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (6 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 7/9] io_uring/net: convert to struct iou_vec Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 9/9] io_uring: cap cached iovec/bvec size Pavel Begunkov
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Add support for vectored registered buffers for send zc.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/net.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 55 insertions(+), 4 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index a4b39343f345..5e27c22e1d58 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -395,6 +395,44 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe
return io_sendmsg_copy_hdr(req, kmsg);
}
+static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
+ struct io_async_msghdr *kmsg = req->async_data;
+ struct user_msghdr msg;
+ int ret, iovec_off;
+ struct iovec *iov;
+ void *res;
+
+ if (!(sr->flags & IORING_RECVSEND_FIXED_BUF))
+ return io_sendmsg_setup(req, sqe);
+
+ sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
+ if (unlikely(ret))
+ return ret;
+ sr->msg_control = kmsg->msg.msg_control_user;
+
+ if (msg.msg_iovlen > kmsg->vec.nr || WARN_ON_ONCE(!kmsg->vec.iovec)) {
+ ret = io_vec_realloc(&kmsg->vec, msg.msg_iovlen);
+ if (ret)
+ return ret;
+ req->flags |= REQ_F_NEED_CLEANUP;
+ }
+ iovec_off = kmsg->vec.nr - msg.msg_iovlen;
+ iov = kmsg->vec.iovec + iovec_off;
+
+ res = iovec_from_user(msg.msg_iov, msg.msg_iovlen, kmsg->vec.nr, iov,
+ io_is_compat(req->ctx));
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
+ req->flags |= REQ_F_IMPORT_BUFFER;
+ return ret;
+}
+
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -1333,8 +1371,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->opcode != IORING_OP_SEND_ZC) {
if (unlikely(sqe->addr2 || sqe->file_index))
return -EINVAL;
- if (unlikely(zc->flags & IORING_RECVSEND_FIXED_BUF))
- return -EINVAL;
}
zc->len = READ_ONCE(sqe->len);
@@ -1350,7 +1386,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -ENOMEM;
if (req->opcode != IORING_OP_SENDMSG_ZC)
return io_send_setup(req, sqe);
- return io_sendmsg_setup(req, sqe);
+ return io_sendmsg_zc_setup(req, sqe);
}
static int io_sg_from_iter_iovec(struct sk_buff *skb,
@@ -1506,6 +1542,22 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
unsigned flags;
int ret, min_ret = 0;
+ kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+
+ if (req->flags & REQ_F_IMPORT_BUFFER) {
+ unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
+ unsigned iovec_off = kmsg->vec.nr - uvec_segs;
+ int ret;
+
+ ret = io_import_reg_vec(ITER_SOURCE, &kmsg->msg.msg_iter, req,
+ &kmsg->vec, uvec_segs, iovec_off,
+ issue_flags);
+ if (unlikely(ret))
+ return ret;
+ kmsg->msg.sg_from_iter = io_sg_from_iter;
+ req->flags &= ~REQ_F_IMPORT_BUFFER;
+ }
+
sock = sock_from_file(req->file);
if (unlikely(!sock))
return -ENOTSOCK;
@@ -1524,7 +1576,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
kmsg->msg.msg_control_user = sr->msg_control;
kmsg->msg.msg_ubuf = &io_notif_to_data(sr->notif)->uarg;
- kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (unlikely(ret < min_ret)) {
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v3 9/9] io_uring: cap cached iovec/bvec size
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
` (7 preceding siblings ...)
2025-03-07 15:49 ` [PATCH v3 8/9] io_uring/net: implement vectored reg bufs for zctx Pavel Begunkov
@ 2025-03-07 15:49 ` Pavel Begunkov
8 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2025-03-07 15:49 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
Bvecs can be large, put an arbitrary limit on the max vector size it
can cache.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/net.c | 3 +++
io_uring/rsrc.h | 2 ++
io_uring/rw.c | 3 +++
3 files changed, 8 insertions(+)
diff --git a/io_uring/net.c b/io_uring/net.c
index 5e27c22e1d58..ce104d04b1e4 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -152,6 +152,9 @@ static void io_netmsg_recycle(struct io_kiocb *req, unsigned int issue_flags)
/* Let normal cleanup path reap it if we fail adding to the cache */
io_alloc_cache_vec_kasan(&hdr->vec);
+ if (hdr->vec.nr > IO_VEC_CACHE_SOFT_CAP)
+ io_vec_free(&hdr->vec);
+
if (io_alloc_cache_put(&req->ctx->netmsg_cache, hdr)) {
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h
index f1496f7d844f..0bfcdba12617 100644
--- a/io_uring/rsrc.h
+++ b/io_uring/rsrc.h
@@ -6,6 +6,8 @@
#include <linux/lockdep.h>
#include <linux/io_uring_types.h>
+#define IO_VEC_CACHE_SOFT_CAP 256
+
enum {
IORING_RSRC_FILE = 0,
IORING_RSRC_BUFFER = 1,
diff --git a/io_uring/rw.c b/io_uring/rw.c
index e62f4ce34171..bf35599d1078 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -150,6 +150,9 @@ static void io_rw_recycle(struct io_kiocb *req, unsigned int issue_flags)
return;
io_alloc_cache_vec_kasan(&rw->vec);
+ if (rw->vec.nr > IO_VEC_CACHE_SOFT_CAP)
+ io_vec_free(&rw->vec);
+
if (io_alloc_cache_put(&req->ctx->rw_cache, rw)) {
req->async_data = NULL;
req->flags &= ~REQ_F_ASYNC_DATA;
--
2.48.1
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2025-03-07 15:48 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-03-07 15:49 [PATCH v3 0/9] Add support for vectored registered buffers Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 1/9] io_uring: introduce struct iou_vec Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 2/9] io_uring: add infra for importing vectored reg buffers Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 3/9] io_uring/rw: implement vectored registered rw Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 4/9] io_uring/rw: defer reg buf vec import Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 5/9] io_uring/net: combine msghdr copy Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 6/9] io_uring/net: pull vec alloc out of msghdr import Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 7/9] io_uring/net: convert to struct iou_vec Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 8/9] io_uring/net: implement vectored reg bufs for zctx Pavel Begunkov
2025-03-07 15:49 ` [PATCH v3 9/9] io_uring: cap cached iovec/bvec size Pavel Begunkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox