* [RFC 0/4] pre-mapped rw attributes
@ 2024-12-30 13:30 Pavel Begunkov
2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
` (3 more replies)
0 siblings, 4 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi
warning: not properly tested
Follow up on the discussion about optimising copy_from_user() for
read/write attributes. The idea here is to use registered regions
(see IORING_REGISTER_MEM_REGION) for that purpose pretty much in
the same way registered wait arguments work.
Putting it simply, a region is a user provided chunk of memory
that has been registered and pre-mapped into io_uring / kernel,
but it has more modes like mmap'ing kernel memory. For attributes
the user passes an offset into a region, and the kernel can read
from it directly without copy_from_user().
The other alternative is to store attributes into the upper half
of SQE128, but then we might run out of space in SQE for larger
and/or compound attributes. It'd also require SQE128, which has a
(perhaps minor) downside when other types of requests don't need it.
Pavel Begunkov (4):
io_uring: add structure for registered arguments
io_uring: add registered request arguments
io_uring/rw: use READ_ONCE with rw attributes
io_uring/rw: pre-mapped rw attributes
include/linux/io_uring_types.h | 11 ++++++++--
include/uapi/linux/io_uring.h | 4 +++-
io_uring/io_uring.c | 23 +++------------------
io_uring/io_uring.h | 16 +++++++++++++++
io_uring/register.c | 7 +++++--
io_uring/rw.c | 37 ++++++++++++++++++++++++----------
6 files changed, 62 insertions(+), 36 deletions(-)
--
2.47.1
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/4] io_uring: add structure for registered arguments
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
` (2 subsequent siblings)
3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi
A preparation patch making infra for wait arguments a bit more general
to use in in following patches.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 9 +++++++--
io_uring/io_uring.c | 23 +++--------------------
io_uring/io_uring.h | 16 ++++++++++++++++
io_uring/register.c | 4 ++--
4 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 493a8f7fa8e4..49008f00d064 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -83,6 +83,11 @@ struct io_mapped_region {
unsigned flags;
};
+struct io_reg_args {
+ void *ptr;
+ size_t size;
+};
+
/*
* Arbitrary limit, can be raised if need be
*/
@@ -332,8 +337,8 @@ struct io_ring_ctx {
struct io_ev_fd __rcu *io_ev_fd;
unsigned cq_extra;
- void *cq_wait_arg;
- size_t cq_wait_size;
+ struct io_reg_args wait_args;
+
} ____cacheline_aligned_in_smp;
/*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5535a72b0ce1..e2b6b256fc9a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3178,25 +3178,6 @@ void __io_uring_cancel(bool cancel_all)
io_uring_cancel_generic(cancel_all, NULL);
}
-static struct io_uring_reg_wait *io_get_ext_arg_reg(struct io_ring_ctx *ctx,
- const struct io_uring_getevents_arg __user *uarg)
-{
- unsigned long size = sizeof(struct io_uring_reg_wait);
- unsigned long offset = (uintptr_t)uarg;
- unsigned long end;
-
- if (unlikely(offset % sizeof(long)))
- return ERR_PTR(-EFAULT);
-
- /* also protects from NULL ->cq_wait_arg as the size would be 0 */
- if (unlikely(check_add_overflow(offset, size, &end) ||
- end > ctx->cq_wait_size))
- return ERR_PTR(-EFAULT);
-
- offset = array_index_nospec(offset, ctx->cq_wait_size - size);
- return ctx->cq_wait_arg + offset;
-}
-
static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
const void __user *argp, size_t argsz)
{
@@ -3233,7 +3214,9 @@ static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
if (ext_arg->argsz != sizeof(struct io_uring_reg_wait))
return -EINVAL;
- w = io_get_ext_arg_reg(ctx, argp);
+
+ w = io_args_get_ptr(&ctx->wait_args, (uintptr_t)argp,
+ sizeof(struct io_uring_reg_wait));
if (IS_ERR(w))
return PTR_ERR(w);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 032758b28d78..a18da74f18e8 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -514,4 +514,20 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) ||
io_local_work_pending(ctx);
}
+
+static inline void *io_args_get_ptr(struct io_reg_args *args,
+ unsigned long offset, size_t size)
+{
+ unsigned long end;
+
+ if (unlikely(offset % sizeof(long)))
+ return ERR_PTR(-EFAULT);
+
+ /* also protects from NULL as the size would be 0 */
+ if (unlikely(check_add_overflow(offset, size, &end) || end > args->size))
+ return ERR_PTR(-EFAULT);
+
+ return args->ptr + array_index_nospec(offset, args->size - size);
+}
+
#endif
diff --git a/io_uring/register.c b/io_uring/register.c
index f1698c18c7cb..b926eb053408 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -604,8 +604,8 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
}
if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) {
- ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region);
- ctx->cq_wait_size = rd.size;
+ ctx->wait_args.ptr = io_region_get_ptr(&ctx->param_region);
+ ctx->wait_args.size = rd.size;
}
return 0;
}
--
2.47.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/4] io_uring: add registered request arguments
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov
3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi
Similarly to registered wait arguments we want to have a pre-mapped
space for various request arguments. Use the same parameter region,
however as ->wait_args has different lifetime rules, add a new instance
of struct io_reg_args.
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/linux/io_uring_types.h | 2 ++
io_uring/register.c | 3 +++
2 files changed, 5 insertions(+)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 49008f00d064..cd6642855533 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -299,6 +299,8 @@ struct io_ring_ctx {
struct io_submit_state submit_state;
+ struct io_reg_args sqe_args;
+
/*
* Modifications are protected by ->uring_lock and ->mmap_lock.
* The flags, buf_pages and buf_nr_pages fields should be stable
diff --git a/io_uring/register.c b/io_uring/register.c
index b926eb053408..d2232b90a81d 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -607,6 +607,9 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
ctx->wait_args.ptr = io_region_get_ptr(&ctx->param_region);
ctx->wait_args.size = rd.size;
}
+
+ ctx->sqe_args.ptr = io_region_get_ptr(&ctx->param_region);
+ ctx->sqe_args.size = rd.size;
return 0;
}
--
2.47.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov
3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi
In preparation to pre-mapped attributes read struct io_uring_attr_pi
with READ_ONCE and use an intermediate pointer.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/rw.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 75f70935ccf4..dc1acaf95db1 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -264,23 +264,29 @@ static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
u64 attr_ptr, u64 attr_type_mask)
{
- struct io_uring_attr_pi pi_attr;
+ struct io_uring_attr_pi __pi_attr;
+ struct io_uring_attr_pi *pi_attr;
struct io_async_rw *io;
+ void __user *pi_addr;
+ size_t pi_len;
int ret;
- if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
+ if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
sizeof(pi_attr)))
return -EFAULT;
+ pi_attr = &__pi_attr;
- if (pi_attr.rsvd)
+ if (pi_attr->rsvd)
return -EINVAL;
io = req->async_data;
- io->meta.flags = pi_attr.flags;
- io->meta.app_tag = pi_attr.app_tag;
- io->meta.seed = pi_attr.seed;
- ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
- pi_attr.len, &io->meta.iter);
+ io->meta.flags = READ_ONCE(pi_attr->flags);
+ io->meta.app_tag = READ_ONCE(pi_attr->app_tag);
+ io->meta.seed = READ_ONCE(pi_attr->seed);
+
+ pi_addr = u64_to_user_ptr(READ_ONCE(pi_attr->addr));
+ pi_len = READ_ONCE(pi_attr->len);
+ ret = import_ubuf(ddir, pi_addr, pi_len, &io->meta.iter);
if (unlikely(ret < 0))
return ret;
req->flags |= REQ_F_HAS_METADATA;
--
2.47.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 4/4] io_uring/rw: pre-mapped rw attributes
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
` (2 preceding siblings ...)
2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi
Instead of copy_from_user()'ing request attributes, allow it to be
grabbwd from a registered pre-registered parameter region like we do
with registered wait arguments.
Suggested-by: Anuj Gupta <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
include/uapi/linux/io_uring.h | 4 +++-
io_uring/rw.c | 19 ++++++++++++++-----
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 38f0d6b10eaf..ec6e6fd37d1c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -112,7 +112,9 @@ struct io_uring_sqe {
};
/* sqe->attr_type_mask flags */
-#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+#define IORING_RW_ATTR_FLAG_PI (1UL << 0)
+#define IORING_RW_ATTR_REGISTERED (1UL << 63)
+
/* PI attribute information */
struct io_uring_attr_pi {
__u16 flags;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index dc1acaf95db1..b1db4595788b 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -271,10 +271,17 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
size_t pi_len;
int ret;
- if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
- sizeof(pi_attr)))
- return -EFAULT;
- pi_attr = &__pi_attr;
+ if (attr_type_mask & IORING_RW_ATTR_REGISTERED) {
+ pi_attr = io_args_get_ptr(&req->ctx->sqe_args, attr_ptr,
+ sizeof(pi_attr));
+ if (IS_ERR(pi_attr))
+ return PTR_ERR(pi_attr);
+ } else {
+ if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
+ sizeof(pi_attr)))
+ return -EFAULT;
+ pi_attr = &__pi_attr;
+ }
if (pi_attr->rsvd)
return -EINVAL;
@@ -294,6 +301,8 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
return ret;
}
+#define IO_RW_ATTR_ALLOWED_MASK (IORING_RW_ATTR_FLAG_PI | IORING_RW_ATTR_REGISTERED)
+
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
int ddir, bool do_import)
{
@@ -332,7 +341,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
u64 attr_ptr;
/* only PI attribute is supported currently */
- if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
+ if (attr_type_mask & IO_RW_ATTR_ALLOWED_MASK)
return -EINVAL;
attr_ptr = READ_ONCE(sqe->attr_ptr);
--
2.47.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-12-30 13:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox