public inbox for [email protected]
 help / color / mirror / Atom feed
* [RFC 0/4] pre-mapped rw attributes
@ 2024-12-30 13:30 Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi

warning: not properly tested

Follow up on the discussion about optimising copy_from_user() for
read/write attributes. The idea here is to use registered regions
(see IORING_REGISTER_MEM_REGION) for that purpose pretty much in
the same way registered wait arguments work.

Putting it simply, a region is a user provided chunk of memory
that has been registered and pre-mapped into io_uring / kernel,
but it has more modes like mmap'ing kernel memory. For attributes
the user passes an offset into a region, and the kernel can read
from it directly without copy_from_user().

The other alternative is to store attributes into the upper half
of SQE128, but then we might run out of space in SQE for larger
and/or compound attributes. It'd also require SQE128, which has a
(perhaps minor) downside when other types of requests don't need it.

Pavel Begunkov (4):
  io_uring: add structure for registered arguments
  io_uring: add registered request arguments
  io_uring/rw: use READ_ONCE with rw attributes
  io_uring/rw: pre-mapped rw attributes

 include/linux/io_uring_types.h | 11 ++++++++--
 include/uapi/linux/io_uring.h  |  4 +++-
 io_uring/io_uring.c            | 23 +++------------------
 io_uring/io_uring.h            | 16 +++++++++++++++
 io_uring/register.c            |  7 +++++--
 io_uring/rw.c                  | 37 ++++++++++++++++++++++++----------
 6 files changed, 62 insertions(+), 36 deletions(-)

-- 
2.47.1


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/4] io_uring: add structure for registered arguments
  2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi

A preparation patch making infra for wait arguments a bit more general
to use in in following patches.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/io_uring_types.h |  9 +++++++--
 io_uring/io_uring.c            | 23 +++--------------------
 io_uring/io_uring.h            | 16 ++++++++++++++++
 io_uring/register.c            |  4 ++--
 4 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 493a8f7fa8e4..49008f00d064 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -83,6 +83,11 @@ struct io_mapped_region {
 	unsigned		flags;
 };
 
+struct io_reg_args {
+	void			*ptr;
+	size_t			size;
+};
+
 /*
  * Arbitrary limit, can be raised if need be
  */
@@ -332,8 +337,8 @@ struct io_ring_ctx {
 		struct io_ev_fd	__rcu	*io_ev_fd;
 		unsigned		cq_extra;
 
-		void			*cq_wait_arg;
-		size_t			cq_wait_size;
+		struct io_reg_args	wait_args;
+
 	} ____cacheline_aligned_in_smp;
 
 	/*
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5535a72b0ce1..e2b6b256fc9a 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3178,25 +3178,6 @@ void __io_uring_cancel(bool cancel_all)
 	io_uring_cancel_generic(cancel_all, NULL);
 }
 
-static struct io_uring_reg_wait *io_get_ext_arg_reg(struct io_ring_ctx *ctx,
-			const struct io_uring_getevents_arg __user *uarg)
-{
-	unsigned long size = sizeof(struct io_uring_reg_wait);
-	unsigned long offset = (uintptr_t)uarg;
-	unsigned long end;
-
-	if (unlikely(offset % sizeof(long)))
-		return ERR_PTR(-EFAULT);
-
-	/* also protects from NULL ->cq_wait_arg as the size would be 0 */
-	if (unlikely(check_add_overflow(offset, size, &end) ||
-		     end > ctx->cq_wait_size))
-		return ERR_PTR(-EFAULT);
-
-	offset = array_index_nospec(offset, ctx->cq_wait_size - size);
-	return ctx->cq_wait_arg + offset;
-}
-
 static int io_validate_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 			       const void __user *argp, size_t argsz)
 {
@@ -3233,7 +3214,9 @@ static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
 
 		if (ext_arg->argsz != sizeof(struct io_uring_reg_wait))
 			return -EINVAL;
-		w = io_get_ext_arg_reg(ctx, argp);
+
+		w = io_args_get_ptr(&ctx->wait_args, (uintptr_t)argp,
+				    sizeof(struct io_uring_reg_wait));
 		if (IS_ERR(w))
 			return PTR_ERR(w);
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 032758b28d78..a18da74f18e8 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -514,4 +514,20 @@ static inline bool io_has_work(struct io_ring_ctx *ctx)
 	return test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq) ||
 	       io_local_work_pending(ctx);
 }
+
+static inline void *io_args_get_ptr(struct io_reg_args *args,
+				    unsigned long offset, size_t size)
+{
+	unsigned long end;
+
+	if (unlikely(offset % sizeof(long)))
+		return ERR_PTR(-EFAULT);
+
+	/* also protects from NULL as the size would be 0 */
+	if (unlikely(check_add_overflow(offset, size, &end) || end > args->size))
+		return ERR_PTR(-EFAULT);
+
+	return args->ptr + array_index_nospec(offset, args->size - size);
+}
+
 #endif
diff --git a/io_uring/register.c b/io_uring/register.c
index f1698c18c7cb..b926eb053408 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -604,8 +604,8 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
 	}
 
 	if (reg.flags & IORING_MEM_REGION_REG_WAIT_ARG) {
-		ctx->cq_wait_arg = io_region_get_ptr(&ctx->param_region);
-		ctx->cq_wait_size = rd.size;
+		ctx->wait_args.ptr = io_region_get_ptr(&ctx->param_region);
+		ctx->wait_args.size = rd.size;
 	}
 	return 0;
 }
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/4] io_uring: add registered request arguments
  2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi

Similarly to registered wait arguments we want to have a pre-mapped
space for various request arguments. Use the same parameter region,
however as ->wait_args has different lifetime rules, add a new instance
of struct io_reg_args.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/io_uring_types.h | 2 ++
 io_uring/register.c            | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 49008f00d064..cd6642855533 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -299,6 +299,8 @@ struct io_ring_ctx {
 
 		struct io_submit_state	submit_state;
 
+		struct io_reg_args	sqe_args;
+
 		/*
 		 * Modifications are protected by ->uring_lock and ->mmap_lock.
 		 * The flags, buf_pages and buf_nr_pages fields should be stable
diff --git a/io_uring/register.c b/io_uring/register.c
index b926eb053408..d2232b90a81d 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -607,6 +607,9 @@ static int io_register_mem_region(struct io_ring_ctx *ctx, void __user *uarg)
 		ctx->wait_args.ptr = io_region_get_ptr(&ctx->param_region);
 		ctx->wait_args.size = rd.size;
 	}
+
+	ctx->sqe_args.ptr = io_region_get_ptr(&ctx->param_region);
+	ctx->sqe_args.size = rd.size;
 	return 0;
 }
 
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes
  2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
  2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi

In preparation to pre-mapped attributes read struct io_uring_attr_pi
with READ_ONCE and use an intermediate pointer.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 io_uring/rw.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/io_uring/rw.c b/io_uring/rw.c
index 75f70935ccf4..dc1acaf95db1 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -264,23 +264,29 @@ static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
 static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
 			 u64 attr_ptr, u64 attr_type_mask)
 {
-	struct io_uring_attr_pi pi_attr;
+	struct io_uring_attr_pi __pi_attr;
+	struct io_uring_attr_pi *pi_attr;
 	struct io_async_rw *io;
+	void __user *pi_addr;
+	size_t pi_len;
 	int ret;
 
-	if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
+	if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
 	    sizeof(pi_attr)))
 		return -EFAULT;
+	pi_attr = &__pi_attr;
 
-	if (pi_attr.rsvd)
+	if (pi_attr->rsvd)
 		return -EINVAL;
 
 	io = req->async_data;
-	io->meta.flags = pi_attr.flags;
-	io->meta.app_tag = pi_attr.app_tag;
-	io->meta.seed = pi_attr.seed;
-	ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
-			  pi_attr.len, &io->meta.iter);
+	io->meta.flags = READ_ONCE(pi_attr->flags);
+	io->meta.app_tag = READ_ONCE(pi_attr->app_tag);
+	io->meta.seed = READ_ONCE(pi_attr->seed);
+
+	pi_addr = u64_to_user_ptr(READ_ONCE(pi_attr->addr));
+	pi_len = READ_ONCE(pi_attr->len);
+	ret = import_ubuf(ddir, pi_addr, pi_len, &io->meta.iter);
 	if (unlikely(ret < 0))
 		return ret;
 	req->flags |= REQ_F_HAS_METADATA;
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 4/4] io_uring/rw: pre-mapped rw attributes
  2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
                   ` (2 preceding siblings ...)
  2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
@ 2024-12-30 13:30 ` Pavel Begunkov
  3 siblings, 0 replies; 5+ messages in thread
From: Pavel Begunkov @ 2024-12-30 13:30 UTC (permalink / raw)
  To: io-uring; +Cc: asml.silence, Anuj Gupta, Kanchan Joshi

Instead of copy_from_user()'ing request attributes, allow it to be
grabbwd from a registered pre-registered parameter region like we do
with registered wait arguments.

Suggested-by: Anuj Gupta <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/uapi/linux/io_uring.h |  4 +++-
 io_uring/rw.c                 | 19 ++++++++++++++-----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 38f0d6b10eaf..ec6e6fd37d1c 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -112,7 +112,9 @@ struct io_uring_sqe {
 };
 
 /* sqe->attr_type_mask flags */
-#define IORING_RW_ATTR_FLAG_PI	(1U << 0)
+#define IORING_RW_ATTR_FLAG_PI		(1UL << 0)
+#define IORING_RW_ATTR_REGISTERED	(1UL << 63)
+
 /* PI attribute information */
 struct io_uring_attr_pi {
 		__u16	flags;
diff --git a/io_uring/rw.c b/io_uring/rw.c
index dc1acaf95db1..b1db4595788b 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -271,10 +271,17 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
 	size_t pi_len;
 	int ret;
 
-	if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
-	    sizeof(pi_attr)))
-		return -EFAULT;
-	pi_attr = &__pi_attr;
+	if (attr_type_mask & IORING_RW_ATTR_REGISTERED) {
+		pi_attr = io_args_get_ptr(&req->ctx->sqe_args, attr_ptr,
+					  sizeof(pi_attr));
+		if (IS_ERR(pi_attr))
+			return PTR_ERR(pi_attr);
+	} else {
+		if (copy_from_user(&__pi_attr, u64_to_user_ptr(attr_ptr),
+		    sizeof(pi_attr)))
+			return -EFAULT;
+		pi_attr = &__pi_attr;
+	}
 
 	if (pi_attr->rsvd)
 		return -EINVAL;
@@ -294,6 +301,8 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
 	return ret;
 }
 
+#define IO_RW_ATTR_ALLOWED_MASK (IORING_RW_ATTR_FLAG_PI | IORING_RW_ATTR_REGISTERED)
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
@@ -332,7 +341,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		u64 attr_ptr;
 
 		/* only PI attribute is supported currently */
-		if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
+		if (attr_type_mask & IO_RW_ATTR_ALLOWED_MASK)
 			return -EINVAL;
 
 		attr_ptr = READ_ONCE(sqe->attr_ptr);
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-12-30 13:29 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-30 13:30 [RFC 0/4] pre-mapped rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 1/4] io_uring: add structure for registered arguments Pavel Begunkov
2024-12-30 13:30 ` [PATCH 2/4] io_uring: add registered request arguments Pavel Begunkov
2024-12-30 13:30 ` [PATCH 3/4] io_uring/rw: use READ_ONCE with rw attributes Pavel Begunkov
2024-12-30 13:30 ` [PATCH 4/4] io_uring/rw: pre-mapped " Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox