public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup()
  2025-06-05 19:40 [PATCHSET RFC v2 0/4] uring_cmd copy avoidance Jens Axboe
@ 2025-06-05 19:40 ` Jens Axboe
  2025-06-06 17:37   ` Caleb Sander Mateos
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2025-06-05 19:40 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe

It's a pretty pointless helper, just allocates and copies data. Fold it
into io_uring_cmd_prep().

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 929cad6ee326..e204f4941d72 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -181,8 +181,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
 }
 EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 
-static int io_uring_cmd_prep_setup(struct io_kiocb *req,
-				   const struct io_uring_sqe *sqe)
+int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 	struct io_async_cmd *ac;
@@ -190,6 +189,18 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	/* see io_uring_cmd_get_async_data() */
 	BUILD_BUG_ON(offsetof(struct io_async_cmd, data) != 0);
 
+	if (sqe->__pad1)
+		return -EINVAL;
+
+	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
+	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
+		return -EINVAL;
+
+	if (ioucmd->flags & IORING_URING_CMD_FIXED)
+		req->buf_index = READ_ONCE(sqe->buf_index);
+
+	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+
 	ac = io_uring_alloc_async_data(&req->ctx->cmd_cache, req);
 	if (!ac)
 		return -ENOMEM;
@@ -207,25 +218,6 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	return 0;
 }
 
-int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-
-	if (sqe->__pad1)
-		return -EINVAL;
-
-	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
-	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
-		return -EINVAL;
-
-	if (ioucmd->flags & IORING_URING_CMD_FIXED)
-		req->buf_index = READ_ONCE(sqe->buf_index);
-
-	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
-
-	return io_uring_cmd_prep_setup(req, sqe);
-}
-
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup()
  2025-06-05 19:40 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
@ 2025-06-06 17:37   ` Caleb Sander Mateos
  0 siblings, 0 replies; 14+ messages in thread
From: Caleb Sander Mateos @ 2025-06-06 17:37 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Thu, Jun 5, 2025 at 12:47 PM Jens Axboe <axboe@kernel.dk> wrote:
>
> It's a pretty pointless helper, just allocates and copies data. Fold it
> into io_uring_cmd_prep().
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup()
  2025-06-06 21:54 [PATCHSET v3 0/4] uring_cmd copy avoidance Jens Axboe
@ 2025-06-06 21:54 ` Jens Axboe
  2025-06-08 19:57   ` Anuj gupta
  0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2025-06-06 21:54 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe

It's a pretty pointless helper, just allocates and copies data. Fold it
into io_uring_cmd_prep().

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 929cad6ee326..e204f4941d72 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -181,8 +181,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
 }
 EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 
-static int io_uring_cmd_prep_setup(struct io_kiocb *req,
-				   const struct io_uring_sqe *sqe)
+int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 	struct io_async_cmd *ac;
@@ -190,6 +189,18 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	/* see io_uring_cmd_get_async_data() */
 	BUILD_BUG_ON(offsetof(struct io_async_cmd, data) != 0);
 
+	if (sqe->__pad1)
+		return -EINVAL;
+
+	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
+	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
+		return -EINVAL;
+
+	if (ioucmd->flags & IORING_URING_CMD_FIXED)
+		req->buf_index = READ_ONCE(sqe->buf_index);
+
+	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+
 	ac = io_uring_alloc_async_data(&req->ctx->cmd_cache, req);
 	if (!ac)
 		return -ENOMEM;
@@ -207,25 +218,6 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	return 0;
 }
 
-int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-
-	if (sqe->__pad1)
-		return -EINVAL;
-
-	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
-	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
-		return -EINVAL;
-
-	if (ioucmd->flags & IORING_URING_CMD_FIXED)
-		req->buf_index = READ_ONCE(sqe->buf_index);
-
-	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
-
-	return io_uring_cmd_prep_setup(req, sqe);
-}
-
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup()
  2025-06-06 21:54 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
@ 2025-06-08 19:57   ` Anuj gupta
  0 siblings, 0 replies; 14+ messages in thread
From: Anuj gupta @ 2025-06-08 19:57 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring, csander

Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCHSET v4 0/4] uring_cmd copy avoidance
@ 2025-06-09 17:36 Jens Axboe
  2025-06-09 17:36 ` [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag Jens Axboe
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Jens Axboe @ 2025-06-09 17:36 UTC (permalink / raw)
  To: io-uring; +Cc: csander

Hi,

Currently uring_cmd unconditionally copies the SQE at prep time, as it
has no other choice - the SQE data must remain stable after submit.
This can lead to excessive memory bandwidth being used for that copy,
as passthrough will often use 128b SQEs, and efficiency concerns as
those copies will potentially use quite a lot of CPU cycles as well.

As a quick test, running the current -git kernel on a box with 23
NVMe drives doing passthrough IO, memcpy() is the highest cycle user
at 9.05%, which is all off the uring_cmd prep path. The test case is
a 512b random read, which runs at 91-92M IOPS.

With these patches, memcpy() is gone from the profiles, and it runs
at 98-99M IOPS, or about 7-8% faster.

Before:

IOPS=91.12M, BW=44.49GiB/s, IOS/call=32/32
IOPS=91.16M, BW=44.51GiB/s, IOS/call=32/32
IOPS=91.18M, BW=44.52GiB/s, IOS/call=31/32
IOPS=91.92M, BW=44.88GiB/s, IOS/call=32/32
IOPS=91.88M, BW=44.86GiB/s, IOS/call=32/32
IOPS=91.82M, BW=44.83GiB/s, IOS/call=32/31
IOPS=91.52M, BW=44.69GiB/s, IOS/call=32/32

with the top perf report -g --no-children being:

+    9.07%  io_uring  [kernel.kallsyms]  [k] memcpy

and after:

# bash run-peak-pass.sh
[...]
IOPS=99.30M, BW=48.49GiB/s, IOS/call=32/32
IOPS=99.27M, BW=48.47GiB/s, IOS/call=31/32
IOPS=99.60M, BW=48.63GiB/s, IOS/call=32/32
IOPS=99.68M, BW=48.67GiB/s, IOS/call=32/31
IOPS=99.80M, BW=48.73GiB/s, IOS/call=31/32
IOPS=99.84M, BW=48.75GiB/s, IOS/call=32/32

with memcpy not even in profiles. If you do the actual math of 100M
requests per second, and 128b of copying per IOP, then it's almost
12GB/sec of reduced memory bandwidth.

Even for lower IOPS production testing, Caleb reports that memcpy()
overhead is in the realm of 1.1% of CPU time.

I think this approach is saner, and in fact it can be extended to
reduce over-eager copies in other spots. For now I just did uring_cmd,
and verified that the memcpy's are still gone from my test.

Can also be found here:

https://git.kernel.dk/cgit/linux/log/?h=uring_cmd.2

 include/linux/io_uring_types.h |  5 ++++
 io_uring/io_uring.c            | 39 ++++++++++++++++++++++++------
 io_uring/opdef.c               |  1 +
 io_uring/opdef.h               |  1 +
 io_uring/uring_cmd.c           | 43 +++++++++++++++-------------------
 io_uring/uring_cmd.h           |  1 +
 6 files changed, 59 insertions(+), 31 deletions(-)

Since v3:
- Add REQ_F_SQE_COPY flag, so that ->sqe_copy() can only be called once
  and so that the IO_URING_F_INLINE WARN_ON_ONCE()/-EFAULT check
  actually works.
- Call io_req_sqe_copy() for any link member
- Update patch 4 commit message
- Rebase on 6.16-rc1

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag
  2025-06-09 17:36 [PATCHSET v4 0/4] uring_cmd copy avoidance Jens Axboe
@ 2025-06-09 17:36 ` Jens Axboe
  2025-06-09 21:53   ` Caleb Sander Mateos
  2025-06-09 17:36 ` [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method Jens Axboe
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2025-06-09 17:36 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe

Set when the execution of the request is done inline from the system
call itself. Any deferred issue will never have this flag set.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  2 ++
 io_uring/io_uring.c            | 12 +++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 2922635986f5..054c43c02c96 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -26,6 +26,8 @@ enum io_uring_cmd_flags {
 	IO_URING_F_MULTISHOT		= 4,
 	/* executed by io-wq */
 	IO_URING_F_IOWQ			= 8,
+	/* executed inline from syscall */
+	IO_URING_F_INLINE		= 16,
 	/* int's last bit, sign checks are usually faster than a bit test */
 	IO_URING_F_NONBLOCK		= INT_MIN,
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cf759c172083..0f9f6a173e66 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -147,7 +147,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 					 bool cancel_all,
 					 bool is_sqpoll_thread);
 
-static void io_queue_sqe(struct io_kiocb *req);
+static void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags);
 static void __io_req_caches_free(struct io_ring_ctx *ctx);
 
 static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
@@ -1377,7 +1377,7 @@ void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw)
 	else if (req->flags & REQ_F_FORCE_ASYNC)
 		io_queue_iowq(req);
 	else
-		io_queue_sqe(req);
+		io_queue_sqe(req, 0);
 }
 
 void io_req_task_queue_fail(struct io_kiocb *req, int ret)
@@ -1957,12 +1957,14 @@ static void io_queue_async(struct io_kiocb *req, int ret)
 	}
 }
 
-static inline void io_queue_sqe(struct io_kiocb *req)
+static inline void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags)
 	__must_hold(&req->ctx->uring_lock)
 {
+	unsigned int issue_flags = IO_URING_F_NONBLOCK |
+				   IO_URING_F_COMPLETE_DEFER | extra_flags;
 	int ret;
 
-	ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
+	ret = io_issue_sqe(req, issue_flags);
 
 	/*
 	 * We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -2218,7 +2220,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return 0;
 	}
 
-	io_queue_sqe(req);
+	io_queue_sqe(req, IO_URING_F_INLINE);
 	return 0;
 }
 
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method
  2025-06-09 17:36 [PATCHSET v4 0/4] uring_cmd copy avoidance Jens Axboe
  2025-06-09 17:36 ` [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag Jens Axboe
@ 2025-06-09 17:36 ` Jens Axboe
  2025-06-09 21:54   ` Caleb Sander Mateos
  2025-06-09 17:36 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
  2025-06-09 17:36 ` [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies Jens Axboe
  3 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2025-06-09 17:36 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe

Will be called by the core of io_uring, if inline issue is not going
to be tried for a request. Opcodes can define this handler to defer
copying of SQE data that should remain stable.

Only called if IO_URING_F_INLINE is set. If it isn't set, then there's a
bug in the core handling of this, and -EFAULT will be returned instead
to terminate the request. This will trigger a WARN_ON_ONCE(). Don't
expect this to ever trigger, and down the line this can be removed.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h |  3 +++
 io_uring/io_uring.c            | 27 +++++++++++++++++++++++++--
 io_uring/opdef.h               |  1 +
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 054c43c02c96..a0331ab80b2d 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -504,6 +504,7 @@ enum {
 	REQ_F_BUF_NODE_BIT,
 	REQ_F_HAS_METADATA_BIT,
 	REQ_F_IMPORT_BUFFER_BIT,
+	REQ_F_SQE_COPY_BIT,
 
 	/* not a real bit, just to check we're not overflowing the space */
 	__REQ_F_LAST_BIT,
@@ -593,6 +594,8 @@ enum {
 	 * For SEND_ZC, whether to import buffers (i.e. the first issue).
 	 */
 	REQ_F_IMPORT_BUFFER	= IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
+	/* ->sqe_copy() has been called, if necessary */
+	REQ_F_SQE_COPY		= IO_REQ_FLAG(REQ_F_SQE_COPY_BIT),
 };
 
 typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0f9f6a173e66..3768d426c2ad 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1935,14 +1935,34 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd)
 	return file;
 }
 
-static void io_queue_async(struct io_kiocb *req, int ret)
+static int io_req_sqe_copy(struct io_kiocb *req, unsigned int issue_flags)
+{
+	const struct io_cold_def *def = &io_cold_defs[req->opcode];
+
+	if (req->flags & REQ_F_SQE_COPY)
+		return 0;
+	req->flags |= REQ_F_SQE_COPY;
+	if (!def->sqe_copy)
+		return 0;
+	if (WARN_ON_ONCE(!(issue_flags & IO_URING_F_INLINE)))
+		return -EFAULT;
+	def->sqe_copy(req);
+	return 0;
+}
+
+static void io_queue_async(struct io_kiocb *req, unsigned int issue_flags, int ret)
 	__must_hold(&req->ctx->uring_lock)
 {
 	if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
+fail:
 		io_req_defer_failed(req, ret);
 		return;
 	}
 
+	ret = io_req_sqe_copy(req, issue_flags);
+	if (unlikely(ret))
+		goto fail;
+
 	switch (io_arm_poll_handler(req, 0)) {
 	case IO_APOLL_READY:
 		io_kbuf_recycle(req, 0);
@@ -1971,7 +1991,7 @@ static inline void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags)
 	 * doesn't support non-blocking read/write attempts
 	 */
 	if (unlikely(ret))
-		io_queue_async(req, ret);
+		io_queue_async(req, issue_flags, ret);
 }
 
 static void io_queue_sqe_fallback(struct io_kiocb *req)
@@ -1986,6 +2006,8 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
 		req->flags |= REQ_F_LINK;
 		io_req_defer_failed(req, req->cqe.res);
 	} else {
+		/* can't fail with IO_URING_F_INLINE */
+		io_req_sqe_copy(req, IO_URING_F_INLINE);
 		if (unlikely(req->ctx->drain_active))
 			io_drain_req(req);
 		else
@@ -2197,6 +2219,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	 */
 	if (unlikely(link->head)) {
 		trace_io_uring_link(req, link->last);
+		io_req_sqe_copy(req, IO_URING_F_INLINE);
 		link->last->link = req;
 		link->last = req;
 
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index 719a52104abe..c2f0907ed78c 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -38,6 +38,7 @@ struct io_issue_def {
 struct io_cold_def {
 	const char		*name;
 
+	void (*sqe_copy)(struct io_kiocb *);
 	void (*cleanup)(struct io_kiocb *);
 	void (*fail)(struct io_kiocb *);
 };
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup()
  2025-06-09 17:36 [PATCHSET v4 0/4] uring_cmd copy avoidance Jens Axboe
  2025-06-09 17:36 ` [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag Jens Axboe
  2025-06-09 17:36 ` [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method Jens Axboe
@ 2025-06-09 17:36 ` Jens Axboe
  2025-06-09 17:36 ` [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies Jens Axboe
  3 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2025-06-09 17:36 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe, Anuj Gupta

It's a pretty pointless helper, just allocates and copies data. Fold it
into io_uring_cmd_prep().

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/uring_cmd.c | 34 +++++++++++++---------------------
 1 file changed, 13 insertions(+), 21 deletions(-)

diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 929cad6ee326..e204f4941d72 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -181,8 +181,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, u64 res2,
 }
 EXPORT_SYMBOL_GPL(io_uring_cmd_done);
 
-static int io_uring_cmd_prep_setup(struct io_kiocb *req,
-				   const struct io_uring_sqe *sqe)
+int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
 	struct io_async_cmd *ac;
@@ -190,6 +189,18 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	/* see io_uring_cmd_get_async_data() */
 	BUILD_BUG_ON(offsetof(struct io_async_cmd, data) != 0);
 
+	if (sqe->__pad1)
+		return -EINVAL;
+
+	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
+	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
+		return -EINVAL;
+
+	if (ioucmd->flags & IORING_URING_CMD_FIXED)
+		req->buf_index = READ_ONCE(sqe->buf_index);
+
+	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
+
 	ac = io_uring_alloc_async_data(&req->ctx->cmd_cache, req);
 	if (!ac)
 		return -ENOMEM;
@@ -207,25 +218,6 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
 	return 0;
 }
 
-int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-
-	if (sqe->__pad1)
-		return -EINVAL;
-
-	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
-	if (ioucmd->flags & ~IORING_URING_CMD_MASK)
-		return -EINVAL;
-
-	if (ioucmd->flags & IORING_URING_CMD_FIXED)
-		req->buf_index = READ_ONCE(sqe->buf_index);
-
-	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
-
-	return io_uring_cmd_prep_setup(req, sqe);
-}
-
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies
  2025-06-09 17:36 [PATCHSET v4 0/4] uring_cmd copy avoidance Jens Axboe
                   ` (2 preceding siblings ...)
  2025-06-09 17:36 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
@ 2025-06-09 17:36 ` Jens Axboe
  2025-06-09 21:54   ` Caleb Sander Mateos
  3 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2025-06-09 17:36 UTC (permalink / raw)
  To: io-uring; +Cc: csander, Jens Axboe

uring_cmd currently copies the full SQE at prep time, just in case it
needs it to be stable. However, for inline completions or requests that
get queued up on the device side, there's no need to ever copy the SQE.
This is particularly important, as various use cases of uring_cmd will
be using 128b sized SQEs.

Opt in to using ->sqe_copy() to let the core of io_uring decide when to
copy SQEs. This callback will only be called if it is safe to do so.

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/opdef.c     |  1 +
 io_uring/uring_cmd.c | 21 ++++++++++++---------
 io_uring/uring_cmd.h |  1 +
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 6e0882b051f9..287f9a23b816 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -759,6 +759,7 @@ const struct io_cold_def io_cold_defs[] = {
 	},
 	[IORING_OP_URING_CMD] = {
 		.name			= "URING_CMD",
+		.sqe_copy		= io_uring_cmd_sqe_copy,
 		.cleanup		= io_uring_cmd_cleanup,
 	},
 	[IORING_OP_SEND_ZC] = {
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index e204f4941d72..a99dc2f9c4b5 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -205,17 +205,20 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	if (!ac)
 		return -ENOMEM;
 	ac->data.op_data = NULL;
+	ioucmd->sqe = sqe;
+	return 0;
+}
+
+void io_uring_cmd_sqe_copy(struct io_kiocb *req)
+{
+	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+	struct io_async_cmd *ac = req->async_data;
 
-	/*
-	 * Unconditionally cache the SQE for now - this is only needed for
-	 * requests that go async, but prep handlers must ensure that any
-	 * sqe data is stable beyond prep. Since uring_cmd is special in
-	 * that it doesn't read in per-op data, play it safe and ensure that
-	 * any SQE data is stable beyond prep. This can later get relaxed.
-	 */
-	memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx));
+	/* already copied, nothing to do */
+	if (ioucmd->sqe == ac->sqes)
+		return;
+	memcpy(ac->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
 	ioucmd->sqe = ac->sqes;
-	return 0;
 }
 
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index e6a5142c890e..a6dad47afc6b 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -11,6 +11,7 @@ struct io_async_cmd {
 
 int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
 int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
+void io_uring_cmd_sqe_copy(struct io_kiocb *req);
 void io_uring_cmd_cleanup(struct io_kiocb *req);
 
 bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
-- 
2.49.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag
  2025-06-09 17:36 ` [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag Jens Axboe
@ 2025-06-09 21:53   ` Caleb Sander Mateos
  0 siblings, 0 replies; 14+ messages in thread
From: Caleb Sander Mateos @ 2025-06-09 21:53 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Mon, Jun 9, 2025 at 10:39 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> Set when the execution of the request is done inline from the system
> call itself. Any deferred issue will never have this flag set.
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method
  2025-06-09 17:36 ` [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method Jens Axboe
@ 2025-06-09 21:54   ` Caleb Sander Mateos
  2025-06-10 13:32     ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Caleb Sander Mateos @ 2025-06-09 21:54 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Mon, Jun 9, 2025 at 10:39 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> Will be called by the core of io_uring, if inline issue is not going
> to be tried for a request. Opcodes can define this handler to defer
> copying of SQE data that should remain stable.
>
> Only called if IO_URING_F_INLINE is set. If it isn't set, then there's a
> bug in the core handling of this, and -EFAULT will be returned instead
> to terminate the request. This will trigger a WARN_ON_ONCE(). Don't
> expect this to ever trigger, and down the line this can be removed.
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  include/linux/io_uring_types.h |  3 +++
>  io_uring/io_uring.c            | 27 +++++++++++++++++++++++++--
>  io_uring/opdef.h               |  1 +
>  3 files changed, 29 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index 054c43c02c96..a0331ab80b2d 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -504,6 +504,7 @@ enum {
>         REQ_F_BUF_NODE_BIT,
>         REQ_F_HAS_METADATA_BIT,
>         REQ_F_IMPORT_BUFFER_BIT,
> +       REQ_F_SQE_COPY_BIT,

naming nit: I would interpret "copy" as "needs copy", which is the
opposite of what the bit represents. How about changing "COPY" to
"COPIED"?

>
>         /* not a real bit, just to check we're not overflowing the space */
>         __REQ_F_LAST_BIT,
> @@ -593,6 +594,8 @@ enum {
>          * For SEND_ZC, whether to import buffers (i.e. the first issue).
>          */
>         REQ_F_IMPORT_BUFFER     = IO_REQ_FLAG(REQ_F_IMPORT_BUFFER_BIT),
> +       /* ->sqe_copy() has been called, if necessary */
> +       REQ_F_SQE_COPY          = IO_REQ_FLAG(REQ_F_SQE_COPY_BIT),
>  };
>
>  typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 0f9f6a173e66..3768d426c2ad 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1935,14 +1935,34 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd)
>         return file;
>  }
>
> -static void io_queue_async(struct io_kiocb *req, int ret)
> +static int io_req_sqe_copy(struct io_kiocb *req, unsigned int issue_flags)
> +{
> +       const struct io_cold_def *def = &io_cold_defs[req->opcode];
> +
> +       if (req->flags & REQ_F_SQE_COPY)
> +               return 0;
> +       req->flags |= REQ_F_SQE_COPY;
> +       if (!def->sqe_copy)
> +               return 0;
> +       if (WARN_ON_ONCE(!(issue_flags & IO_URING_F_INLINE)))
> +               return -EFAULT;
> +       def->sqe_copy(req);
> +       return 0;
> +}
> +
> +static void io_queue_async(struct io_kiocb *req, unsigned int issue_flags, int ret)
>         __must_hold(&req->ctx->uring_lock)
>  {
>         if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
> +fail:
>                 io_req_defer_failed(req, ret);
>                 return;
>         }
>
> +       ret = io_req_sqe_copy(req, issue_flags);
> +       if (unlikely(ret))
> +               goto fail;
> +
>         switch (io_arm_poll_handler(req, 0)) {
>         case IO_APOLL_READY:
>                 io_kbuf_recycle(req, 0);
> @@ -1971,7 +1991,7 @@ static inline void io_queue_sqe(struct io_kiocb *req, unsigned int extra_flags)
>          * doesn't support non-blocking read/write attempts
>          */
>         if (unlikely(ret))
> -               io_queue_async(req, ret);
> +               io_queue_async(req, issue_flags, ret);
>  }
>
>  static void io_queue_sqe_fallback(struct io_kiocb *req)
> @@ -1986,6 +2006,8 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
>                 req->flags |= REQ_F_LINK;
>                 io_req_defer_failed(req, req->cqe.res);
>         } else {
> +               /* can't fail with IO_URING_F_INLINE */
> +               io_req_sqe_copy(req, IO_URING_F_INLINE);

I think this is currently correct. I would mildly prefer for the
callers to explicitly pass IO_URING_F_INLINE to make it harder for
non-inline callers to be added in the future. But maybe it's not worth
the effort to pass issue_flags through multiple layers of function
calls.

Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>



>                 if (unlikely(req->ctx->drain_active))
>                         io_drain_req(req);
>                 else
> @@ -2197,6 +2219,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>          */
>         if (unlikely(link->head)) {
>                 trace_io_uring_link(req, link->last);
> +               io_req_sqe_copy(req, IO_URING_F_INLINE);
>                 link->last->link = req;
>                 link->last = req;
>
> diff --git a/io_uring/opdef.h b/io_uring/opdef.h
> index 719a52104abe..c2f0907ed78c 100644
> --- a/io_uring/opdef.h
> +++ b/io_uring/opdef.h
> @@ -38,6 +38,7 @@ struct io_issue_def {
>  struct io_cold_def {
>         const char              *name;
>
> +       void (*sqe_copy)(struct io_kiocb *);
>         void (*cleanup)(struct io_kiocb *);
>         void (*fail)(struct io_kiocb *);
>  };
> --
> 2.49.0
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies
  2025-06-09 17:36 ` [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies Jens Axboe
@ 2025-06-09 21:54   ` Caleb Sander Mateos
  2025-06-10 13:35     ` Jens Axboe
  0 siblings, 1 reply; 14+ messages in thread
From: Caleb Sander Mateos @ 2025-06-09 21:54 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Mon, Jun 9, 2025 at 10:39 AM Jens Axboe <axboe@kernel.dk> wrote:
>
> uring_cmd currently copies the full SQE at prep time, just in case it
> needs it to be stable. However, for inline completions or requests that
> get queued up on the device side, there's no need to ever copy the SQE.
> This is particularly important, as various use cases of uring_cmd will
> be using 128b sized SQEs.
>
> Opt in to using ->sqe_copy() to let the core of io_uring decide when to
> copy SQEs. This callback will only be called if it is safe to do so.
>
> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
>  io_uring/opdef.c     |  1 +
>  io_uring/uring_cmd.c | 21 ++++++++++++---------
>  io_uring/uring_cmd.h |  1 +
>  3 files changed, 14 insertions(+), 9 deletions(-)
>
> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
> index 6e0882b051f9..287f9a23b816 100644
> --- a/io_uring/opdef.c
> +++ b/io_uring/opdef.c
> @@ -759,6 +759,7 @@ const struct io_cold_def io_cold_defs[] = {
>         },
>         [IORING_OP_URING_CMD] = {
>                 .name                   = "URING_CMD",
> +               .sqe_copy               = io_uring_cmd_sqe_copy,
>                 .cleanup                = io_uring_cmd_cleanup,
>         },
>         [IORING_OP_SEND_ZC] = {
> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
> index e204f4941d72..a99dc2f9c4b5 100644
> --- a/io_uring/uring_cmd.c
> +++ b/io_uring/uring_cmd.c
> @@ -205,17 +205,20 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>         if (!ac)
>                 return -ENOMEM;
>         ac->data.op_data = NULL;
> +       ioucmd->sqe = sqe;
> +       return 0;
> +}
> +
> +void io_uring_cmd_sqe_copy(struct io_kiocb *req)
> +{
> +       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
> +       struct io_async_cmd *ac = req->async_data;
>
> -       /*
> -        * Unconditionally cache the SQE for now - this is only needed for
> -        * requests that go async, but prep handlers must ensure that any
> -        * sqe data is stable beyond prep. Since uring_cmd is special in
> -        * that it doesn't read in per-op data, play it safe and ensure that
> -        * any SQE data is stable beyond prep. This can later get relaxed.
> -        */
> -       memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx));
> +       /* already copied, nothing to do */
> +       if (ioucmd->sqe == ac->sqes)

REQ_F_SQE_COPY should prevent this from ever happening, right? Could
we make it a WARN_ON()? Or drop it entirely?

Best,
Caleb


> +               return;
> +       memcpy(ac->sqes, ioucmd->sqe, uring_sqe_size(req->ctx));
>         ioucmd->sqe = ac->sqes;
> -       return 0;
>  }
>
>  int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
> diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
> index e6a5142c890e..a6dad47afc6b 100644
> --- a/io_uring/uring_cmd.h
> +++ b/io_uring/uring_cmd.h
> @@ -11,6 +11,7 @@ struct io_async_cmd {
>
>  int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
>  int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
> +void io_uring_cmd_sqe_copy(struct io_kiocb *req);
>  void io_uring_cmd_cleanup(struct io_kiocb *req);
>
>  bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
> --
> 2.49.0
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method
  2025-06-09 21:54   ` Caleb Sander Mateos
@ 2025-06-10 13:32     ` Jens Axboe
  0 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2025-06-10 13:32 UTC (permalink / raw)
  To: Caleb Sander Mateos; +Cc: io-uring

On 6/9/25 3:54 PM, Caleb Sander Mateos wrote:
> On Mon, Jun 9, 2025 at 10:39?AM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> Will be called by the core of io_uring, if inline issue is not going
>> to be tried for a request. Opcodes can define this handler to defer
>> copying of SQE data that should remain stable.
>>
>> Only called if IO_URING_F_INLINE is set. If it isn't set, then there's a
>> bug in the core handling of this, and -EFAULT will be returned instead
>> to terminate the request. This will trigger a WARN_ON_ONCE(). Don't
>> expect this to ever trigger, and down the line this can be removed.
>>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  include/linux/io_uring_types.h |  3 +++
>>  io_uring/io_uring.c            | 27 +++++++++++++++++++++++++--
>>  io_uring/opdef.h               |  1 +
>>  3 files changed, 29 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
>> index 054c43c02c96..a0331ab80b2d 100644
>> --- a/include/linux/io_uring_types.h
>> +++ b/include/linux/io_uring_types.h
>> @@ -504,6 +504,7 @@ enum {
>>         REQ_F_BUF_NODE_BIT,
>>         REQ_F_HAS_METADATA_BIT,
>>         REQ_F_IMPORT_BUFFER_BIT,
>> +       REQ_F_SQE_COPY_BIT,
> 
> naming nit: I would interpret "copy" as "needs copy", which is the
> opposite of what the bit represents. How about changing "COPY" to
> "COPIED"?

That is more descriptive, I'll make that change.

>>  static void io_queue_sqe_fallback(struct io_kiocb *req)
>> @@ -1986,6 +2006,8 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
>>                 req->flags |= REQ_F_LINK;
>>                 io_req_defer_failed(req, req->cqe.res);
>>         } else {
>> +               /* can't fail with IO_URING_F_INLINE */
>> +               io_req_sqe_copy(req, IO_URING_F_INLINE);
> 
> I think this is currently correct. I would mildly prefer for the
> callers to explicitly pass IO_URING_F_INLINE to make it harder for
> non-inline callers to be added in the future. But maybe it's not worth
> the effort to pass issue_flags through multiple layers of function
> calls.

I did ponder that and decided against it, as it'd be a lot of flag
passing just for that. I'll keep it as-is for now.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies
  2025-06-09 21:54   ` Caleb Sander Mateos
@ 2025-06-10 13:35     ` Jens Axboe
  0 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2025-06-10 13:35 UTC (permalink / raw)
  To: Caleb Sander Mateos; +Cc: io-uring

On 6/9/25 3:54 PM, Caleb Sander Mateos wrote:
> On Mon, Jun 9, 2025 at 10:39 AM Jens Axboe <axboe@kernel.dk> wrote:
>>
>> uring_cmd currently copies the full SQE at prep time, just in case it
>> needs it to be stable. However, for inline completions or requests that
>> get queued up on the device side, there's no need to ever copy the SQE.
>> This is particularly important, as various use cases of uring_cmd will
>> be using 128b sized SQEs.
>>
>> Opt in to using ->sqe_copy() to let the core of io_uring decide when to
>> copy SQEs. This callback will only be called if it is safe to do so.
>>
>> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com>
>> Signed-off-by: Jens Axboe <axboe@kernel.dk>
>> ---
>>  io_uring/opdef.c     |  1 +
>>  io_uring/uring_cmd.c | 21 ++++++++++++---------
>>  io_uring/uring_cmd.h |  1 +
>>  3 files changed, 14 insertions(+), 9 deletions(-)
>>
>> diff --git a/io_uring/opdef.c b/io_uring/opdef.c
>> index 6e0882b051f9..287f9a23b816 100644
>> --- a/io_uring/opdef.c
>> +++ b/io_uring/opdef.c
>> @@ -759,6 +759,7 @@ const struct io_cold_def io_cold_defs[] = {
>>         },
>>         [IORING_OP_URING_CMD] = {
>>                 .name                   = "URING_CMD",
>> +               .sqe_copy               = io_uring_cmd_sqe_copy,
>>                 .cleanup                = io_uring_cmd_cleanup,
>>         },
>>         [IORING_OP_SEND_ZC] = {
>> diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
>> index e204f4941d72..a99dc2f9c4b5 100644
>> --- a/io_uring/uring_cmd.c
>> +++ b/io_uring/uring_cmd.c
>> @@ -205,17 +205,20 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>>         if (!ac)
>>                 return -ENOMEM;
>>         ac->data.op_data = NULL;
>> +       ioucmd->sqe = sqe;
>> +       return 0;
>> +}
>> +
>> +void io_uring_cmd_sqe_copy(struct io_kiocb *req)
>> +{
>> +       struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
>> +       struct io_async_cmd *ac = req->async_data;
>>
>> -       /*
>> -        * Unconditionally cache the SQE for now - this is only needed for
>> -        * requests that go async, but prep handlers must ensure that any
>> -        * sqe data is stable beyond prep. Since uring_cmd is special in
>> -        * that it doesn't read in per-op data, play it safe and ensure that
>> -        * any SQE data is stable beyond prep. This can later get relaxed.
>> -        */
>> -       memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx));
>> +       /* already copied, nothing to do */
>> +       if (ioucmd->sqe == ac->sqes)
> 
> REQ_F_SQE_COPY should prevent this from ever happening, right? Could
> we make it a WARN_ON()? Or drop it entirely?

It should in the current form of the patchset, now that the copy is
gone inside uring_cmd and we have the SQE_COPIED flag. I'll make it
a WARN_ON_ONCE() instead.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2025-06-10 13:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-06-09 17:36 [PATCHSET v4 0/4] uring_cmd copy avoidance Jens Axboe
2025-06-09 17:36 ` [PATCH 1/4] io_uring: add IO_URING_F_INLINE issue flag Jens Axboe
2025-06-09 21:53   ` Caleb Sander Mateos
2025-06-09 17:36 ` [PATCH 2/4] io_uring: add struct io_cold_def->sqe_copy() method Jens Axboe
2025-06-09 21:54   ` Caleb Sander Mateos
2025-06-10 13:32     ` Jens Axboe
2025-06-09 17:36 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
2025-06-09 17:36 ` [PATCH 4/4] io_uring/uring_cmd: implement ->sqe_copy() to avoid unnecessary copies Jens Axboe
2025-06-09 21:54   ` Caleb Sander Mateos
2025-06-10 13:35     ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2025-06-06 21:54 [PATCHSET v3 0/4] uring_cmd copy avoidance Jens Axboe
2025-06-06 21:54 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
2025-06-08 19:57   ` Anuj gupta
2025-06-05 19:40 [PATCHSET RFC v2 0/4] uring_cmd copy avoidance Jens Axboe
2025-06-05 19:40 ` [PATCH 3/4] io_uring/uring_cmd: get rid of io_uring_cmd_prep_setup() Jens Axboe
2025-06-06 17:37   ` Caleb Sander Mateos

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox