public inbox for [email protected]
 help / color / mirror / Atom feed
From: Ming Lei <[email protected]>
To: Jens Axboe <[email protected]>,
	[email protected], [email protected]
Cc: [email protected],
	Miklos Szeredi <[email protected]>,
	ZiyangZhang <[email protected]>,
	Xiaoguang Wang <[email protected]>,
	Bernd Schubert <[email protected]>,
	Pavel Begunkov <[email protected]>,
	Stefan Hajnoczi <[email protected]>,
	Ming Lei <[email protected]>
Subject: [PATCH V4 03/17] io_uring: support normal SQE for fused command
Date: Fri, 24 Mar 2023 21:57:54 +0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

So far, the slave sqe is saved in the 2nd 64 byte of master sqe, which
requires that SQE128 has to be enabled. Relax this limit by allowing to
fetch slave SQE from SQ directly.

IORING_URING_CMD_FUSED_SPLIT_SQE has to be set for this usage, and
userspace has to put slave SQE following the master sqe.

However, not sure if this way is useful, given fused command needs at least
two SQEs for running io in fast path, and SQE128 matches this usecase
perfectly.

Signed-off-by: Ming Lei <[email protected]>
---
 include/uapi/linux/io_uring.h |  8 ++++++-
 io_uring/fused_cmd.c          | 42 ++++++++++++++++++++++++++++-------
 io_uring/io_uring.c           | 22 ++++++++++++------
 io_uring/io_uring.h           |  1 +
 4 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9762a2989747..6f25ca85639f 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -233,9 +233,15 @@ enum io_uring_op {
  * sqe->uring_cmd_flags
  * IORING_URING_CMD_FIXED	use registered buffer; pass this flag
  *				along with setting sqe->buf_index.
+ *
+ * IORING_URING_CMD_FUSED_SPLIT_SQE fused command only, slave sqe is
+ * 				    provided from another new sqe; without
+ * 				    setting the flag, slave sqe is from
+ * 				    2nd 64byte of this sqe, so SQE128 has
+ * 				    to be enabled
  */
 #define IORING_URING_CMD_FIXED	(1U << 0)
-
+#define IORING_URING_CMD_FUSED_SPLIT_SQE	(1U << 1)
 
 /*
  * sqe->fsync_flags
diff --git a/io_uring/fused_cmd.c b/io_uring/fused_cmd.c
index ff3921f6a5df..4cfe02e316f9 100644
--- a/io_uring/fused_cmd.c
+++ b/io_uring/fused_cmd.c
@@ -43,24 +43,45 @@ static inline void io_fused_cmd_update_link_flags(struct io_kiocb *req,
 		req->flags |= REQ_F_LINK;
 }
 
+static const struct io_uring_sqe *fused_cmd_get_slave_sqe(
+		struct io_ring_ctx *ctx, const struct io_uring_sqe *master,
+		bool split_sqe)
+{
+	if (unlikely(!(ctx->flags & IORING_SETUP_SQE128) && !split_sqe))
+		return NULL;
+
+	if (split_sqe) {
+		const struct io_uring_sqe *sqe;
+
+		if (unlikely(!io_get_slave_sqe(ctx, &sqe)))
+			return NULL;
+		return sqe;
+	}
+
+	return master + 1;
+}
+
 int io_fused_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	__must_hold(&req->ctx->uring_lock)
 {
 	struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-	const struct io_uring_sqe *slave_sqe = sqe + 1;
+	const struct io_uring_sqe *slave_sqe;
 	struct io_ring_ctx *ctx = req->ctx;
 	struct io_kiocb *slave;
 	u8 slave_op;
 	int ret;
-
-	if (unlikely(!(ctx->flags & IORING_SETUP_SQE128)))
-		return -EINVAL;
+	bool split_sqe;
 
 	if (unlikely(sqe->__pad1))
 		return -EINVAL;
 
 	ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
-	if (unlikely(ioucmd->flags))
+	if (unlikely(ioucmd->flags & ~IORING_URING_CMD_FUSED_SPLIT_SQE))
+		return -EINVAL;
+
+	split_sqe = ioucmd->flags & IORING_URING_CMD_FUSED_SPLIT_SQE;
+	slave_sqe = fused_cmd_get_slave_sqe(ctx, sqe, split_sqe);
+	if (unlikely(!slave_sqe))
 		return -EINVAL;
 
 	slave_op = READ_ONCE(slave_sqe->opcode);
@@ -71,8 +92,12 @@ int io_fused_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 	ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
 	req->fused_cmd_kbuf = NULL;
 
-	/* take one extra reference for the slave request */
-	io_get_task_refs(1);
+	/*
+	 * Take one extra reference for the slave request built from
+	 * builtin SQE since io_uring core code doesn't grab it for us
+	 */
+	if (!split_sqe)
+		io_get_task_refs(1);
 
 	ret = -ENOMEM;
 	if (unlikely(!io_alloc_req(ctx, &slave)))
@@ -96,7 +121,8 @@ int io_fused_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 fail_free_req:
 	io_free_req(slave);
 fail:
-	current->io_uring->cached_refs += 1;
+	if (!split_sqe)
+		current->io_uring->cached_refs += 1;
 	return ret;
 }
 
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e5e43637d313..b0008d380686 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2414,7 +2414,8 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
  * used, it's important that those reads are done through READ_ONCE() to
  * prevent a re-load down the line.
  */
-static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
+static inline bool io_get_sqe(struct io_ring_ctx *ctx,
+		const struct io_uring_sqe **sqe)
 {
 	unsigned head, mask = ctx->sq_entries - 1;
 	unsigned sq_idx = ctx->cached_sq_head++ & mask;
@@ -2443,19 +2444,25 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
 	return false;
 }
 
+bool io_get_slave_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
+{
+	return io_get_sqe(ctx, sqe);
+}
+
 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 	__must_hold(&ctx->uring_lock)
 {
 	unsigned int entries = io_sqring_entries(ctx);
-	unsigned int left;
+	unsigned old_head = ctx->cached_sq_head;
+	unsigned int left = 0;
 	int ret;
 
 	if (unlikely(!entries))
 		return 0;
 	/* make sure SQ entry isn't read before tail */
-	ret = left = min3(nr, ctx->sq_entries, entries);
-	io_get_task_refs(left);
-	io_submit_state_start(&ctx->submit_state, left);
+	ret = min3(nr, ctx->sq_entries, entries);
+	io_get_task_refs(ret);
+	io_submit_state_start(&ctx->submit_state, ret);
 
 	do {
 		const struct io_uring_sqe *sqe;
@@ -2474,11 +2481,12 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		 */
 		if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
 		    !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
-			left--;
+			left = 1;
 			break;
 		}
-	} while (--left);
+	} while ((ctx->cached_sq_head - old_head) < ret);
 
+	left = ret - (ctx->cached_sq_head - old_head) - left;
 	if (unlikely(left)) {
 		ret -= left;
 		/* try again if it submitted nothing and can't allocate a req */
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 637e12e4fb9f..ee22e65c4aef 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -78,6 +78,7 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
 bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
 			bool cancel_all);
 
+bool io_get_slave_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe);
 int io_init_slave_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		const struct io_uring_sqe *sqe);
 
-- 
2.39.2


  parent reply	other threads:[~2023-03-24 13:59 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20230324135916epcas5p37aad4c49c76c05567a484377d8909092@epcas5p3.samsung.com>
2023-03-24 13:57 ` [PATCH V4 00/17] io_uring/ublk: add IORING_OP_FUSED_CMD Ming Lei
2023-03-24 13:57   ` [PATCH V4 01/17] io_uring: increase io_kiocb->flags into 64bit Ming Lei
2023-03-24 13:57   ` [PATCH V4 02/17] io_uring: add IORING_OP_FUSED_CMD Ming Lei
2023-03-24 13:57   ` Ming Lei [this message]
2023-03-24 13:57   ` [PATCH V4 04/17] io_uring: support OP_READ/OP_WRITE for fused slave request Ming Lei
2023-03-24 13:57   ` [PATCH V4 05/17] io_uring: support OP_SEND_ZC/OP_RECV " Ming Lei
2023-03-24 13:57   ` [PATCH V4 06/17] block: ublk_drv: mark device as LIVE before adding disk Ming Lei
2023-03-24 13:57   ` [PATCH V4 07/17] block: ublk_drv: add common exit handling Ming Lei
2023-03-24 13:57   ` [PATCH V4 08/17] block: ublk_drv: don't consider flush request in map/unmap io Ming Lei
2023-03-24 13:58   ` [PATCH V4 09/17] block: ublk_drv: add two helpers to clean up map/unmap request Ming Lei
2023-03-24 13:58   ` [PATCH V4 10/17] block: ublk_drv: clean up several helpers Ming Lei
2023-03-24 13:58   ` [PATCH V4 11/17] block: ublk_drv: cleanup 'struct ublk_map_data' Ming Lei
2023-03-24 13:58   ` [PATCH V4 12/17] block: ublk_drv: cleanup ublk_copy_user_pages Ming Lei
2023-03-24 13:58   ` [PATCH V4 13/17] block: ublk_drv: grab request reference when the request is handled by userspace Ming Lei
2023-03-24 13:58   ` [PATCH V4 14/17] block: ublk_drv: support to copy any part of request pages Ming Lei
2023-03-24 13:58   ` [PATCH V4 15/17] block: ublk_drv: add read()/write() support for ublk char device Ming Lei
2023-03-24 13:58   ` [PATCH V4 16/17] block: ublk_drv: don't check buffer in case of zero copy Ming Lei
2023-03-24 13:58   ` [PATCH V4 17/17] block: ublk_drv: apply io_uring FUSED_CMD for supporting " Ming Lei
2023-03-28  0:36   ` [PATCH V4 00/17] io_uring/ublk: add IORING_OP_FUSED_CMD Dan Williams
2023-03-28  1:16     ` Ming Lei
2023-03-28  1:29       ` Jens Axboe
2023-03-28  1:35         ` Ming Lei
2023-03-28  1:31       ` Dan Williams
2023-03-28  2:02         ` Ming Lei
2023-03-28  6:32           ` Dan Williams
2023-03-28  3:13       ` Gao Xiang
2023-03-28  3:33         ` Ming Lei
2023-03-28  5:10   ` Kanchan Joshi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox