public inbox for [email protected]
 help / color / mirror / Atom feed
From: Ziyang Zhang <[email protected]>
To: Jens Axboe <[email protected]>, Ming Lei <[email protected]>
Cc: [email protected], [email protected],
	Xiaoguang Wang <[email protected]>,
	Gabriel Krisman Bertazi <[email protected]>,
	[email protected],
	Ziyang Zhang <[email protected]>
Subject: [PATCH] ublk_drv: add one new ublk command: UBLK_IO_NEED_GET_DATA
Date: Mon, 18 Jul 2022 19:44:51 +0800	[thread overview]
Message-ID: <[email protected]> (raw)

Add one new ublk command: UBLK_IO_NEED_GET_DATA.

It is designed for a user application who wants to allocate IO buffer
and set IO buffer address only after it receives an IO request.

1. Background:
For now, ublk requires the user to set IO buffer address in advance(with
last UBLK_IO_COMMIT_AND_FETCH_REQ command)so the user has to
pre-allocate IO buffer.

For READ requests, this work flow looks good because the data copy
happens after user application gets a cqe and the kernel copies data.
So user application can allocate IO buffer, copy data to be read into
it, and issues a sqe with the newly allocated IO buffer.

However, for WRITE requests, this work flow looks weird because
the data copy happens in a task_work before the user application gets one
cqe. So it is inconvenient for users who allocates(or fetch from a
memory pool)buffer after it gets one request(and know the actual data
size).

2. Design:
Consider add a new feature flag: UBLK_F_NEED_GET_DATA.

If user sets this new flag(through libublksrv) and pass it to kernel
driver, ublk kernel driver should returns a cqe with
UBLK_IO_RES_NEED_GET_DATA after a new blk-mq WRITE request comes.

A user application now can allocate data buffer for writing and pass its
address in UBLK_IO_NEED_GET_DATA command after ublk kernel driver returns
cqe with UBLK_IO_RES_NEED_GET_DATA.

After the kernel side gets the sqe (UBLK_IO_NEED_GET_DATA command), it
now copies(address pinned, indeed) data to be written from bio vectors
to newly returned IO data buffer.

Finally, the kernel side returns UBLK_IO_RES_OK and ublksrv handles the
IO request as usual.The new feature: UBLK_F_NEED_GET_DATA is enabled on
demand ublksrv still can pre-allocate data buffers with task_work.

3. Evaluation:
We modify ublksrv to support UBLK_F_NEED_GET_DATA and the modification
will be PR-ed to Ming Lei's github repository soon if this patch is
okay.

We have tested write latency with:
  (1)  No UBLK_F_NEED_GET_DATA(the old commit) as baseline
  (2)  UBLK_F_NEED_GET_DATA enabled/disabled
on demo_null and demo_event of newest ublksrv project.

Config of fio:bs=4k, iodepth=1, numjobs=1, rw=write/randwrite, direct=1,
ioengine=libaio.

Here is the comparison of lat(usec) in fio:

demo_null:
write:        28.74(baseline) -- 28.77(disable) -- 57.20(enable)
randwrite:    27.81(baseline) -- 28.51(disable) -- 54.81(enable)

demo_event:
write:        46.45(baseline) -- 43.31(disable) -- 75.50(enable)
randwrite:    45.39(baseline) -- 43.66(disable) -- 76.02(enable)

Looks like:
  (1) UBLK_F_NEED_GET_DATA does not introduce additional overhead when
      comparing baseline and disable.
  (2) enabling UBLK_F_NEED_GET_DATA adds about two times more latency
      than disabling it. And it is reasonable since the IO goes through
      the total ublk IO stack(ubd_drv <--> ublksrv) once again.
  (3) demo_null and demo_event are both null targets. And I think this
      overhead is not too heavy if real data handling backend is used.

Signed-off-by: ZiyangZhang <[email protected]>
---
 drivers/block/ublk_drv.c      | 77 +++++++++++++++++++++++++++++++----
 include/uapi/linux/ublk_cmd.h | 19 +++++++++
 2 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2c1b01d7f27d..0d56ae680cfe 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -83,6 +83,8 @@ struct ublk_uring_cmd_pdu {
  */
 #define UBLK_IO_FLAG_ABORTED 0x04
 
+#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+
 struct ublk_io {
 	/* userspace buffer address from io cmd */
 	__u64	addr;
@@ -160,11 +162,19 @@ static struct lock_class_key ublk_bio_compl_lkclass;
 static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
 {
 	if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
+			!(ubq->flags & UBLK_F_NEED_GET_DATA) &&
 			!(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
 		return true;
 	return false;
 }
 
+static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
+{
+	if (ubq->flags & UBLK_F_NEED_GET_DATA)
+		return true;
+	return false;
+}
+
 static struct ublk_device *ublk_get_device(struct ublk_device *ub)
 {
 	if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
@@ -514,6 +524,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
 	}
 }
 
+static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+{
+	/* mark this cmd owned by ublksrv */
+	io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
+
+	/*
+	 * clear ACTIVE since we are done with this sqe/cmd slot
+	 * We can only accept io cmd in case of being not active.
+	 */
+	io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+
+	/* tell ublksrv one io request is coming */
+	io_uring_cmd_done(io->cmd, res, 0);
+}
+
 #define UBLK_REQUEUE_DELAY_MS	3
 
 static inline void __ublk_rq_task_work(struct request *req)
@@ -536,6 +561,20 @@ static inline void __ublk_rq_task_work(struct request *req)
 		return;
 	}
 
+	if (ublk_need_get_data(ubq) &&
+			(req_op(req) == REQ_OP_WRITE ||
+			req_op(req) == REQ_OP_FLUSH) &&
+			!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
+
+		pr_devel("%s: ublk_need_get_data. op %d, qid %d tag %d io_flags %x\n",
+				__func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags);
+
+		io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+
+		ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+		return;
+	}
+
 	mapped_bytes = ublk_map_io(ubq, req, io);
 
 	/* partially mapped, update io descriptor */
@@ -558,17 +597,13 @@ static inline void __ublk_rq_task_work(struct request *req)
 			mapped_bytes >> 9;
 	}
 
-	/* mark this cmd owned by ublksrv */
-	io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
-
 	/*
-	 * clear ACTIVE since we are done with this sqe/cmd slot
-	 * We can only accept io cmd in case of being not active.
+	 * Anyway, we have handled UBLK_IO_NEED_GET_DATA for WRITE/FLUSH requests,
+	 * or we did nothing for other types requests.
 	 */
-	io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+	io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
 
-	/* tell ublksrv one io request is coming */
-	io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
+	ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
 }
 
 static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
@@ -860,6 +895,16 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
 	mutex_unlock(&ub->mutex);
 }
 
+static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
+		int tag, struct io_uring_cmd *cmd)
+{
+	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+	struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
+
+	pdu->req = req;
+	io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+}
+
 static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 {
 	struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
@@ -898,6 +943,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 		goto out;
 	}
 
+	/*
+	 * ensure that the user issues UBLK_IO_NEED_GET_DATA
+	 * if the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
+	 */
+	if ((io->flags & UBLK_IO_FLAG_NEED_GET_DATA)
+			&& (cmd_op != UBLK_IO_NEED_GET_DATA))
+		goto out;
+
 	switch (cmd_op) {
 	case UBLK_IO_FETCH_REQ:
 		/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
@@ -931,6 +984,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 		io->cmd = cmd;
 		ublk_commit_completion(ub, ub_cmd);
 		break;
+	case UBLK_IO_NEED_GET_DATA:
+		if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+			goto out;
+		io->addr = ub_cmd->addr;
+		io->cmd = cmd;
+		io->flags |= UBLK_IO_FLAG_ACTIVE;
+		ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
+		break;
 	default:
 		goto out;
 	}
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index a3f5e7c21807..711170f961ac 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -28,12 +28,22 @@
  *      this IO request, request's handling result is committed to ublk
  *      driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
  *      handled before completing io request.
+ *
+ * NEED_GET_DATA: only used for write requests to set io addr and copy data
+ *      When NEED_GET_DATA is set, ublksrv has to issue UBLK_IO_NEED_GET_DATA
+ *      command after ublk driver returns UBLK_IO_RES_NEED_GET_DATA.
+ *
+ *      It is only used if ublksrv set UBLK_F_NEED_GET_DATA flag
+ *      while starting a ublk device.
  */
 #define	UBLK_IO_FETCH_REQ		0x20
 #define	UBLK_IO_COMMIT_AND_FETCH_REQ	0x21
 
+#define UBLK_IO_NEED_GET_DATA	0x22
+
 /* only ABORT means that no re-fetch */
 #define UBLK_IO_RES_OK			0
+#define UBLK_IO_RES_NEED_GET_DATA	1
 #define UBLK_IO_RES_ABORT		(-ENODEV)
 
 #define UBLKSRV_CMD_BUF_OFFSET	0
@@ -54,6 +64,15 @@
  */
 #define UBLK_F_URING_CMD_COMP_IN_TASK	(1UL << 1)
 
+/*
+ * User should issue io cmd again for write requests to
+ * set io buffer address and copy data from bio vectors
+ * to the userspace io buffer.
+ *
+ * In this mode, task_work is not used.
+ */
+#define UBLK_F_NEED_GET_DATA (1UL << 3)
+
 /* device state */
 #define UBLK_S_DEV_DEAD	0
 #define UBLK_S_DEV_LIVE	1
-- 
2.34.1

             reply	other threads:[~2022-07-18 11:44 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-18 11:44 Ziyang Zhang [this message]
2022-07-19 11:09 ` [PATCH] ublk_drv: add one new ublk command: UBLK_IO_NEED_GET_DATA Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9993fb25-ecd1-4682-99b9-e83472583897@linux.alibaba.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox