From: Keith Busch <[email protected]>
To: Ming Lei <[email protected]>
Cc: Keith Busch <[email protected]>,
[email protected], [email protected],
[email protected], [email protected],
[email protected], [email protected]
Subject: Re: [PATCHv5 09/11] ublk: zc register/unregister bvec
Date: Wed, 26 Feb 2025 10:10:31 -0700 [thread overview]
Message-ID: <Z79LB3T5Aa6RoaDo@kbusch-mbp> (raw)
In-Reply-To: <Z77Nq_5ZGxUjxkau@fedora>
On Wed, Feb 26, 2025 at 04:15:39PM +0800, Ming Lei wrote:
> On Mon, Feb 24, 2025 at 01:31:14PM -0800, Keith Busch wrote:
> > From: Keith Busch <[email protected]>
> >
> > Provide new operations for the user to request mapping an active request
> > to an io uring instance's buf_table. The user has to provide the index
> > it wants to install the buffer.
> >
> > A reference count is taken on the request to ensure it can't be
> > completed while it is active in a ring's buf_table.
> >
> > Signed-off-by: Keith Busch <[email protected]>
> > ---
>
> Looks IO_LINK doesn't work, and UNREG_BUF cqe can be received from userspace.
You can link the register, but should do the unregister with COMMIT
command on the frontend when the backend is complete. This doesn't need
the triple SQE requirement.
I was going to share with the next version, but since you bring it up
now, here's the reference patch for ublksrv using links:
---
diff --git a/include/ublk_cmd.h b/include/ublk_cmd.h
index 0150003..07439be 100644
--- a/include/ublk_cmd.h
+++ b/include/ublk_cmd.h
@@ -94,6 +94,10 @@
_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
#define UBLK_U_IO_NEED_GET_DATA \
_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
+#define UBLK_U_IO_REGISTER_IO_BUF \
+ _IOWR('u', 0x23, struct ublksrv_io_cmd)
+#define UBLK_U_IO_UNREGISTER_IO_BUF \
+ _IOWR('u', 0x24, struct ublksrv_io_cmd)
/* only ABORT means that no re-fetch */
#define UBLK_IO_RES_OK 0
diff --git a/include/ublksrv_tgt.h b/include/ublksrv_tgt.h
index 1deee2b..c331963 100644
--- a/include/ublksrv_tgt.h
+++ b/include/ublksrv_tgt.h
@@ -99,6 +99,7 @@ struct ublk_io_tgt {
co_handle_type co;
const struct io_uring_cqe *tgt_io_cqe;
int queued_tgt_io; /* obsolete */
+ bool needs_unregister;
};
static inline struct ublk_io_tgt *__ublk_get_io_tgt_data(const struct ublk_io_data *io)
diff --git a/lib/ublksrv.c b/lib/ublksrv.c
index 16a9e13..7205247 100644
--- a/lib/ublksrv.c
+++ b/lib/ublksrv.c
@@ -619,6 +619,15 @@ skip_alloc_buf:
goto fail;
}
+ if (ctrl_dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth);
+ if (ret) {
+ ublk_err("ublk dev %d queue %d register spare buffers failed %d",
+ q->dev->ctrl_dev->dev_info.dev_id, q->q_id, ret);
+ goto fail;
+ }
+ }
+
io_uring_register_ring_fd(&q->ring);
/*
diff --git a/tgt_loop.cpp b/tgt_loop.cpp
index 0f16676..91f8c81 100644
--- a/tgt_loop.cpp
+++ b/tgt_loop.cpp
@@ -246,12 +246,70 @@ static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
return mode;
}
+static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
+ int dev_fd, int tag, int q_id, __u64 index)
+{
+ struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+ io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->flags |= IOSQE_IO_LINK | IOSQE_CQE_SKIP_SUCCESS | IOSQE_FIXED_FILE;
+ sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
+
+ cmd->tag = tag;
+ cmd->addr = index;
+ cmd->q_id = q_id;
+}
+
+static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
+ int dev_fd, int tag, int q_id, __u64 index)
+{
+ struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
+
+ io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
+ sqe->opcode = IORING_OP_URING_CMD;
+ sqe->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_FIXED_FILE;
+ sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
+
+ cmd->tag = tag;
+ cmd->addr = index;
+ cmd->q_id = q_id;
+}
+
+static void loop_unregister(const struct ublksrv_queue *q, int tag)
+{
+ struct io_uring_sqe *sqe;
+
+ ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
+ io_uring_prep_buf_unregister(sqe, 0, tag, q->q_id, tag);
+}
+
static void loop_queue_tgt_read(const struct ublksrv_queue *q,
- const struct ublksrv_io_desc *iod, int tag)
+ const struct ublk_io_data *data, int tag)
{
+ struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
+ const struct ublksrv_io_desc *iod = data->iod;
+ const struct ublksrv_ctrl_dev_info *info =
+ ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(q->dev));
unsigned ublk_op = ublksrv_get_op(iod);
- if (user_copy) {
+ if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ struct io_uring_sqe *reg;
+ struct io_uring_sqe *read;
+
+ ublk_get_sqe_pair(q->ring_ptr, ®, &read);
+
+ io_uring_prep_buf_register(reg, 0, tag, q->q_id, tag);
+
+ io_uring_prep_read_fixed(read, 1 /*fds[1]*/,
+ 0,
+ iod->nr_sectors << 9,
+ iod->start_sector << 9,
+ tag);
+ io_uring_sqe_set_flags(read, IOSQE_FIXED_FILE);
+ read->user_data = build_user_data(tag, ublk_op, 0, 1);
+ io->needs_unregister = true;
+ } else if (user_copy) {
struct io_uring_sqe *sqe, *sqe2;
__u64 pos = ublk_pos(q->q_id, tag, 0);
void *buf = ublksrv_queue_get_io_buf(q, tag);
@@ -284,11 +342,31 @@ static void loop_queue_tgt_read(const struct ublksrv_queue *q,
}
static void loop_queue_tgt_write(const struct ublksrv_queue *q,
- const struct ublksrv_io_desc *iod, int tag)
+ const struct ublk_io_data *data, int tag)
{
+ const struct ublksrv_io_desc *iod = data->iod;
+ const struct ublksrv_ctrl_dev_info *info =
+ ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(q->dev));
unsigned ublk_op = ublksrv_get_op(iod);
- if (user_copy) {
+ if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) {
+ struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
+ struct io_uring_sqe *reg;
+ struct io_uring_sqe *write;
+
+ ublk_get_sqe_pair(q->ring_ptr, ®, &write);
+ io_uring_prep_buf_register(reg, 0, tag, q->q_id, tag);
+
+ io_uring_prep_write_fixed(write, 1 /*fds[1]*/,
+ 0,
+ iod->nr_sectors << 9,
+ iod->start_sector << 9,
+ tag);
+ io_uring_sqe_set_flags(write, IOSQE_FIXED_FILE);
+ write->user_data = build_user_data(tag, ublk_op, 0, 1);
+
+ io->needs_unregister = true;
+ } else if (user_copy) {
struct io_uring_sqe *sqe, *sqe2;
__u64 pos = ublk_pos(q->q_id, tag, 0);
void *buf = ublksrv_queue_get_io_buf(q, tag);
@@ -352,10 +430,10 @@ static int loop_queue_tgt_io(const struct ublksrv_queue *q,
sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
break;
case UBLK_IO_OP_READ:
- loop_queue_tgt_read(q, iod, tag);
+ loop_queue_tgt_read(q, data, tag);
break;
case UBLK_IO_OP_WRITE:
- loop_queue_tgt_write(q, iod, tag);
+ loop_queue_tgt_write(q, data, tag);
break;
default:
return -EINVAL;
@@ -387,6 +465,10 @@ static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
if (io->tgt_io_cqe->res == -EAGAIN)
goto again;
+ if (io->needs_unregister) {
+ io->needs_unregister = false;
+ loop_unregister(q, tag);
+ }
ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
} else if (ret < 0) {
ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
diff --git a/ublksrv_tgt.cpp b/ublksrv_tgt.cpp
index 8f9cf28..f3ebe14 100644
--- a/ublksrv_tgt.cpp
+++ b/ublksrv_tgt.cpp
@@ -723,7 +723,7 @@ static int cmd_dev_add(int argc, char *argv[])
data.tgt_type = optarg;
break;
case 'z':
- data.flags |= UBLK_F_SUPPORT_ZERO_COPY;
+ data.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
break;
case 'q':
data.nr_hw_queues = strtol(optarg, NULL, 10);
--
next prev parent reply other threads:[~2025-02-26 17:10 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-24 21:31 [PATCHv5 00/11] ublk zero copy support Keith Busch
2025-02-24 21:31 ` [PATCHv5 01/11] io_uring/rsrc: remove redundant check for valid imu Keith Busch
2025-02-25 8:37 ` Ming Lei
2025-02-25 13:13 ` Pavel Begunkov
2025-02-24 21:31 ` [PATCHv5 02/11] io_uring/nop: reuse req->buf_index Keith Busch
2025-02-24 23:30 ` Jens Axboe
2025-02-25 0:02 ` Keith Busch
2025-02-25 8:43 ` Ming Lei
2025-02-25 13:13 ` Pavel Begunkov
2025-02-24 21:31 ` [PATCHv5 03/11] io_uring/net: reuse req->buf_index for sendzc Keith Busch
2025-02-25 8:44 ` Ming Lei
2025-02-25 13:14 ` Pavel Begunkov
2025-02-24 21:31 ` [PATCHv5 04/11] io_uring/nvme: pass issue_flags to io_uring_cmd_import_fixed() Keith Busch
2025-02-25 8:52 ` Ming Lei
2025-02-24 21:31 ` [PATCHv5 05/11] io_uring: combine buffer lookup and import Keith Busch
2025-02-25 8:55 ` Ming Lei
2025-02-24 21:31 ` [PATCHv5 06/11] io_uring/rw: move fixed buffer import to issue path Keith Busch
2025-02-25 9:26 ` Ming Lei
2025-02-25 13:57 ` Pavel Begunkov
2025-02-25 20:57 ` Caleb Sander Mateos
2025-02-25 21:16 ` Keith Busch
2025-02-24 21:31 ` [PATCHv5 07/11] io_uring: add support for kernel registered bvecs Keith Busch
2025-02-25 9:40 ` Ming Lei
2025-02-25 17:32 ` Keith Busch
2025-02-25 22:47 ` Ming Lei
2025-02-25 22:55 ` Keith Busch
2025-02-25 14:00 ` Pavel Begunkov
2025-02-25 14:05 ` Pavel Begunkov
2025-02-25 20:58 ` Caleb Sander Mateos
2025-02-24 21:31 ` [PATCHv5 08/11] nvme: map uring_cmd data even if address is 0 Keith Busch
2025-02-25 9:41 ` Ming Lei
2025-02-24 21:31 ` [PATCHv5 09/11] ublk: zc register/unregister bvec Keith Busch
2025-02-25 11:00 ` Ming Lei
2025-02-25 16:35 ` Keith Busch
2025-02-25 22:56 ` Ming Lei
2025-02-25 16:19 ` Pavel Begunkov
2025-02-25 16:27 ` Keith Busch
2025-02-25 16:42 ` Pavel Begunkov
2025-02-25 16:52 ` Keith Busch
2025-02-27 4:16 ` Ming Lei
2025-02-25 21:14 ` Caleb Sander Mateos
2025-02-26 8:15 ` Ming Lei
2025-02-26 17:10 ` Keith Busch [this message]
2025-02-27 4:19 ` Ming Lei
2025-02-24 21:31 ` [PATCHv5 10/11] io_uring: add abstraction for buf_table rsrc data Keith Busch
2025-02-25 16:04 ` Pavel Begunkov
2025-02-24 21:31 ` [PATCHv5 11/11] io_uring: cache nodes and mapped buffers Keith Busch
2025-02-25 13:11 ` Pavel Begunkov
2025-02-25 14:10 ` [PATCHv5 00/11] ublk zero copy support Pavel Begunkov
2025-02-25 14:47 ` Jens Axboe
2025-02-25 15:07 ` (subset) " Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Z79LB3T5Aa6RoaDo@kbusch-mbp \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox