From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>,
io-uring@vger.kernel.org, Pavel Begunkov <asml.silence@gmail.com>,
linux-block@vger.kernel.org
Cc: Uday Shankar <ushankar@purestorage.com>,
Caleb Sander Mateos <csander@purestorage.com>,
Keith Busch <kbusch@kernel.org>, Ming Lei <ming.lei@redhat.com>
Subject: [RFC PATCH 6/7] ublk: register buffer to specified io_uring & buf index via UBLK_F_AUTO_BUF_REG
Date: Mon, 28 Apr 2025 17:44:17 +0800 [thread overview]
Message-ID: <20250428094420.1584420-7-ming.lei@redhat.com> (raw)
In-Reply-To: <20250428094420.1584420-1-ming.lei@redhat.com>
Add UBLK_F_AUTO_BUF_REG for supporting to register buffer automatically
to specified io_uring context and buffer index.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/block/ublk_drv.c | 56 ++++++++++++++++++++++++++++-------
include/uapi/linux/ublk_cmd.h | 38 ++++++++++++++++++++++++
2 files changed, 84 insertions(+), 10 deletions(-)
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 1fd20e481a60..e82618442749 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -66,7 +66,8 @@
| UBLK_F_USER_COPY \
| UBLK_F_ZONED \
| UBLK_F_USER_RECOVERY_FAIL_IO \
- | UBLK_F_UPDATE_SIZE)
+ | UBLK_F_UPDATE_SIZE \
+ | UBLK_F_AUTO_BUF_REG)
#define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
@@ -146,7 +147,10 @@ struct ublk_uring_cmd_pdu {
struct ublk_io {
/* userspace buffer address from io cmd */
- __u64 addr;
+ union {
+ __u64 addr;
+ struct ublk_auto_buf_reg buf;
+ };
unsigned int flags;
int res;
@@ -626,7 +630,7 @@ static inline bool ublk_support_zero_copy(const struct ublk_queue *ubq)
static inline bool ublk_support_auto_buf_reg(const struct ublk_queue *ubq)
{
- return false;
+ return ubq->flags & UBLK_F_AUTO_BUF_REG;
}
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
@@ -1177,6 +1181,16 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,
blk_mq_end_request(rq, BLK_STS_IOERR);
}
+
+static inline void ublk_init_auto_buf_reg(const struct ublk_io *io,
+ struct io_buf_data *data)
+{
+ data->index = io->buf.index;
+ data->ring_fd = io->buf.ring_fd;
+ data->has_fd = true;
+ data->registered_fd = io->buf.flags & UBLK_AUTO_BUF_REGISTERED_RING;
+}
+
static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
struct ublk_io *io, unsigned int issue_flags)
{
@@ -1187,6 +1201,9 @@ static bool ublk_auto_buf_reg(struct ublk_queue *ubq, struct request *req,
};
int ret;
+ if (ublk_support_auto_buf_reg(ubq))
+ ublk_init_auto_buf_reg(io, &data);
+
/* one extra reference is dropped by ublk_io_release */
ublk_init_req_ref(ubq, req, 2);
ret = io_buffer_register_bvec(io->cmd, &data, issue_flags);
@@ -2045,7 +2062,7 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
*/
if (!buf_addr && !ublk_need_get_data(ubq))
goto out;
- } else if (buf_addr) {
+ } else if (buf_addr && !ublk_support_auto_buf_reg(ubq)) {
/* User copy requires addr to be unset */
ret = -EINVAL;
goto out;
@@ -2058,13 +2075,17 @@ static int ublk_fetch(struct io_uring_cmd *cmd, struct ublk_queue *ubq,
return ret;
}
-static void ublk_auto_buf_unreg(struct ublk_io *io, struct io_uring_cmd *cmd,
+static void ublk_auto_buf_unreg(const struct ublk_queue *ubq,
+ struct ublk_io *io, struct io_uring_cmd *cmd,
struct request *req, unsigned int issue_flags)
{
struct io_buf_data data = {
.index = req->tag,
};
+ if (ublk_support_auto_buf_reg(ubq))
+ ublk_init_auto_buf_reg(io, &data);
+
WARN_ON_ONCE(io_buffer_unregister_bvec(cmd, &data, issue_flags));
io->flags &= ~UBLK_IO_FLAG_AUTO_BUF_REG;
}
@@ -2088,7 +2109,8 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
req_op(req) == REQ_OP_READ))
return -EINVAL;
- } else if (req_op(req) != REQ_OP_ZONE_APPEND && ub_cmd->addr) {
+ } else if ((req_op(req) != REQ_OP_ZONE_APPEND &&
+ !ublk_support_auto_buf_reg(ubq)) && ub_cmd->addr) {
/*
* User copy requires addr to be unset when command is
* not zone append
@@ -2097,7 +2119,7 @@ static int ublk_commit_and_fetch(const struct ublk_queue *ubq,
}
if (io->flags & UBLK_IO_FLAG_AUTO_BUF_REG)
- ublk_auto_buf_unreg(io, cmd, req, issue_flags);
+ ublk_auto_buf_unreg(ubq, io, cmd, req, issue_flags);
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
@@ -2788,6 +2810,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
else if (!(info.flags & UBLK_F_UNPRIVILEGED_DEV))
return -EPERM;
+ /* F_AUTO_BUF_REG and F_SUPPORT_ZERO_COPY can't co-exist */
+ if ((info.flags & UBLK_F_AUTO_BUF_REG) &&
+ (info.flags & UBLK_F_SUPPORT_ZERO_COPY))
+ return -EINVAL;
+
/* forbid nonsense combinations of recovery flags */
switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) {
case 0:
@@ -2817,8 +2844,11 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
* For USER_COPY, we depends on userspace to fill request
* buffer by pwrite() to ublk char device, which can't be
* used for unprivileged device
+ *
+ * Same with zero copy or auto buffer register.
*/
- if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
return -EINVAL;
}
@@ -2876,17 +2906,22 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_cmd *header)
UBLK_F_URING_CMD_COMP_IN_TASK;
/* GET_DATA isn't needed any more with USER_COPY or ZERO COPY */
- if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
+ if (ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG))
ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
/*
* Zoned storage support requires reuse `ublksrv_io_cmd->addr` for
* returning write_append_lba, which is only allowed in case of
* user copy or zero copy
+ *
+ * UBLK_F_AUTO_BUF_REG can't be enabled for zoned because it need
+ * the space for getting ring_fd and buffer index.
*/
if (ublk_dev_is_zoned(ub) &&
(!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || !(ub->dev_info.flags &
- (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)))) {
+ (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY)) ||
+ (ub->dev_info.flags & UBLK_F_AUTO_BUF_REG))) {
ret = -EINVAL;
goto out_free_dev_number;
}
@@ -3403,6 +3438,7 @@ static int __init ublk_init(void)
BUILD_BUG_ON((u64)UBLKSRV_IO_BUF_OFFSET +
UBLKSRV_IO_BUF_TOTAL_SIZE < UBLKSRV_IO_BUF_OFFSET);
+ BUILD_BUG_ON(sizeof(struct ublk_auto_buf_reg) != sizeof(__u64));
init_waitqueue_head(&ublk_idr_wq);
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index be5c6c6b16e0..3d7c8c69cf06 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -219,6 +219,30 @@
*/
#define UBLK_F_UPDATE_SIZE (1ULL << 10)
+/*
+ * request buffer is registered automatically to ublk server specified
+ * io_uring context before delivering this io command to ublk server,
+ * meantime it is un-registered automatically when completing this io
+ * command.
+ *
+ * For using this feature:
+ *
+ * - ublk server has to create sparse buffer table
+ *
+ * - pass io_ring context FD from `ublksrv_io_cmd.buf.ring_fd`, and the FD
+ * can be registered io_ring FD if `UBLK_AUTO_BUF_REGISTERED_RING` is set
+ * in `ublksrv_io_cmd.flags`, or plain FD
+ *
+ * - pass buffer index from `ublksrv_io_cmd.buf.index`
+ *
+ * This way avoids extra cost from two uring_cmd, but also simplifies backend
+ * implementation, such as, the dependency on IO_REGISTER_IO_BUF and
+ * IO_UNREGISTER_IO_BUF becomes not necessary.
+ *
+ * This feature isn't available for UBLK_F_ZONED
+ */
+#define UBLK_F_AUTO_BUF_REG (1ULL << 11)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
@@ -339,6 +363,14 @@ static inline __u32 ublksrv_get_flags(const struct ublksrv_io_desc *iod)
return iod->op_flags >> 8;
}
+struct ublk_auto_buf_reg {
+ __s32 ring_fd;
+ __u16 index;
+#define UBLK_AUTO_BUF_REGISTERED_RING (1 << 0)
+ __u8 flags;
+ __u8 _pad;
+};
+
/* issued to ublk driver via /dev/ublkcN */
struct ublksrv_io_cmd {
__u16 q_id;
@@ -363,6 +395,12 @@ struct ublksrv_io_cmd {
*/
__u64 addr;
__u64 zone_append_lba;
+
+ /*
+ * for AUTO_BUF_REG feature, F_ZONED can't be supported,
+ * and ->addr isn't used for zero copy
+ */
+ struct ublk_auto_buf_reg auto_buf;
};
};
--
2.47.0
next prev parent reply other threads:[~2025-04-28 9:45 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-28 9:44 [RFC PATCH 0/7] ublk: support to register bvec buffer automatically Ming Lei
2025-04-28 9:44 ` [RFC PATCH 1/7] io_uring: add 'struct io_buf_data' for register/unregister bvec buffer Ming Lei
2025-04-29 0:35 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 2/7] io_uring: add helper __io_buffer_[un]register_bvec Ming Lei
2025-04-29 0:36 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 3/7] io_uring: support to register bvec buffer to specified io_uring Ming Lei
2025-04-28 10:28 ` Pavel Begunkov
2025-04-29 0:46 ` Caleb Sander Mateos
2025-04-29 0:47 ` Ming Lei
2025-04-30 8:25 ` Pavel Begunkov
2025-04-30 14:44 ` Ming Lei
2025-04-29 0:43 ` Caleb Sander Mateos
2025-04-30 15:34 ` Ming Lei
2025-05-02 1:31 ` Caleb Sander Mateos
2025-05-02 15:59 ` Ming Lei
2025-05-02 21:21 ` Caleb Sander Mateos
2025-05-03 1:00 ` Ming Lei
2025-05-03 18:55 ` Caleb Sander Mateos
2025-05-06 2:45 ` Ming Lei
2025-04-28 9:44 ` [RFC PATCH 4/7] ublk: convert to refcount_t Ming Lei
2025-04-28 17:13 ` Caleb Sander Mateos
2025-04-28 9:44 ` [RFC PATCH 5/7] ublk: prepare for supporting to register request buffer automatically Ming Lei
2025-04-29 0:50 ` Caleb Sander Mateos
2025-04-28 9:44 ` Ming Lei [this message]
2025-04-29 0:52 ` [RFC PATCH 6/7] ublk: register buffer to specified io_uring & buf index via UBLK_F_AUTO_BUF_REG Caleb Sander Mateos
2025-04-30 15:45 ` Ming Lei
2025-04-30 16:30 ` Caleb Sander Mateos
2025-05-02 14:09 ` Ming Lei
2025-04-28 9:44 ` [RFC PATCH 7/7] selftests: ublk: support UBLK_F_AUTO_BUF_REG Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250428094420.1584420-7-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=csander@purestorage.com \
--cc=io-uring@vger.kernel.org \
--cc=kbusch@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox