From: Ming Lei <[email protected]>
To: Jens Axboe <[email protected]>
Cc: [email protected], [email protected],
[email protected],
Gabriel Krisman Bertazi <[email protected]>,
ZiyangZhang <[email protected]>,
Xiaoguang Wang <[email protected]>,
Ming Lei <[email protected]>
Subject: [PATCH V4 2/2] ublk_drv: add UBLK_IO_REFETCH_REQ for supporting to build as module
Date: Mon, 11 Jul 2022 10:20:24 +0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Add UBLK_IO_REFETCH_REQ command to fetch the incoming io request in
ubq daemon context, so we can avoid to call task_work_add(), then
it is fine to build ublk driver as module.
In this way, iops is affected a bit, but just by ~5% on ublk/null,
given io_uring provides pretty good batching issuing & completing.
One thing to be careful is race between ->queue_rq() and handling
abort, which is avoided by quiescing queue when aborting queue.
Except for that, handling abort becomes much easier with
UBLK_IO_REFETCH_REQ since aborting handler is strictly exclusive with
anything done in ubq daemon kernel context.
Signed-off-by: Ming Lei <[email protected]>
---
drivers/block/Kconfig | 2 +-
drivers/block/ublk_drv.c | 121 ++++++++++++++++++++++++++--------
include/uapi/linux/ublk_cmd.h | 17 +++++
3 files changed, 113 insertions(+), 27 deletions(-)
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index d218089cdbec..2ba77fd960c2 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -409,7 +409,7 @@ config BLK_DEV_RBD
If unsure, say N.
config BLK_DEV_UBLK
- bool "Userspace block driver"
+ tristate "Userspace block driver"
select IO_URING
help
io uring based userspace block driver.
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 0076418e6fad..98482f8d1a77 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -92,6 +92,7 @@ struct ublk_queue {
int q_id;
int q_depth;
+ unsigned long flags;
struct task_struct *ubq_daemon;
char *io_cmd_buf;
@@ -141,6 +142,15 @@ struct ublk_device {
struct work_struct stop_work;
};
+#define ublk_use_task_work(ubq) \
+({ \
+ bool ret = false; \
+ if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) && \
+ !((ubq)->flags & UBLK_F_NEED_REFETCH)) \
+ ret = true; \
+ ret; \
+})
+
static dev_t ublk_chr_devt;
static struct class *ublk_chr_class;
@@ -429,6 +439,26 @@ static int ublk_setup_iod(struct ublk_queue *ubq, struct request *req)
return BLK_STS_OK;
}
+static bool ubq_daemon_is_dying(struct ublk_queue *ubq)
+{
+ return ubq->ubq_daemon->flags & PF_EXITING;
+}
+
+static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+{
+ /* mark this cmd owned by ublksrv */
+ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
+
+ /*
+ * clear ACTIVE since we are done with this sqe/cmd slot
+ * We can only accept io cmd in case of being not active.
+ */
+ io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+
+ /* tell ublksrv one io request is coming */
+ io_uring_cmd_done(io->cmd, res, 0);
+}
+
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -456,30 +486,38 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
* If we can't add the task work, something must be wrong, schedule
* monitor work immediately.
*/
- if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) {
- struct ublk_device *ub = rq->q->queuedata;
+ if (ublk_use_task_work(ubq)) {
+ if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode)) {
+ struct ublk_device *ub = rq->q->queuedata;
- mod_delayed_work(system_wq, &ub->monitor_work, 0);
- return BLK_STS_IOERR;
+ mod_delayed_work(system_wq, &ub->monitor_work, 0);
+ return BLK_STS_IOERR;
+ }
+ } else {
+ if (ubq_daemon_is_dying(ubq)) {
+ struct ublk_device *ub = rq->q->queuedata;
+
+ mod_delayed_work(system_wq, &ub->monitor_work, 0);
+ return BLK_STS_IOERR;
+ }
+
+ ubq_complete_io_cmd(&ubq->ios[rq->tag], UBLK_IO_RES_REFETCH);
}
return BLK_STS_OK;
}
-static bool ubq_daemon_is_dying(struct ublk_queue *ubq)
-{
- return ubq->ubq_daemon->flags & PF_EXITING;
-}
-
static void ublk_commit_rqs(struct blk_mq_hw_ctx *hctx)
{
struct ublk_queue *ubq = hctx->driver_data;
- __set_notify_signal(ubq->ubq_daemon);
- if (ubq_daemon_is_dying(ubq)) {
- struct ublk_device *ub = hctx->queue->queuedata;
+ if (ublk_use_task_work(ubq)) {
+ __set_notify_signal(ubq->ubq_daemon);
+ if (ubq_daemon_is_dying(ubq)) {
+ struct ublk_device *ub = hctx->queue->queuedata;
- mod_delayed_work(system_wq, &ub->monitor_work, 0);
+ mod_delayed_work(system_wq, &ub->monitor_work, 0);
+ }
}
}
@@ -604,17 +642,7 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
return;
}
- /* mark this cmd owned by ublksrv */
- io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
-
- /*
- * clear ACTIVE since we are done with this sqe/cmd slot
- * We can only accept io cmd in case of being not active.
- */
- io->flags &= ~UBLK_IO_FLAG_ACTIVE;
-
- /* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, ret, 0);
+ ubq_complete_io_cmd(io, ret);
/*
* in case task is exiting, our partner has gone, so schedule monitor
@@ -737,13 +765,26 @@ static void ublk_commit_completion(struct ublk_device *ub,
* Focus on aborting any in-flight request scheduled to run via task work
*/
static void __ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
+ __releases(&ubq->abort_lock)
{
bool task_exiting = !!(ubq->ubq_daemon->flags & PF_EXITING);
int i;
+ bool quiesced = false;
if (!task_exiting)
goto out;
+ /*
+ * quiesce queue so that we can avoid to race with ublk_queue_rq()
+ * wrt. dealing with io flags
+ */
+ if (ubq->flags & UBLK_F_NEED_REFETCH) {
+ spin_unlock(&ubq->abort_lock);
+ blk_mq_quiesce_queue(ub->ub_queue);
+ spin_lock(&ubq->abort_lock);
+ quiesced = true;
+ }
+
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
@@ -759,6 +800,8 @@ static void __ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
__ublk_fail_req(io, rq);
}
}
+ if (quiesced)
+ blk_mq_unquiesce_queue(ub->ub_queue);
out:
ubq->abort_work_pending = false;
ublk_put_device(ub);
@@ -792,8 +835,12 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)
if (!ubq->abort_work_pending) {
ubq->abort_work_pending = true;
put_dev = false;
- if (task_work_add(ubq->ubq_daemon, &ubq->abort_work,
- TWA_SIGNAL)) {
+ if (ublk_use_task_work(ubq)) {
+ if (task_work_add(ubq->ubq_daemon,
+ &ubq->abort_work, TWA_SIGNAL)) {
+ __ublk_abort_queue(ub, ubq);
+ }
+ } else {
__ublk_abort_queue(ub, ubq);
}
} else {
@@ -872,6 +919,16 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
mutex_unlock(&ub->mutex);
}
+static void ublk_handle_refetch(struct ublk_device *ub,
+ struct ublk_queue *ubq, int tag)
+{
+ struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id],
+ tag);
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ ublk_rq_task_work_fn(&data->work);
+}
+
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
@@ -943,6 +1000,15 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->cmd = cmd;
ublk_commit_completion(ub, ub_cmd);
break;
+ case UBLK_IO_REFETCH_REQ:
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ io->addr = ub_cmd->addr;
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ io->flags &= ~UBLK_IO_FLAG_OWNED_BY_SRV;
+ ublk_handle_refetch(ub, ubq, tag);
+ break;
default:
goto out;
}
@@ -983,6 +1049,7 @@ static int ublk_init_queue(struct ublk_device *ub, int q_id)
void *ptr;
int size;
+ ubq->flags = ub->dev_info.flags[0];
ubq->q_id = q_id;
ubq->q_depth = ub->dev_info.queue_depth;
size = ublk_queue_cmd_buf_size(ub, q_id);
@@ -1381,6 +1448,8 @@ static int ublk_ctrl_add_dev(const struct ublksrv_ctrl_dev_info *info,
/* update device id */
ub->dev_info.dev_id = ub->ub_number;
+ if (IS_MODULE(CONFIG_BLK_DEV_UBLK))
+ ub->dev_info.flags[0] |= UBLK_F_NEED_REFETCH;
ret = ublk_add_dev(ub);
if (!ret) {
diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
index 4f0c16ec875e..fe4a2c7c8349 100644
--- a/include/uapi/linux/ublk_cmd.h
+++ b/include/uapi/linux/ublk_cmd.h
@@ -28,12 +28,19 @@
* this IO request, request's handling result is committed to ublk
* driver, meantime FETCH_REQ is piggyback, and FETCH_REQ has to be
* handled before completing io request.
+ *
+ * REFETCH_REQ: issued via sqe(URING_CMD) after ublk driver returns
+ * UBLK_IO_REFETCH_REQ, which only purpose is to fetch io request data
+ * from the userspace ublk server context in case that task_work_add
+ * isn't available because ublk driver is built as module
*/
#define UBLK_IO_FETCH_REQ 0x20
#define UBLK_IO_COMMIT_AND_FETCH_REQ 0x21
+#define UBLK_IO_REFETCH_REQ 0x22
/* only ABORT means that no re-fetch */
#define UBLK_IO_RES_OK 0
+#define UBLK_IO_RES_REFETCH 1
#define UBLK_IO_RES_ABORT (-ENODEV)
#define UBLKSRV_CMD_BUF_OFFSET 0
@@ -48,6 +55,16 @@
*/
#define UBLK_F_SUPPORT_ZERO_COPY (1UL << 0)
+/*
+ * When NEED_REFETCH is set, ublksrv has to issue UBLK_IO_REFETCH_REQ
+ * command after ublk driver returns UBLK_IO_REFETCH_REQ.
+ *
+ * This flag is negotiated during handling UBLK_CMD_ADD_DEV. If ublksrv
+ * sets it, ublk driver can't clear it. But if ublk driver sets it back
+ * to ublksrv, ublksrv has to handle it correctly.
+ */
+#define UBLK_F_NEED_REFETCH (1UL << 1)
+
/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
--
2.31.1
next prev parent reply other threads:[~2022-07-11 2:21 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-11 2:20 [PATCH V4 0/2] ublk: add io_uring based userspace block driver Ming Lei
2022-07-11 2:20 ` [PATCH V4 1/2] " Ming Lei
2022-07-11 2:20 ` Ming Lei [this message]
2022-07-11 20:06 ` [PATCH V4 2/2] ublk_drv: add UBLK_IO_REFETCH_REQ for supporting to build as module Gabriel Krisman Bertazi
2022-07-12 2:26 ` Ziyang Zhang
2022-07-12 2:46 ` Ming Lei
2022-07-12 2:33 ` Ming Lei
2022-07-12 10:08 ` Ming Lei
2022-07-11 11:58 ` [PATCH V4 0/2] ublk: add io_uring based userspace block driver Xiaoguang Wang
[not found] ` <[email protected]>
2022-07-11 14:03 ` Ming Lei
2022-07-12 8:44 ` Xiaoguang Wang
2022-07-12 11:30 ` Ming Lei
2022-07-12 15:16 ` Xiaoguang Wang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox