From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
[email protected], Conrad Meyer <[email protected]>,
[email protected], [email protected]
Subject: [RFC 5/5] block: implement io_uring discard cmd
Date: Wed, 14 Aug 2024 11:45:54 +0100 [thread overview]
Message-ID: <6ecd7ab3386f63f1656dc766c1b5b038ff5353c2.1723601134.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
Add ->uring_cmd callback for block device files and use it to implement
asynchronous discard. Normally, it first tries to execute the command
from non-blocking context, which we limit to a single bio because
otherwise one of sub-bios may need to wait for other bios, and we don't
want to deal with partial IO. If non-blocking attempt fails, we'll retry
it in a blocking context.
Suggested-by: Conrad Meyer <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
block/blk.h | 1 +
block/fops.c | 2 +
block/ioctl.c | 94 +++++++++++++++++++++++++++++++++++++++++
include/uapi/linux/fs.h | 2 +
4 files changed, 99 insertions(+)
diff --git a/block/blk.h b/block/blk.h
index e180863f918b..5178c5ba6852 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -571,6 +571,7 @@ blk_mode_t file_to_blk_mode(struct file *file);
int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
loff_t lstart, loff_t lend);
long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
extern const struct address_space_operations def_blk_aops;
diff --git a/block/fops.c b/block/fops.c
index 9825c1713a49..8154b10b5abf 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -17,6 +17,7 @@
#include <linux/fs.h>
#include <linux/iomap.h>
#include <linux/module.h>
+#include <linux/io_uring/cmd.h>
#include "blk.h"
static inline struct inode *bdev_file_inode(struct file *file)
@@ -873,6 +874,7 @@ const struct file_operations def_blk_fops = {
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
.fallocate = blkdev_fallocate,
+ .uring_cmd = blkdev_uring_cmd,
.fop_flags = FOP_BUFFER_RASYNC,
};
diff --git a/block/ioctl.c b/block/ioctl.c
index c7a3e6c6f5fa..f7f9c4c6d6b5 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,8 @@
#include <linux/blktrace_api.h>
#include <linux/pr.h>
#include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <linux/io_uring/cmd.h>
#include "blk.h"
static int blkpg_do_ioctl(struct block_device *bdev,
@@ -744,4 +746,96 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return ret;
}
+
+struct blk_cmd {
+ blk_status_t status;
+ bool nowait;
+};
+
+static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+ int res = blk_status_to_errno(bc->status);
+
+ if (res == -EAGAIN && bc->nowait)
+ io_uring_cmd_issue_blocking(cmd);
+ else
+ io_uring_cmd_done(cmd, res, 0, issue_flags);
+}
+
+static void bio_cmd_end(struct bio *bio)
+{
+ struct io_uring_cmd *cmd = bio->bi_private;
+ struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+
+ if (unlikely(bio->bi_status) && !bc->status)
+ bc->status = bio->bi_status;
+
+ io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete);
+ bio_put(bio);
+}
+
+static int blkdev_cmd_discard(struct io_uring_cmd *cmd,
+ struct block_device *bdev,
+ uint64_t start, uint64_t len, bool nowait)
+{
+ sector_t sector = start >> SECTOR_SHIFT;
+ sector_t nr_sects = len >> SECTOR_SHIFT;
+ struct bio *prev = NULL, *bio;
+ int err;
+
+ err = blk_validate_discard(bdev, file_to_blk_mode(cmd->file),
+ start, len);
+ if (err)
+ return err;
+ err = filemap_invalidate_pages(bdev->bd_mapping, start,
+ start + len - 1, nowait);
+ if (err)
+ return err;
+
+ while ((bio = blk_alloc_discard_bio(bdev, §or, &nr_sects,
+ GFP_KERNEL))) {
+ if (nowait) {
+ if (unlikely(nr_sects)) {
+ bio_put(bio);
+ return -EAGAIN;
+ }
+ bio->bi_opf |= REQ_NOWAIT;
+ }
+ prev = bio_chain_and_submit(prev, bio);
+ }
+ if (!prev)
+ return -EFAULT;
+
+ prev->bi_private = cmd;
+ prev->bi_end_io = bio_cmd_end;
+ submit_bio(prev);
+ return -EIOCBQUEUED;
+}
+
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host);
+ struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+ const struct io_uring_sqe *sqe = cmd->sqe;
+ u32 cmd_op = cmd->cmd_op;
+ uint64_t start, len;
+
+ if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len ||
+ sqe->rw_flags || sqe->file_index))
+ return -EINVAL;
+
+ bc->status = BLK_STS_OK;
+ bc->nowait = issue_flags & IO_URING_F_NONBLOCK;
+
+ start = READ_ONCE(sqe->addr);
+ len = READ_ONCE(sqe->addr3);
+
+ switch (cmd_op) {
+ case BLOCK_URING_CMD_DISCARD:
+ return blkdev_cmd_discard(cmd, bdev, start, len, bc->nowait);
+ }
+ return -EINVAL;
+}
+
#endif
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 753971770733..0016e38ed33c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -208,6 +208,8 @@ struct fsxattr {
* (see uapi/linux/blkzoned.h)
*/
+#define BLOCK_URING_CMD_DISCARD 0
+
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
--
2.45.2
next prev parent reply other threads:[~2024-08-14 10:45 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-14 10:45 [RFC 0/5] implement asynchronous BLKDISCARD via io_uring Pavel Begunkov
2024-08-14 10:45 ` [RFC 1/5] io_uring/cmd: expose iowq to cmds Pavel Begunkov
2024-08-14 10:45 ` [RFC 2/5] io_uring/cmd: give inline space in request " Pavel Begunkov
2024-08-14 10:45 ` [RFC 3/5] filemap: introduce filemap_invalidate_pages Pavel Begunkov
2024-08-14 10:45 ` [RFC 4/5] block: introduce blk_validate_discard() Pavel Begunkov
2024-08-14 10:45 ` Pavel Begunkov [this message]
2024-08-15 1:42 ` [RFC 5/5] block: implement io_uring discard cmd Ming Lei
2024-08-15 14:33 ` Jens Axboe
2024-08-15 17:11 ` Pavel Begunkov
2024-08-15 23:44 ` Ming Lei
2024-08-16 1:24 ` Jens Axboe
2024-08-16 1:45 ` Ming Lei
2024-08-16 1:59 ` Pavel Begunkov
2024-08-16 2:08 ` Ming Lei
2024-08-16 2:16 ` Pavel Begunkov
2024-08-19 20:02 ` Jens Axboe
2024-08-19 20:01 ` Jens Axboe
2024-08-20 2:36 ` Ming Lei
2024-08-20 16:30 ` Jens Axboe
2024-08-20 17:19 ` Pavel Begunkov
2024-08-21 2:55 ` Ming Lei
2024-08-15 14:42 ` Jens Axboe
2024-08-15 15:50 ` [RFC 0/5] implement asynchronous BLKDISCARD via io_uring Jens Axboe
2024-08-15 17:26 ` Pavel Begunkov
2024-08-15 16:15 ` Martin K. Petersen
2024-08-15 17:12 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=6ecd7ab3386f63f1656dc766c1b5b038ff5353c2.1723601134.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox