public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>,
	[email protected], Conrad Meyer <[email protected]>,
	[email protected], [email protected]
Subject: [RFC 5/5] block: implement io_uring discard cmd
Date: Wed, 14 Aug 2024 11:45:54 +0100	[thread overview]
Message-ID: <6ecd7ab3386f63f1656dc766c1b5b038ff5353c2.1723601134.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Add ->uring_cmd callback for block device files and use it to implement
asynchronous discard. Normally, it first tries to execute the command
from non-blocking context, which we limit to a single bio because
otherwise one of sub-bios may need to wait for other bios, and we don't
want to deal with partial IO. If non-blocking attempt fails, we'll retry
it in a blocking context.

Suggested-by: Conrad Meyer <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
 block/blk.h             |  1 +
 block/fops.c            |  2 +
 block/ioctl.c           | 94 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fs.h |  2 +
 4 files changed, 99 insertions(+)

diff --git a/block/blk.h b/block/blk.h
index e180863f918b..5178c5ba6852 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -571,6 +571,7 @@ blk_mode_t file_to_blk_mode(struct file *file);
 int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode,
 		loff_t lstart, loff_t lend);
 long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
 long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg);
 
 extern const struct address_space_operations def_blk_aops;
diff --git a/block/fops.c b/block/fops.c
index 9825c1713a49..8154b10b5abf 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/iomap.h>
 #include <linux/module.h>
+#include <linux/io_uring/cmd.h>
 #include "blk.h"
 
 static inline struct inode *bdev_file_inode(struct file *file)
@@ -873,6 +874,7 @@ const struct file_operations def_blk_fops = {
 	.splice_read	= filemap_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= blkdev_fallocate,
+	.uring_cmd	= blkdev_uring_cmd,
 	.fop_flags	= FOP_BUFFER_RASYNC,
 };
 
diff --git a/block/ioctl.c b/block/ioctl.c
index c7a3e6c6f5fa..f7f9c4c6d6b5 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -11,6 +11,8 @@
 #include <linux/blktrace_api.h>
 #include <linux/pr.h>
 #include <linux/uaccess.h>
+#include <linux/pagemap.h>
+#include <linux/io_uring/cmd.h>
 #include "blk.h"
 
 static int blkpg_do_ioctl(struct block_device *bdev,
@@ -744,4 +746,96 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 
 	return ret;
 }
+
+struct blk_cmd {
+	blk_status_t status;
+	bool nowait;
+};
+
+static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+	struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+	int res = blk_status_to_errno(bc->status);
+
+	if (res == -EAGAIN && bc->nowait)
+		io_uring_cmd_issue_blocking(cmd);
+	else
+		io_uring_cmd_done(cmd, res, 0, issue_flags);
+}
+
+static void bio_cmd_end(struct bio *bio)
+{
+	struct io_uring_cmd *cmd = bio->bi_private;
+	struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+
+	if (unlikely(bio->bi_status) && !bc->status)
+		bc->status = bio->bi_status;
+
+	io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete);
+	bio_put(bio);
+}
+
+static int blkdev_cmd_discard(struct io_uring_cmd *cmd,
+			      struct block_device *bdev,
+			      uint64_t start, uint64_t len, bool nowait)
+{
+	sector_t sector = start >> SECTOR_SHIFT;
+	sector_t nr_sects = len >> SECTOR_SHIFT;
+	struct bio *prev = NULL, *bio;
+	int err;
+
+	err = blk_validate_discard(bdev, file_to_blk_mode(cmd->file),
+				   start, len);
+	if (err)
+		return err;
+	err = filemap_invalidate_pages(bdev->bd_mapping, start,
+					start + len - 1, nowait);
+	if (err)
+		return err;
+
+	while ((bio = blk_alloc_discard_bio(bdev, &sector, &nr_sects,
+					    GFP_KERNEL))) {
+		if (nowait) {
+			if (unlikely(nr_sects)) {
+				bio_put(bio);
+				return -EAGAIN;
+			}
+			bio->bi_opf |= REQ_NOWAIT;
+		}
+		prev = bio_chain_and_submit(prev, bio);
+	}
+	if (!prev)
+		return -EFAULT;
+
+	prev->bi_private = cmd;
+	prev->bi_end_io = bio_cmd_end;
+	submit_bio(prev);
+	return -EIOCBQUEUED;
+}
+
+int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+	struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host);
+	struct blk_cmd *bc = io_uring_cmd_to_pdu(cmd, struct blk_cmd);
+	const struct io_uring_sqe *sqe = cmd->sqe;
+	u32 cmd_op = cmd->cmd_op;
+	uint64_t start, len;
+
+	if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len ||
+		     sqe->rw_flags || sqe->file_index))
+		return -EINVAL;
+
+	bc->status = BLK_STS_OK;
+	bc->nowait = issue_flags & IO_URING_F_NONBLOCK;
+
+	start = READ_ONCE(sqe->addr);
+	len = READ_ONCE(sqe->addr3);
+
+	switch (cmd_op) {
+	case BLOCK_URING_CMD_DISCARD:
+		return blkdev_cmd_discard(cmd, bdev, start, len, bc->nowait);
+	}
+	return -EINVAL;
+}
+
 #endif
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 753971770733..0016e38ed33c 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -208,6 +208,8 @@ struct fsxattr {
  * (see uapi/linux/blkzoned.h)
  */
 
+#define BLOCK_URING_CMD_DISCARD			0
+
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
 #define FIGETBSZ   _IO(0x00,2)	/* get the block size used for bmap */
-- 
2.45.2


  parent reply	other threads:[~2024-08-14 10:45 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-14 10:45 [RFC 0/5] implement asynchronous BLKDISCARD via io_uring Pavel Begunkov
2024-08-14 10:45 ` [RFC 1/5] io_uring/cmd: expose iowq to cmds Pavel Begunkov
2024-08-14 10:45 ` [RFC 2/5] io_uring/cmd: give inline space in request " Pavel Begunkov
2024-08-14 10:45 ` [RFC 3/5] filemap: introduce filemap_invalidate_pages Pavel Begunkov
2024-08-14 10:45 ` [RFC 4/5] block: introduce blk_validate_discard() Pavel Begunkov
2024-08-14 10:45 ` Pavel Begunkov [this message]
2024-08-15  1:42   ` [RFC 5/5] block: implement io_uring discard cmd Ming Lei
2024-08-15 14:33     ` Jens Axboe
2024-08-15 17:11       ` Pavel Begunkov
2024-08-15 23:44         ` Ming Lei
2024-08-16  1:24           ` Jens Axboe
2024-08-16  1:45             ` Ming Lei
2024-08-16  1:59               ` Pavel Begunkov
2024-08-16  2:08                 ` Ming Lei
2024-08-16  2:16                   ` Pavel Begunkov
2024-08-19 20:02                     ` Jens Axboe
2024-08-19 20:01               ` Jens Axboe
2024-08-20  2:36                 ` Ming Lei
2024-08-20 16:30                   ` Jens Axboe
2024-08-20 17:19                     ` Pavel Begunkov
2024-08-21  2:55                       ` Ming Lei
2024-08-15 14:42   ` Jens Axboe
2024-08-15 15:50 ` [RFC 0/5] implement asynchronous BLKDISCARD via io_uring Jens Axboe
2024-08-15 17:26   ` Pavel Begunkov
2024-08-15 16:15 ` Martin K. Petersen
2024-08-15 17:12   ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6ecd7ab3386f63f1656dc766c1b5b038ff5353c2.1723601134.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox