From: Pavel Begunkov <[email protected]>
To: Mark Harmstone <[email protected]>,
[email protected], [email protected]
Subject: Re: [PATCH 6/6] btrfs: add io_uring interface for encoded reads
Date: Fri, 6 Sep 2024 15:41:49 +0100 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 8/23/24 17:27, Mark Harmstone wrote:
> Adds an io_uring interface for asynchronous encoded reads, using the
> same interface as for the ioctl. To use this you would use an SQE opcode
> of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
> addr would point to the userspace address of the
> btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
> CAP_SYS_ADMIN for this to work.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
...
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index a5d786c6d7d4..62e5757d3ba2 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -620,6 +620,8 @@ struct btrfs_encoded_read_private {
> struct btrfs_ioctl_encoded_io_args args;
> struct file *file;
> void __user *copy_out;
> + struct io_uring_cmd *cmd;
> + unsigned int issue_flags;
> };
>
...
> +static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
> + struct io_uring_cmd *cmd)
> +{
> + return *(struct btrfs_encoded_read_private **)cmd->pdu;
> +}
> +static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + struct btrfs_encoded_read_private *priv;
> + ssize_t ret;
> +
> + priv = btrfs_uring_encoded_read_pdu(cmd);
> + ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
> +
> + io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);
s/priv->issue_flags/issue_flags/
You can't use flags that you got somewhere before from a completely
different execution context.
> +
> + kfree(priv);
> +}
> +
> +static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio)
> +{
> + struct btrfs_encoded_read_private *priv = bbio->private;
Might be cleaner if you combine it with btrfs_encoded_read_ioctl_endio
btrfs_encoded_read_endio()
{
...
if (!atomic_dec_return(&priv->pending)) {
if (priv->cmd)
io_uring_cmd_complete_in_task();
else
wake_up();
...
}
> +
> + if (bbio->bio.bi_status) {
> + /*
> + * The memory barrier implied by the atomic_dec_return() here
> + * pairs with the memory barrier implied by the
> + * atomic_dec_return() or io_wait_event() in
> + * btrfs_encoded_read_regular_fill_pages() to ensure that this
> + * write is observed before the load of status in
> + * btrfs_encoded_read_regular_fill_pages().
> + */
> + WRITE_ONCE(priv->status, bbio->bio.bi_status);
> + }
> + if (!atomic_dec_return(&priv->pending)) {
> + io_uring_cmd_complete_in_task(priv->cmd,
> + btrfs_finish_uring_encoded_read);
> + }
> + bio_put(&bbio->bio);
> +}
> +
> static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> u64 file_offset, u64 disk_bytenr,
> u64 disk_io_size,
> @@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
> unsigned long i = 0;
> struct btrfs_bio *bbio;
> + btrfs_bio_end_io_t cb;
>
> - init_waitqueue_head(&priv->wait);
> + if (priv->cmd) {
> + cb = btrfs_encoded_read_uring_endio;
> + } else {
> + init_waitqueue_head(&priv->wait);
> + cb = btrfs_encoded_read_ioctl_endio;
> + }
>
> - bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
> - btrfs_encoded_read_endio, priv);
> + bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv);
> bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
> bbio->inode = inode;
>
> @@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> btrfs_submit_bio(bbio, 0);
>
> bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
> - btrfs_encoded_read_endio, priv);
> + cb, priv);
> bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
> bbio->inode = inode;
> continue;
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index c1886209933a..85a512a9ca64 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
...
> +static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + ssize_t ret;
> + void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr;
u64_to_user_ptr()
> + struct btrfs_encoded_read_private *priv;
> + bool compat = issue_flags & IO_URING_F_COMPAT;
> +
> + priv = kmalloc(sizeof(*priv), GFP_KERNEL);
> + if (!priv) {
> + ret = -ENOMEM;
> + goto out_uring;
> + }
> +
> + ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp);
> + if (ret)
> + goto out_finish;
> +
> + if (iov_iter_count(&priv->iter) == 0) {
> + ret = 0;
> + goto out_finish;
> + }
> +
> + *(struct btrfs_encoded_read_private **)cmd->pdu = priv;
> + priv->cmd = cmd;
> + priv->issue_flags = issue_flags;
You shouldn't be stashing issue_flags, it almost always wrong.
Looks btrfs_finish_uring_encoded_read() is the only user, and
with my comment above you can kill priv->issue_flags
> + ret = btrfs_encoded_read(priv);
> +
> + if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending))
> + return;
Is gating on -EIOCBQUEUED just an optimisation? I.e. it's fine
to swap checks
if (atomic_dec_return(&priv->pending) && ret == -EIOCBQUEUED)
but we know that unless it returned -EIOCBQUEUED nobody
should've touched ->pending.
> +
> +out_finish:
> + ret = btrfs_encoded_read_finish(priv, ret);
> + kfree(priv);
> +
> +out_uring:
> + io_uring_cmd_done(cmd, ret, 0, issue_flags);
> +}
> +
--
Pavel Begunkov
next prev parent reply other threads:[~2024-09-06 14:41 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
2024-08-23 16:27 ` [PATCH 1/6] btrfs: remove iocb from btrfs_encoded_read Mark Harmstone
2024-08-27 1:12 ` David Sterba
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
2024-08-26 15:22 ` David Sterba
2024-08-27 1:03 ` David Sterba
2024-09-06 15:19 ` Pavel Begunkov
2024-08-23 16:27 ` [PATCH 3/6] btrfs: add btrfs_encoded_read_finish Mark Harmstone
2024-08-23 16:27 ` [PATCH 4/6] btrfs: add btrfs_prepare_encoded_read Mark Harmstone
2024-08-23 16:27 ` [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read Mark Harmstone
2024-09-06 15:11 ` Pavel Begunkov
2024-08-23 16:27 ` [PATCH 6/6] btrfs: add io_uring interface for encoded reads Mark Harmstone
2024-09-06 14:41 ` Pavel Begunkov [this message]
2024-09-06 15:33 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox