* [PATCH 1/6] btrfs: remove iocb from btrfs_encoded_read
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-08-27 1:12 ` David Sterba
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
` (4 subsequent siblings)
5 siblings, 1 reply; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
We initialize a struct kiocb for encoded reads, but this never gets
passed outside the driver. Simplify things by replacing it with a file,
offset pair.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/btrfs_inode.h | 3 ++-
fs/btrfs/inode.c | 59 ++++++++++++++++++++----------------------
fs/btrfs/ioctl.c | 6 +----
3 files changed, 31 insertions(+), 37 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 3056c8aed8ef..affe70929234 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -605,7 +605,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size,
struct page **pages);
-ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
+ struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b1b6564ab68f..a0cc029d95ed 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8993,7 +8993,7 @@ int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
}
static ssize_t btrfs_encoded_read_inline(
- struct kiocb *iocb,
+ struct btrfs_inode *inode, loff_t pos,
struct iov_iter *iter, u64 start,
u64 lockend,
struct extent_state **cached_state,
@@ -9001,7 +9001,6 @@ static ssize_t btrfs_encoded_read_inline(
struct btrfs_ioctl_encoded_io_args *encoded,
bool *unlocked)
{
- struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &inode->io_tree;
@@ -9034,7 +9033,7 @@ static ssize_t btrfs_encoded_read_inline(
ptr = btrfs_file_extent_inline_start(item);
encoded->len = min_t(u64, extent_start + ram_bytes,
- inode->vfs_inode.i_size) - iocb->ki_pos;
+ inode->vfs_inode.i_size) - pos;
ret = btrfs_encoded_io_compression_from_extent(fs_info,
btrfs_file_extent_compression(leaf, item));
if (ret < 0)
@@ -9051,12 +9050,12 @@ static ssize_t btrfs_encoded_read_inline(
}
count = inline_size;
encoded->unencoded_len = ram_bytes;
- encoded->unencoded_offset = iocb->ki_pos - extent_start;
+ encoded->unencoded_offset = pos - extent_start;
} else {
count = min_t(u64, count, encoded->len);
encoded->len = count;
encoded->unencoded_len = count;
- ptr += iocb->ki_pos - extent_start;
+ ptr += pos - extent_start;
}
tmp = kmalloc(count, GFP_NOFS);
@@ -9151,15 +9150,14 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
return blk_status_to_errno(READ_ONCE(priv.status));
}
-static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
- struct iov_iter *iter,
+static ssize_t btrfs_encoded_read_regular(struct btrfs_inode *inode,
+ loff_t offset, struct iov_iter *iter,
u64 start, u64 lockend,
struct extent_state **cached_state,
u64 disk_bytenr, u64 disk_io_size,
size_t count, bool compressed,
bool *unlocked)
{
- struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
struct extent_io_tree *io_tree = &inode->io_tree;
struct page **pages;
unsigned long nr_pages, i;
@@ -9190,8 +9188,8 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
i = 0;
page_offset = 0;
} else {
- i = (iocb->ki_pos - start) >> PAGE_SHIFT;
- page_offset = (iocb->ki_pos - start) & (PAGE_SIZE - 1);
+ i = (offset - start) >> PAGE_SHIFT;
+ page_offset = (offset - start) & (PAGE_SIZE - 1);
}
cur = 0;
while (cur < count) {
@@ -9217,10 +9215,11 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
return ret;
}
-ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
+ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
+ struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded)
{
- struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
+ struct btrfs_inode *inode = BTRFS_I(file_inode(file));
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *io_tree = &inode->io_tree;
ssize_t ret;
@@ -9230,17 +9229,17 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
struct extent_map *em;
bool unlocked = false;
- file_accessed(iocb->ki_filp);
+ file_accessed(file);
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
- if (iocb->ki_pos >= inode->vfs_inode.i_size) {
+ if (offset >= inode->vfs_inode.i_size) {
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
return 0;
}
- start = ALIGN_DOWN(iocb->ki_pos, fs_info->sectorsize);
+ start = ALIGN_DOWN(offset, fs_info->sectorsize);
/*
- * We don't know how long the extent containing iocb->ki_pos is, but if
+ * We don't know how long the extent containing offset is, but if
* it's compressed we know that it won't be longer than this.
*/
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
@@ -9277,10 +9276,11 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
*/
free_extent_map(em);
em = NULL;
- ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
- &cached_state, extent_start,
- count, encoded, &unlocked);
- goto out;
+ ret = btrfs_encoded_read_inline(inode, offset, iter, start,
+ lockend, &cached_state,
+ extent_start, count, encoded,
+ &unlocked);
+ goto out_em;
}
/*
@@ -9288,7 +9288,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
* that.
*/
encoded->len = min_t(u64, extent_map_end(em),
- inode->vfs_inode.i_size) - iocb->ki_pos;
+ inode->vfs_inode.i_size) - offset;
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(em->flags & EXTENT_FLAG_PREALLOC)) {
disk_bytenr = EXTENT_MAP_HOLE;
@@ -9308,7 +9308,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
disk_io_size = em->disk_num_bytes;
count = em->disk_num_bytes;
encoded->unencoded_len = em->ram_bytes;
- encoded->unencoded_offset = iocb->ki_pos - (em->start - em->offset);
+ encoded->unencoded_offset = offset - (em->start - em->offset);
ret = btrfs_encoded_io_compression_from_extent(fs_info,
extent_map_compression(em));
if (ret < 0)
@@ -9322,8 +9322,8 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
* Don't read beyond what we locked. This also limits the page
* allocations that we'll do.
*/
- disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
- count = start + disk_io_size - iocb->ki_pos;
+ disk_io_size = min(lockend + 1, offset + encoded->len) - start;
+ count = start + disk_io_size - offset;
encoded->len = count;
encoded->unencoded_len = count;
disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
@@ -9339,16 +9339,13 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
if (ret != count)
ret = -EFAULT;
} else {
- ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
- &cached_state, disk_bytenr,
- disk_io_size, count,
- encoded->compression,
+ ret = btrfs_encoded_read_regular(inode, offset, iter, start,
+ lockend, &cached_state,
+ disk_bytenr, disk_io_size,
+ count, encoded->compression,
&unlocked);
}
-out:
- if (ret >= 0)
- iocb->ki_pos += encoded->len;
out_em:
free_extent_map(em);
out_unlock_extent:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e0a664b8a46a..406ed70814f5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4520,7 +4520,6 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
- struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
@@ -4571,10 +4570,7 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
if (ret < 0)
goto out_iov;
- init_sync_kiocb(&kiocb, file);
- kiocb.ki_pos = pos;
-
- ret = btrfs_encoded_read(&kiocb, &iter, &args);
+ ret = btrfs_encoded_read(file, pos, &iter, &args);
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
2024-08-23 16:27 ` [PATCH 1/6] btrfs: remove iocb from btrfs_encoded_read Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-08-26 15:22 ` David Sterba
` (2 more replies)
2024-08-23 16:27 ` [PATCH 3/6] btrfs: add btrfs_encoded_read_finish Mark Harmstone
` (3 subsequent siblings)
5 siblings, 3 replies; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
Move the various stack variables needed for encoded reads into struct
btrfs_encoded_read_private, so that we can split it into several
functions.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/btrfs_inode.h | 20 ++++-
fs/btrfs/inode.c | 170 +++++++++++++++++++++--------------------
fs/btrfs/ioctl.c | 60 ++++++++-------
3 files changed, 135 insertions(+), 115 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index affe70929234..5cd4308bd337 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -605,9 +605,23 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size,
struct page **pages);
-ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
- struct iov_iter *iter,
- struct btrfs_ioctl_encoded_io_args *encoded);
+
+struct btrfs_encoded_read_private {
+ wait_queue_head_t wait;
+ atomic_t pending;
+ blk_status_t status;
+ unsigned long nr_pages;
+ struct page **pages;
+ struct extent_state *cached_state;
+ size_t count;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov;
+ struct iov_iter iter;
+ struct btrfs_ioctl_encoded_io_args args;
+ struct file *file;
+};
+
+ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0cc029d95ed..c1292e58366a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9078,12 +9078,6 @@ static ssize_t btrfs_encoded_read_inline(
return ret;
}
-struct btrfs_encoded_read_private {
- wait_queue_head_t wait;
- atomic_t pending;
- blk_status_t status;
-};
-
static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
{
struct btrfs_encoded_read_private *priv = bbio->private;
@@ -9104,33 +9098,31 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
bio_put(&bbio->bio);
}
-int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
- u64 file_offset, u64 disk_bytenr,
- u64 disk_io_size, struct page **pages)
+static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+ u64 file_offset, u64 disk_bytenr,
+ u64 disk_io_size,
+ struct btrfs_encoded_read_private *priv)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_encoded_read_private priv = {
- .pending = ATOMIC_INIT(1),
- };
unsigned long i = 0;
struct btrfs_bio *bbio;
- init_waitqueue_head(&priv.wait);
+ init_waitqueue_head(&priv->wait);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, &priv);
+ btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
do {
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
- if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
- atomic_inc(&priv.pending);
+ if (bio_add_page(&bbio->bio, priv->pages[i], bytes, 0) < bytes) {
+ atomic_inc(&priv->pending);
btrfs_submit_bio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, &priv);
+ btrfs_encoded_read_endio, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
continue;
@@ -9141,8 +9133,21 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
disk_io_size -= bytes;
} while (disk_io_size);
- atomic_inc(&priv.pending);
+ atomic_inc(&priv->pending);
btrfs_submit_bio(bbio, 0);
+}
+
+int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
+ u64 file_offset, u64 disk_bytenr,
+ u64 disk_io_size, struct page **pages)
+{
+ struct btrfs_encoded_read_private priv = {
+ .pending = ATOMIC_INIT(1),
+ .pages = pages,
+ };
+
+ _btrfs_encoded_read_regular_fill_pages(inode, file_offset, disk_bytenr,
+ disk_io_size, &priv);
if (atomic_dec_return(&priv.pending))
io_wait_event(priv.wait, !atomic_read(&priv.pending));
@@ -9150,54 +9155,56 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
return blk_status_to_errno(READ_ONCE(priv.status));
}
-static ssize_t btrfs_encoded_read_regular(struct btrfs_inode *inode,
- loff_t offset, struct iov_iter *iter,
+static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *priv,
u64 start, u64 lockend,
- struct extent_state **cached_state,
u64 disk_bytenr, u64 disk_io_size,
- size_t count, bool compressed,
bool *unlocked)
{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
struct extent_io_tree *io_tree = &inode->io_tree;
- struct page **pages;
- unsigned long nr_pages, i;
+ unsigned long i;
u64 cur;
size_t page_offset;
ssize_t ret;
- nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!pages)
+ priv->nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+ priv->pages = kcalloc(priv->nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!priv->pages)
return -ENOMEM;
- ret = btrfs_alloc_page_array(nr_pages, pages, false);
+ ret = btrfs_alloc_page_array(priv->nr_pages, priv->pages, false);
if (ret) {
ret = -ENOMEM;
goto out;
}
- ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
- disk_io_size, pages);
+ _btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
+ disk_io_size, priv);
+
+ if (atomic_dec_return(&priv->pending))
+ io_wait_event(priv->wait, !atomic_read(&priv->pending));
+
+ ret = blk_status_to_errno(READ_ONCE(priv->status));
if (ret)
goto out;
- unlock_extent(io_tree, start, lockend, cached_state);
+ unlock_extent(io_tree, start, lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
*unlocked = true;
- if (compressed) {
+ if (priv->args.compression) {
i = 0;
page_offset = 0;
} else {
- i = (offset - start) >> PAGE_SHIFT;
- page_offset = (offset - start) & (PAGE_SIZE - 1);
+ i = (priv->args.offset - start) >> PAGE_SHIFT;
+ page_offset = (priv->args.offset - start) & (PAGE_SIZE - 1);
}
cur = 0;
- while (cur < count) {
- size_t bytes = min_t(size_t, count - cur,
+ while (cur < priv->count) {
+ size_t bytes = min_t(size_t, priv->count - cur,
PAGE_SIZE - page_offset);
- if (copy_page_to_iter(pages[i], page_offset, bytes,
- iter) != bytes) {
+ if (copy_page_to_iter(priv->pages[i], page_offset, bytes,
+ &priv->iter) != bytes) {
ret = -EFAULT;
goto out;
}
@@ -9205,42 +9212,40 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_inode *inode,
cur += bytes;
page_offset = 0;
}
- ret = count;
+ ret = priv->count;
out:
- for (i = 0; i < nr_pages; i++) {
- if (pages[i])
- __free_page(pages[i]);
+ for (i = 0; i < priv->nr_pages; i++) {
+ if (priv->pages[i])
+ __free_page(priv->pages[i]);
}
- kfree(pages);
+ kfree(priv->pages);
return ret;
}
-ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
- struct iov_iter *iter,
- struct btrfs_ioctl_encoded_io_args *encoded)
+ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
{
- struct btrfs_inode *inode = BTRFS_I(file_inode(file));
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *io_tree = &inode->io_tree;
ssize_t ret;
- size_t count = iov_iter_count(iter);
u64 start, lockend, disk_bytenr, disk_io_size;
- struct extent_state *cached_state = NULL;
struct extent_map *em;
bool unlocked = false;
- file_accessed(file);
+ priv->count = iov_iter_count(&priv->iter);
+
+ file_accessed(priv->file);
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
- if (offset >= inode->vfs_inode.i_size) {
+ if (priv->args.offset >= inode->vfs_inode.i_size) {
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
return 0;
}
- start = ALIGN_DOWN(offset, fs_info->sectorsize);
+ start = ALIGN_DOWN(priv->args.offset, fs_info->sectorsize);
/*
- * We don't know how long the extent containing offset is, but if
- * it's compressed we know that it won't be longer than this.
+ * We don't know how long the extent containing priv->args.offset is,
+ * but if it's compressed we know that it won't be longer than this.
*/
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
@@ -9251,13 +9256,13 @@ ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
lockend - start + 1);
if (ret)
goto out_unlock_inode;
- lock_extent(io_tree, start, lockend, &cached_state);
+ lock_extent(io_tree, start, lockend, &priv->cached_state);
ordered = btrfs_lookup_ordered_range(inode, start,
lockend - start + 1);
if (!ordered)
break;
btrfs_put_ordered_extent(ordered);
- unlock_extent(io_tree, start, lockend, &cached_state);
+ unlock_extent(io_tree, start, lockend, &priv->cached_state);
cond_resched();
}
@@ -9276,10 +9281,11 @@ ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
*/
free_extent_map(em);
em = NULL;
- ret = btrfs_encoded_read_inline(inode, offset, iter, start,
- lockend, &cached_state,
- extent_start, count, encoded,
- &unlocked);
+ ret = btrfs_encoded_read_inline(inode, priv->args.offset,
+ &priv->iter, start,
+ lockend, &priv->cached_state,
+ extent_start, priv->count,
+ &priv->args, &unlocked);
goto out_em;
}
@@ -9287,62 +9293,60 @@ ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
* We only want to return up to EOF even if the extent extends beyond
* that.
*/
- encoded->len = min_t(u64, extent_map_end(em),
- inode->vfs_inode.i_size) - offset;
+ priv->args.len = min_t(u64, extent_map_end(em),
+ inode->vfs_inode.i_size) - priv->args.offset;
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
(em->flags & EXTENT_FLAG_PREALLOC)) {
disk_bytenr = EXTENT_MAP_HOLE;
- count = min_t(u64, count, encoded->len);
- encoded->len = count;
- encoded->unencoded_len = count;
+ priv->count = min_t(u64, priv->count, priv->args.len);
+ priv->args.len = priv->count;
+ priv->args.unencoded_len = priv->count;
} else if (extent_map_is_compressed(em)) {
disk_bytenr = em->disk_bytenr;
/*
* Bail if the buffer isn't large enough to return the whole
* compressed extent.
*/
- if (em->disk_num_bytes > count) {
+ if (em->disk_num_bytes > priv->count) {
ret = -ENOBUFS;
goto out_em;
}
disk_io_size = em->disk_num_bytes;
- count = em->disk_num_bytes;
- encoded->unencoded_len = em->ram_bytes;
- encoded->unencoded_offset = offset - (em->start - em->offset);
+ priv->count = em->disk_num_bytes;
+ priv->args.unencoded_len = em->ram_bytes;
+ priv->args.unencoded_offset = priv->args.offset - (em->start - em->offset);
ret = btrfs_encoded_io_compression_from_extent(fs_info,
extent_map_compression(em));
if (ret < 0)
goto out_em;
- encoded->compression = ret;
+ priv->args.compression = ret;
} else {
disk_bytenr = extent_map_block_start(em) + (start - em->start);
- if (encoded->len > count)
- encoded->len = count;
+ if (priv->args.len > priv->count)
+ priv->args.len = priv->count;
/*
* Don't read beyond what we locked. This also limits the page
* allocations that we'll do.
*/
- disk_io_size = min(lockend + 1, offset + encoded->len) - start;
- count = start + disk_io_size - offset;
- encoded->len = count;
- encoded->unencoded_len = count;
+ disk_io_size = min(lockend + 1, priv->args.offset + priv->args.len) - start;
+ priv->count = start + disk_io_size - priv->args.offset;
+ priv->args.len = priv->count;
+ priv->args.unencoded_len = priv->count;
disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
}
free_extent_map(em);
em = NULL;
if (disk_bytenr == EXTENT_MAP_HOLE) {
- unlock_extent(io_tree, start, lockend, &cached_state);
+ unlock_extent(io_tree, start, lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
unlocked = true;
- ret = iov_iter_zero(count, iter);
- if (ret != count)
+ ret = iov_iter_zero(priv->count, &priv->iter);
+ if (ret != priv->count)
ret = -EFAULT;
} else {
- ret = btrfs_encoded_read_regular(inode, offset, iter, start,
- lockend, &cached_state,
+ ret = btrfs_encoded_read_regular(priv, start, lockend,
disk_bytenr, disk_io_size,
- count, encoded->compression,
&unlocked);
}
@@ -9350,7 +9354,7 @@ ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
free_extent_map(em);
out_unlock_extent:
if (!unlocked)
- unlock_extent(io_tree, start, lockend, &cached_state);
+ unlock_extent(io_tree, start, lockend, &priv->cached_state);
out_unlock_inode:
if (!unlocked)
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 406ed70814f5..770bd609f386 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4512,19 +4512,19 @@ static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
bool compat)
{
- struct btrfs_ioctl_encoded_io_args args = { 0 };
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
loff_t pos;
ssize_t ret;
+ struct btrfs_encoded_read_private priv = {
+ .pending = ATOMIC_INIT(1),
+ .file = file,
+ };
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
- goto out_acct;
+ goto out;
}
if (compat) {
@@ -4535,53 +4535,55 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
flags);
if (copy_from_user(&args32, argp, copy_end)) {
ret = -EFAULT;
- goto out_acct;
+ goto out;
}
- args.iov = compat_ptr(args32.iov);
- args.iovcnt = args32.iovcnt;
- args.offset = args32.offset;
- args.flags = args32.flags;
+ priv.args.iov = compat_ptr(args32.iov);
+ priv.args.iovcnt = args32.iovcnt;
+ priv.args.offset = args32.offset;
+ priv.args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
- if (copy_from_user(&args, argp, copy_end)) {
+ if (copy_from_user(&priv.args, argp, copy_end)) {
ret = -EFAULT;
- goto out_acct;
+ goto out;
}
}
- if (args.flags != 0) {
+ if (priv.args.flags != 0) {
ret = -EINVAL;
- goto out_acct;
+ goto out;
}
- ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
- &iov, &iter);
- if (ret < 0)
- goto out_acct;
+ priv.iov = priv.iovstack;
+ ret = import_iovec(ITER_DEST, priv.args.iov, priv.args.iovcnt,
+ ARRAY_SIZE(priv.iovstack), &priv.iov, &priv.iter);
+ if (ret < 0) {
+ priv.iov = NULL;
+ goto out;
+ }
- if (iov_iter_count(&iter) == 0) {
+ if (iov_iter_count(&priv.iter) == 0) {
ret = 0;
- goto out_iov;
+ goto out;
}
- pos = args.offset;
- ret = rw_verify_area(READ, file, &pos, args.len);
+ pos = priv.args.offset;
+ ret = rw_verify_area(READ, file, &pos, priv.args.len);
if (ret < 0)
- goto out_iov;
+ goto out;
- ret = btrfs_encoded_read(file, pos, &iter, &args);
+ ret = btrfs_encoded_read(&priv);
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
- (char *)&args + copy_end_kernel,
- sizeof(args) - copy_end_kernel))
+ (char *)&priv.args + copy_end_kernel,
+ sizeof(priv.args) - copy_end_kernel))
ret = -EFAULT;
}
-out_iov:
- kfree(iov);
-out_acct:
+out:
+ kfree(priv.iov);
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
@ 2024-08-26 15:22 ` David Sterba
2024-08-27 1:03 ` David Sterba
2024-09-06 15:19 ` Pavel Begunkov
2 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2024-08-26 15:22 UTC (permalink / raw)
To: Mark Harmstone; +Cc: io-uring, linux-btrfs
On Fri, Aug 23, 2024 at 05:27:44PM +0100, Mark Harmstone wrote:
> Move the various stack variables needed for encoded reads into struct
> btrfs_encoded_read_private, so that we can split it into several
> functions.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
> fs/btrfs/btrfs_inode.h | 20 ++++-
> fs/btrfs/inode.c | 170 +++++++++++++++++++++--------------------
> fs/btrfs/ioctl.c | 60 ++++++++-------
> 3 files changed, 135 insertions(+), 115 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index affe70929234..5cd4308bd337 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -605,9 +605,23 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> u64 file_offset, u64 disk_bytenr,
> u64 disk_io_size,
> struct page **pages);
> -ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
> - struct iov_iter *iter,
> - struct btrfs_ioctl_encoded_io_args *encoded);
> +
> +struct btrfs_encoded_read_private {
> + wait_queue_head_t wait;
> + atomic_t pending;
> + blk_status_t status;
> + unsigned long nr_pages;
> + struct page **pages;
> + struct extent_state *cached_state;
> + size_t count;
> + struct iovec iovstack[UIO_FASTIOV];
> + struct iovec *iov;
> + struct iov_iter iter;
> + struct btrfs_ioctl_encoded_io_args args;
> + struct file *file;
> +};
> +
> +ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
> ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
> const struct btrfs_ioctl_encoded_io_args *encoded);
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index a0cc029d95ed..c1292e58366a 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -9078,12 +9078,6 @@ static ssize_t btrfs_encoded_read_inline(
> return ret;
> }
>
> -struct btrfs_encoded_read_private {
> - wait_queue_head_t wait;
> - atomic_t pending;
> - blk_status_t status;
> -};
> -
> static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
> {
> struct btrfs_encoded_read_private *priv = bbio->private;
> @@ -9104,33 +9098,31 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
> bio_put(&bbio->bio);
> }
>
> -int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> - u64 file_offset, u64 disk_bytenr,
> - u64 disk_io_size, struct page **pages)
> +static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
Please don't use underscores in function names
https://btrfs.readthedocs.io/en/latest/dev/Development-notes.html#function-declarations
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
2024-08-26 15:22 ` David Sterba
@ 2024-08-27 1:03 ` David Sterba
2024-09-06 15:19 ` Pavel Begunkov
2 siblings, 0 replies; 14+ messages in thread
From: David Sterba @ 2024-08-27 1:03 UTC (permalink / raw)
To: Mark Harmstone; +Cc: io-uring, linux-btrfs
On Fri, Aug 23, 2024 at 05:27:44PM +0100, Mark Harmstone wrote:
> Move the various stack variables needed for encoded reads into struct
> btrfs_encoded_read_private, so that we can split it into several
> functions.
Moving local variables makes sense in some cases but I don't see reason
to move all of them.
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
> fs/btrfs/btrfs_inode.h | 20 ++++-
> fs/btrfs/inode.c | 170 +++++++++++++++++++++--------------------
> fs/btrfs/ioctl.c | 60 ++++++++-------
> 3 files changed, 135 insertions(+), 115 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index affe70929234..5cd4308bd337 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -605,9 +605,23 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> u64 file_offset, u64 disk_bytenr,
> u64 disk_io_size,
> struct page **pages);
> -ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
> - struct iov_iter *iter,
> - struct btrfs_ioctl_encoded_io_args *encoded);
> +
> +struct btrfs_encoded_read_private {
> + wait_queue_head_t wait;
> + atomic_t pending;
> + blk_status_t status;
> + unsigned long nr_pages;
> + struct page **pages;
> + struct extent_state *cached_state;
The cached state is used as a local variable that is not reused by other
functions that also take the private structure, so what's the reason to
store it here?
> + size_t count;
> + struct iovec iovstack[UIO_FASTIOV];
> + struct iovec *iov;
Same, this is used in the leaf functions and not passed around.
> + struct iov_iter iter;
> + struct btrfs_ioctl_encoded_io_args args;
> + struct file *file;
> +};
As a coding pattern, the structure should store data that would be
otherwise passed as parameters or repeatedly derived from the
parameters.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
2024-08-26 15:22 ` David Sterba
2024-08-27 1:03 ` David Sterba
@ 2024-09-06 15:19 ` Pavel Begunkov
2 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2024-09-06 15:19 UTC (permalink / raw)
To: Mark Harmstone, io-uring, linux-btrfs
On 8/23/24 17:27, Mark Harmstone wrote:
> Move the various stack variables needed for encoded reads into struct
> btrfs_encoded_read_private, so that we can split it into several
> functions.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
> fs/btrfs/btrfs_inode.h | 20 ++++-
> fs/btrfs/inode.c | 170 +++++++++++++++++++++--------------------
> fs/btrfs/ioctl.c | 60 ++++++++-------
> 3 files changed, 135 insertions(+), 115 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index affe70929234..5cd4308bd337 100644
...
>
> -ssize_t btrfs_encoded_read(struct file *file, loff_t offset,
> - struct iov_iter *iter,
> - struct btrfs_ioctl_encoded_io_args *encoded)
> +ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
> {
> - struct btrfs_inode *inode = BTRFS_I(file_inode(file));
> + struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
> struct extent_io_tree *io_tree = &inode->io_tree;
> ssize_t ret;
> - size_t count = iov_iter_count(iter);
> u64 start, lockend, disk_bytenr, disk_io_size;
> - struct extent_state *cached_state = NULL;
> struct extent_map *em;
> bool unlocked = false;
>
> - file_accessed(file);
> + priv->count = iov_iter_count(&priv->iter);
> +
> + file_accessed(priv->file);
>
> btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
Request submission should usually be short and not block
on IO or any locks that might wait for IO.
bool nowait = issue_flags & IO_URING_F_NONBLOCK;
btrfs_encoded_read(..., nowait) {
f = BTRFS_ILOCK_SHARED;
if (nowait)
f |= BTRFS_ILOCK_TRY;
if (btrfs_inode_lock(inode, f))
return -EAGAIN; // io_uring will retry from a blocking context
}
so it might need sth like this here as well as for
filemap waiting and possibly other places.
>
> - if (offset >= inode->vfs_inode.i_size) {
> + if (priv->args.offset >= inode->vfs_inode.i_size) {
> btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
> return 0;
> }
> - start = ALIGN_DOWN(offset, fs_info->sectorsize);
> + start = ALIGN_DOWN(priv->args.offset, fs_info->sectorsize);
> /*
> - * We don't know how long the extent containing offset is, but if
> - * it's compressed we know that it won't be longer than this.
> + * We don't know how long the extent containing priv->args.offset is,
> + * but if it's compressed we know that it won't be longer than this.
> */
> lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
>
...
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 3/6] btrfs: add btrfs_encoded_read_finish
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
2024-08-23 16:27 ` [PATCH 1/6] btrfs: remove iocb from btrfs_encoded_read Mark Harmstone
2024-08-23 16:27 ` [PATCH 2/6] btrfs: store encoded read state in struct btrfs_encoded_read_private Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-08-23 16:27 ` [PATCH 4/6] btrfs: add btrfs_prepare_encoded_read Mark Harmstone
` (2 subsequent siblings)
5 siblings, 0 replies; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
Move the end of btrfs_ioctl_encoded_read, responsible for copying to
userspace and cleanup, into its own function.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/btrfs_inode.h | 1 +
fs/btrfs/inode.c | 29 +++++++++++-----------------
fs/btrfs/ioctl.c | 43 ++++++++++++++++++++++++++++++------------
3 files changed, 43 insertions(+), 30 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 5cd4308bd337..f4d77c3bb544 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -619,6 +619,7 @@ struct btrfs_encoded_read_private {
struct iov_iter iter;
struct btrfs_ioctl_encoded_io_args args;
struct file *file;
+ void __user *copy_out;
};
ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c1292e58366a..1e53977a4854 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9169,13 +9169,13 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *pri
priv->nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
priv->pages = kcalloc(priv->nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!priv->pages)
+ if (!priv->pages) {
+ priv->nr_pages = 0;
return -ENOMEM;
+ }
ret = btrfs_alloc_page_array(priv->nr_pages, priv->pages, false);
- if (ret) {
- ret = -ENOMEM;
- goto out;
- }
+ if (ret)
+ return -ENOMEM;
_btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
disk_io_size, priv);
@@ -9185,7 +9185,7 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *pri
ret = blk_status_to_errno(READ_ONCE(priv->status));
if (ret)
- goto out;
+ return ret;
unlock_extent(io_tree, start, lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
@@ -9204,22 +9204,15 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *pri
PAGE_SIZE - page_offset);
if (copy_page_to_iter(priv->pages[i], page_offset, bytes,
- &priv->iter) != bytes) {
- ret = -EFAULT;
- goto out;
- }
+ &priv->iter) != bytes)
+ return -EFAULT;
+
i++;
cur += bytes;
page_offset = 0;
}
- ret = priv->count;
-out:
- for (i = 0; i < priv->nr_pages; i++) {
- if (priv->pages[i])
- __free_page(priv->pages[i]);
- }
- kfree(priv->pages);
- return ret;
+
+ return priv->count;
}
ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 770bd609f386..3fa661322c26 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4509,6 +4509,34 @@ static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool
return ret;
}
+static ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv,
+ ssize_t ret)
+{
+ size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
+ flags);
+ unsigned long i;
+
+ if (ret >= 0) {
+ fsnotify_access(priv->file);
+ if (copy_to_user(priv->copy_out,
+ (char *)&priv->args + copy_end_kernel,
+ sizeof(priv->args) - copy_end_kernel))
+ ret = -EFAULT;
+ }
+
+ for (i = 0; i < priv->nr_pages; i++) {
+ if (priv->pages[i])
+ __free_page(priv->pages[i]);
+ }
+ kfree(priv->pages);
+ kfree(priv->iov);
+
+ if (ret > 0)
+ add_rchar(current, ret);
+ inc_syscr(current);
+ return ret;
+}
+
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
bool compat)
{
@@ -4573,21 +4601,12 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
if (ret < 0)
goto out;
+ priv.copy_out = argp + copy_end;
+
ret = btrfs_encoded_read(&priv);
- if (ret >= 0) {
- fsnotify_access(file);
- if (copy_to_user(argp + copy_end,
- (char *)&priv.args + copy_end_kernel,
- sizeof(priv.args) - copy_end_kernel))
- ret = -EFAULT;
- }
out:
- kfree(priv.iov);
- if (ret > 0)
- add_rchar(current, ret);
- inc_syscr(current);
- return ret;
+ return btrfs_encoded_read_finish(&priv, ret);
}
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 4/6] btrfs: add btrfs_prepare_encoded_read
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
` (2 preceding siblings ...)
2024-08-23 16:27 ` [PATCH 3/6] btrfs: add btrfs_encoded_read_finish Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-08-23 16:27 ` [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read Mark Harmstone
2024-08-23 16:27 ` [PATCH 6/6] btrfs: add io_uring interface for encoded reads Mark Harmstone
5 siblings, 0 replies; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
Move the beginning of btrfs_ioctl_encoded_read, responsible for
initialization of priv and validation, into its own function.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/ioctl.c | 84 ++++++++++++++++++++++++++----------------------
1 file changed, 46 insertions(+), 38 deletions(-)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3fa661322c26..d2658508e055 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4537,23 +4537,23 @@ static ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv
return ret;
}
-static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
- bool compat)
+static ssize_t btrfs_prepare_encoded_read(struct btrfs_encoded_read_private *priv,
+ struct file *file, bool compat,
+ void __user *argp)
{
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
loff_t pos;
ssize_t ret;
- struct btrfs_encoded_read_private priv = {
- .pending = ATOMIC_INIT(1),
- .file = file,
- };
- if (!capable(CAP_SYS_ADMIN)) {
- ret = -EPERM;
- goto out;
- }
+ memset(priv, 0, sizeof(*priv));
+
+ atomic_set(&priv->pending, 1);
+ priv->file = file;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
@@ -4561,47 +4561,55 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
flags);
- if (copy_from_user(&args32, argp, copy_end)) {
- ret = -EFAULT;
- goto out;
- }
- priv.args.iov = compat_ptr(args32.iov);
- priv.args.iovcnt = args32.iovcnt;
- priv.args.offset = args32.offset;
- priv.args.flags = args32.flags;
+ if (copy_from_user(&args32, argp, copy_end))
+ return -EFAULT;
+
+ priv->args.iov = compat_ptr(args32.iov);
+ priv->args.iovcnt = args32.iovcnt;
+ priv->args.offset = args32.offset;
+ priv->args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
- if (copy_from_user(&priv.args, argp, copy_end)) {
- ret = -EFAULT;
- goto out;
- }
- }
- if (priv.args.flags != 0) {
- ret = -EINVAL;
- goto out;
+ if (copy_from_user(&priv->args, argp, copy_end))
+ return -EFAULT;
}
- priv.iov = priv.iovstack;
- ret = import_iovec(ITER_DEST, priv.args.iov, priv.args.iovcnt,
- ARRAY_SIZE(priv.iovstack), &priv.iov, &priv.iter);
+ if (priv->args.flags != 0)
+ return -EINVAL;
+
+ priv->iov = priv->iovstack;
+ ret = import_iovec(ITER_DEST, priv->args.iov, priv->args.iovcnt,
+ ARRAY_SIZE(priv->iovstack), &priv->iov, &priv->iter);
if (ret < 0) {
- priv.iov = NULL;
- goto out;
+ priv->iov = NULL;
+ return ret;
}
- if (iov_iter_count(&priv.iter) == 0) {
- ret = 0;
- goto out;
- }
- pos = priv.args.offset;
- ret = rw_verify_area(READ, file, &pos, priv.args.len);
+ pos = priv->args.offset;
+ ret = rw_verify_area(READ, priv->file, &pos, priv->args.len);
if (ret < 0)
+ return ret;
+
+ priv->copy_out = argp + copy_end;
+
+ return 0;
+}
+
+static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
+ bool compat)
+{
+ ssize_t ret;
+ struct btrfs_encoded_read_private priv;
+
+ ret = btrfs_prepare_encoded_read(&priv, file, compat, argp);
+ if (ret)
goto out;
- priv.copy_out = argp + copy_end;
+ if (iov_iter_count(&priv.iter) == 0)
+ goto out;
ret = btrfs_encoded_read(&priv);
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
` (3 preceding siblings ...)
2024-08-23 16:27 ` [PATCH 4/6] btrfs: add btrfs_prepare_encoded_read Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-09-06 15:11 ` Pavel Begunkov
2024-08-23 16:27 ` [PATCH 6/6] btrfs: add io_uring interface for encoded reads Mark Harmstone
5 siblings, 1 reply; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
Makes it so that if btrfs_encoded_read has launched a bio, rather than
waiting for it to complete it leaves the extent and inode locked and returns
-EIOCBQUEUED. The caller is responsible for waiting on the bio, and
calling the completion code in the new btrfs_encoded_read_regular_end.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/btrfs_inode.h | 1 +
fs/btrfs/inode.c | 81 +++++++++++++++++++++++-------------------
fs/btrfs/ioctl.c | 8 +++++
3 files changed, 54 insertions(+), 36 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f4d77c3bb544..a5d786c6d7d4 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -623,6 +623,7 @@ struct btrfs_encoded_read_private {
};
ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
+ssize_t btrfs_encoded_read_regular_end(struct btrfs_encoded_read_private *priv);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e53977a4854..1bd4c74e8c51 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8999,7 +8999,7 @@ static ssize_t btrfs_encoded_read_inline(
struct extent_state **cached_state,
u64 extent_start, size_t count,
struct btrfs_ioctl_encoded_io_args *encoded,
- bool *unlocked)
+ bool *need_unlock)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -9067,7 +9067,7 @@ static ssize_t btrfs_encoded_read_inline(
btrfs_release_path(path);
unlock_extent(io_tree, start, lockend, cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- *unlocked = true;
+ *need_unlock = false;
ret = copy_to_iter(tmp, count, iter);
if (ret != count)
@@ -9155,42 +9155,26 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
return blk_status_to_errno(READ_ONCE(priv.status));
}
-static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *priv,
- u64 start, u64 lockend,
- u64 disk_bytenr, u64 disk_io_size,
- bool *unlocked)
+ssize_t btrfs_encoded_read_regular_end(struct btrfs_encoded_read_private *priv)
{
- struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
- struct extent_io_tree *io_tree = &inode->io_tree;
+ u64 cur, start, lockend;
unsigned long i;
- u64 cur;
size_t page_offset;
- ssize_t ret;
-
- priv->nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
- priv->pages = kcalloc(priv->nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!priv->pages) {
- priv->nr_pages = 0;
- return -ENOMEM;
- }
- ret = btrfs_alloc_page_array(priv->nr_pages, priv->pages, false);
- if (ret)
- return -ENOMEM;
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ struct extent_io_tree *io_tree = &inode->io_tree;
+ int ret;
- _btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
- disk_io_size, priv);
+ start = ALIGN_DOWN(priv->args.offset, fs_info->sectorsize);
+ lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
- if (atomic_dec_return(&priv->pending))
- io_wait_event(priv->wait, !atomic_read(&priv->pending));
+ unlock_extent(io_tree, start, lockend, &priv->cached_state);
+ btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
ret = blk_status_to_errno(READ_ONCE(priv->status));
if (ret)
return ret;
- unlock_extent(io_tree, start, lockend, &priv->cached_state);
- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- *unlocked = true;
-
if (priv->args.compression) {
i = 0;
page_offset = 0;
@@ -9215,6 +9199,30 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *pri
return priv->count;
}
+static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *priv,
+ u64 disk_bytenr, u64 disk_io_size)
+{
+ struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u64 start = ALIGN_DOWN(priv->args.offset, fs_info->sectorsize);
+ ssize_t ret;
+
+ priv->nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
+ priv->pages = kcalloc(priv->nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!priv->pages) {
+ priv->nr_pages = 0;
+ return -ENOMEM;
+ }
+ ret = btrfs_alloc_page_array(priv->nr_pages, priv->pages, false);
+ if (ret)
+ return -ENOMEM;
+
+ _btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
+ disk_io_size, priv);
+
+ return -EIOCBQUEUED;
+}
+
ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
{
struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
@@ -9223,7 +9231,7 @@ ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
ssize_t ret;
u64 start, lockend, disk_bytenr, disk_io_size;
struct extent_map *em;
- bool unlocked = false;
+ bool need_unlock = true;
priv->count = iov_iter_count(&priv->iter);
@@ -9278,7 +9286,7 @@ ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
&priv->iter, start,
lockend, &priv->cached_state,
extent_start, priv->count,
- &priv->args, &unlocked);
+ &priv->args, &need_unlock);
goto out_em;
}
@@ -9333,23 +9341,24 @@ ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv)
if (disk_bytenr == EXTENT_MAP_HOLE) {
unlock_extent(io_tree, start, lockend, &priv->cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- unlocked = true;
+ need_unlock = false;
ret = iov_iter_zero(priv->count, &priv->iter);
if (ret != priv->count)
ret = -EFAULT;
} else {
- ret = btrfs_encoded_read_regular(priv, start, lockend,
- disk_bytenr, disk_io_size,
- &unlocked);
+ ret = btrfs_encoded_read_regular(priv, disk_bytenr, disk_io_size);
+
+ if (ret == -EIOCBQUEUED)
+ need_unlock = false;
}
out_em:
free_extent_map(em);
out_unlock_extent:
- if (!unlocked)
+ if (need_unlock)
unlock_extent(io_tree, start, lockend, &priv->cached_state);
out_unlock_inode:
- if (!unlocked)
+ if (need_unlock)
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d2658508e055..c1886209933a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4516,6 +4516,9 @@ static ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv
flags);
unsigned long i;
+ if (ret == -EIOCBQUEUED)
+ ret = btrfs_encoded_read_regular_end(priv);
+
if (ret >= 0) {
fsnotify_access(priv->file);
if (copy_to_user(priv->copy_out,
@@ -4613,6 +4616,11 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
ret = btrfs_encoded_read(&priv);
+ if (ret == -EIOCBQUEUED) {
+ if (atomic_dec_return(&priv.pending))
+ io_wait_event(priv.wait, !atomic_read(&priv.pending));
+ }
+
out:
return btrfs_encoded_read_finish(&priv, ret);
}
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read
2024-08-23 16:27 ` [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read Mark Harmstone
@ 2024-09-06 15:11 ` Pavel Begunkov
0 siblings, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2024-09-06 15:11 UTC (permalink / raw)
To: Mark Harmstone, io-uring, linux-btrfs
On 8/23/24 17:27, Mark Harmstone wrote:
> Makes it so that if btrfs_encoded_read has launched a bio, rather than
> waiting for it to complete it leaves the extent and inode locked and returns
> -EIOCBQUEUED. The caller is responsible for waiting on the bio, and
> calling the completion code in the new btrfs_encoded_read_regular_end.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
> fs/btrfs/btrfs_inode.h | 1 +
> fs/btrfs/inode.c | 81 +++++++++++++++++++++++-------------------
> fs/btrfs/ioctl.c | 8 +++++
> 3 files changed, 54 insertions(+), 36 deletions(-)
>
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index f4d77c3bb544..a5d786c6d7d4 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -623,6 +623,7 @@ struct btrfs_encoded_read_private {
> };
>
> ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
> +ssize_t btrfs_encoded_read_regular_end(struct btrfs_encoded_read_private *priv);
> ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
> const struct btrfs_ioctl_encoded_io_args *encoded);
>
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 1e53977a4854..1bd4c74e8c51 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -8999,7 +8999,7 @@ static ssize_t btrfs_encoded_read_inline(
> struct extent_state **cached_state,
> u64 extent_start, size_t count,
> struct btrfs_ioctl_encoded_io_args *encoded,
> - bool *unlocked)
> + bool *need_unlock)
> {
> struct btrfs_root *root = inode->root;
> struct btrfs_fs_info *fs_info = root->fs_info;
> @@ -9067,7 +9067,7 @@ static ssize_t btrfs_encoded_read_inline(
> btrfs_release_path(path);
> unlock_extent(io_tree, start, lockend, cached_state);
> btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
> - *unlocked = true;
> + *need_unlock = false;
>
> ret = copy_to_iter(tmp, count, iter);
> if (ret != count)
> @@ -9155,42 +9155,26 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> return blk_status_to_errno(READ_ONCE(priv.status));
> }
>
> -static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *priv,
> - u64 start, u64 lockend,
> - u64 disk_bytenr, u64 disk_io_size,
> - bool *unlocked)
> +ssize_t btrfs_encoded_read_regular_end(struct btrfs_encoded_read_private *priv)
> - struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
> - struct extent_io_tree *io_tree = &inode->io_tree;
> + u64 cur, start, lockend;
> unsigned long i;
> - u64 cur;
> size_t page_offset;
> - ssize_t ret;
> -
> - priv->nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
> - priv->pages = kcalloc(priv->nr_pages, sizeof(struct page *), GFP_NOFS);
> - if (!priv->pages) {
> - priv->nr_pages = 0;
> - return -ENOMEM;
> - }
> - ret = btrfs_alloc_page_array(priv->nr_pages, priv->pages, false);
> - if (ret)
> - return -ENOMEM;
> + struct btrfs_inode *inode = BTRFS_I(file_inode(priv->file));
> + struct btrfs_fs_info *fs_info = inode->root->fs_info;
> + struct extent_io_tree *io_tree = &inode->io_tree;
> + int ret;
> - _btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
> - disk_io_size, priv);
> + start = ALIGN_DOWN(priv->args.offset, fs_info->sectorsize);
> + lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
>
> - if (atomic_dec_return(&priv->pending))
> - io_wait_event(priv->wait, !atomic_read(&priv->pending));
> + unlock_extent(io_tree, start, lockend, &priv->cached_state);
> + btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
AFAIS, btrfs_encoded_read_regular_end is here to be used asynchronously
in a later patch, specifically via bio callback -> io_uring tw callback,
and that io_uring tw callback might not get run in any reasonable amount
of time as it's controlled by the user space.
So, it takes the inode lock in one context (task executing the
request) and releases it in another, it's usually is not allowed.
And it also holds the locks potentially semi indefinitely (guaranteed
to be executed when io_uring and/or task die). There is also
unlock_extent() which I'd assume we don't want to hold locked
for too long.
It's in general a bad practice having is_locked variables, especially
when it's not on stack, even more so when it's implicit like
regular_read() {
(ret == -EIOCBQUEUED)
need_unlock = false; // delay it
}
btrfs_encoded_read_finish() {
if (ret == -EIOCBQUEUED)
unlock();
}
That just too easy to miss when you do anything with the code
afterwards.
I hope Josef and btrfs folks can comment what would be a way
here, does the inode lock has to be held for the entire
duration of IO?
>
> ret = blk_status_to_errno(READ_ONCE(priv->status));
> if (ret)
> return ret;
>
> - unlock_extent(io_tree, start, lockend, &priv->cached_state);
> - btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
> - *unlocked = true;
> -
> if (priv->args.compression) {
> i = 0;
> page_offset = 0;
> @@ -9215,6 +9199,30 @@ static ssize_t btrfs_encoded_read_regular(struct btrfs_encoded_read_private *pri
> return priv->count;
> }
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 6/6] btrfs: add io_uring interface for encoded reads
2024-08-23 16:27 [PATCH 0/6] btrfs: add io_uring for encoded reads Mark Harmstone
` (4 preceding siblings ...)
2024-08-23 16:27 ` [PATCH 5/6] btrfs: move wait out of btrfs_encoded_read Mark Harmstone
@ 2024-08-23 16:27 ` Mark Harmstone
2024-09-06 14:41 ` Pavel Begunkov
2024-09-06 15:33 ` Pavel Begunkov
5 siblings, 2 replies; 14+ messages in thread
From: Mark Harmstone @ 2024-08-23 16:27 UTC (permalink / raw)
To: io-uring, linux-btrfs; +Cc: Mark Harmstone
Adds an io_uring interface for asynchronous encoded reads, using the
same interface as for the ioctl. To use this you would use an SQE opcode
of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
addr would point to the userspace address of the
btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
CAP_SYS_ADMIN for this to work.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/btrfs_inode.h | 2 ++
fs/btrfs/file.c | 1 +
fs/btrfs/inode.c | 57 ++++++++++++++++++++++++++++++++++++----
fs/btrfs/ioctl.c | 59 ++++++++++++++++++++++++++++++++++++++++--
fs/btrfs/ioctl.h | 3 +++
5 files changed, 115 insertions(+), 7 deletions(-)
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index a5d786c6d7d4..62e5757d3ba2 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -620,6 +620,8 @@ struct btrfs_encoded_read_private {
struct btrfs_ioctl_encoded_io_args args;
struct file *file;
void __user *copy_out;
+ struct io_uring_cmd *cmd;
+ unsigned int issue_flags;
};
ssize_t btrfs_encoded_read(struct btrfs_encoded_read_private *priv);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9914419f3b7d..2db76422d126 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3769,6 +3769,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
+ .uring_cmd = btrfs_uring_cmd,
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1bd4c74e8c51..e4458168c340 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -34,6 +34,7 @@
#include <linux/iomap.h>
#include <asm/unaligned.h>
#include <linux/fsverity.h>
+#include <linux/io_uring/cmd.h>
#include "misc.h"
#include "ctree.h"
#include "disk-io.h"
@@ -9078,7 +9079,7 @@ static ssize_t btrfs_encoded_read_inline(
return ret;
}
-static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
+static void btrfs_encoded_read_ioctl_endio(struct btrfs_bio *bbio)
{
struct btrfs_encoded_read_private *priv = bbio->private;
@@ -9098,6 +9099,47 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
bio_put(&bbio->bio);
}
+static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
+ struct io_uring_cmd *cmd)
+{
+ return *(struct btrfs_encoded_read_private **)cmd->pdu;
+}
+static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ struct btrfs_encoded_read_private *priv;
+ ssize_t ret;
+
+ priv = btrfs_uring_encoded_read_pdu(cmd);
+ ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
+
+ io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);
+
+ kfree(priv);
+}
+
+static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio)
+{
+ struct btrfs_encoded_read_private *priv = bbio->private;
+
+ if (bbio->bio.bi_status) {
+ /*
+ * The memory barrier implied by the atomic_dec_return() here
+ * pairs with the memory barrier implied by the
+ * atomic_dec_return() or io_wait_event() in
+ * btrfs_encoded_read_regular_fill_pages() to ensure that this
+ * write is observed before the load of status in
+ * btrfs_encoded_read_regular_fill_pages().
+ */
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
+ }
+ if (!atomic_dec_return(&priv->pending)) {
+ io_uring_cmd_complete_in_task(priv->cmd,
+ btrfs_finish_uring_encoded_read);
+ }
+ bio_put(&bbio->bio);
+}
+
static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
u64 file_offset, u64 disk_bytenr,
u64 disk_io_size,
@@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long i = 0;
struct btrfs_bio *bbio;
+ btrfs_bio_end_io_t cb;
- init_waitqueue_head(&priv->wait);
+ if (priv->cmd) {
+ cb = btrfs_encoded_read_uring_endio;
+ } else {
+ init_waitqueue_head(&priv->wait);
+ cb = btrfs_encoded_read_ioctl_endio;
+ }
- bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, priv);
+ bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
@@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
btrfs_submit_bio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
- btrfs_encoded_read_endio, priv);
+ cb, priv);
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bbio->inode = inode;
continue;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index c1886209933a..85a512a9ca64 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -29,6 +29,7 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#include <linux/sched/xacct.h>
+#include <linux/io_uring/cmd.h>
#include "ctree.h"
#include "disk-io.h"
#include "export.h"
@@ -4509,8 +4510,8 @@ static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool
return ret;
}
-static ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv,
- ssize_t ret)
+ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv,
+ ssize_t ret)
{
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
@@ -4725,6 +4726,60 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
return ret;
}
+static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
+ unsigned int issue_flags)
+{
+ ssize_t ret;
+ void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr;
+ struct btrfs_encoded_read_private *priv;
+ bool compat = issue_flags & IO_URING_F_COMPAT;
+
+ priv = kmalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto out_uring;
+ }
+
+ ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp);
+ if (ret)
+ goto out_finish;
+
+ if (iov_iter_count(&priv->iter) == 0) {
+ ret = 0;
+ goto out_finish;
+ }
+
+ *(struct btrfs_encoded_read_private **)cmd->pdu = priv;
+ priv->cmd = cmd;
+ priv->issue_flags = issue_flags;
+ ret = btrfs_encoded_read(priv);
+
+ if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending))
+ return;
+
+out_finish:
+ ret = btrfs_encoded_read_finish(priv, ret);
+ kfree(priv);
+
+out_uring:
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
+}
+
+int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ switch (cmd->cmd_op) {
+ case BTRFS_IOC_ENCODED_READ:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_READ_32:
+#endif
+ btrfs_uring_encoded_read(cmd, issue_flags);
+ return -EIOCBQUEUED;
+ }
+
+ io_uring_cmd_done(cmd, -EINVAL, 0, issue_flags);
+ return -EIOCBQUEUED;
+}
+
long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 19cd26b0244a..9d1522de79d3 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,5 +22,8 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
int __pure btrfs_is_empty_uuid(const u8 *uuid);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
+int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
+ssize_t btrfs_encoded_read_finish(struct btrfs_encoded_read_private *priv,
+ ssize_t ret);
#endif
--
2.44.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 6/6] btrfs: add io_uring interface for encoded reads
2024-08-23 16:27 ` [PATCH 6/6] btrfs: add io_uring interface for encoded reads Mark Harmstone
@ 2024-09-06 14:41 ` Pavel Begunkov
2024-09-06 15:33 ` Pavel Begunkov
1 sibling, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2024-09-06 14:41 UTC (permalink / raw)
To: Mark Harmstone, io-uring, linux-btrfs
On 8/23/24 17:27, Mark Harmstone wrote:
> Adds an io_uring interface for asynchronous encoded reads, using the
> same interface as for the ioctl. To use this you would use an SQE opcode
> of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
> addr would point to the userspace address of the
> btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
> CAP_SYS_ADMIN for this to work.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
...
> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> index a5d786c6d7d4..62e5757d3ba2 100644
> --- a/fs/btrfs/btrfs_inode.h
> +++ b/fs/btrfs/btrfs_inode.h
> @@ -620,6 +620,8 @@ struct btrfs_encoded_read_private {
> struct btrfs_ioctl_encoded_io_args args;
> struct file *file;
> void __user *copy_out;
> + struct io_uring_cmd *cmd;
> + unsigned int issue_flags;
> };
>
...
> +static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
> + struct io_uring_cmd *cmd)
> +{
> + return *(struct btrfs_encoded_read_private **)cmd->pdu;
> +}
> +static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + struct btrfs_encoded_read_private *priv;
> + ssize_t ret;
> +
> + priv = btrfs_uring_encoded_read_pdu(cmd);
> + ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
> +
> + io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);
s/priv->issue_flags/issue_flags/
You can't use flags that you got somewhere before from a completely
different execution context.
> +
> + kfree(priv);
> +}
> +
> +static void btrfs_encoded_read_uring_endio(struct btrfs_bio *bbio)
> +{
> + struct btrfs_encoded_read_private *priv = bbio->private;
Might be cleaner if you combine it with btrfs_encoded_read_ioctl_endio
btrfs_encoded_read_endio()
{
...
if (!atomic_dec_return(&priv->pending)) {
if (priv->cmd)
io_uring_cmd_complete_in_task();
else
wake_up();
...
}
> +
> + if (bbio->bio.bi_status) {
> + /*
> + * The memory barrier implied by the atomic_dec_return() here
> + * pairs with the memory barrier implied by the
> + * atomic_dec_return() or io_wait_event() in
> + * btrfs_encoded_read_regular_fill_pages() to ensure that this
> + * write is observed before the load of status in
> + * btrfs_encoded_read_regular_fill_pages().
> + */
> + WRITE_ONCE(priv->status, bbio->bio.bi_status);
> + }
> + if (!atomic_dec_return(&priv->pending)) {
> + io_uring_cmd_complete_in_task(priv->cmd,
> + btrfs_finish_uring_encoded_read);
> + }
> + bio_put(&bbio->bio);
> +}
> +
> static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> u64 file_offset, u64 disk_bytenr,
> u64 disk_io_size,
> @@ -9106,11 +9148,16 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> struct btrfs_fs_info *fs_info = inode->root->fs_info;
> unsigned long i = 0;
> struct btrfs_bio *bbio;
> + btrfs_bio_end_io_t cb;
>
> - init_waitqueue_head(&priv->wait);
> + if (priv->cmd) {
> + cb = btrfs_encoded_read_uring_endio;
> + } else {
> + init_waitqueue_head(&priv->wait);
> + cb = btrfs_encoded_read_ioctl_endio;
> + }
>
> - bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
> - btrfs_encoded_read_endio, priv);
> + bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info, cb, priv);
> bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
> bbio->inode = inode;
>
> @@ -9122,7 +9169,7 @@ static void _btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
> btrfs_submit_bio(bbio, 0);
>
> bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
> - btrfs_encoded_read_endio, priv);
> + cb, priv);
> bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
> bbio->inode = inode;
> continue;
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index c1886209933a..85a512a9ca64 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
...
> +static void btrfs_uring_encoded_read(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + ssize_t ret;
> + void __user *argp = (void __user *)(uintptr_t)cmd->sqe->addr;
u64_to_user_ptr()
> + struct btrfs_encoded_read_private *priv;
> + bool compat = issue_flags & IO_URING_F_COMPAT;
> +
> + priv = kmalloc(sizeof(*priv), GFP_KERNEL);
> + if (!priv) {
> + ret = -ENOMEM;
> + goto out_uring;
> + }
> +
> + ret = btrfs_prepare_encoded_read(priv, cmd->file, compat, argp);
> + if (ret)
> + goto out_finish;
> +
> + if (iov_iter_count(&priv->iter) == 0) {
> + ret = 0;
> + goto out_finish;
> + }
> +
> + *(struct btrfs_encoded_read_private **)cmd->pdu = priv;
> + priv->cmd = cmd;
> + priv->issue_flags = issue_flags;
You shouldn't be stashing issue_flags, it almost always wrong.
Looks btrfs_finish_uring_encoded_read() is the only user, and
with my comment above you can kill priv->issue_flags
> + ret = btrfs_encoded_read(priv);
> +
> + if (ret == -EIOCBQUEUED && atomic_dec_return(&priv->pending))
> + return;
Is gating on -EIOCBQUEUED just an optimisation? I.e. it's fine
to swap checks
if (atomic_dec_return(&priv->pending) && ret == -EIOCBQUEUED)
but we know that unless it returned -EIOCBQUEUED nobody
should've touched ->pending.
> +
> +out_finish:
> + ret = btrfs_encoded_read_finish(priv, ret);
> + kfree(priv);
> +
> +out_uring:
> + io_uring_cmd_done(cmd, ret, 0, issue_flags);
> +}
> +
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 6/6] btrfs: add io_uring interface for encoded reads
2024-08-23 16:27 ` [PATCH 6/6] btrfs: add io_uring interface for encoded reads Mark Harmstone
2024-09-06 14:41 ` Pavel Begunkov
@ 2024-09-06 15:33 ` Pavel Begunkov
1 sibling, 0 replies; 14+ messages in thread
From: Pavel Begunkov @ 2024-09-06 15:33 UTC (permalink / raw)
To: Mark Harmstone, io-uring, linux-btrfs
On 8/23/24 17:27, Mark Harmstone wrote:
> Adds an io_uring interface for asynchronous encoded reads, using the
> same interface as for the ioctl. To use this you would use an SQE opcode
> of IORING_OP_URING_CMD, the cmd_op would be BTRFS_IOC_ENCODED_READ, and
> addr would point to the userspace address of the
> btrfs_ioctl_encoded_io_args struct. As with the ioctl, you need to have
> CAP_SYS_ADMIN for this to work.
>
> Signed-off-by: Mark Harmstone <[email protected]>
> ---
...
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 1bd4c74e8c51..e4458168c340 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -34,6 +34,7 @@
> #include <linux/iomap.h>
> #include <asm/unaligned.h>
> #include <linux/fsverity.h>
> +#include <linux/io_uring/cmd.h>
> #include "misc.h"
> #include "ctree.h"
> #include "disk-io.h"
> @@ -9078,7 +9079,7 @@ static ssize_t btrfs_encoded_read_inline(
> return ret;
> }
>
> -static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
> +static void btrfs_encoded_read_ioctl_endio(struct btrfs_bio *bbio)
> {
> struct btrfs_encoded_read_private *priv = bbio->private;
>
> @@ -9098,6 +9099,47 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
> bio_put(&bbio->bio);
> }
>
> +static inline struct btrfs_encoded_read_private *btrfs_uring_encoded_read_pdu(
> + struct io_uring_cmd *cmd)
> +{
> + return *(struct btrfs_encoded_read_private **)cmd->pdu;
> +}
> +static void btrfs_finish_uring_encoded_read(struct io_uring_cmd *cmd,
> + unsigned int issue_flags)
> +{
> + struct btrfs_encoded_read_private *priv;
> + ssize_t ret;
> +
> + priv = btrfs_uring_encoded_read_pdu(cmd);
> + ret = btrfs_encoded_read_finish(priv, -EIOCBQUEUED);
tw callback -> btrfs_encoded_read_finish() -> copy_to_user()
That's usually fine except cases when the task and/or io_uring are dying
and the callback executes not from a user task context. Same problem as
with fuse, we can pass a flag from io_uring into the callback telling
btrfs that it should terminate the request and not rely on mm or any
other task related pieces.
> +
> + io_uring_cmd_done(priv->cmd, ret, 0, priv->issue_flags);
> +
> + kfree(priv);
> +}
> +
...
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 14+ messages in thread