* [PATCH] btrfs: add io_uring interface for encoded writes
@ 2024-11-12 16:29 Mark Harmstone
2024-11-12 21:01 ` Jens Axboe
0 siblings, 1 reply; 6+ messages in thread
From: Mark Harmstone @ 2024-11-12 16:29 UTC (permalink / raw)
To: linux-btrfs, io-uring; +Cc: Mark Harmstone
Add an io_uring interface for encoded writes, with the same parameters
as the BTRFS_IOC_ENCODED_WRITE ioctl.
As with the encoded reads code, there's a test program for this at
https://github.com/maharmstone/io_uring-encoded, and I'll get this
worked into an fstest.
How io_uring works is that it initially calls btrfs_uring_cmd with the
IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
a kthread with the flag cleared.
Ideally we'd honour this and call try_lock etc., but there's still a lot
of work to be done to create non-blocking versions of all the functions
in our write path. Instead, just validate the input in
btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
view to properly optimizing the happy path later on.
Signed-off-by: Mark Harmstone <[email protected]>
---
fs/btrfs/ioctl.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 116 insertions(+)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6478216305c7..37578270686a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -5030,6 +5030,116 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
return ret;
}
+static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ struct btrfs_ioctl_encoded_io_args args;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov = iovstack;
+ struct iov_iter iter;
+ loff_t pos;
+ struct kiocb kiocb;
+ struct file *file;
+ ssize_t ret;
+ void __user *sqe_addr;
+
+ if (!capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out_acct;
+ }
+
+ file = cmd->file;
+ sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
+
+ if (!(file->f_mode & FMODE_WRITE)) {
+ ret = -EBADF;
+ goto out_acct;
+ }
+
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
+
+ if (copy_from_user(&args32, sqe_addr, sizeof(args32))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ args.iov = compat_ptr(args32.iov);
+ args.iovcnt = args32.iovcnt;
+ args.offset = args32.offset;
+ args.flags = args32.flags;
+ args.len = args32.len;
+ args.unencoded_len = args32.unencoded_len;
+ args.unencoded_offset = args32.unencoded_offset;
+ args.compression = args32.compression;
+ args.encryption = args32.encryption;
+ memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
+#else
+ return -ENOTTY;
+#endif
+ } else {
+ if (copy_from_user(&args, sqe_addr, sizeof(args))) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ ret = -EINVAL;
+ if (args.flags != 0)
+ goto out_acct;
+ if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
+ goto out_acct;
+ if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
+ args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
+ goto out_acct;
+ if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
+ args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
+ goto out_acct;
+ if (args.unencoded_offset > args.unencoded_len)
+ goto out_acct;
+ if (args.len > args.unencoded_len - args.unencoded_offset)
+ goto out_acct;
+
+ if (issue_flags & IO_URING_F_NONBLOCK) {
+ ret = -EAGAIN;
+ goto out_acct;
+ }
+
+ ret = import_iovec(ITER_SOURCE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
+ &iov, &iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&iter) == 0) {
+ ret = 0;
+ goto out_iov;
+ }
+ pos = args.offset;
+ ret = rw_verify_area(WRITE, file, &pos, args.len);
+ if (ret < 0)
+ goto out_iov;
+
+ init_sync_kiocb(&kiocb, file);
+ ret = kiocb_set_rw_flags(&kiocb, 0, WRITE);
+ if (ret)
+ goto out_iov;
+ kiocb.ki_pos = pos;
+
+ file_start_write(file);
+
+ ret = btrfs_do_write_iter(&kiocb, &iter, &args);
+ if (ret > 0)
+ fsnotify_modify(file);
+
+ file_end_write(file);
+out_iov:
+ kfree(iov);
+out_acct:
+ if (ret > 0)
+ add_wchar(current, ret);
+ inc_syscw(current);
+ return ret;
+}
+
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
switch (cmd->cmd_op) {
@@ -5038,6 +5148,12 @@ int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
case BTRFS_IOC_ENCODED_READ_32:
#endif
return btrfs_uring_encoded_read(cmd, issue_flags);
+
+ case BTRFS_IOC_ENCODED_WRITE:
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ case BTRFS_IOC_ENCODED_WRITE_32:
+#endif
+ return btrfs_uring_encoded_write(cmd, issue_flags);
}
return -EINVAL;
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] btrfs: add io_uring interface for encoded writes
2024-11-12 16:29 [PATCH] btrfs: add io_uring interface for encoded writes Mark Harmstone
@ 2024-11-12 21:01 ` Jens Axboe
2024-11-12 21:11 ` Jens Axboe
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Jens Axboe @ 2024-11-12 21:01 UTC (permalink / raw)
To: Mark Harmstone, linux-btrfs, io-uring
On 11/12/24 9:29 AM, Mark Harmstone wrote:
> Add an io_uring interface for encoded writes, with the same parameters
> as the BTRFS_IOC_ENCODED_WRITE ioctl.
>
> As with the encoded reads code, there's a test program for this at
> https://github.com/maharmstone/io_uring-encoded, and I'll get this
> worked into an fstest.
>
> How io_uring works is that it initially calls btrfs_uring_cmd with the
> IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
> a kthread with the flag cleared.
^^^^^^^^
Not a kernel thread, it's an io worker. The distinction may seem
irrelevant, but it's really not - io workers inherit all the properties
of the original task.
> Ideally we'd honour this and call try_lock etc., but there's still a lot
> of work to be done to create non-blocking versions of all the functions
> in our write path. Instead, just validate the input in
> btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
> view to properly optimizing the happy path later on.
But you need to ensure stable state after the first issue, regardless of
how you handle it. I don't have the other patches handy, but whatever
you copy from userspace before you return -EAGAIN, you should not be
copying again. By the time you get the 2nd invocation from io-wq, no
copying should be taking place, you should be using the state you
already ensured was stable for the non-blocking issue.
Maybe this is all handled by the caller of btrfs_uring_encoded_write()
already? As far as looking at the code below, it just looks like it
copies everything, then returns -EAGAIN, then copies it again later? Yes
uring_cmd will make the sqe itself stable, but:
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
the userspace btrfs_ioctl_encoded_io_args that sqe->addr points too
should remain stable as well. If not, consider userspace doing:
some_func()
{
struct btrfs_ioctl_encoded_io_args args;
fill_in_args(&args);
sqe = io_uring_get_sqe(ring);
sqe->addr = &args;
io_uring_submit(); <- initial invocation here
}
main_func()
{
some_func();
- io-wq invocation perhaps here
wait_on_cqes();
}
where io-wq will be reading garbage as args went out of scope, unless
some_func() used a stable/heap struct that isn't freed until completion.
some_func() can obviously wait on the cqe, but at that point you'd be
using it as a sync interface, and there's little point.
This is why io_kiocb->async_data exists. uring_cmd is already using that
for the sqe, I think you'd want to add a 2nd "void *op_data" or
something in there, and have the uring_cmd alloc cache get clear that to
NULL and have uring_cmd alloc cache put kfree() it if it's non-NULL.
We'd also need to move the uring_cache struct into
include/linux/io_uring_types.h so that btrfs can get to it (and probably
rename it to something saner, uring_cmd_async_data for example).
static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct io_kiocb *req = cmd_to_io_kiocb(cmd);
struct uring_cmd_async_data *data = req->async_data;
struct btrfs_ioctl_encoded_io_args *args;
if (!data->op_data) {
data->op_data = kmalloc(sizeof(*args), GFP_NOIO);
if (!data->op_data)
return -ENOMEM;
if (copy_from_user(data->op_data, sqe_addr, sizeof(*args))
return -EFAULT;
}
...
}
and have it be stable, then moving your copying into a helper rather
than inline in btrfs_uring_encoded_write() (it probably should be
regardless). Ignored the compat above, it's just pseudo code.
Anyway, hope that helps. I'll be happy to do the uring_cmd bit for you,
but it really should be pretty straight forward.
I'm also pondering if the encoded read side suffers from the same issue?
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] btrfs: add io_uring interface for encoded writes
2024-11-12 21:01 ` Jens Axboe
@ 2024-11-12 21:11 ` Jens Axboe
2024-11-20 15:50 ` Mark Harmstone
2024-11-12 21:19 ` Jens Axboe
2024-11-15 17:29 ` Mark Harmstone
2 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2024-11-12 21:11 UTC (permalink / raw)
To: Mark Harmstone, linux-btrfs, io-uring
[-- Attachment #1: Type: text/plain, Size: 1238 bytes --]
On 11/12/24 2:01 PM, Jens Axboe wrote:
> This is why io_kiocb->async_data exists. uring_cmd is already using that
> for the sqe, I think you'd want to add a 2nd "void *op_data" or
> something in there, and have the uring_cmd alloc cache get clear that to
> NULL and have uring_cmd alloc cache put kfree() it if it's non-NULL.
>
> We'd also need to move the uring_cache struct into
> include/linux/io_uring_types.h so that btrfs can get to it (and probably
> rename it to something saner, uring_cmd_async_data for example).
Here are two patches that implement that basic thing on the io_uring
uring_cmd side. With that, you can then do:
> static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
> {
> struct io_kiocb *req = cmd_to_io_kiocb(cmd);
> struct uring_cmd_async_data *data = req->async_data;
> struct btrfs_ioctl_encoded_io_args *args;
>
> if (!data->op_data) {
> data->op_data = kmalloc(sizeof(*args), GFP_NOIO);
> if (!data->op_data)
> return -ENOMEM;
> if (copy_from_user(data->op_data, sqe_addr, sizeof(*args))
> return -EFAULT;
> }
> ...
> }
and have it be both stable, and not need to worry about freeing it
either. Hope that helps. Totally untested...
--
Jens Axboe
[-- Attachment #2: 0002-io_uring-cmd-add-per-op-data-to-struct-io_uring_cmd_.patch --]
[-- Type: text/x-patch, Size: 2077 bytes --]
From f06038764982dcf0440e2eef8073700becd8203d Mon Sep 17 00:00:00 2001
From: Jens Axboe <[email protected]>
Date: Tue, 12 Nov 2024 14:09:12 -0700
Subject: [PATCH 2/2] io_uring/cmd: add per-op data to struct io_uring_cmd_data
In case an op handler for ->uring_cmd() needs stable storage for user
data, it can allocate io_uring_cmd_data->op_data and use it for the
duration of the request. When the request gets cleaned up, uring_cmd
will free it automatically.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring/cmd.h | 1 +
io_uring/uring_cmd.c | 13 +++++++++++--
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 9541b8924e89..cb3a4c4f263c 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -20,6 +20,7 @@ struct io_uring_cmd {
struct io_uring_cmd_data {
struct io_uring_sqe sqes[2];
+ void *op_data;
};
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index a7a11cb5e22e..e676dae73a0f 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -23,12 +23,16 @@ static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req)
cache = io_alloc_cache_get(&ctx->uring_cache);
if (cache) {
+ cache->op_data = NULL;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = cache;
return cache;
}
- if (!io_alloc_async_data(req))
- return req->async_data;
+ if (!io_alloc_async_data(req)) {
+ cache = req->async_data;
+ cache->op_data = NULL;
+ return cache;
+ }
return NULL;
}
@@ -37,6 +41,11 @@ static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
struct io_uring_cmd_data *cache = req->async_data;
+ if (cache->op_data) {
+ kfree(cache->op_data);
+ cache->op_data = NULL;
+ }
+
if (issue_flags & IO_URING_F_UNLOCKED)
return;
if (io_alloc_cache_put(&req->ctx->uring_cache, cache)) {
--
2.45.2
[-- Attachment #3: 0001-io_uring-cmd-rename-struct-uring_cache-to-io_uring_c.patch --]
[-- Type: text/x-patch, Size: 3769 bytes --]
From f444c9e2a6f1380419a9c1863f9048ae08a1b5c0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <[email protected]>
Date: Tue, 12 Nov 2024 14:05:27 -0700
Subject: [PATCH 1/2] io_uring/cmd: rename struct uring_cache to
io_uring_cmd_data
In preparation for making this more generically available for
->uring_cmd() usage that needs stable command data, rename it and move
it to io_uring/cmd.h instead.
Signed-off-by: Jens Axboe <[email protected]>
---
include/linux/io_uring/cmd.h | 4 ++++
io_uring/io_uring.c | 2 +-
io_uring/uring_cmd.c | 10 +++++-----
io_uring/uring_cmd.h | 4 ----
4 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 578a3fdf5c71..9541b8924e89 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -18,6 +18,10 @@ struct io_uring_cmd {
u8 pdu[32]; /* available inline for free use */
};
+struct io_uring_cmd_data {
+ struct io_uring_sqe sqes[2];
+};
+
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 076171977d5e..f75bc2959f60 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -321,7 +321,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_rw));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct uring_cache));
+ sizeof(struct io_uring_cmd_data));
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb));
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index e9d99d3ecc34..a7a11cb5e22e 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -16,10 +16,10 @@
#include "rsrc.h"
#include "uring_cmd.h"
-static struct uring_cache *io_uring_async_get(struct io_kiocb *req)
+static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_alloc_cache_get(&ctx->uring_cache);
if (cache) {
@@ -35,7 +35,7 @@ static struct uring_cache *io_uring_async_get(struct io_kiocb *req)
static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
if (issue_flags & IO_URING_F_UNLOCKED)
return;
@@ -179,7 +179,7 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_uring_async_get(req);
if (unlikely(!cache))
@@ -256,7 +256,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) {
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 7dba0f1efc58..f6837ee0955b 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -1,9 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-struct uring_cache {
- struct io_uring_sqe sqes[2];
-};
-
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
--
2.45.2
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] btrfs: add io_uring interface for encoded writes
2024-11-12 21:01 ` Jens Axboe
2024-11-12 21:11 ` Jens Axboe
@ 2024-11-12 21:19 ` Jens Axboe
2024-11-15 17:29 ` Mark Harmstone
2 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2024-11-12 21:19 UTC (permalink / raw)
To: Mark Harmstone, linux-btrfs, io-uring
On 11/12/24 2:01 PM, Jens Axboe wrote:
> I'm also pondering if the encoded read side suffers from the same issue?
It does, it needs the same fixup.
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] btrfs: add io_uring interface for encoded writes
2024-11-12 21:01 ` Jens Axboe
2024-11-12 21:11 ` Jens Axboe
2024-11-12 21:19 ` Jens Axboe
@ 2024-11-15 17:29 ` Mark Harmstone
2 siblings, 0 replies; 6+ messages in thread
From: Mark Harmstone @ 2024-11-15 17:29 UTC (permalink / raw)
To: Jens Axboe, [email protected], [email protected]
On 12/11/24 21:01, Jens Axboe wrote:
> >
> On 11/12/24 9:29 AM, Mark Harmstone wrote:
>> Add an io_uring interface for encoded writes, with the same parameters
>> as the BTRFS_IOC_ENCODED_WRITE ioctl.
>>
>> As with the encoded reads code, there's a test program for this at
>> https://github.com/maharmstone/io_uring-encoded, and I'll get this
>> worked into an fstest.
>>
>> How io_uring works is that it initially calls btrfs_uring_cmd with the
>> IO_URING_F_NONBLOCK flag set, and if we return -EAGAIN it tries again in
>> a kthread with the flag cleared.
> ^^^^^^^^
>
> Not a kernel thread, it's an io worker. The distinction may seem
> irrelevant, but it's really not - io workers inherit all the properties
> of the original task.
>
>> Ideally we'd honour this and call try_lock etc., but there's still a lot
>> of work to be done to create non-blocking versions of all the functions
>> in our write path. Instead, just validate the input in
>> btrfs_uring_encoded_write() on the first pass and return -EAGAIN, with a
>> view to properly optimizing the happy path later on.
>
> But you need to ensure stable state after the first issue, regardless of
> how you handle it. I don't have the other patches handy, but whatever
> you copy from userspace before you return -EAGAIN, you should not be
> copying again. By the time you get the 2nd invocation from io-wq, no
> copying should be taking place, you should be using the state you
> already ensured was stable for the non-blocking issue.
>
> Maybe this is all handled by the caller of btrfs_uring_encoded_write()
> already? As far as looking at the code below, it just looks like it
> copies everything, then returns -EAGAIN, then copies it again later? Yes
> uring_cmd will make the sqe itself stable, but:
>
> sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
>
> the userspace btrfs_ioctl_encoded_io_args that sqe->addr points too
> should remain stable as well. If not, consider userspace doing:
>
> some_func()
> {
> struct btrfs_ioctl_encoded_io_args args;
>
> fill_in_args(&args);
> sqe = io_uring_get_sqe(ring);
> sqe->addr = &args;
> io_uring_submit(); <- initial invocation here
> }
>
> main_func()
> {
> some_func();
> - io-wq invocation perhaps here
> wait_on_cqes();
> }
>
> where io-wq will be reading garbage as args went out of scope, unless
> some_func() used a stable/heap struct that isn't freed until completion.
> some_func() can obviously wait on the cqe, but at that point you'd be
> using it as a sync interface, and there's little point.
>
> This is why io_kiocb->async_data exists. uring_cmd is already using that
> for the sqe, I think you'd want to add a 2nd "void *op_data" or
> something in there, and have the uring_cmd alloc cache get clear that to
> NULL and have uring_cmd alloc cache put kfree() it if it's non-NULL.
>
> We'd also need to move the uring_cache struct into
> include/linux/io_uring_types.h so that btrfs can get to it (and probably
> rename it to something saner, uring_cmd_async_data for example).
>
> static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
> {
> struct io_kiocb *req = cmd_to_io_kiocb(cmd);
> struct uring_cmd_async_data *data = req->async_data;
> struct btrfs_ioctl_encoded_io_args *args;
>
> if (!data->op_data) {
> data->op_data = kmalloc(sizeof(*args), GFP_NOIO);
> if (!data->op_data)
> return -ENOMEM;
> if (copy_from_user(data->op_data, sqe_addr, sizeof(*args))
> return -EFAULT;
> }
> ...
> }
>
> and have it be stable, then moving your copying into a helper rather
> than inline in btrfs_uring_encoded_write() (it probably should be
> regardless). Ignored the compat above, it's just pseudo code.
>
> Anyway, hope that helps. I'll be happy to do the uring_cmd bit for you,
> but it really should be pretty straight forward.
>
> I'm also pondering if the encoded read side suffers from the same issue?
>
Thanks Jens, that makes sense to me.
Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] btrfs: add io_uring interface for encoded writes
2024-11-12 21:11 ` Jens Axboe
@ 2024-11-20 15:50 ` Mark Harmstone
0 siblings, 0 replies; 6+ messages in thread
From: Mark Harmstone @ 2024-11-20 15:50 UTC (permalink / raw)
To: Jens Axboe, [email protected], [email protected]
On 12/11/24 21:11, Jens Axboe wrote:
> >
> On 11/12/24 2:01 PM, Jens Axboe wrote:
>> This is why io_kiocb->async_data exists. uring_cmd is already using that
>> for the sqe, I think you'd want to add a 2nd "void *op_data" or
>> something in there, and have the uring_cmd alloc cache get clear that to
>> NULL and have uring_cmd alloc cache put kfree() it if it's non-NULL.
>>
>> We'd also need to move the uring_cache struct into
>> include/linux/io_uring_types.h so that btrfs can get to it (and probably
>> rename it to something saner, uring_cmd_async_data for example).
>
> Here are two patches that implement that basic thing on the io_uring
> uring_cmd side. With that, you can then do:
>
>> static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags)
>> {
>> struct io_kiocb *req = cmd_to_io_kiocb(cmd);
>> struct uring_cmd_async_data *data = req->async_data;
>> struct btrfs_ioctl_encoded_io_args *args;
>>
>> if (!data->op_data) {
>> data->op_data = kmalloc(sizeof(*args), GFP_NOIO);
>> if (!data->op_data)
>> return -ENOMEM;
>> if (copy_from_user(data->op_data, sqe_addr, sizeof(*args))
>> return -EFAULT;
>> }
>> ...
>> }
>
> and have it be both stable, and not need to worry about freeing it
> either. Hope that helps. Totally untested...
>
This works, but I get a KASAN crash because
io_issue_defs[IORING_OP_URING_CMD].async_size is now the wrong size.
I'll send a patch for this.
Mark
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-11-20 15:50 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-12 16:29 [PATCH] btrfs: add io_uring interface for encoded writes Mark Harmstone
2024-11-12 21:01 ` Jens Axboe
2024-11-12 21:11 ` Jens Axboe
2024-11-20 15:50 ` Mark Harmstone
2024-11-12 21:19 ` Jens Axboe
2024-11-15 17:29 ` Mark Harmstone
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox