From: Joanne Koong <joannelkoong@gmail.com>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: miklos@szeredi.hu, axboe@kernel.dk,
linux-fsdevel@vger.kernel.org, bschubert@ddn.com,
asml.silence@gmail.com, io-uring@vger.kernel.org,
xiaobing.li@samsung.com, kernel-team@meta.com
Subject: Re: [PATCH v2 8/8] fuse: support io-uring registered buffers
Date: Tue, 28 Oct 2025 16:56:54 -0700 [thread overview]
Message-ID: <CAJnrk1ZuxeZ__7PmzSO=KA-NjxZhq2V-QFg8U1JS2d5KmDwHvw@mail.gmail.com> (raw)
In-Reply-To: <CADUfDZrhAORbO5dz41F-bFWxNJAoYGX2JsHgPugi3JZVoWcYvg@mail.gmail.com>
On Mon, Oct 27, 2025 at 6:42 PM Caleb Sander Mateos
<csander@purestorage.com> wrote:
>
> On Mon, Oct 27, 2025 at 3:29 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add support for io-uring registered buffers for fuse daemons
> > communicating through the io-uring interface. Daemons may register
> > buffers ahead of time, which will eliminate the overhead of
> > pinning/unpinning user pages and translating virtual addresses for every
> > server-kernel interaction.
> >
> > To support page-aligned payloads, the buffer is structured such that the
> > payload is at the front of the buffer and the fuse_uring_req_header is
> > offset from the end of the buffer.
> >
> > To be backwards compatible, fuse uring still needs to support non-registered
> > buffers as well.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> > fs/fuse/dev_uring.c | 200 +++++++++++++++++++++++++++++++++---------
> > fs/fuse/dev_uring_i.h | 27 +++++-
> > 2 files changed, 183 insertions(+), 44 deletions(-)
> >
> > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> > index c6b22b14b354..f501bc81f331 100644
> > --- a/fs/fuse/dev_uring.c
> > +++ b/fs/fuse/dev_uring.c
> > @@ -580,6 +580,22 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
> > return err;
> > }
> >
> > +static void *get_kernel_ring_header(struct fuse_ring_ent *ent,
> > + enum fuse_uring_header_type type)
> > +{
> > + switch (type) {
> > + case FUSE_URING_HEADER_IN_OUT:
> > + return &ent->headers->in_out;
> > + case FUSE_URING_HEADER_OP:
> > + return &ent->headers->op_in;
> > + case FUSE_URING_HEADER_RING_ENT:
> > + return &ent->headers->ring_ent_in_out;
> > + }
> > +
> > + WARN_ON_ONCE(1);
> > + return NULL;
> > +}
> > +
> > static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
> > enum fuse_uring_header_type type)
> > {
> > @@ -600,16 +616,22 @@ static int copy_header_to_ring(struct fuse_ring_ent *ent,
> > enum fuse_uring_header_type type,
> > const void *header, size_t header_size)
> > {
> > - void __user *ring = get_user_ring_header(ent, type);
> > + if (ent->fixed_buffer) {
> > + void *ring = get_kernel_ring_header(ent, type);
> >
> > - if (!ring)
> > - return -EINVAL;
> > + if (!ring)
> > + return -EINVAL;
> > + memcpy(ring, header, header_size);
> > + } else {
> > + void __user *ring = get_user_ring_header(ent, type);
> >
> > - if (copy_to_user(ring, header, header_size)) {
> > - pr_info_ratelimited("Copying header to ring failed.\n");
> > - return -EFAULT;
> > + if (!ring)
> > + return -EINVAL;
> > + if (copy_to_user(ring, header, header_size)) {
> > + pr_info_ratelimited("Copying header to ring failed.\n");
> > + return -EFAULT;
> > + }
> > }
> > -
> > return 0;
> > }
> >
> > @@ -617,14 +639,21 @@ static int copy_header_from_ring(struct fuse_ring_ent *ent,
> > enum fuse_uring_header_type type,
> > void *header, size_t header_size)
> > {
> > - const void __user *ring = get_user_ring_header(ent, type);
> > + if (ent->fixed_buffer) {
> > + const void *ring = get_kernel_ring_header(ent, type);
> >
> > - if (!ring)
> > - return -EINVAL;
> > + if (!ring)
> > + return -EINVAL;
> > + memcpy(header, ring, header_size);
> > + } else {
> > + const void __user *ring = get_user_ring_header(ent, type);
> >
> > - if (copy_from_user(header, ring, header_size)) {
> > - pr_info_ratelimited("Copying header from ring failed.\n");
> > - return -EFAULT;
> > + if (!ring)
> > + return -EINVAL;
> > + if (copy_from_user(header, ring, header_size)) {
> > + pr_info_ratelimited("Copying header from ring failed.\n");
> > + return -EFAULT;
> > + }
> > }
> >
> > return 0;
> > @@ -637,11 +666,15 @@ static int setup_fuse_copy_state(struct fuse_ring *ring, struct fuse_req *req,
> > {
> > int err;
> >
> > - err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> > - iter);
> > - if (err) {
> > - pr_info_ratelimited("fuse: Import of user buffer failed\n");
> > - return err;
> > + if (ent->fixed_buffer) {
> > + *iter = ent->payload_iter;
> > + } else {
> > + err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> > + iter);
> > + if (err) {
> > + pr_info_ratelimited("fuse: Import of user buffer failed\n");
> > + return err;
> > + }
> > }
> >
> > fuse_copy_init(cs, rw == ITER_DEST, iter);
> > @@ -754,6 +787,62 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
> > sizeof(req->in.h));
> > }
> >
> > +/*
> > + * Prepare fixed buffer for access. Sets up the payload iter and kmaps the
> > + * header.
> > + *
> > + * Callers must call fuse_uring_unmap_buffer() in the same scope to release the
> > + * header mapping.
> > + *
> > + * For non-fixed buffers, this is a no-op.
> > + */
> > +static int fuse_uring_map_buffer(struct fuse_ring_ent *ent)
> > +{
> > + size_t header_size = sizeof(struct fuse_uring_req_header);
> > + struct iov_iter iter;
> > + struct page *header_page;
> > + size_t count, start;
> > + ssize_t copied;
> > + int err;
> > +
> > + if (!ent->fixed_buffer)
> > + return 0;
> > +
> > + err = io_uring_cmd_import_fixed_full(ITER_DEST, &iter, ent->cmd, 0);
> > + if (err)
> > + return err;
> > +
> > + count = iov_iter_count(&iter);
> > + if (count < header_size || count & (PAGE_SIZE - 1))
> > + return -EINVAL;
> > +
> > + /* Adjust the payload iter to protect the header from any overwrites */
> > + ent->payload_iter = iter;
> > + iov_iter_truncate(&ent->payload_iter, count - header_size);
> > +
> > + /* Set up the headers */
> > + iov_iter_advance(&iter, count - header_size);
> > + copied = iov_iter_get_pages2(&iter, &header_page, header_size, 1, &start);
> > + if (copied < header_size)
> > + return -EFAULT;
> > + ent->headers = kmap_local_page(header_page) + start;
> > +
> > + /*
> > + * We can release the acquired reference on the header page immediately
> > + * since the page is pinned and io_uring_cmd_import_fixed_full()
> > + * prevents it from being unpinned while we are using it.
> > + */
> > + put_page(header_page);
> > +
> > + return 0;
> > +}
> > +
> > +static void fuse_uring_unmap_buffer(struct fuse_ring_ent *ent)
> > +{
> > + if (ent->fixed_buffer)
> > + kunmap_local(ent->headers);
> > +}
> > +
> > static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
> > struct fuse_req *req)
> > {
> > @@ -932,6 +1021,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> > unsigned int qid = READ_ONCE(cmd_req->qid);
> > struct fuse_pqueue *fpq;
> > struct fuse_req *req;
> > + bool next_req;
> >
> > err = -ENOTCONN;
> > if (!ring)
> > @@ -982,6 +1072,13 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> >
> > /* without the queue lock, as other locks are taken */
> > fuse_uring_prepare_cancel(cmd, issue_flags, ent);
> > +
> > + err = fuse_uring_map_buffer(ent);
> > + if (err) {
> > + fuse_uring_req_end(ent, req, err);
> > + return err;
> > + }
> > +
> > fuse_uring_commit(ent, req, issue_flags);
> >
> > /*
> > @@ -990,7 +1087,9 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> > * and fetching is done in one step vs legacy fuse, which has separated
> > * read (fetch request) and write (commit result).
> > */
> > - if (fuse_uring_get_next_fuse_req(ent, queue))
> > + next_req = fuse_uring_get_next_fuse_req(ent, queue);
> > + fuse_uring_unmap_buffer(ent);
> > + if (next_req)
> > fuse_uring_send(ent, cmd, 0, issue_flags);
> > return 0;
> > }
> > @@ -1086,39 +1185,49 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
> > struct iovec iov[FUSE_URING_IOV_SEGS];
> > int err;
> >
> > + err = -ENOMEM;
> > + ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> > + if (!ent)
> > + return ERR_PTR(err);
> > +
> > + INIT_LIST_HEAD(&ent->list);
> > +
> > + ent->queue = queue;
> > +
> > + if (READ_ONCE(cmd->sqe->uring_cmd_flags) & IORING_URING_CMD_FIXED) {
>
> Just use cmd->flags. That avoids having to deal with any possibility
> of userspace changing sqe-> uring_cmd_flags between the multiple loads
> of it.
Awesome, I'll switch this to just use cmd->flags.
Thank you for looking at the patches.
>
> > + ent->fixed_buffer = true;
> > + atomic_inc(&ring->queue_refs);
> > + return ent;
> > + }
> > +
> > err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
> > if (err) {
> > pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
> > err);
> > - return ERR_PTR(err);
> > + goto error;
> > }
> >
> > err = -EINVAL;
> > if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
> > pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
> > - return ERR_PTR(err);
> > + goto error;
> > }
> >
> > payload_size = iov[1].iov_len;
> > if (payload_size < ring->max_payload_sz) {
> > pr_info_ratelimited("Invalid req payload len %zu\n",
> > payload_size);
> > - return ERR_PTR(err);
> > + goto error;
> > }
> > -
> > - err = -ENOMEM;
> > - ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> > - if (!ent)
> > - return ERR_PTR(err);
> > -
> > - INIT_LIST_HEAD(&ent->list);
> > -
> > - ent->queue = queue;
> > ent->user_headers = iov[0].iov_base;
> > ent->user_payload = iov[1].iov_base;
> >
> > atomic_inc(&ring->queue_refs);
> > return ent;
> > +
> > +error:
> > + kfree(ent);
> > + return ERR_PTR(err);
> > }
> >
> > /*
> > @@ -1249,20 +1358,29 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
> > {
> > struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
> > struct fuse_ring_queue *queue = ent->queue;
> > + bool send_ent = true;
> > int err;
> >
> > - if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
> > - err = fuse_uring_prepare_send(ent, ent->fuse_req);
> > - if (err) {
> > - if (!fuse_uring_get_next_fuse_req(ent, queue))
> > - return;
> > - err = 0;
> > - }
> > - } else {
> > - err = -ECANCELED;
> > + if (issue_flags & IO_URING_F_TASK_DEAD) {
> > + fuse_uring_send(ent, cmd, -ECANCELED, issue_flags);
> > + return;
> > + }
> > +
> > + err = fuse_uring_map_buffer(ent);
> > + if (err) {
> > + fuse_uring_req_end(ent, ent->fuse_req, err);
> > + return;
> > + }
> > +
> > + err = fuse_uring_prepare_send(ent, ent->fuse_req);
> > + if (err) {
> > + send_ent = fuse_uring_get_next_fuse_req(ent, queue);
> > + err = 0;
> > }
> > + fuse_uring_unmap_buffer(ent);
> >
> > - fuse_uring_send(ent, cmd, err, issue_flags);
> > + if (send_ent)
> > + fuse_uring_send(ent, cmd, err, issue_flags);
> > }
> >
> > static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
> > diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
> > index 381fd0b8156a..fe14acccd6a6 100644
> > --- a/fs/fuse/dev_uring_i.h
> > +++ b/fs/fuse/dev_uring_i.h
> > @@ -7,6 +7,7 @@
> > #ifndef _FS_FUSE_DEV_URING_I_H
> > #define _FS_FUSE_DEV_URING_I_H
> >
> > +#include <linux/uio.h>
> > #include "fuse_i.h"
> >
> > #ifdef CONFIG_FUSE_IO_URING
> > @@ -38,9 +39,29 @@ enum fuse_ring_req_state {
> >
> > /** A fuse ring entry, part of the ring queue */
> > struct fuse_ring_ent {
> > - /* userspace buffer */
> > - struct fuse_uring_req_header __user *user_headers;
> > - void __user *user_payload;
> > + /*
> > + * If true, the buffer was pre-registered by the daemon and the
> > + * pages backing it are pinned in kernel memory. The fixed buffer layout
> > + * is: [payload][header at end]. Use payload_iter and headers for
> > + * copying to/from the ring.
> > + *
> > + * Otherwise, use user_headers and user_payload which point to userspace
> > + * addresses representing the ring memory.
> > + */
> > + bool fixed_buffer;
>
> Could use cmd->flags instead of adding this field. It's an extra
> indirection vs. space tradeoff, I guess.
>
> Best,
> Caleb
>
> > +
> > + union {
> > + /* fixed_buffer == false */
> > + struct {
> > + struct fuse_uring_req_header __user *user_headers;
> > + void __user *user_payload;
> > + };
> > + /* fixed_buffer == true */
> > + struct {
> > + struct fuse_uring_req_header *headers;
> > + struct iov_iter payload_iter;
> > + };
> > + };
> >
> > /* the ring queue that owns the request */
> > struct fuse_ring_queue *queue;
> > --
> > 2.47.3
> >
next prev parent reply other threads:[~2025-10-28 23:57 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 22:27 [PATCH v2 0/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-27 22:28 ` [PATCH v2 1/8] io_uring/uring_cmd: add io_uring_cmd_import_fixed_full() Joanne Koong
2025-10-28 1:28 ` Caleb Sander Mateos
2025-10-29 14:01 ` Pavel Begunkov
2025-10-29 18:37 ` Joanne Koong
2025-10-29 19:59 ` Bernd Schubert
2025-10-30 17:42 ` Pavel Begunkov
2025-10-30 18:06 ` Pavel Begunkov
2025-10-30 22:23 ` Bernd Schubert
2025-10-30 23:50 ` Joanne Koong
2025-10-31 10:27 ` Bernd Schubert
2025-10-31 21:19 ` Joanne Koong
2025-10-30 23:13 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 2/8] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-10-30 23:07 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 3/8] fuse: refactor io-uring header copying to ring Joanne Koong
2025-10-30 23:15 ` Bernd Schubert
2025-10-30 23:52 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 4/8] fuse: refactor io-uring header copying from ring Joanne Koong
2025-10-27 22:28 ` [PATCH v2 5/8] fuse: use enum types for header copying Joanne Koong
2025-11-05 23:01 ` Bernd Schubert
2025-11-06 21:59 ` Joanne Koong
2025-11-07 22:11 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 6/8] fuse: add user_ prefix to userspace headers and payload fields Joanne Koong
2025-10-28 1:32 ` Caleb Sander Mateos
2025-10-28 23:56 ` Joanne Koong
2025-11-06 13:35 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 7/8] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-11-06 16:53 ` Bernd Schubert
2025-11-06 22:01 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 8/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-28 1:42 ` Caleb Sander Mateos
2025-10-28 23:56 ` Joanne Koong [this message]
2025-11-06 19:48 ` Bernd Schubert
2025-11-06 23:09 ` Joanne Koong
2025-11-07 22:16 ` Bernd Schubert
2025-11-07 22:23 ` Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAJnrk1ZuxeZ__7PmzSO=KA-NjxZhq2V-QFg8U1JS2d5KmDwHvw@mail.gmail.com' \
--to=joannelkoong@gmail.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=bschubert@ddn.com \
--cc=csander@purestorage.com \
--cc=io-uring@vger.kernel.org \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=xiaobing.li@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox