public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Joanne Koong <joannelkoong@gmail.com>
To: Caleb Sander Mateos <csander@purestorage.com>
Cc: miklos@szeredi.hu, axboe@kernel.dk,
	linux-fsdevel@vger.kernel.org,  bschubert@ddn.com,
	asml.silence@gmail.com, io-uring@vger.kernel.org,
	 xiaobing.li@samsung.com, kernel-team@meta.com
Subject: Re: [PATCH v2 8/8] fuse: support io-uring registered buffers
Date: Tue, 28 Oct 2025 16:56:54 -0700	[thread overview]
Message-ID: <CAJnrk1ZuxeZ__7PmzSO=KA-NjxZhq2V-QFg8U1JS2d5KmDwHvw@mail.gmail.com> (raw)
In-Reply-To: <CADUfDZrhAORbO5dz41F-bFWxNJAoYGX2JsHgPugi3JZVoWcYvg@mail.gmail.com>

On Mon, Oct 27, 2025 at 6:42 PM Caleb Sander Mateos
<csander@purestorage.com> wrote:
>
> On Mon, Oct 27, 2025 at 3:29 PM Joanne Koong <joannelkoong@gmail.com> wrote:
> >
> > Add support for io-uring registered buffers for fuse daemons
> > communicating through the io-uring interface. Daemons may register
> > buffers ahead of time, which will eliminate the overhead of
> > pinning/unpinning user pages and translating virtual addresses for every
> > server-kernel interaction.
> >
> > To support page-aligned payloads, the buffer is structured such that the
> > payload is at the front of the buffer and the fuse_uring_req_header is
> > offset from the end of the buffer.
> >
> > To be backwards compatible, fuse uring still needs to support non-registered
> > buffers as well.
> >
> > Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> > ---
> >  fs/fuse/dev_uring.c   | 200 +++++++++++++++++++++++++++++++++---------
> >  fs/fuse/dev_uring_i.h |  27 +++++-
> >  2 files changed, 183 insertions(+), 44 deletions(-)
> >
> > diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> > index c6b22b14b354..f501bc81f331 100644
> > --- a/fs/fuse/dev_uring.c
> > +++ b/fs/fuse/dev_uring.c
> > @@ -580,6 +580,22 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
> >         return err;
> >  }
> >
> > +static void *get_kernel_ring_header(struct fuse_ring_ent *ent,
> > +                                   enum fuse_uring_header_type type)
> > +{
> > +       switch (type) {
> > +       case FUSE_URING_HEADER_IN_OUT:
> > +               return &ent->headers->in_out;
> > +       case FUSE_URING_HEADER_OP:
> > +               return &ent->headers->op_in;
> > +       case FUSE_URING_HEADER_RING_ENT:
> > +               return &ent->headers->ring_ent_in_out;
> > +       }
> > +
> > +       WARN_ON_ONCE(1);
> > +       return NULL;
> > +}
> > +
> >  static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
> >                                          enum fuse_uring_header_type type)
> >  {
> > @@ -600,16 +616,22 @@ static int copy_header_to_ring(struct fuse_ring_ent *ent,
> >                                enum fuse_uring_header_type type,
> >                                const void *header, size_t header_size)
> >  {
> > -       void __user *ring = get_user_ring_header(ent, type);
> > +       if (ent->fixed_buffer) {
> > +               void *ring = get_kernel_ring_header(ent, type);
> >
> > -       if (!ring)
> > -               return -EINVAL;
> > +               if (!ring)
> > +                       return -EINVAL;
> > +               memcpy(ring, header, header_size);
> > +       } else {
> > +               void __user *ring = get_user_ring_header(ent, type);
> >
> > -       if (copy_to_user(ring, header, header_size)) {
> > -               pr_info_ratelimited("Copying header to ring failed.\n");
> > -               return -EFAULT;
> > +               if (!ring)
> > +                       return -EINVAL;
> > +               if (copy_to_user(ring, header, header_size)) {
> > +                       pr_info_ratelimited("Copying header to ring failed.\n");
> > +                       return -EFAULT;
> > +               }
> >         }
> > -
> >         return 0;
> >  }
> >
> > @@ -617,14 +639,21 @@ static int copy_header_from_ring(struct fuse_ring_ent *ent,
> >                                  enum fuse_uring_header_type type,
> >                                  void *header, size_t header_size)
> >  {
> > -       const void __user *ring = get_user_ring_header(ent, type);
> > +       if (ent->fixed_buffer) {
> > +               const void *ring = get_kernel_ring_header(ent, type);
> >
> > -       if (!ring)
> > -               return -EINVAL;
> > +               if (!ring)
> > +                       return -EINVAL;
> > +               memcpy(header, ring, header_size);
> > +       } else {
> > +               const void __user *ring = get_user_ring_header(ent, type);
> >
> > -       if (copy_from_user(header, ring, header_size)) {
> > -               pr_info_ratelimited("Copying header from ring failed.\n");
> > -               return -EFAULT;
> > +               if (!ring)
> > +                       return -EINVAL;
> > +               if (copy_from_user(header, ring, header_size)) {
> > +                       pr_info_ratelimited("Copying header from ring failed.\n");
> > +                       return -EFAULT;
> > +               }
> >         }
> >
> >         return 0;
> > @@ -637,11 +666,15 @@ static int setup_fuse_copy_state(struct fuse_ring *ring, struct fuse_req *req,
> >  {
> >         int err;
> >
> > -       err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> > -                         iter);
> > -       if (err) {
> > -               pr_info_ratelimited("fuse: Import of user buffer failed\n");
> > -               return err;
> > +       if (ent->fixed_buffer) {
> > +               *iter = ent->payload_iter;
> > +       } else {
> > +               err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> > +                                 iter);
> > +               if (err) {
> > +                       pr_info_ratelimited("fuse: Import of user buffer failed\n");
> > +                       return err;
> > +               }
> >         }
> >
> >         fuse_copy_init(cs, rw == ITER_DEST, iter);
> > @@ -754,6 +787,62 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
> >                                    sizeof(req->in.h));
> >  }
> >
> > +/*
> > + * Prepare fixed buffer for access. Sets up the payload iter and kmaps the
> > + * header.
> > + *
> > + * Callers must call fuse_uring_unmap_buffer() in the same scope to release the
> > + * header mapping.
> > + *
> > + * For non-fixed buffers, this is a no-op.
> > + */
> > +static int fuse_uring_map_buffer(struct fuse_ring_ent *ent)
> > +{
> > +       size_t header_size = sizeof(struct fuse_uring_req_header);
> > +       struct iov_iter iter;
> > +       struct page *header_page;
> > +       size_t count, start;
> > +       ssize_t copied;
> > +       int err;
> > +
> > +       if (!ent->fixed_buffer)
> > +               return 0;
> > +
> > +       err = io_uring_cmd_import_fixed_full(ITER_DEST, &iter, ent->cmd, 0);
> > +       if (err)
> > +               return err;
> > +
> > +       count = iov_iter_count(&iter);
> > +       if (count < header_size || count & (PAGE_SIZE - 1))
> > +               return -EINVAL;
> > +
> > +       /* Adjust the payload iter to protect the header from any overwrites */
> > +       ent->payload_iter = iter;
> > +       iov_iter_truncate(&ent->payload_iter, count - header_size);
> > +
> > +       /* Set up the headers */
> > +       iov_iter_advance(&iter, count - header_size);
> > +       copied = iov_iter_get_pages2(&iter, &header_page, header_size, 1, &start);
> > +       if (copied < header_size)
> > +               return -EFAULT;
> > +       ent->headers = kmap_local_page(header_page) + start;
> > +
> > +       /*
> > +        * We can release the acquired reference on the header page immediately
> > +        * since the page is pinned and io_uring_cmd_import_fixed_full()
> > +        * prevents it from being unpinned while we are using it.
> > +        */
> > +       put_page(header_page);
> > +
> > +       return 0;
> > +}
> > +
> > +static void fuse_uring_unmap_buffer(struct fuse_ring_ent *ent)
> > +{
> > +       if (ent->fixed_buffer)
> > +               kunmap_local(ent->headers);
> > +}
> > +
> >  static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
> >                                    struct fuse_req *req)
> >  {
> > @@ -932,6 +1021,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> >         unsigned int qid = READ_ONCE(cmd_req->qid);
> >         struct fuse_pqueue *fpq;
> >         struct fuse_req *req;
> > +       bool next_req;
> >
> >         err = -ENOTCONN;
> >         if (!ring)
> > @@ -982,6 +1072,13 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> >
> >         /* without the queue lock, as other locks are taken */
> >         fuse_uring_prepare_cancel(cmd, issue_flags, ent);
> > +
> > +       err = fuse_uring_map_buffer(ent);
> > +       if (err) {
> > +               fuse_uring_req_end(ent, req, err);
> > +               return err;
> > +       }
> > +
> >         fuse_uring_commit(ent, req, issue_flags);
> >
> >         /*
> > @@ -990,7 +1087,9 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> >          * and fetching is done in one step vs legacy fuse, which has separated
> >          * read (fetch request) and write (commit result).
> >          */
> > -       if (fuse_uring_get_next_fuse_req(ent, queue))
> > +       next_req = fuse_uring_get_next_fuse_req(ent, queue);
> > +       fuse_uring_unmap_buffer(ent);
> > +       if (next_req)
> >                 fuse_uring_send(ent, cmd, 0, issue_flags);
> >         return 0;
> >  }
> > @@ -1086,39 +1185,49 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
> >         struct iovec iov[FUSE_URING_IOV_SEGS];
> >         int err;
> >
> > +       err = -ENOMEM;
> > +       ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> > +       if (!ent)
> > +               return ERR_PTR(err);
> > +
> > +       INIT_LIST_HEAD(&ent->list);
> > +
> > +       ent->queue = queue;
> > +
> > +       if (READ_ONCE(cmd->sqe->uring_cmd_flags) & IORING_URING_CMD_FIXED) {
>
> Just use cmd->flags. That avoids having to deal with any possibility
> of userspace changing sqe-> uring_cmd_flags between the multiple loads
> of it.

Awesome, I'll switch this to just use cmd->flags.

Thank you for looking at the patches.
>
> > +               ent->fixed_buffer = true;
> > +               atomic_inc(&ring->queue_refs);
> > +               return ent;
> > +       }
> > +
> >         err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
> >         if (err) {
> >                 pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
> >                                     err);
> > -               return ERR_PTR(err);
> > +               goto error;
> >         }
> >
> >         err = -EINVAL;
> >         if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
> >                 pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
> > -               return ERR_PTR(err);
> > +               goto error;
> >         }
> >
> >         payload_size = iov[1].iov_len;
> >         if (payload_size < ring->max_payload_sz) {
> >                 pr_info_ratelimited("Invalid req payload len %zu\n",
> >                                     payload_size);
> > -               return ERR_PTR(err);
> > +               goto error;
> >         }
> > -
> > -       err = -ENOMEM;
> > -       ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> > -       if (!ent)
> > -               return ERR_PTR(err);
> > -
> > -       INIT_LIST_HEAD(&ent->list);
> > -
> > -       ent->queue = queue;
> >         ent->user_headers = iov[0].iov_base;
> >         ent->user_payload = iov[1].iov_base;
> >
> >         atomic_inc(&ring->queue_refs);
> >         return ent;
> > +
> > +error:
> > +       kfree(ent);
> > +       return ERR_PTR(err);
> >  }
> >
> >  /*
> > @@ -1249,20 +1358,29 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
> >  {
> >         struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
> >         struct fuse_ring_queue *queue = ent->queue;
> > +       bool send_ent = true;
> >         int err;
> >
> > -       if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
> > -               err = fuse_uring_prepare_send(ent, ent->fuse_req);
> > -               if (err) {
> > -                       if (!fuse_uring_get_next_fuse_req(ent, queue))
> > -                               return;
> > -                       err = 0;
> > -               }
> > -       } else {
> > -               err = -ECANCELED;
> > +       if (issue_flags & IO_URING_F_TASK_DEAD) {
> > +               fuse_uring_send(ent, cmd, -ECANCELED, issue_flags);
> > +               return;
> > +       }
> > +
> > +       err = fuse_uring_map_buffer(ent);
> > +       if (err) {
> > +               fuse_uring_req_end(ent, ent->fuse_req, err);
> > +               return;
> > +       }
> > +
> > +       err = fuse_uring_prepare_send(ent, ent->fuse_req);
> > +       if (err) {
> > +               send_ent = fuse_uring_get_next_fuse_req(ent, queue);
> > +               err = 0;
> >         }
> > +       fuse_uring_unmap_buffer(ent);
> >
> > -       fuse_uring_send(ent, cmd, err, issue_flags);
> > +       if (send_ent)
> > +               fuse_uring_send(ent, cmd, err, issue_flags);
> >  }
> >
> >  static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
> > diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
> > index 381fd0b8156a..fe14acccd6a6 100644
> > --- a/fs/fuse/dev_uring_i.h
> > +++ b/fs/fuse/dev_uring_i.h
> > @@ -7,6 +7,7 @@
> >  #ifndef _FS_FUSE_DEV_URING_I_H
> >  #define _FS_FUSE_DEV_URING_I_H
> >
> > +#include <linux/uio.h>
> >  #include "fuse_i.h"
> >
> >  #ifdef CONFIG_FUSE_IO_URING
> > @@ -38,9 +39,29 @@ enum fuse_ring_req_state {
> >
> >  /** A fuse ring entry, part of the ring queue */
> >  struct fuse_ring_ent {
> > -       /* userspace buffer */
> > -       struct fuse_uring_req_header __user *user_headers;
> > -       void __user *user_payload;
> > +       /*
> > +        * If true, the buffer was pre-registered by the daemon and the
> > +        * pages backing it are pinned in kernel memory. The fixed buffer layout
> > +        * is: [payload][header at end]. Use payload_iter and headers for
> > +        * copying to/from the ring.
> > +        *
> > +        * Otherwise, use user_headers and user_payload which point to userspace
> > +        * addresses representing the ring memory.
> > +        */
> > +       bool fixed_buffer;
>
> Could use cmd->flags instead of adding this field. It's an extra
> indirection vs. space tradeoff, I guess.
>
> Best,
> Caleb
>
> > +
> > +       union {
> > +               /* fixed_buffer == false */
> > +               struct {
> > +                       struct fuse_uring_req_header __user *user_headers;
> > +                       void __user *user_payload;
> > +               };
> > +               /* fixed_buffer == true */
> > +               struct {
> > +                       struct fuse_uring_req_header *headers;
> > +                       struct iov_iter payload_iter;
> > +               };
> > +       };
> >
> >         /* the ring queue that owns the request */
> >         struct fuse_ring_queue *queue;
> > --
> > 2.47.3
> >

  reply	other threads:[~2025-10-28 23:57 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-27 22:27 [PATCH v2 0/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-27 22:28 ` [PATCH v2 1/8] io_uring/uring_cmd: add io_uring_cmd_import_fixed_full() Joanne Koong
2025-10-28  1:28   ` Caleb Sander Mateos
2025-10-29 14:01   ` Pavel Begunkov
2025-10-29 18:37     ` Joanne Koong
2025-10-29 19:59       ` Bernd Schubert
2025-10-30 17:42         ` Pavel Begunkov
2025-10-30 18:06       ` Pavel Begunkov
2025-10-30 22:23         ` Bernd Schubert
2025-10-30 23:50           ` Joanne Koong
2025-10-31 10:27             ` Bernd Schubert
2025-10-31 21:19               ` Joanne Koong
2025-10-30 23:13         ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 2/8] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-10-30 23:07   ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 3/8] fuse: refactor io-uring header copying to ring Joanne Koong
2025-10-30 23:15   ` Bernd Schubert
2025-10-30 23:52     ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 4/8] fuse: refactor io-uring header copying from ring Joanne Koong
2025-10-27 22:28 ` [PATCH v2 5/8] fuse: use enum types for header copying Joanne Koong
2025-11-05 23:01   ` Bernd Schubert
2025-11-06 21:59     ` Joanne Koong
2025-11-07 22:11       ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 6/8] fuse: add user_ prefix to userspace headers and payload fields Joanne Koong
2025-10-28  1:32   ` Caleb Sander Mateos
2025-10-28 23:56     ` Joanne Koong
2025-11-06 13:35   ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 7/8] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-11-06 16:53   ` Bernd Schubert
2025-11-06 22:01     ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 8/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-28  1:42   ` Caleb Sander Mateos
2025-10-28 23:56     ` Joanne Koong [this message]
2025-11-06 19:48   ` Bernd Schubert
2025-11-06 23:09     ` Joanne Koong
2025-11-07 22:16       ` Bernd Schubert
2025-11-07 22:23         ` Bernd Schubert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAJnrk1ZuxeZ__7PmzSO=KA-NjxZhq2V-QFg8U1JS2d5KmDwHvw@mail.gmail.com' \
    --to=joannelkoong@gmail.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bschubert@ddn.com \
    --cc=csander@purestorage.com \
    --cc=io-uring@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=xiaobing.li@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox