From: Caleb Sander Mateos <csander@purestorage.com>
To: Joanne Koong <joannelkoong@gmail.com>
Cc: miklos@szeredi.hu, axboe@kernel.dk,
linux-fsdevel@vger.kernel.org, bschubert@ddn.com,
asml.silence@gmail.com, io-uring@vger.kernel.org,
xiaobing.li@samsung.com, kernel-team@meta.com
Subject: Re: [PATCH v2 8/8] fuse: support io-uring registered buffers
Date: Mon, 27 Oct 2025 18:42:10 -0700 [thread overview]
Message-ID: <CADUfDZrhAORbO5dz41F-bFWxNJAoYGX2JsHgPugi3JZVoWcYvg@mail.gmail.com> (raw)
In-Reply-To: <20251027222808.2332692-9-joannelkoong@gmail.com>
On Mon, Oct 27, 2025 at 3:29 PM Joanne Koong <joannelkoong@gmail.com> wrote:
>
> Add support for io-uring registered buffers for fuse daemons
> communicating through the io-uring interface. Daemons may register
> buffers ahead of time, which will eliminate the overhead of
> pinning/unpinning user pages and translating virtual addresses for every
> server-kernel interaction.
>
> To support page-aligned payloads, the buffer is structured such that the
> payload is at the front of the buffer and the fuse_uring_req_header is
> offset from the end of the buffer.
>
> To be backwards compatible, fuse uring still needs to support non-registered
> buffers as well.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> fs/fuse/dev_uring.c | 200 +++++++++++++++++++++++++++++++++---------
> fs/fuse/dev_uring_i.h | 27 +++++-
> 2 files changed, 183 insertions(+), 44 deletions(-)
>
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index c6b22b14b354..f501bc81f331 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -580,6 +580,22 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
> return err;
> }
>
> +static void *get_kernel_ring_header(struct fuse_ring_ent *ent,
> + enum fuse_uring_header_type type)
> +{
> + switch (type) {
> + case FUSE_URING_HEADER_IN_OUT:
> + return &ent->headers->in_out;
> + case FUSE_URING_HEADER_OP:
> + return &ent->headers->op_in;
> + case FUSE_URING_HEADER_RING_ENT:
> + return &ent->headers->ring_ent_in_out;
> + }
> +
> + WARN_ON_ONCE(1);
> + return NULL;
> +}
> +
> static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
> enum fuse_uring_header_type type)
> {
> @@ -600,16 +616,22 @@ static int copy_header_to_ring(struct fuse_ring_ent *ent,
> enum fuse_uring_header_type type,
> const void *header, size_t header_size)
> {
> - void __user *ring = get_user_ring_header(ent, type);
> + if (ent->fixed_buffer) {
> + void *ring = get_kernel_ring_header(ent, type);
>
> - if (!ring)
> - return -EINVAL;
> + if (!ring)
> + return -EINVAL;
> + memcpy(ring, header, header_size);
> + } else {
> + void __user *ring = get_user_ring_header(ent, type);
>
> - if (copy_to_user(ring, header, header_size)) {
> - pr_info_ratelimited("Copying header to ring failed.\n");
> - return -EFAULT;
> + if (!ring)
> + return -EINVAL;
> + if (copy_to_user(ring, header, header_size)) {
> + pr_info_ratelimited("Copying header to ring failed.\n");
> + return -EFAULT;
> + }
> }
> -
> return 0;
> }
>
> @@ -617,14 +639,21 @@ static int copy_header_from_ring(struct fuse_ring_ent *ent,
> enum fuse_uring_header_type type,
> void *header, size_t header_size)
> {
> - const void __user *ring = get_user_ring_header(ent, type);
> + if (ent->fixed_buffer) {
> + const void *ring = get_kernel_ring_header(ent, type);
>
> - if (!ring)
> - return -EINVAL;
> + if (!ring)
> + return -EINVAL;
> + memcpy(header, ring, header_size);
> + } else {
> + const void __user *ring = get_user_ring_header(ent, type);
>
> - if (copy_from_user(header, ring, header_size)) {
> - pr_info_ratelimited("Copying header from ring failed.\n");
> - return -EFAULT;
> + if (!ring)
> + return -EINVAL;
> + if (copy_from_user(header, ring, header_size)) {
> + pr_info_ratelimited("Copying header from ring failed.\n");
> + return -EFAULT;
> + }
> }
>
> return 0;
> @@ -637,11 +666,15 @@ static int setup_fuse_copy_state(struct fuse_ring *ring, struct fuse_req *req,
> {
> int err;
>
> - err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> - iter);
> - if (err) {
> - pr_info_ratelimited("fuse: Import of user buffer failed\n");
> - return err;
> + if (ent->fixed_buffer) {
> + *iter = ent->payload_iter;
> + } else {
> + err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
> + iter);
> + if (err) {
> + pr_info_ratelimited("fuse: Import of user buffer failed\n");
> + return err;
> + }
> }
>
> fuse_copy_init(cs, rw == ITER_DEST, iter);
> @@ -754,6 +787,62 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
> sizeof(req->in.h));
> }
>
> +/*
> + * Prepare fixed buffer for access. Sets up the payload iter and kmaps the
> + * header.
> + *
> + * Callers must call fuse_uring_unmap_buffer() in the same scope to release the
> + * header mapping.
> + *
> + * For non-fixed buffers, this is a no-op.
> + */
> +static int fuse_uring_map_buffer(struct fuse_ring_ent *ent)
> +{
> + size_t header_size = sizeof(struct fuse_uring_req_header);
> + struct iov_iter iter;
> + struct page *header_page;
> + size_t count, start;
> + ssize_t copied;
> + int err;
> +
> + if (!ent->fixed_buffer)
> + return 0;
> +
> + err = io_uring_cmd_import_fixed_full(ITER_DEST, &iter, ent->cmd, 0);
> + if (err)
> + return err;
> +
> + count = iov_iter_count(&iter);
> + if (count < header_size || count & (PAGE_SIZE - 1))
> + return -EINVAL;
> +
> + /* Adjust the payload iter to protect the header from any overwrites */
> + ent->payload_iter = iter;
> + iov_iter_truncate(&ent->payload_iter, count - header_size);
> +
> + /* Set up the headers */
> + iov_iter_advance(&iter, count - header_size);
> + copied = iov_iter_get_pages2(&iter, &header_page, header_size, 1, &start);
> + if (copied < header_size)
> + return -EFAULT;
> + ent->headers = kmap_local_page(header_page) + start;
> +
> + /*
> + * We can release the acquired reference on the header page immediately
> + * since the page is pinned and io_uring_cmd_import_fixed_full()
> + * prevents it from being unpinned while we are using it.
> + */
> + put_page(header_page);
> +
> + return 0;
> +}
> +
> +static void fuse_uring_unmap_buffer(struct fuse_ring_ent *ent)
> +{
> + if (ent->fixed_buffer)
> + kunmap_local(ent->headers);
> +}
> +
> static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
> struct fuse_req *req)
> {
> @@ -932,6 +1021,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> unsigned int qid = READ_ONCE(cmd_req->qid);
> struct fuse_pqueue *fpq;
> struct fuse_req *req;
> + bool next_req;
>
> err = -ENOTCONN;
> if (!ring)
> @@ -982,6 +1072,13 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
>
> /* without the queue lock, as other locks are taken */
> fuse_uring_prepare_cancel(cmd, issue_flags, ent);
> +
> + err = fuse_uring_map_buffer(ent);
> + if (err) {
> + fuse_uring_req_end(ent, req, err);
> + return err;
> + }
> +
> fuse_uring_commit(ent, req, issue_flags);
>
> /*
> @@ -990,7 +1087,9 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
> * and fetching is done in one step vs legacy fuse, which has separated
> * read (fetch request) and write (commit result).
> */
> - if (fuse_uring_get_next_fuse_req(ent, queue))
> + next_req = fuse_uring_get_next_fuse_req(ent, queue);
> + fuse_uring_unmap_buffer(ent);
> + if (next_req)
> fuse_uring_send(ent, cmd, 0, issue_flags);
> return 0;
> }
> @@ -1086,39 +1185,49 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
> struct iovec iov[FUSE_URING_IOV_SEGS];
> int err;
>
> + err = -ENOMEM;
> + ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> + if (!ent)
> + return ERR_PTR(err);
> +
> + INIT_LIST_HEAD(&ent->list);
> +
> + ent->queue = queue;
> +
> + if (READ_ONCE(cmd->sqe->uring_cmd_flags) & IORING_URING_CMD_FIXED) {
Just use cmd->flags. That avoids having to deal with any possibility
of userspace changing sqe-> uring_cmd_flags between the multiple loads
of it.
> + ent->fixed_buffer = true;
> + atomic_inc(&ring->queue_refs);
> + return ent;
> + }
> +
> err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
> if (err) {
> pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
> err);
> - return ERR_PTR(err);
> + goto error;
> }
>
> err = -EINVAL;
> if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
> pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
> - return ERR_PTR(err);
> + goto error;
> }
>
> payload_size = iov[1].iov_len;
> if (payload_size < ring->max_payload_sz) {
> pr_info_ratelimited("Invalid req payload len %zu\n",
> payload_size);
> - return ERR_PTR(err);
> + goto error;
> }
> -
> - err = -ENOMEM;
> - ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
> - if (!ent)
> - return ERR_PTR(err);
> -
> - INIT_LIST_HEAD(&ent->list);
> -
> - ent->queue = queue;
> ent->user_headers = iov[0].iov_base;
> ent->user_payload = iov[1].iov_base;
>
> atomic_inc(&ring->queue_refs);
> return ent;
> +
> +error:
> + kfree(ent);
> + return ERR_PTR(err);
> }
>
> /*
> @@ -1249,20 +1358,29 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
> {
> struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
> struct fuse_ring_queue *queue = ent->queue;
> + bool send_ent = true;
> int err;
>
> - if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
> - err = fuse_uring_prepare_send(ent, ent->fuse_req);
> - if (err) {
> - if (!fuse_uring_get_next_fuse_req(ent, queue))
> - return;
> - err = 0;
> - }
> - } else {
> - err = -ECANCELED;
> + if (issue_flags & IO_URING_F_TASK_DEAD) {
> + fuse_uring_send(ent, cmd, -ECANCELED, issue_flags);
> + return;
> + }
> +
> + err = fuse_uring_map_buffer(ent);
> + if (err) {
> + fuse_uring_req_end(ent, ent->fuse_req, err);
> + return;
> + }
> +
> + err = fuse_uring_prepare_send(ent, ent->fuse_req);
> + if (err) {
> + send_ent = fuse_uring_get_next_fuse_req(ent, queue);
> + err = 0;
> }
> + fuse_uring_unmap_buffer(ent);
>
> - fuse_uring_send(ent, cmd, err, issue_flags);
> + if (send_ent)
> + fuse_uring_send(ent, cmd, err, issue_flags);
> }
>
> static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
> diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
> index 381fd0b8156a..fe14acccd6a6 100644
> --- a/fs/fuse/dev_uring_i.h
> +++ b/fs/fuse/dev_uring_i.h
> @@ -7,6 +7,7 @@
> #ifndef _FS_FUSE_DEV_URING_I_H
> #define _FS_FUSE_DEV_URING_I_H
>
> +#include <linux/uio.h>
> #include "fuse_i.h"
>
> #ifdef CONFIG_FUSE_IO_URING
> @@ -38,9 +39,29 @@ enum fuse_ring_req_state {
>
> /** A fuse ring entry, part of the ring queue */
> struct fuse_ring_ent {
> - /* userspace buffer */
> - struct fuse_uring_req_header __user *user_headers;
> - void __user *user_payload;
> + /*
> + * If true, the buffer was pre-registered by the daemon and the
> + * pages backing it are pinned in kernel memory. The fixed buffer layout
> + * is: [payload][header at end]. Use payload_iter and headers for
> + * copying to/from the ring.
> + *
> + * Otherwise, use user_headers and user_payload which point to userspace
> + * addresses representing the ring memory.
> + */
> + bool fixed_buffer;
Could use cmd->flags instead of adding this field. It's an extra
indirection vs. space tradeoff, I guess.
Best,
Caleb
> +
> + union {
> + /* fixed_buffer == false */
> + struct {
> + struct fuse_uring_req_header __user *user_headers;
> + void __user *user_payload;
> + };
> + /* fixed_buffer == true */
> + struct {
> + struct fuse_uring_req_header *headers;
> + struct iov_iter payload_iter;
> + };
> + };
>
> /* the ring queue that owns the request */
> struct fuse_ring_queue *queue;
> --
> 2.47.3
>
next prev parent reply other threads:[~2025-10-28 1:42 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-27 22:27 [PATCH v2 0/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-27 22:28 ` [PATCH v2 1/8] io_uring/uring_cmd: add io_uring_cmd_import_fixed_full() Joanne Koong
2025-10-28 1:28 ` Caleb Sander Mateos
2025-10-29 14:01 ` Pavel Begunkov
2025-10-29 18:37 ` Joanne Koong
2025-10-29 19:59 ` Bernd Schubert
2025-10-30 17:42 ` Pavel Begunkov
2025-10-30 18:06 ` Pavel Begunkov
2025-10-30 22:23 ` Bernd Schubert
2025-10-30 23:50 ` Joanne Koong
2025-10-31 10:27 ` Bernd Schubert
2025-10-31 21:19 ` Joanne Koong
2025-10-30 23:13 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 2/8] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-10-30 23:07 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 3/8] fuse: refactor io-uring header copying to ring Joanne Koong
2025-10-30 23:15 ` Bernd Schubert
2025-10-30 23:52 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 4/8] fuse: refactor io-uring header copying from ring Joanne Koong
2025-10-27 22:28 ` [PATCH v2 5/8] fuse: use enum types for header copying Joanne Koong
2025-11-05 23:01 ` Bernd Schubert
2025-11-06 21:59 ` Joanne Koong
2025-11-07 22:11 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 6/8] fuse: add user_ prefix to userspace headers and payload fields Joanne Koong
2025-10-28 1:32 ` Caleb Sander Mateos
2025-10-28 23:56 ` Joanne Koong
2025-11-06 13:35 ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 7/8] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-11-06 16:53 ` Bernd Schubert
2025-11-06 22:01 ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 8/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-28 1:42 ` Caleb Sander Mateos [this message]
2025-10-28 23:56 ` Joanne Koong
2025-11-06 19:48 ` Bernd Schubert
2025-11-06 23:09 ` Joanne Koong
2025-11-07 22:16 ` Bernd Schubert
2025-11-07 22:23 ` Bernd Schubert
2025-11-23 20:12 ` Bernd Schubert
2025-11-25 1:13 ` Joanne Koong
2025-11-14 23:59 ` [PATCH v2 0/8] " Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CADUfDZrhAORbO5dz41F-bFWxNJAoYGX2JsHgPugi3JZVoWcYvg@mail.gmail.com \
--to=csander@purestorage.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=bschubert@ddn.com \
--cc=io-uring@vger.kernel.org \
--cc=joannelkoong@gmail.com \
--cc=kernel-team@meta.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=xiaobing.li@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox