From: Bernd Schubert <bernd@bsbernd.com>
To: Joanne Koong <joannelkoong@gmail.com>,
miklos@szeredi.hu, axboe@kernel.dk
Cc: bschubert@ddn.com, asml.silence@gmail.com,
io-uring@vger.kernel.org, csander@purestorage.com,
xiaobing.li@samsung.com, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v3 19/25] fuse: add io-uring kernel-managed buffer ring
Date: Wed, 4 Feb 2026 00:58:03 +0100 [thread overview]
Message-ID: <4e406b1f-723b-4dc7-8e50-1a5ef6ea11b3@bsbernd.com> (raw)
In-Reply-To: <20251223003522.3055912-20-joannelkoong@gmail.com>
On 12/23/25 01:35, Joanne Koong wrote:
> Add io-uring kernel-managed buffer ring capability for fuse daemons
> communicating through the io-uring interface.
>
> This has two benefits:
> a) eliminates the overhead of pinning/unpinning user pages and
> translating virtual addresses for every server-kernel interaction
>
> b) reduces the amount of memory needed for the buffers per queue and
> allows buffers to be reused across entries. Incremental buffer
> consumption, when added, will allow a buffer to be used across multiple
> requests.
>
> Buffer ring usage is set on a per-queue basis. In order to use this, the
> daemon needs to have preregistered a kernel-managed buffer ring and a
> fixed buffer at index 0 that will hold all the headers, and set the
> "use_bufring" field during registration. The kernel-managed buffer ring
> will be pinned for the lifetime of the connection.
>
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
> fs/fuse/dev_uring.c | 423 ++++++++++++++++++++++++++++++++------
> fs/fuse/dev_uring_i.h | 30 ++-
> include/uapi/linux/fuse.h | 15 +-
> 3 files changed, 399 insertions(+), 69 deletions(-)
>
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index b57871f92d08..e9905f09c3ad 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -10,6 +10,8 @@
> #include "fuse_trace.h"
>
> #include <linux/fs.h>
> +#include <linux/io_uring.h>
> +#include <linux/io_uring/buf.h>
> #include <linux/io_uring/cmd.h>
>
> static bool __read_mostly enable_uring;
> @@ -19,6 +21,8 @@ MODULE_PARM_DESC(enable_uring,
>
> #define FUSE_URING_IOV_SEGS 2 /* header and payload */
>
> +#define FUSE_URING_RINGBUF_GROUP 0
> +#define FUSE_URING_FIXED_HEADERS_OFFSET 0
>
> bool fuse_uring_enabled(void)
> {
> @@ -276,20 +280,46 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
> return res;
> }
>
> -static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
> - int qid)
> +static int fuse_uring_buf_ring_setup(struct io_uring_cmd *cmd,
> + struct fuse_ring_queue *queue,
> + unsigned int issue_flags)
> +{
> + int err;
> +
> + err = io_uring_cmd_buf_ring_pin(cmd, FUSE_URING_RINGBUF_GROUP,
> + issue_flags, &queue->bufring);
> + if (err)
> + return err;
> +
> + if (!io_uring_cmd_is_kmbuf_ring(cmd, FUSE_URING_RINGBUF_GROUP,
> + issue_flags)) {
> + io_uring_cmd_buf_ring_unpin(cmd,
> + FUSE_URING_RINGBUF_GROUP,
> + issue_flags);
> + return -EINVAL;
> + }
> +
> + queue->use_bufring = true;
> +
> + return 0;
> +}
> +
> +static struct fuse_ring_queue *
> +fuse_uring_create_queue(struct io_uring_cmd *cmd, struct fuse_ring *ring,
> + int qid, bool use_bufring, unsigned int issue_flags)
> {
> struct fuse_conn *fc = ring->fc;
> struct fuse_ring_queue *queue;
> struct list_head *pq;
> + int err;
>
> queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
> if (!queue)
> - return NULL;
> + return ERR_PTR(-ENOMEM);
> pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
> if (!pq) {
> kfree(queue);
> - return NULL;
> + return ERR_PTR(-ENOMEM);
> }
>
> queue->qid = qid;
> @@ -307,6 +337,15 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
> queue->fpq.processing = pq;
> fuse_pqueue_init(&queue->fpq);
>
> + if (use_bufring) {
> + err = fuse_uring_buf_ring_setup(cmd, queue, issue_flags);
> + if (err) {
> + kfree(pq);
> + kfree(queue);
> + return ERR_PTR(err);
> + }
> + }
> +
> spin_lock(&fc->lock);
> if (ring->queues[qid]) {
> spin_unlock(&fc->lock);
> @@ -584,6 +623,35 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
> return err;
> }
>
> +static int get_kernel_ring_header(struct fuse_ring_ent *ent,
> + enum fuse_uring_header_type type,
> + struct iov_iter *headers_iter)
> +{
> + size_t offset;
> +
> + switch (type) {
> + case FUSE_URING_HEADER_IN_OUT:
> + /* No offset - start of header */
> + offset = 0;
> + break;
> + case FUSE_URING_HEADER_OP:
> + offset = offsetof(struct fuse_uring_req_header, op_in);
> + break;
> + case FUSE_URING_HEADER_RING_ENT:
> + offset = offsetof(struct fuse_uring_req_header, ring_ent_in_out);
> + break;
> + default:
> + WARN_ONCE(1, "Invalid header type: %d\n", type);
> + return -EINVAL;
> + }
> +
> + *headers_iter = ent->headers_iter;
> + if (offset)
> + iov_iter_advance(headers_iter, offset);
> +
> + return 0;
> +}
> +
> static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
> enum fuse_uring_header_type type)
> {
> @@ -605,17 +673,38 @@ static __always_inline int copy_header_to_ring(struct fuse_ring_ent *ent,
> const void *header,
> size_t header_size)
> {
> - void __user *ring = get_user_ring_header(ent, type);
> + bool use_bufring = ent->queue->use_bufring;
> + int err = 0;
>
> - if (!ring)
> - return -EINVAL;
> + if (use_bufring) {
> + struct iov_iter iter;
> +
> + err = get_kernel_ring_header(ent, type, &iter);
> + if (err)
> + goto done;
> +
> + if (copy_to_iter(header, header_size, &iter) != header_size)
> + err = -EFAULT;
> + } else {
> + void __user *ring = get_user_ring_header(ent, type);
> +
> + if (!ring) {
> + err = -EINVAL;
> + goto done;
> + }
>
> - if (copy_to_user(ring, header, header_size)) {
> - pr_info_ratelimited("Copying header to ring failed.\n");
> - return -EFAULT;
> + if (copy_to_user(ring, header, header_size))
> + err = -EFAULT;
> }
>
> - return 0;
> +done:
> + if (err)
> + pr_info_ratelimited("Copying header to ring failed: "
> + "header_type=%u, header_size=%zu, "
> + "use_bufring=%d\n", type, header_size,
> + use_bufring);
> +
> + return err;
> }
>
> static __always_inline int copy_header_from_ring(struct fuse_ring_ent *ent,
> @@ -623,17 +712,38 @@ static __always_inline int copy_header_from_ring(struct fuse_ring_ent *ent,
> void *header,
> size_t header_size)
> {
> - const void __user *ring = get_user_ring_header(ent, type);
> + bool use_bufring = ent->queue->use_bufring;
> + int err = 0;
>
> - if (!ring)
> - return -EINVAL;
> + if (use_bufring) {
> + struct iov_iter iter;
> +
> + err = get_kernel_ring_header(ent, type, &iter);
> + if (err)
> + goto done;
> +
> + if (copy_from_iter(header, header_size, &iter) != header_size)
> + err = -EFAULT;
> + } else {
> + const void __user *ring = get_user_ring_header(ent, type);
> +
> + if (!ring) {
> + err = -EINVAL;
> + goto done;
> + }
>
> - if (copy_from_user(header, ring, header_size)) {
> - pr_info_ratelimited("Copying header from ring failed.\n");
> - return -EFAULT;
> + if (copy_from_user(header, ring, header_size))
> + err = -EFAULT;
> }
>
> - return 0;
> +done:
> + if (err)
> + pr_info_ratelimited("Copying header from ring failed: "
> + "header_type=%u, header_size=%zu, "
> + "use_bufring=%d\n", type, header_size,
> + use_bufring);
> +
> + return err;
> }
>
> static int setup_fuse_copy_state(struct fuse_copy_state *cs,
> @@ -643,14 +753,23 @@ static int setup_fuse_copy_state(struct fuse_copy_state *cs,
> {
> int err;
>
> - err = import_ubuf(dir, ent->payload, ring->max_payload_sz, iter);
> - if (err) {
> - pr_info_ratelimited("fuse: Import of user buffer failed\n");
> - return err;
> + if (!ent->queue->use_bufring) {
> + err = import_ubuf(dir, ent->payload, ring->max_payload_sz, iter);
> + if (err) {
> + pr_info_ratelimited("fuse: Import of user buffer "
> + "failed\n");
> + return err;
> + }
> }
>
> fuse_copy_init(cs, dir == ITER_DEST, iter);
>
> + if (ent->queue->use_bufring) {
> + cs->is_kaddr = true;
> + cs->len = ent->payload_kvec.iov_len;
> + cs->kaddr = ent->payload_kvec.iov_base;
> + }
> +
> cs->is_uring = true;
> cs->req = req;
>
> @@ -762,6 +881,103 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
> sizeof(req->in.h));
> }
>
> +static bool fuse_uring_req_has_payload(struct fuse_req *req)
> +{
> + struct fuse_args *args = req->args;
> +
> + return args->in_numargs > 1 || args->out_numargs;
> +}
> +
> +static int fuse_uring_select_buffer(struct fuse_ring_ent *ent,
> + unsigned int issue_flags)
> + __must_hold(&queue->lock)
> +{
> + struct io_br_sel sel;
> + size_t len = 0;
> +
> + lockdep_assert_held(&ent->queue->lock);
> +
> + /* Get a buffer to use for the payload */
> + sel = io_ring_buffer_select(cmd_to_io_kiocb(ent->cmd), &len,
> + ent->queue->bufring, issue_flags);
> + if (sel.val)
> + return sel.val;
> + if (!sel.kaddr)
> + return -ENOENT;
> +
> + ent->payload_kvec.iov_base = sel.kaddr;
> + ent->payload_kvec.iov_len = len;
> + ent->ringbuf_buf_id = sel.buf_id;
> +
> + return 0;
> +}
> +
> +static void fuse_uring_clean_up_buffer(struct fuse_ring_ent *ent,
> + unsigned int issue_flags)
> + __must_hold(&queue->lock)
> +{
> + struct kvec *kvec = &ent->payload_kvec;
> +
> + lockdep_assert_held(&ent->queue->lock);
> +
> + if (!ent->queue->use_bufring || !kvec->iov_base)
> + return;
> +
> + WARN_ON_ONCE(io_uring_cmd_kmbuffer_recycle(ent->cmd,
> + FUSE_URING_RINGBUF_GROUP,
> + (u64)kvec->iov_base,
> + kvec->iov_len,
> + ent->ringbuf_buf_id,
> + issue_flags));
> +
> + memset(kvec, 0, sizeof(*kvec));
> +}
> +
> +static int fuse_uring_next_req_update_buffer(struct fuse_ring_ent *ent,
> + struct fuse_req *req,
> + unsigned int issue_flags)
> +{
> + bool buffer_selected;
> + bool has_payload;
> +
> + if (!ent->queue->use_bufring)
> + return 0;
> +
> + ent->headers_iter.data_source = false;
> +
> + buffer_selected = ent->payload_kvec.iov_base != 0;
> + has_payload = fuse_uring_req_has_payload(req);
> +
> + if (has_payload && !buffer_selected)
> + return fuse_uring_select_buffer(ent, issue_flags);
> +
> + if (!has_payload && buffer_selected)
> + fuse_uring_clean_up_buffer(ent, issue_flags);
> +
> + return 0;
> +}
> +
> +static int fuse_uring_prep_buffer(struct fuse_ring_ent *ent,
> + struct fuse_req *req, unsigned int dir,
> + unsigned issue_flags)
> +{
> + if (!ent->queue->use_bufring)
> + return 0;
> +
> + if (dir == ITER_SOURCE) {
> + ent->headers_iter.data_source = true;
> + return 0;
> + }
> +
> + ent->headers_iter.data_source = false;
> +
> + /* no payload to copy, can skip selecting a buffer */
> + if (!fuse_uring_req_has_payload(req))
> + return 0;
> +
> + return fuse_uring_select_buffer(ent, issue_flags);
> +}
> +
> static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
> struct fuse_req *req)
> {
> @@ -824,21 +1040,29 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
> }
>
> /* Fetch the next fuse request if available */
> -static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
> +static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent,
> + unsigned int issue_flags)
> __must_hold(&queue->lock)
> {
> struct fuse_req *req;
> struct fuse_ring_queue *queue = ent->queue;
> struct list_head *req_queue = &queue->fuse_req_queue;
> + int err;
>
> lockdep_assert_held(&queue->lock);
>
> /* get and assign the next entry while it is still holding the lock */
> req = list_first_entry_or_null(req_queue, struct fuse_req, list);
> - if (req)
> - fuse_uring_add_req_to_ring_ent(ent, req);
> + if (req) {
> + err = fuse_uring_next_req_update_buffer(ent, req, issue_flags);
> + if (!err) {
> + fuse_uring_add_req_to_ring_ent(ent, req);
> + return req;
> + }
Hmm, who/what is going to handle the request if this fails? Let's say we
have just one ring entry per queue and now it fails here - this ring
entry will go into FRRS_AVAILABLE and nothing will pull from the queue
anymore. I guess it _should_ not happen, some protection would be good.
In order to handle it, at least one other ent needs to be in flight.
Thanks,
Bernd
next prev parent reply other threads:[~2026-02-03 23:58 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-23 0:34 [PATCH v3 00/25] fuse/io-uring: add kernel-managed buffer rings and zero-copy Joanne Koong
2025-12-23 0:34 ` [PATCH v3 01/25] io_uring/kbuf: refactor io_buf_pbuf_register() logic into generic helpers Joanne Koong
2025-12-23 0:34 ` [PATCH v3 02/25] io_uring/kbuf: rename io_unregister_pbuf_ring() to io_unregister_buf_ring() Joanne Koong
2025-12-23 0:35 ` [PATCH v3 03/25] io_uring/kbuf: add support for kernel-managed buffer rings Joanne Koong
2025-12-23 0:35 ` [PATCH v3 04/25] io_uring/kbuf: add mmap " Joanne Koong
2025-12-23 0:35 ` [PATCH v3 05/25] io_uring/kbuf: support kernel-managed buffer rings in buffer selection Joanne Koong
2026-01-03 22:45 ` Caleb Sander Mateos
2026-01-09 0:56 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 06/25] io_uring/kbuf: add buffer ring pinning/unpinning Joanne Koong
2025-12-29 21:07 ` Gabriel Krisman Bertazi
2025-12-30 1:27 ` Joanne Koong
2025-12-30 17:54 ` Gabriel Krisman Bertazi
2026-01-02 17:57 ` Joanne Koong
2026-01-08 18:40 ` Caleb Sander Mateos
2026-01-08 19:18 ` Caleb Sander Mateos
2026-01-09 1:04 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 07/25] io_uring/kbuf: add recycling for kernel managed buffer rings Joanne Koong
2025-12-29 22:00 ` Gabriel Krisman Bertazi
2025-12-29 22:20 ` Gabriel Krisman Bertazi
2025-12-30 1:15 ` Joanne Koong
2026-01-05 18:49 ` Gabriel Krisman Bertazi
2026-01-08 20:37 ` Caleb Sander Mateos
2026-01-09 1:07 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 08/25] io_uring: add io_uring_cmd_fixed_index_get() and io_uring_cmd_fixed_index_put() Joanne Koong
2026-01-08 19:02 ` Caleb Sander Mateos
2026-01-08 20:44 ` Caleb Sander Mateos
2026-01-09 0:55 ` Joanne Koong
2026-01-09 1:08 ` Caleb Sander Mateos
2025-12-23 0:35 ` [PATCH v3 09/25] io_uring/kbuf: add io_uring_cmd_is_kmbuf_ring() Joanne Koong
2025-12-23 0:35 ` [PATCH v3 10/25] io_uring/kbuf: export io_ring_buffer_select() Joanne Koong
2026-01-08 20:34 ` Caleb Sander Mateos
2026-01-09 0:38 ` Joanne Koong
2026-01-09 2:43 ` Caleb Sander Mateos
2025-12-23 0:35 ` [PATCH v3 11/25] io_uring/kbuf: return buffer id in buffer selection Joanne Koong
2025-12-23 0:35 ` [PATCH v3 12/25] io_uring/cmd: set selected buffer index in __io_uring_cmd_done() Joanne Koong
2025-12-23 0:35 ` [PATCH v3 13/25] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-12-23 0:35 ` [PATCH v3 14/25] fuse: refactor io-uring header copying to ring Joanne Koong
2026-01-11 16:03 ` Bernd Schubert
2026-01-16 22:33 ` Joanne Koong
2026-01-27 23:06 ` Bernd Schubert
2025-12-23 0:35 ` [PATCH v3 15/25] fuse: refactor io-uring header copying from ring Joanne Koong
2025-12-23 0:35 ` [PATCH v3 16/25] fuse: use enum types for header copying Joanne Koong
2025-12-23 0:35 ` [PATCH v3 17/25] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-12-23 0:35 ` [PATCH v3 18/25] fuse: support buffer copying for kernel addresses Joanne Koong
2025-12-23 0:35 ` [PATCH v3 19/25] fuse: add io-uring kernel-managed buffer ring Joanne Koong
2026-02-03 23:58 ` Bernd Schubert [this message]
2025-12-23 0:35 ` [PATCH v3 20/25] io_uring/rsrc: rename io_buffer_register_bvec()/io_buffer_unregister_bvec() Joanne Koong
2026-01-08 20:52 ` Caleb Sander Mateos
2025-12-23 0:35 ` [PATCH v3 21/25] io_uring/rsrc: split io_buffer_register_request() logic Joanne Koong
2026-01-08 21:04 ` Caleb Sander Mateos
2026-01-09 0:18 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 22/25] io_uring/rsrc: Allow buffer release callback to be optional Joanne Koong
2025-12-23 0:35 ` [PATCH v3 23/25] io_uring/rsrc: add io_buffer_register_bvec() Joanne Koong
2026-01-08 21:09 ` Caleb Sander Mateos
2026-01-09 0:10 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 24/25] fuse: add zero-copy over io-uring Joanne Koong
2026-01-08 21:15 ` Caleb Sander Mateos
2026-01-09 0:07 ` Joanne Koong
2025-12-23 0:35 ` [PATCH v3 25/25] docs: fuse: add io-uring bufring and zero-copy documentation Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4e406b1f-723b-4dc7-8e50-1a5ef6ea11b3@bsbernd.com \
--to=bernd@bsbernd.com \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=bschubert@ddn.com \
--cc=csander@purestorage.com \
--cc=io-uring@vger.kernel.org \
--cc=joannelkoong@gmail.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=xiaobing.li@samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox