public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Bernd Schubert <bernd@bsbernd.com>
To: Joanne Koong <joannelkoong@gmail.com>,
	miklos@szeredi.hu, axboe@kernel.dk
Cc: bschubert@ddn.com, asml.silence@gmail.com,
	io-uring@vger.kernel.org, csander@purestorage.com,
	xiaobing.li@samsung.com, linux-fsdevel@vger.kernel.org
Subject: Re: [PATCH v3 19/25] fuse: add io-uring kernel-managed buffer ring
Date: Wed, 4 Feb 2026 00:58:03 +0100	[thread overview]
Message-ID: <4e406b1f-723b-4dc7-8e50-1a5ef6ea11b3@bsbernd.com> (raw)
In-Reply-To: <20251223003522.3055912-20-joannelkoong@gmail.com>



On 12/23/25 01:35, Joanne Koong wrote:
> Add io-uring kernel-managed buffer ring capability for fuse daemons
> communicating through the io-uring interface.
> 
> This has two benefits:
> a) eliminates the overhead of pinning/unpinning user pages and
> translating virtual addresses for every server-kernel interaction
> 
> b) reduces the amount of memory needed for the buffers per queue and
> allows buffers to be reused across entries. Incremental buffer
> consumption, when added, will allow a buffer to be used across multiple
> requests.
> 
> Buffer ring usage is set on a per-queue basis. In order to use this, the
> daemon needs to have preregistered a kernel-managed buffer ring and a
> fixed buffer at index 0 that will hold all the headers, and set the
> "use_bufring" field during registration. The kernel-managed buffer ring
> will be pinned for the lifetime of the connection.
> 
> Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
> ---
>  fs/fuse/dev_uring.c       | 423 ++++++++++++++++++++++++++++++++------
>  fs/fuse/dev_uring_i.h     |  30 ++-
>  include/uapi/linux/fuse.h |  15 +-
>  3 files changed, 399 insertions(+), 69 deletions(-)
> 
> diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
> index b57871f92d08..e9905f09c3ad 100644
> --- a/fs/fuse/dev_uring.c
> +++ b/fs/fuse/dev_uring.c
> @@ -10,6 +10,8 @@
>  #include "fuse_trace.h"
>  
>  #include <linux/fs.h>
> +#include <linux/io_uring.h>
> +#include <linux/io_uring/buf.h>
>  #include <linux/io_uring/cmd.h>
>  
>  static bool __read_mostly enable_uring;
> @@ -19,6 +21,8 @@ MODULE_PARM_DESC(enable_uring,
>  
>  #define FUSE_URING_IOV_SEGS 2 /* header and payload */
>  
> +#define FUSE_URING_RINGBUF_GROUP 0
> +#define FUSE_URING_FIXED_HEADERS_OFFSET 0
>  
>  bool fuse_uring_enabled(void)
>  {
> @@ -276,20 +280,46 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc)
>  	return res;
>  }
>  
> -static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
> -						       int qid)
> +static int fuse_uring_buf_ring_setup(struct io_uring_cmd *cmd,
> +				     struct fuse_ring_queue *queue,
> +				     unsigned int issue_flags)
> +{
> +	int err;
> +
> +	err = io_uring_cmd_buf_ring_pin(cmd, FUSE_URING_RINGBUF_GROUP,
> +					issue_flags, &queue->bufring);
> +	if (err)
> +		return err;
> +
> +	if (!io_uring_cmd_is_kmbuf_ring(cmd, FUSE_URING_RINGBUF_GROUP,
> +					issue_flags)) {
> +		io_uring_cmd_buf_ring_unpin(cmd,
> +					    FUSE_URING_RINGBUF_GROUP,
> +					    issue_flags);
> +		return -EINVAL;
> +	}
> +
> +	queue->use_bufring = true;
> +
> +	return 0;
> +}
> +
> +static struct fuse_ring_queue *
> +fuse_uring_create_queue(struct io_uring_cmd *cmd, struct fuse_ring *ring,
> +			int qid, bool use_bufring, unsigned int issue_flags)
>  {
>  	struct fuse_conn *fc = ring->fc;
>  	struct fuse_ring_queue *queue;
>  	struct list_head *pq;
> +	int err;
>  
>  	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
>  	if (!queue)
> -		return NULL;
> +		return ERR_PTR(-ENOMEM);
>  	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
>  	if (!pq) {
>  		kfree(queue);
> -		return NULL;
> +		return ERR_PTR(-ENOMEM);
>  	}
>  
>  	queue->qid = qid;
> @@ -307,6 +337,15 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
>  	queue->fpq.processing = pq;
>  	fuse_pqueue_init(&queue->fpq);
>  
> +	if (use_bufring) {
> +		err = fuse_uring_buf_ring_setup(cmd, queue, issue_flags);
> +		if (err) {
> +			kfree(pq);
> +			kfree(queue);
> +			return ERR_PTR(err);
> +		}
> +	}
> +
>  	spin_lock(&fc->lock);
>  	if (ring->queues[qid]) {
>  		spin_unlock(&fc->lock);
> @@ -584,6 +623,35 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
>  	return err;
>  }
>  
> +static int get_kernel_ring_header(struct fuse_ring_ent *ent,
> +				  enum fuse_uring_header_type type,
> +				  struct iov_iter *headers_iter)
> +{
> +	size_t offset;
> +
> +	switch (type) {
> +	case FUSE_URING_HEADER_IN_OUT:
> +		/* No offset - start of header */
> +		offset = 0;
> +		break;
> +	case FUSE_URING_HEADER_OP:
> +		offset = offsetof(struct fuse_uring_req_header, op_in);
> +		break;
> +	case FUSE_URING_HEADER_RING_ENT:
> +		offset = offsetof(struct fuse_uring_req_header, ring_ent_in_out);
> +		break;
> +	default:
> +		WARN_ONCE(1, "Invalid header type: %d\n", type);
> +		return -EINVAL;
> +	}
> +
> +	*headers_iter = ent->headers_iter;
> +	if (offset)
> +		iov_iter_advance(headers_iter, offset);
> +
> +	return 0;
> +}
> +
>  static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
>  					 enum fuse_uring_header_type type)
>  {
> @@ -605,17 +673,38 @@ static __always_inline int copy_header_to_ring(struct fuse_ring_ent *ent,
>  					       const void *header,
>  					       size_t header_size)
>  {
> -	void __user *ring = get_user_ring_header(ent, type);
> +	bool use_bufring = ent->queue->use_bufring;
> +	int err = 0;
>  
> -	if (!ring)
> -		return -EINVAL;
> +	if (use_bufring) {
> +		struct iov_iter iter;
> +
> +		err =  get_kernel_ring_header(ent, type, &iter);
> +		if (err)
> +			goto done;
> +
> +		if (copy_to_iter(header, header_size, &iter) != header_size)
> +			err = -EFAULT;
> +	} else {
> +		void __user *ring = get_user_ring_header(ent, type);
> +
> +		if (!ring) {
> +			err = -EINVAL;
> +			goto done;
> +		}
>  
> -	if (copy_to_user(ring, header, header_size)) {
> -		pr_info_ratelimited("Copying header to ring failed.\n");
> -		return -EFAULT;
> +		if (copy_to_user(ring, header, header_size))
> +			err = -EFAULT;
>  	}
>  
> -	return 0;
> +done:
> +	if (err)
> +		pr_info_ratelimited("Copying header to ring failed: "
> +				    "header_type=%u, header_size=%zu, "
> +				    "use_bufring=%d\n", type, header_size,
> +				    use_bufring);
> +
> +	return err;
>  }
>  
>  static __always_inline int copy_header_from_ring(struct fuse_ring_ent *ent,
> @@ -623,17 +712,38 @@ static __always_inline int copy_header_from_ring(struct fuse_ring_ent *ent,
>  						 void *header,
>  						 size_t header_size)
>  {
> -	const void __user *ring = get_user_ring_header(ent, type);
> +	bool use_bufring = ent->queue->use_bufring;
> +	int err = 0;
>  
> -	if (!ring)
> -		return -EINVAL;
> +	if (use_bufring) {
> +		struct iov_iter iter;
> +
> +		err =  get_kernel_ring_header(ent, type, &iter);
> +		if (err)
> +			goto done;
> +
> +		if (copy_from_iter(header, header_size, &iter) != header_size)
> +			err = -EFAULT;
> +	} else {
> +		const void __user *ring = get_user_ring_header(ent, type);
> +
> +		if (!ring) {
> +			err = -EINVAL;
> +			goto done;
> +		}
>  
> -	if (copy_from_user(header, ring, header_size)) {
> -		pr_info_ratelimited("Copying header from ring failed.\n");
> -		return -EFAULT;
> +		if (copy_from_user(header, ring, header_size))
> +			err = -EFAULT;
>  	}
>  
> -	return 0;
> +done:
> +	if (err)
> +		pr_info_ratelimited("Copying header from ring failed: "
> +				    "header_type=%u, header_size=%zu, "
> +				    "use_bufring=%d\n", type, header_size,
> +				    use_bufring);
> +
> +	return err;
>  }
>  
>  static int setup_fuse_copy_state(struct fuse_copy_state *cs,
> @@ -643,14 +753,23 @@ static int setup_fuse_copy_state(struct fuse_copy_state *cs,
>  {
>  	int err;
>  
> -	err = import_ubuf(dir, ent->payload, ring->max_payload_sz, iter);
> -	if (err) {
> -		pr_info_ratelimited("fuse: Import of user buffer failed\n");
> -		return err;
> +	if (!ent->queue->use_bufring) {
> +		err = import_ubuf(dir, ent->payload, ring->max_payload_sz, iter);
> +		if (err) {
> +			pr_info_ratelimited("fuse: Import of user buffer "
> +					    "failed\n");
> +			return err;
> +		}
>  	}
>  
>  	fuse_copy_init(cs, dir == ITER_DEST, iter);
>  
> +	if (ent->queue->use_bufring) {
> +		cs->is_kaddr = true;
> +		cs->len = ent->payload_kvec.iov_len;
> +		cs->kaddr = ent->payload_kvec.iov_base;
> +	}
> +
>  	cs->is_uring = true;
>  	cs->req = req;
>  
> @@ -762,6 +881,103 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
>  				   sizeof(req->in.h));
>  }
>  
> +static bool fuse_uring_req_has_payload(struct fuse_req *req)
> +{
> +	struct fuse_args *args = req->args;
> +
> +	return args->in_numargs > 1 || args->out_numargs;
> +}
> +
> +static int fuse_uring_select_buffer(struct fuse_ring_ent *ent,
> +				    unsigned int issue_flags)
> +	__must_hold(&queue->lock)
> +{
> +	struct io_br_sel sel;
> +	size_t len = 0;
> +
> +	lockdep_assert_held(&ent->queue->lock);
> +
> +	/* Get a buffer to use for the payload */
> +	sel = io_ring_buffer_select(cmd_to_io_kiocb(ent->cmd), &len,
> +				    ent->queue->bufring, issue_flags);
> +	if (sel.val)
> +		return sel.val;
> +	if (!sel.kaddr)
> +		return -ENOENT;
> +
> +	ent->payload_kvec.iov_base = sel.kaddr;
> +	ent->payload_kvec.iov_len = len;
> +	ent->ringbuf_buf_id = sel.buf_id;
> +
> +	return 0;
> +}
> +
> +static void fuse_uring_clean_up_buffer(struct fuse_ring_ent *ent,
> +				       unsigned int issue_flags)
> +	__must_hold(&queue->lock)
> +{
> +	struct kvec *kvec = &ent->payload_kvec;
> +
> +	lockdep_assert_held(&ent->queue->lock);
> +
> +	if (!ent->queue->use_bufring || !kvec->iov_base)
> +		return;
> +
> +	WARN_ON_ONCE(io_uring_cmd_kmbuffer_recycle(ent->cmd,
> +						   FUSE_URING_RINGBUF_GROUP,
> +						   (u64)kvec->iov_base,
> +						   kvec->iov_len,
> +						   ent->ringbuf_buf_id,
> +						   issue_flags));
> +
> +	memset(kvec, 0, sizeof(*kvec));
> +}
> +
> +static int fuse_uring_next_req_update_buffer(struct fuse_ring_ent *ent,
> +					     struct fuse_req *req,
> +					     unsigned int issue_flags)
> +{
> +	bool buffer_selected;
> +	bool has_payload;
> +
> +	if (!ent->queue->use_bufring)
> +		return 0;
> +
> +	ent->headers_iter.data_source = false;
> +
> +	buffer_selected = ent->payload_kvec.iov_base != 0;
> +	has_payload = fuse_uring_req_has_payload(req);
> +
> +	if (has_payload && !buffer_selected)
> +		return fuse_uring_select_buffer(ent, issue_flags);
> +
> +	if (!has_payload && buffer_selected)
> +		fuse_uring_clean_up_buffer(ent, issue_flags);
> +
> +	return 0;
> +}
> +
> +static int fuse_uring_prep_buffer(struct fuse_ring_ent *ent,
> +				  struct fuse_req *req, unsigned int dir,
> +				  unsigned issue_flags)
> +{
> +	if (!ent->queue->use_bufring)
> +		return 0;
> +
> +	if (dir == ITER_SOURCE) {
> +		ent->headers_iter.data_source = true;
> +		return 0;
> +	}
> +
> +	ent->headers_iter.data_source = false;
> +
> +	/* no payload to copy, can skip selecting a buffer */
> +	if (!fuse_uring_req_has_payload(req))
> +		return 0;
> +
> +	return fuse_uring_select_buffer(ent, issue_flags);
> +}
> +
>  static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
>  				   struct fuse_req *req)
>  {
> @@ -824,21 +1040,29 @@ static void fuse_uring_add_req_to_ring_ent(struct fuse_ring_ent *ent,
>  }
>  
>  /* Fetch the next fuse request if available */
> -static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent)
> +static struct fuse_req *fuse_uring_ent_assign_req(struct fuse_ring_ent *ent,
> +						  unsigned int issue_flags)
>  	__must_hold(&queue->lock)
>  {
>  	struct fuse_req *req;
>  	struct fuse_ring_queue *queue = ent->queue;
>  	struct list_head *req_queue = &queue->fuse_req_queue;
> +	int err;
>  
>  	lockdep_assert_held(&queue->lock);
>  
>  	/* get and assign the next entry while it is still holding the lock */
>  	req = list_first_entry_or_null(req_queue, struct fuse_req, list);
> -	if (req)
> -		fuse_uring_add_req_to_ring_ent(ent, req);
> +	if (req) {
> +		err = fuse_uring_next_req_update_buffer(ent, req, issue_flags);
> +		if (!err) {
> +			fuse_uring_add_req_to_ring_ent(ent, req);
> +			return req;
> +		}

Hmm, who/what is going to handle the request if this fails? Let's say we
have just one ring entry per queue and now it fails here - this ring
entry will go into FRRS_AVAILABLE and nothing will pull from the queue
anymore. I guess it _should_ not happen, some protection would be good.
In order to handle it, at least one other ent needs to be in flight.

Thanks,
Bernd

  reply	other threads:[~2026-02-03 23:58 UTC|newest]

Thread overview: 59+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-23  0:34 [PATCH v3 00/25] fuse/io-uring: add kernel-managed buffer rings and zero-copy Joanne Koong
2025-12-23  0:34 ` [PATCH v3 01/25] io_uring/kbuf: refactor io_buf_pbuf_register() logic into generic helpers Joanne Koong
2025-12-23  0:34 ` [PATCH v3 02/25] io_uring/kbuf: rename io_unregister_pbuf_ring() to io_unregister_buf_ring() Joanne Koong
2025-12-23  0:35 ` [PATCH v3 03/25] io_uring/kbuf: add support for kernel-managed buffer rings Joanne Koong
2025-12-23  0:35 ` [PATCH v3 04/25] io_uring/kbuf: add mmap " Joanne Koong
2025-12-23  0:35 ` [PATCH v3 05/25] io_uring/kbuf: support kernel-managed buffer rings in buffer selection Joanne Koong
2026-01-03 22:45   ` Caleb Sander Mateos
2026-01-09  0:56     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 06/25] io_uring/kbuf: add buffer ring pinning/unpinning Joanne Koong
2025-12-29 21:07   ` Gabriel Krisman Bertazi
2025-12-30  1:27     ` Joanne Koong
2025-12-30 17:54       ` Gabriel Krisman Bertazi
2026-01-02 17:57         ` Joanne Koong
2026-01-08 18:40         ` Caleb Sander Mateos
2026-01-08 19:18   ` Caleb Sander Mateos
2026-01-09  1:04     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 07/25] io_uring/kbuf: add recycling for kernel managed buffer rings Joanne Koong
2025-12-29 22:00   ` Gabriel Krisman Bertazi
2025-12-29 22:20     ` Gabriel Krisman Bertazi
2025-12-30  1:15       ` Joanne Koong
2026-01-05 18:49         ` Gabriel Krisman Bertazi
2026-01-08 20:37   ` Caleb Sander Mateos
2026-01-09  1:07     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 08/25] io_uring: add io_uring_cmd_fixed_index_get() and io_uring_cmd_fixed_index_put() Joanne Koong
2026-01-08 19:02   ` Caleb Sander Mateos
2026-01-08 20:44     ` Caleb Sander Mateos
2026-01-09  0:55       ` Joanne Koong
2026-01-09  1:08         ` Caleb Sander Mateos
2025-12-23  0:35 ` [PATCH v3 09/25] io_uring/kbuf: add io_uring_cmd_is_kmbuf_ring() Joanne Koong
2025-12-23  0:35 ` [PATCH v3 10/25] io_uring/kbuf: export io_ring_buffer_select() Joanne Koong
2026-01-08 20:34   ` Caleb Sander Mateos
2026-01-09  0:38     ` Joanne Koong
2026-01-09  2:43       ` Caleb Sander Mateos
2025-12-23  0:35 ` [PATCH v3 11/25] io_uring/kbuf: return buffer id in buffer selection Joanne Koong
2025-12-23  0:35 ` [PATCH v3 12/25] io_uring/cmd: set selected buffer index in __io_uring_cmd_done() Joanne Koong
2025-12-23  0:35 ` [PATCH v3 13/25] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-12-23  0:35 ` [PATCH v3 14/25] fuse: refactor io-uring header copying to ring Joanne Koong
2026-01-11 16:03   ` Bernd Schubert
2026-01-16 22:33     ` Joanne Koong
2026-01-27 23:06       ` Bernd Schubert
2025-12-23  0:35 ` [PATCH v3 15/25] fuse: refactor io-uring header copying from ring Joanne Koong
2025-12-23  0:35 ` [PATCH v3 16/25] fuse: use enum types for header copying Joanne Koong
2025-12-23  0:35 ` [PATCH v3 17/25] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-12-23  0:35 ` [PATCH v3 18/25] fuse: support buffer copying for kernel addresses Joanne Koong
2025-12-23  0:35 ` [PATCH v3 19/25] fuse: add io-uring kernel-managed buffer ring Joanne Koong
2026-02-03 23:58   ` Bernd Schubert [this message]
2025-12-23  0:35 ` [PATCH v3 20/25] io_uring/rsrc: rename io_buffer_register_bvec()/io_buffer_unregister_bvec() Joanne Koong
2026-01-08 20:52   ` Caleb Sander Mateos
2025-12-23  0:35 ` [PATCH v3 21/25] io_uring/rsrc: split io_buffer_register_request() logic Joanne Koong
2026-01-08 21:04   ` Caleb Sander Mateos
2026-01-09  0:18     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 22/25] io_uring/rsrc: Allow buffer release callback to be optional Joanne Koong
2025-12-23  0:35 ` [PATCH v3 23/25] io_uring/rsrc: add io_buffer_register_bvec() Joanne Koong
2026-01-08 21:09   ` Caleb Sander Mateos
2026-01-09  0:10     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 24/25] fuse: add zero-copy over io-uring Joanne Koong
2026-01-08 21:15   ` Caleb Sander Mateos
2026-01-09  0:07     ` Joanne Koong
2025-12-23  0:35 ` [PATCH v3 25/25] docs: fuse: add io-uring bufring and zero-copy documentation Joanne Koong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4e406b1f-723b-4dc7-8e50-1a5ef6ea11b3@bsbernd.com \
    --to=bernd@bsbernd.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bschubert@ddn.com \
    --cc=csander@purestorage.com \
    --cc=io-uring@vger.kernel.org \
    --cc=joannelkoong@gmail.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=xiaobing.li@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox