public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Joanne Koong <joannelkoong@gmail.com>
To: miklos@szeredi.hu, axboe@kernel.dk
Cc: linux-fsdevel@vger.kernel.org, bschubert@ddn.com,
	asml.silence@gmail.com, io-uring@vger.kernel.org,
	xiaobing.li@samsung.com, csander@purestorage.com,
	kernel-team@meta.com
Subject: [PATCH v2 8/8] fuse: support io-uring registered buffers
Date: Mon, 27 Oct 2025 15:28:07 -0700	[thread overview]
Message-ID: <20251027222808.2332692-9-joannelkoong@gmail.com> (raw)
In-Reply-To: <20251027222808.2332692-1-joannelkoong@gmail.com>

Add support for io-uring registered buffers for fuse daemons
communicating through the io-uring interface. Daemons may register
buffers ahead of time, which will eliminate the overhead of
pinning/unpinning user pages and translating virtual addresses for every
server-kernel interaction.

To support page-aligned payloads, the buffer is structured such that the
payload is at the front of the buffer and the fuse_uring_req_header is
offset from the end of the buffer.

To be backwards compatible, fuse uring still needs to support non-registered
buffers as well.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
---
 fs/fuse/dev_uring.c   | 200 +++++++++++++++++++++++++++++++++---------
 fs/fuse/dev_uring_i.h |  27 +++++-
 2 files changed, 183 insertions(+), 44 deletions(-)

diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index c6b22b14b354..f501bc81f331 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -580,6 +580,22 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
 	return err;
 }
 
+static void *get_kernel_ring_header(struct fuse_ring_ent *ent,
+				    enum fuse_uring_header_type type)
+{
+	switch (type) {
+	case FUSE_URING_HEADER_IN_OUT:
+		return &ent->headers->in_out;
+	case FUSE_URING_HEADER_OP:
+		return &ent->headers->op_in;
+	case FUSE_URING_HEADER_RING_ENT:
+		return &ent->headers->ring_ent_in_out;
+	}
+
+	WARN_ON_ONCE(1);
+	return NULL;
+}
+
 static void __user *get_user_ring_header(struct fuse_ring_ent *ent,
 					 enum fuse_uring_header_type type)
 {
@@ -600,16 +616,22 @@ static int copy_header_to_ring(struct fuse_ring_ent *ent,
 			       enum fuse_uring_header_type type,
 			       const void *header, size_t header_size)
 {
-	void __user *ring = get_user_ring_header(ent, type);
+	if (ent->fixed_buffer) {
+		void *ring = get_kernel_ring_header(ent, type);
 
-	if (!ring)
-		return -EINVAL;
+		if (!ring)
+			return -EINVAL;
+		memcpy(ring, header, header_size);
+	} else {
+		void __user *ring = get_user_ring_header(ent, type);
 
-	if (copy_to_user(ring, header, header_size)) {
-		pr_info_ratelimited("Copying header to ring failed.\n");
-		return -EFAULT;
+		if (!ring)
+			return -EINVAL;
+		if (copy_to_user(ring, header, header_size)) {
+			pr_info_ratelimited("Copying header to ring failed.\n");
+			return -EFAULT;
+		}
 	}
-
 	return 0;
 }
 
@@ -617,14 +639,21 @@ static int copy_header_from_ring(struct fuse_ring_ent *ent,
 				 enum fuse_uring_header_type type,
 				 void *header, size_t header_size)
 {
-	const void __user *ring = get_user_ring_header(ent, type);
+	if (ent->fixed_buffer) {
+		const void *ring = get_kernel_ring_header(ent, type);
 
-	if (!ring)
-		return -EINVAL;
+		if (!ring)
+			return -EINVAL;
+		memcpy(header, ring, header_size);
+	} else {
+		const void __user *ring = get_user_ring_header(ent, type);
 
-	if (copy_from_user(header, ring, header_size)) {
-		pr_info_ratelimited("Copying header from ring failed.\n");
-		return -EFAULT;
+		if (!ring)
+			return -EINVAL;
+		if (copy_from_user(header, ring, header_size)) {
+			pr_info_ratelimited("Copying header from ring failed.\n");
+			return -EFAULT;
+		}
 	}
 
 	return 0;
@@ -637,11 +666,15 @@ static int setup_fuse_copy_state(struct fuse_ring *ring, struct fuse_req *req,
 {
 	int err;
 
-	err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
-			  iter);
-	if (err) {
-		pr_info_ratelimited("fuse: Import of user buffer failed\n");
-		return err;
+	if (ent->fixed_buffer) {
+		*iter = ent->payload_iter;
+	} else {
+		err = import_ubuf(rw, ent->user_payload, ring->max_payload_sz,
+				  iter);
+		if (err) {
+			pr_info_ratelimited("fuse: Import of user buffer failed\n");
+			return err;
+		}
 	}
 
 	fuse_copy_init(cs, rw == ITER_DEST, iter);
@@ -754,6 +787,62 @@ static int fuse_uring_copy_to_ring(struct fuse_ring_ent *ent,
 				   sizeof(req->in.h));
 }
 
+/*
+ * Prepare fixed buffer for access. Sets up the payload iter and kmaps the
+ * header.
+ *
+ * Callers must call fuse_uring_unmap_buffer() in the same scope to release the
+ * header mapping.
+ *
+ * For non-fixed buffers, this is a no-op.
+ */
+static int fuse_uring_map_buffer(struct fuse_ring_ent *ent)
+{
+	size_t header_size = sizeof(struct fuse_uring_req_header);
+	struct iov_iter iter;
+	struct page *header_page;
+	size_t count, start;
+	ssize_t copied;
+	int err;
+
+	if (!ent->fixed_buffer)
+		return 0;
+
+	err = io_uring_cmd_import_fixed_full(ITER_DEST, &iter, ent->cmd, 0);
+	if (err)
+		return err;
+
+	count = iov_iter_count(&iter);
+	if (count < header_size || count & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	/* Adjust the payload iter to protect the header from any overwrites */
+	ent->payload_iter = iter;
+	iov_iter_truncate(&ent->payload_iter, count - header_size);
+
+	/* Set up the headers */
+	iov_iter_advance(&iter, count - header_size);
+	copied = iov_iter_get_pages2(&iter, &header_page, header_size, 1, &start);
+	if (copied < header_size)
+		return -EFAULT;
+	ent->headers = kmap_local_page(header_page) + start;
+
+	/*
+	 * We can release the acquired reference on the header page immediately
+	 * since the page is pinned and io_uring_cmd_import_fixed_full()
+	 * prevents it from being unpinned while we are using it.
+	 */
+	put_page(header_page);
+
+	return 0;
+}
+
+static void fuse_uring_unmap_buffer(struct fuse_ring_ent *ent)
+{
+	if (ent->fixed_buffer)
+		kunmap_local(ent->headers);
+}
+
 static int fuse_uring_prepare_send(struct fuse_ring_ent *ent,
 				   struct fuse_req *req)
 {
@@ -932,6 +1021,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 	unsigned int qid = READ_ONCE(cmd_req->qid);
 	struct fuse_pqueue *fpq;
 	struct fuse_req *req;
+	bool next_req;
 
 	err = -ENOTCONN;
 	if (!ring)
@@ -982,6 +1072,13 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 
 	/* without the queue lock, as other locks are taken */
 	fuse_uring_prepare_cancel(cmd, issue_flags, ent);
+
+	err = fuse_uring_map_buffer(ent);
+	if (err) {
+		fuse_uring_req_end(ent, req, err);
+		return err;
+	}
+
 	fuse_uring_commit(ent, req, issue_flags);
 
 	/*
@@ -990,7 +1087,9 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 	 * and fetching is done in one step vs legacy fuse, which has separated
 	 * read (fetch request) and write (commit result).
 	 */
-	if (fuse_uring_get_next_fuse_req(ent, queue))
+	next_req = fuse_uring_get_next_fuse_req(ent, queue);
+	fuse_uring_unmap_buffer(ent);
+	if (next_req)
 		fuse_uring_send(ent, cmd, 0, issue_flags);
 	return 0;
 }
@@ -1086,39 +1185,49 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd,
 	struct iovec iov[FUSE_URING_IOV_SEGS];
 	int err;
 
+	err = -ENOMEM;
+	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
+	if (!ent)
+		return ERR_PTR(err);
+
+	INIT_LIST_HEAD(&ent->list);
+
+	ent->queue = queue;
+
+	if (READ_ONCE(cmd->sqe->uring_cmd_flags) & IORING_URING_CMD_FIXED) {
+		ent->fixed_buffer = true;
+		atomic_inc(&ring->queue_refs);
+		return ent;
+	}
+
 	err = fuse_uring_get_iovec_from_sqe(cmd->sqe, iov);
 	if (err) {
 		pr_info_ratelimited("Failed to get iovec from sqe, err=%d\n",
 				    err);
-		return ERR_PTR(err);
+		goto error;
 	}
 
 	err = -EINVAL;
 	if (iov[0].iov_len < sizeof(struct fuse_uring_req_header)) {
 		pr_info_ratelimited("Invalid header len %zu\n", iov[0].iov_len);
-		return ERR_PTR(err);
+		goto error;
 	}
 
 	payload_size = iov[1].iov_len;
 	if (payload_size < ring->max_payload_sz) {
 		pr_info_ratelimited("Invalid req payload len %zu\n",
 				    payload_size);
-		return ERR_PTR(err);
+		goto error;
 	}
-
-	err = -ENOMEM;
-	ent = kzalloc(sizeof(*ent), GFP_KERNEL_ACCOUNT);
-	if (!ent)
-		return ERR_PTR(err);
-
-	INIT_LIST_HEAD(&ent->list);
-
-	ent->queue = queue;
 	ent->user_headers = iov[0].iov_base;
 	ent->user_payload = iov[1].iov_base;
 
 	atomic_inc(&ring->queue_refs);
 	return ent;
+
+error:
+	kfree(ent);
+	return ERR_PTR(err);
 }
 
 /*
@@ -1249,20 +1358,29 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
 {
 	struct fuse_ring_ent *ent = uring_cmd_to_ring_ent(cmd);
 	struct fuse_ring_queue *queue = ent->queue;
+	bool send_ent = true;
 	int err;
 
-	if (!(issue_flags & IO_URING_F_TASK_DEAD)) {
-		err = fuse_uring_prepare_send(ent, ent->fuse_req);
-		if (err) {
-			if (!fuse_uring_get_next_fuse_req(ent, queue))
-				return;
-			err = 0;
-		}
-	} else {
-		err = -ECANCELED;
+	if (issue_flags & IO_URING_F_TASK_DEAD) {
+		fuse_uring_send(ent, cmd, -ECANCELED, issue_flags);
+		return;
+	}
+
+	err = fuse_uring_map_buffer(ent);
+	if (err) {
+		fuse_uring_req_end(ent, ent->fuse_req, err);
+		return;
+	}
+
+	err = fuse_uring_prepare_send(ent, ent->fuse_req);
+	if (err) {
+		send_ent = fuse_uring_get_next_fuse_req(ent, queue);
+		err = 0;
 	}
+	fuse_uring_unmap_buffer(ent);
 
-	fuse_uring_send(ent, cmd, err, issue_flags);
+	if (send_ent)
+		fuse_uring_send(ent, cmd, err, issue_flags);
 }
 
 static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 381fd0b8156a..fe14acccd6a6 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -7,6 +7,7 @@
 #ifndef _FS_FUSE_DEV_URING_I_H
 #define _FS_FUSE_DEV_URING_I_H
 
+#include <linux/uio.h>
 #include "fuse_i.h"
 
 #ifdef CONFIG_FUSE_IO_URING
@@ -38,9 +39,29 @@ enum fuse_ring_req_state {
 
 /** A fuse ring entry, part of the ring queue */
 struct fuse_ring_ent {
-	/* userspace buffer */
-	struct fuse_uring_req_header __user *user_headers;
-	void __user *user_payload;
+	/*
+	 * If true, the buffer was pre-registered by the daemon and the
+	 * pages backing it are pinned in kernel memory. The fixed buffer layout
+	 * is: [payload][header at end]. Use payload_iter and headers for
+	 * copying to/from the ring.
+	 *
+	 * Otherwise, use user_headers and user_payload which point to userspace
+	 * addresses representing the ring memory.
+	 */
+	bool fixed_buffer;
+
+	union {
+		/* fixed_buffer == false */
+		struct {
+			struct fuse_uring_req_header __user *user_headers;
+			void __user *user_payload;
+		};
+		/* fixed_buffer == true */
+		struct {
+			struct fuse_uring_req_header *headers;
+			struct iov_iter payload_iter;
+		};
+	};
 
 	/* the ring queue that owns the request */
 	struct fuse_ring_queue *queue;
-- 
2.47.3


  parent reply	other threads:[~2025-10-27 22:29 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-27 22:27 [PATCH v2 0/8] fuse: support io-uring registered buffers Joanne Koong
2025-10-27 22:28 ` [PATCH v2 1/8] io_uring/uring_cmd: add io_uring_cmd_import_fixed_full() Joanne Koong
2025-10-28  1:28   ` Caleb Sander Mateos
2025-10-29 14:01   ` Pavel Begunkov
2025-10-29 18:37     ` Joanne Koong
2025-10-29 19:59       ` Bernd Schubert
2025-10-30 17:42         ` Pavel Begunkov
2025-10-30 18:06       ` Pavel Begunkov
2025-10-30 22:23         ` Bernd Schubert
2025-10-30 23:50           ` Joanne Koong
2025-10-31 10:27             ` Bernd Schubert
2025-10-31 21:19               ` Joanne Koong
2025-10-30 23:13         ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 2/8] fuse: refactor io-uring logic for getting next fuse request Joanne Koong
2025-10-30 23:07   ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 3/8] fuse: refactor io-uring header copying to ring Joanne Koong
2025-10-30 23:15   ` Bernd Schubert
2025-10-30 23:52     ` Joanne Koong
2025-10-27 22:28 ` [PATCH v2 4/8] fuse: refactor io-uring header copying from ring Joanne Koong
2025-10-27 22:28 ` [PATCH v2 5/8] fuse: use enum types for header copying Joanne Koong
2025-11-05 23:01   ` Bernd Schubert
2025-11-06 21:59     ` Joanne Koong
2025-11-07 22:11       ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 6/8] fuse: add user_ prefix to userspace headers and payload fields Joanne Koong
2025-10-28  1:32   ` Caleb Sander Mateos
2025-10-28 23:56     ` Joanne Koong
2025-11-06 13:35   ` Bernd Schubert
2025-10-27 22:28 ` [PATCH v2 7/8] fuse: refactor setting up copy state for payload copying Joanne Koong
2025-11-06 16:53   ` Bernd Schubert
2025-11-06 22:01     ` Joanne Koong
2025-10-27 22:28 ` Joanne Koong [this message]
2025-10-28  1:42   ` [PATCH v2 8/8] fuse: support io-uring registered buffers Caleb Sander Mateos
2025-10-28 23:56     ` Joanne Koong
2025-11-06 19:48   ` Bernd Schubert
2025-11-06 23:09     ` Joanne Koong
2025-11-07 22:16       ` Bernd Schubert
2025-11-07 22:23         ` Bernd Schubert
2025-11-23 20:12       ` Bernd Schubert
2025-11-25  1:13         ` Joanne Koong
2025-11-14 23:59 ` [PATCH v2 0/8] " Joanne Koong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027222808.2332692-9-joannelkoong@gmail.com \
    --to=joannelkoong@gmail.com \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=bschubert@ddn.com \
    --cc=csander@purestorage.com \
    --cc=io-uring@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=xiaobing.li@samsung.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox