From: Bernd Schubert <[email protected]>
To: Miklos Szeredi <[email protected]>, Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
[email protected]
Cc: [email protected], [email protected],
Joanne Koong <[email protected]>,
Josef Bacik <[email protected]>,
Amir Goldstein <[email protected]>,
Bernd Schubert <[email protected]>
Subject: [PATCH RFC v3 08/17] fuse: {uring} Handle SQEs - register commands
Date: Sun, 01 Sep 2024 15:37:02 +0200 [thread overview]
Message-ID: <20240901-b4-fuse-uring-rfcv3-without-mmap-v3-8-9207f7391444@ddn.com> (raw)
In-Reply-To: <20240901-b4-fuse-uring-rfcv3-without-mmap-v3-0-9207f7391444@ddn.com>
This adds basic support for ring SQEs (with opcode=IORING_OP_URING_CMD).
For now only FUSE_URING_REQ_FETCH is handled to register queue entries.
Signed-off-by: Bernd Schubert <[email protected]>
---
fs/fuse/dev.c | 3 +
fs/fuse/dev_uring.c | 231 ++++++++++++++++++++++++++++++++++++++++++++++
fs/fuse/dev_uring_i.h | 60 ++++++++++++
include/uapi/linux/fuse.h | 38 ++++++++
4 files changed, 332 insertions(+)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index fec995818a9e..998027825481 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2477,6 +2477,9 @@ const struct file_operations fuse_dev_operations = {
.fasync = fuse_dev_fasync,
.unlocked_ioctl = fuse_dev_ioctl,
.compat_ioctl = compat_ptr_ioctl,
+#ifdef CONFIG_FUSE_IO_URING
+ .uring_cmd = fuse_uring_cmd,
+#endif
};
EXPORT_SYMBOL_GPL(fuse_dev_operations);
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 4dcb4972242e..46c2274193bf 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -29,6 +29,30 @@
#include <linux/topology.h>
#include <linux/io_uring/cmd.h>
+static int fuse_ring_ring_ent_unset_userspace(struct fuse_ring_ent *ent)
+{
+ if (WARN_ON_ONCE(ent->state != FRRS_USERSPACE))
+ return -EIO;
+
+ ent->state = FRRS_COMMIT;
+ list_del_init(&ent->list);
+
+ return 0;
+}
+
+/* Update conn limits according to ring values */
+static void fuse_uring_conn_cfg_limits(struct fuse_ring *ring)
+{
+ struct fuse_conn *fc = ring->fc;
+
+ /*
+ * This not ideal, as multiplication with nr_queue assumes the limit
+ * gets reached when all queues are used, but even a single queue
+ * might reach the limit.
+ */
+ WRITE_ONCE(fc->max_background, ring->nr_queues * ring->max_nr_async);
+}
+
static void fuse_uring_queue_cfg(struct fuse_ring_queue *queue, int qid,
struct fuse_ring *ring)
{
@@ -37,6 +61,11 @@ static void fuse_uring_queue_cfg(struct fuse_ring_queue *queue, int qid,
queue->qid = qid;
queue->ring = ring;
+ spin_lock_init(&queue->lock);
+
+ INIT_LIST_HEAD(&queue->sync_ent_avail_queue);
+ INIT_LIST_HEAD(&queue->async_ent_avail_queue);
+
for (tag = 0; tag < ring->queue_depth; tag++) {
struct fuse_ring_ent *ent = &queue->ring_ent[tag];
@@ -44,6 +73,8 @@ static void fuse_uring_queue_cfg(struct fuse_ring_queue *queue, int qid,
ent->tag = tag;
ent->state = FRRS_INIT;
+
+ INIT_LIST_HEAD(&ent->list);
}
}
@@ -141,3 +172,203 @@ int fuse_uring_conn_cfg(struct file *file, void __user *argp)
kvfree(ring);
return res;
}
+
+/*
+ * Put a ring request onto hold, it is no longer used for now.
+ */
+static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
+ struct fuse_ring_queue *queue)
+ __must_hold(&queue->lock)
+{
+ struct fuse_ring *ring = queue->ring;
+
+ lockdep_assert_held(&queue->lock);
+
+ /* unsets all previous flags - basically resets */
+ pr_devel("%s ring=%p qid=%d tag=%d state=%d async=%d\n", __func__,
+ ring, ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
+ ring_ent->async);
+
+ if (WARN_ON(ring_ent->state != FRRS_COMMIT)) {
+ pr_warn("%s qid=%d tag=%d state=%d async=%d\n", __func__,
+ ring_ent->queue->qid, ring_ent->tag, ring_ent->state,
+ ring_ent->async);
+ return;
+ }
+
+ WARN_ON_ONCE(!list_empty(&ring_ent->list));
+
+ if (ring_ent->async)
+ list_add(&ring_ent->list, &queue->async_ent_avail_queue);
+ else
+ list_add(&ring_ent->list, &queue->sync_ent_avail_queue);
+
+ ring_ent->state = FRRS_WAIT;
+}
+
+/*
+ * fuse_uring_req_fetch command handling
+ */
+static int _fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
+ struct io_uring_cmd *cmd, unsigned int issue_flags)
+__must_hold(ring_ent->queue->lock)
+{
+ struct fuse_ring_queue *queue = ring_ent->queue;
+ struct fuse_ring *ring = queue->ring;
+ int nr_ring_sqe;
+
+ lockdep_assert_held(&queue->lock);
+
+ /* register requests for foreground requests first, then backgrounds */
+ if (queue->nr_req_sync >= ring->max_nr_sync) {
+ queue->nr_req_async++;
+ ring_ent->async = 1;
+ } else
+ queue->nr_req_sync++;
+
+ fuse_uring_ent_avail(ring_ent, queue);
+
+ if (WARN_ON_ONCE(queue->nr_req_sync +
+ queue->nr_req_async > ring->queue_depth)) {
+ /* should be caught by ring state before and queue depth
+ * check before
+ */
+ pr_info("qid=%d tag=%d req cnt (fg=%d async=%d exceeds depth=%zu",
+ queue->qid, ring_ent->tag, queue->nr_req_sync,
+ queue->nr_req_async, ring->queue_depth);
+ return -ERANGE;
+ }
+
+ WRITE_ONCE(ring_ent->cmd, cmd);
+
+ nr_ring_sqe = ring->queue_depth * ring->nr_queues;
+ if (atomic_inc_return(&ring->nr_sqe_init) == nr_ring_sqe) {
+ fuse_uring_conn_cfg_limits(ring);
+ ring->ready = 1;
+ }
+
+ return 0;
+}
+
+static int fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
+ struct io_uring_cmd *cmd, unsigned int issue_flags)
+ __releases(ring_ent->queue->lock)
+{
+ struct fuse_ring *ring = ring_ent->queue->ring;
+ struct fuse_ring_queue *queue = ring_ent->queue;
+ int ret;
+
+ /* No other bit must be set here */
+ ret = -EINVAL;
+ if (ring_ent->state != FRRS_INIT)
+ goto err;
+
+ /*
+ * FUSE_URING_REQ_FETCH is an initialization exception, needs
+ * state override
+ */
+ ring_ent->state = FRRS_USERSPACE;
+ ret = fuse_ring_ring_ent_unset_userspace(ring_ent);
+ if (ret != 0) {
+ pr_info_ratelimited(
+ "qid=%d tag=%d register req state %d expected %d",
+ queue->qid, ring_ent->tag, ring_ent->state,
+ FRRS_INIT);
+ goto err;
+ }
+
+ ret = _fuse_uring_fetch(ring_ent, cmd, issue_flags);
+ if (ret)
+ goto err;
+
+ /*
+ * The ring entry is registered now and needs to be handled
+ * for shutdown.
+ */
+ atomic_inc(&ring->queue_refs);
+err:
+ spin_unlock(&queue->lock);
+ return ret;
+}
+
+/**
+ * Entry function from io_uring to handle the given passthrough command
+ * (op cocde IORING_OP_URING_CMD)
+ */
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
+{
+ const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe_cmd(cmd->sqe);
+ struct fuse_dev *fud;
+ struct fuse_conn *fc;
+ struct fuse_ring *ring;
+ struct fuse_ring_queue *queue;
+ struct fuse_ring_ent *ring_ent = NULL;
+ u32 cmd_op = cmd->cmd_op;
+ int ret = 0;
+
+ ret = -ENODEV;
+ fud = fuse_get_dev(cmd->file);
+ if (!fud)
+ goto out;
+ fc = fud->fc;
+
+ ring = fc->ring;
+ if (!ring)
+ goto out;
+
+ queue = fud->ring_q;
+ if (!queue)
+ goto out;
+
+ ret = -EINVAL;
+ if (queue->qid != cmd_req->qid)
+ goto out;
+
+ ret = -ERANGE;
+ if (cmd_req->tag > ring->queue_depth)
+ goto out;
+
+ ring_ent = &queue->ring_ent[cmd_req->tag];
+
+ pr_devel("%s:%d received: cmd op %d qid %d (%p) tag %d (%p)\n",
+ __func__, __LINE__, cmd_op, cmd_req->qid, queue, cmd_req->tag,
+ ring_ent);
+
+ spin_lock(&queue->lock);
+ ret = -ENOTCONN;
+ if (unlikely(fc->aborted || queue->stopped))
+ goto err_unlock;
+
+ switch (cmd_op) {
+ case FUSE_URING_REQ_FETCH:
+ ret = fuse_uring_fetch(ring_ent, cmd, issue_flags);
+ break;
+ default:
+ ret = -EINVAL;
+ pr_devel("Unknown uring command %d", cmd_op);
+ goto err_unlock;
+ }
+out:
+ pr_devel("uring cmd op=%d, qid=%d tag=%d ret=%d\n", cmd_op,
+ cmd_req->qid, cmd_req->tag, ret);
+
+ if (ret < 0) {
+ if (ring_ent != NULL) {
+ pr_info_ratelimited("error: uring cmd op=%d, qid=%d tag=%d ret=%d\n",
+ cmd_op, cmd_req->qid, cmd_req->tag,
+ ret);
+
+ /* must not change the entry state, as userspace
+ * might have sent random data, but valid requests
+ * might be registered already - don't confuse those.
+ */
+ }
+ io_uring_cmd_done(cmd, ret, 0, issue_flags);
+ }
+
+ return -EIOCBQUEUED;
+
+err_unlock:
+ spin_unlock(&queue->lock);
+ goto out;
+}
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 26266f923321..6561f4178cac 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -19,6 +19,15 @@ enum fuse_ring_req_state {
/* request is basially initialized */
FRRS_INIT,
+
+ /* ring entry received from userspace and it being processed */
+ FRRS_COMMIT,
+
+ /* The ring request waits for a new fuse request */
+ FRRS_WAIT,
+
+ /* request is in or on the way to user space */
+ FRRS_USERSPACE,
};
/* A fuse ring entry, part of the ring queue */
@@ -31,6 +40,13 @@ struct fuse_ring_ent {
/* state the request is currently in */
enum fuse_ring_req_state state;
+
+ /* is this an async or sync entry */
+ unsigned int async : 1;
+
+ struct list_head list;
+
+ struct io_uring_cmd *cmd;
};
struct fuse_ring_queue {
@@ -43,6 +59,30 @@ struct fuse_ring_queue {
/* queue id, typically also corresponds to the cpu core */
unsigned int qid;
+ /*
+ * queue lock, taken when any value in the queue changes _and_ also
+ * a ring entry state changes.
+ */
+ spinlock_t lock;
+
+ /* available ring entries (struct fuse_ring_ent) */
+ struct list_head async_ent_avail_queue;
+ struct list_head sync_ent_avail_queue;
+
+ /*
+ * available number of sync requests,
+ * loosely bound to fuse foreground requests
+ */
+ int nr_req_sync;
+
+ /*
+ * available number of async requests
+ * loosely bound to fuse background requests
+ */
+ int nr_req_async;
+
+ unsigned int stopped : 1;
+
/* size depends on queue depth */
struct fuse_ring_ent ring_ent[] ____cacheline_aligned_in_smp;
};
@@ -79,11 +119,21 @@ struct fuse_ring {
/* numa aware memory allocation */
unsigned int numa_aware : 1;
+ /* Is the ring read to take requests */
+ unsigned int ready : 1;
+
+ /* number of SQEs initialized */
+ atomic_t nr_sqe_init;
+
+ /* Used to release the ring on stop */
+ atomic_t queue_refs;
+
struct fuse_ring_queue queues[] ____cacheline_aligned_in_smp;
};
void fuse_uring_abort_end_requests(struct fuse_ring *ring);
int fuse_uring_conn_cfg(struct file *file, void __user *argp);
+int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
static inline void fuse_uring_conn_destruct(struct fuse_conn *fc)
{
@@ -113,6 +163,11 @@ static inline bool fuse_uring_configured(struct fuse_conn *fc)
return false;
}
+static inline bool fuse_per_core_queue(struct fuse_conn *fc)
+{
+ return fc->ring && fc->ring->per_core_queue;
+}
+
#else /* CONFIG_FUSE_IO_URING */
struct fuse_ring;
@@ -131,6 +186,11 @@ static inline bool fuse_uring_configured(struct fuse_conn *fc)
return false;
}
+static inline bool fuse_per_core_queue(struct fuse_conn *fc)
+{
+ return false;
+}
+
#endif /* CONFIG_FUSE_IO_URING */
#endif /* _FS_FUSE_DEV_URING_I_H */
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 143ed3c1c7b3..586358e9992c 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -1247,6 +1247,12 @@ struct fuse_supp_groups {
#define FUSE_RING_HEADER_BUF_SIZE 4096
#define FUSE_RING_MIN_IN_OUT_ARG_SIZE 4096
+/*
+ * Request is background type. Daemon side is free to use this information
+ * to handle foreground/background CQEs with different priorities.
+ */
+#define FUSE_RING_REQ_FLAG_ASYNC (1ull << 0)
+
/**
* This structure mapped onto the
*/
@@ -1272,4 +1278,36 @@ struct fuse_ring_req {
char in_out_arg[];
};
+/**
+ * sqe commands to the kernel
+ */
+enum fuse_uring_cmd {
+ FUSE_URING_REQ_INVALID = 0,
+
+ /* submit sqe to kernel to get a request */
+ FUSE_URING_REQ_FETCH = 1,
+
+ /* commit result and fetch next request */
+ FUSE_URING_REQ_COMMIT_AND_FETCH = 2,
+};
+
+/**
+ * In the 80B command area of the SQE.
+ */
+struct fuse_uring_cmd_req {
+ /* User buffer */
+ uint64_t buf_ptr;
+
+ /* length of the user buffer */
+ uint32_t buf_len;
+
+ /* queue the command is for (queue index) */
+ uint16_t qid;
+
+ /* queue entry (array index) */
+ uint16_t tag;
+
+ uint32_t flags;
+};
+
#endif /* _LINUX_FUSE_H */
--
2.43.0
next prev parent reply other threads:[~2024-09-01 13:37 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-01 13:36 [PATCH RFC v3 00/17] fuse: fuse-over-io-uring Bernd Schubert
2024-09-01 13:36 ` [PATCH RFC v3 01/17] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-09-01 13:36 ` [PATCH RFC v3 02/17] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-09-01 13:36 ` [PATCH RFC v3 03/17] fuse: Move request bits Bernd Schubert
2024-09-01 13:36 ` [PATCH RFC v3 04/17] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-09-01 13:36 ` [PATCH RFC v3 05/17] fuse: Add a uring config ioctl Bernd Schubert
2024-09-04 0:43 ` Joanne Koong
2024-09-04 22:24 ` Bernd Schubert
2024-09-06 19:23 ` Joanne Koong
2024-09-01 13:37 ` [PATCH RFC v3 06/17] fuse: Add the queue configuration ioctl Bernd Schubert
2024-09-04 22:23 ` Joanne Koong
2024-09-04 22:38 ` Bernd Schubert
2024-09-04 22:42 ` Joanne Koong
2024-09-01 13:37 ` [PATCH RFC v3 07/17] fuse: {uring} Add a dev_release exception for fuse-over-io-uring Bernd Schubert
2024-09-01 13:37 ` Bernd Schubert [this message]
2024-09-04 15:40 ` [PATCH RFC v3 08/17] fuse: {uring} Handle SQEs - register commands Jens Axboe
2024-09-01 13:37 ` [PATCH RFC v3 09/17] fuse: Make fuse_copy non static Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 10/17] fuse: Add buffer offset for uring into fuse_copy_state Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 11/17] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 12/17] fuse: {uring} Handle teardown of ring entries Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 13/17] fuse: {uring} Add a ring queue and send method Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 14/17] fuse: {uring} Allow to queue to the ring Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 15/17] ate: 2024-08-30 15:43:32 +0100 Bernd Schubert
2024-09-04 15:43 ` Jens Axboe
2024-09-04 15:54 ` Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 16/17] fuse: {uring} Handle IO_URING_F_TASK_DEAD Bernd Schubert
2024-09-01 13:37 ` [PATCH RFC v3 17/17] fuse: {uring} Pin the user buffer Bernd Schubert
2024-09-04 15:47 ` Jens Axboe
2024-09-04 16:08 ` Bernd Schubert
2024-09-04 16:16 ` Jens Axboe
2024-09-04 19:25 ` Bernd Schubert
2024-09-04 19:40 ` Jens Axboe
2024-09-05 21:04 ` Bernd Schubert
2024-09-04 18:59 ` Jens Axboe
2024-09-04 16:42 ` [PATCH RFC v3 00/17] fuse: fuse-over-io-uring Jens Axboe
2024-09-04 19:37 ` Bernd Schubert
2024-09-04 19:41 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240901-b4-fuse-uring-rfcv3-without-mmap-v3-8-9207f7391444@ddn.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox