From: Bernd Schubert <[email protected]>
To: Miklos Szeredi <[email protected]>
Cc: Jens Axboe <[email protected]>,
Pavel Begunkov <[email protected]>,
[email protected], [email protected],
Joanne Koong <[email protected]>,
Josef Bacik <[email protected]>,
Amir Goldstein <[email protected]>,
Ming Lei <[email protected]>, David Wei <[email protected]>,
[email protected], Bernd Schubert <[email protected]>
Subject: [PATCH RFC v6 15/16] fuse: {io-uring} Prevent mount point hang on fuse-server termination
Date: Fri, 22 Nov 2024 00:43:31 +0100 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
When the fuse-server terminates while the fuse-client or kernel
still has queued URING_CMDs, these commands retain references
to the struct file used by the fuse connection. This prevents
fuse_dev_release() from being invoked, resulting in a hung mount
point.
This patch addresses the issue by making queued URING_CMDs
cancelable, allowing fuse_dev_release() to proceed as expected
and preventing the mount point from hanging.
Signed-off-by: Bernd Schubert <[email protected]>
---
fs/fuse/dev_uring.c | 103 ++++++++++++++++++++++++++++++++++++++++----------
fs/fuse/dev_uring_i.h | 12 ++++++
2 files changed, 94 insertions(+), 21 deletions(-)
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index d0f8f0932e1715babebbc715c1846a5052419eb9..b7a6c3946611a9fdecd4996117b45b3081ad6edd 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -23,6 +23,7 @@ MODULE_PARM_DESC(enable_uring,
struct fuse_uring_cmd_pdu {
struct fuse_ring_ent *ring_ent;
+ struct fuse_ring_queue *queue;
};
const struct fuse_iqueue_ops fuse_io_uring_ops;
@@ -221,6 +222,7 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
struct fuse_conn *fc = ring->fc;
struct fuse_ring_queue *queue;
struct list_head *pq;
+ struct fuse_ring_ent *ent, *next;
queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
if (!queue)
@@ -249,6 +251,12 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
INIT_LIST_HEAD(&queue->ent_in_userspace);
INIT_LIST_HEAD(&queue->fuse_req_queue);
INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
+ INIT_LIST_HEAD(&queue->ent_released);
+
+ list_for_each_entry_safe(ent, next, &queue->ent_released, list) {
+ list_del_init(&ent->list);
+ kfree(ent);
+ }
queue->fpq.processing = pq;
fuse_pqueue_init(&queue->fpq);
@@ -281,8 +289,7 @@ static void fuse_uring_stop_fuse_req_end(struct fuse_ring_ent *ent)
/*
* Release a request/entry on connection tear down
*/
-static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
- bool need_cmd_done)
+static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
{
struct fuse_ring_queue *queue = ent->queue;
@@ -292,7 +299,7 @@ static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
*/
lockdep_assert_not_held(&ent->queue->lock);
- if (need_cmd_done) {
+ if (ent->need_cmd_done) {
pr_devel("qid=%d sending cmd_done\n", queue->qid);
io_uring_cmd_done(ent->cmd, -ENOTCONN, 0,
@@ -302,8 +309,16 @@ static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
if (ent->fuse_req)
fuse_uring_stop_fuse_req_end(ent);
- list_del_init(&ent->list);
- kfree(ent);
+ /*
+ * The entry must not be freed immediately, due to access of direct
+ * pointer access of entries through IO_URING_F_CANCEL - there is a risk
+ * of race between daemon termination (which triggers IO_URING_F_CANCEL
+ * and accesses entries without checking the list state first
+ */
+ spin_lock(&queue->lock);
+ list_move(&ent->list, &queue->ent_released);
+ ent->state = FRRS_RELEASED;
+ spin_unlock(&queue->lock);
}
static void fuse_uring_stop_list_entries(struct list_head *head,
@@ -323,15 +338,15 @@ static void fuse_uring_stop_list_entries(struct list_head *head,
continue;
}
+ ent->need_cmd_done = ent->state != FRRS_USERSPACE;
+ ent->state = FRRS_TEARDOWN;
list_move(&ent->list, &to_teardown);
}
spin_unlock(&queue->lock);
/* no queue lock to avoid lock order issues */
list_for_each_entry_safe(ent, next, &to_teardown, list) {
- bool need_cmd_done = ent->state != FRRS_USERSPACE;
-
- fuse_uring_entry_teardown(ent, need_cmd_done);
+ fuse_uring_entry_teardown(ent);
queue_refs = atomic_dec_return(&ring->queue_refs);
if (WARN_ON_ONCE(queue_refs < 0))
@@ -442,6 +457,49 @@ void fuse_uring_stop_queues(struct fuse_ring *ring)
}
}
+/*
+ * Handle IO_URING_F_CANCEL, typically should come on daemon termination
+ */
+static void fuse_uring_cancel(struct io_uring_cmd *cmd,
+ unsigned int issue_flags, struct fuse_conn *fc)
+{
+ struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu;
+ struct fuse_ring_queue *queue = pdu->queue;
+ struct fuse_ring_ent *ent = pdu->ring_ent;
+ bool need_cmd_done = false;
+
+ /*
+ * direct access on ent - it must not be destructed as long as
+ * IO_URING_F_CANCEL might come up
+ */
+ spin_lock(&queue->lock);
+ if (ent->state == FRRS_WAIT) {
+ ent->state = FRRS_USERSPACE;
+ list_move(&ent->list, &queue->ent_in_userspace);
+ need_cmd_done = true;
+ }
+ spin_unlock(&queue->lock);
+
+ if (need_cmd_done)
+ io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
+
+ /*
+ * releasing the last entry should trigger fuse_dev_release() if
+ * the daemon was terminated
+ */
+}
+
+static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
+ struct fuse_ring_ent *ring_ent)
+{
+ struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu;
+
+ pdu->ring_ent = ring_ent;
+ pdu->queue = ring_ent->queue;
+
+ io_uring_cmd_mark_cancelable(cmd, issue_flags);
+}
+
/*
* Checks for errors and stores it into the request
*/
@@ -665,7 +723,8 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ring_ent)
* Put a ring request onto hold, it is no longer used for now.
*/
static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
- struct fuse_ring_queue *queue)
+ struct fuse_ring_queue *queue,
+ unsigned int issue_flags)
__must_hold(&queue->lock)
{
struct fuse_ring *ring = queue->ring;
@@ -682,6 +741,7 @@ static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
return;
}
+ fuse_uring_prepare_cancel(ring_ent->cmd, issue_flags, ring_ent);
list_move(&ring_ent->list, &queue->ent_avail_queue);
ring_ent->state = FRRS_WAIT;
@@ -789,7 +849,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent,
* Get the next fuse req and send it
*/
static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent,
- struct fuse_ring_queue *queue)
+ struct fuse_ring_queue *queue,
+ unsigned int issue_flags)
{
int has_next, err;
int prev_state = ring_ent->state;
@@ -798,7 +859,7 @@ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent,
spin_lock(&queue->lock);
has_next = fuse_uring_ent_assign_req(ring_ent);
if (!has_next) {
- fuse_uring_ent_avail(ring_ent, queue);
+ fuse_uring_ent_avail(ring_ent, queue, issue_flags);
spin_unlock(&queue->lock);
break; /* no request left */
}
@@ -873,7 +934,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
* and fetching is done in one step vs legacy fuse, which has separated
* read (fetch request) and write (commit result).
*/
- fuse_uring_next_fuse_req(ring_ent, queue);
+ fuse_uring_next_fuse_req(ring_ent, queue, issue_flags);
return 0;
}
@@ -915,7 +976,7 @@ static void _fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
struct fuse_iqueue *fiq = &fc->iq;
spin_lock(&queue->lock);
- fuse_uring_ent_avail(ring_ent, queue);
+ fuse_uring_ent_avail(ring_ent, queue, issue_flags);
ring_ent->cmd = cmd;
spin_unlock(&queue->lock);
@@ -1085,6 +1146,11 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
if (fc->aborted)
return err;
+ if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
+ fuse_uring_cancel(cmd, issue_flags, fc);
+ return 0;
+ }
+
switch (cmd_op) {
case FUSE_URING_REQ_FETCH:
err = fuse_uring_fetch(cmd, issue_flags, fc);
@@ -1142,7 +1208,7 @@ fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
return;
err:
- fuse_uring_next_fuse_req(ring_ent, queue);
+ fuse_uring_next_fuse_req(ring_ent, queue, issue_flags);
}
static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
@@ -1197,14 +1263,11 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
if (ring_ent) {
struct io_uring_cmd *cmd = ring_ent->cmd;
- struct fuse_uring_cmd_pdu *pdu =
- (struct fuse_uring_cmd_pdu *)cmd->pdu;
-
err = -EIO;
if (WARN_ON_ONCE(ring_ent->state != FRRS_FUSE_REQ))
goto err;
- pdu->ring_ent = ring_ent;
+ /* pdu already set by preparing IO_URING_F_CANCEL */
io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task);
}
@@ -1257,12 +1320,10 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req)
list);
if (ring_ent && req) {
struct io_uring_cmd *cmd = ring_ent->cmd;
- struct fuse_uring_cmd_pdu *pdu =
- (struct fuse_uring_cmd_pdu *)cmd->pdu;
fuse_uring_add_req_to_ring_ent(ring_ent, req);
- pdu->ring_ent = ring_ent;
+ /* pdu already set by preparing IO_URING_F_CANCEL */
io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task);
}
spin_unlock(&queue->lock);
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 8426337361c72a30dca8f6fd9012ea3827160091..6af7754249623102f48a4c5c924a21b20851925f 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -28,6 +28,12 @@ enum fuse_ring_req_state {
/* The ring entry is in or on the way to user space */
FRRS_USERSPACE,
+
+ /* The ring entry is in teardown */
+ FRRS_TEARDOWN,
+
+ /* The ring entry is released, but not freed yet */
+ FRRS_RELEASED,
};
/** A fuse ring entry, part of the ring queue */
@@ -52,6 +58,9 @@ struct fuse_ring_ent {
*/
unsigned int state;
+ /* The entry needs io_uring_cmd_done for teardown */
+ unsigned int need_cmd_done;
+
struct fuse_req *fuse_req;
};
@@ -84,6 +93,9 @@ struct fuse_ring_queue {
/* entries in userspace */
struct list_head ent_in_userspace;
+ /* entries that are released */
+ struct list_head ent_released;
+
/* fuse requests waiting for an entry slot */
struct list_head fuse_req_queue;
--
2.43.0
next prev parent reply other threads:[~2024-11-22 0:16 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-21 23:43 [PATCH RFC v6 00/16] fuse: fuse-over-io-uring Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 01/16] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 02/16] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 03/16] fuse: Move request bits Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 04/16] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 05/16] fuse: make args->in_args[0] to be always the header Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 06/16] fuse: {uring} Handle SQEs - register commands Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 07/16] fuse: Make fuse_copy non static Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 08/16] fuse: Add fuse-io-uring handling into fuse_copy Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 09/16] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 10/16] fuse: {uring} Handle teardown of ring entries Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 11/16] fuse: {uring} Allow to queue fg requests through io-uring Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 12/16] fuse: {uring} Allow to queue to the ring Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 13/16] io_uring/cmd: let cmds to know about dying task Bernd Schubert
2024-11-21 23:43 ` [PATCH RFC v6 14/16] fuse: {uring} Handle IO_URING_F_TASK_DEAD Bernd Schubert
2024-11-21 23:43 ` Bernd Schubert [this message]
2024-11-21 23:43 ` [PATCH RFC v6 16/16] fuse: enable fuse-over-io-uring Bernd Schubert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241122-fuse-uring-for-6-10-rfc4-v6-15-28e6cdd0e914@ddn.com \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox