public inbox for [email protected]
 help / color / mirror / Atom feed
From: Bernd Schubert <[email protected]>
To: Miklos Szeredi <[email protected]>
Cc: Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>,
	 [email protected], [email protected],
	 Joanne Koong <[email protected]>,
	Josef Bacik <[email protected]>,
	 Amir Goldstein <[email protected]>,
	Ming Lei <[email protected]>,  David Wei <[email protected]>,
	[email protected],  Bernd Schubert <[email protected]>
Subject: [PATCH RFC v7 15/16] fuse: {io-uring} Prevent mount point hang on fuse-server termination
Date: Wed, 27 Nov 2024 14:40:32 +0100	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

When the fuse-server terminates while the fuse-client or kernel
still has queued URING_CMDs, these commands retain references
to the struct file used by the fuse connection. This prevents
fuse_dev_release() from being invoked, resulting in a hung mount
point.

This patch addresses the issue by making queued URING_CMDs
cancelable, allowing fuse_dev_release() to proceed as expected
and preventing the mount point from hanging.

Signed-off-by: Bernd Schubert <[email protected]>
---
 fs/fuse/dev_uring.c   | 109 ++++++++++++++++++++++++++++++++++++++------------
 fs/fuse/dev_uring_i.h |  12 ++++++
 2 files changed, 95 insertions(+), 26 deletions(-)

diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 94dc3f56d4ab604eb4b87d3b9731567e3a214b0a..fe24e31bbfecec526f88bc5b82b0aa132357c1cc 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -26,6 +26,7 @@ bool fuse_uring_enabled(void)
 
 struct fuse_uring_cmd_pdu {
 	struct fuse_ring_ent *ring_ent;
+	struct fuse_ring_queue *queue;
 };
 
 const struct fuse_iqueue_ops fuse_io_uring_ops;
@@ -221,6 +222,7 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
 	struct fuse_conn *fc = ring->fc;
 	struct fuse_ring_queue *queue;
 	struct list_head *pq;
+	struct fuse_ring_ent *ent, *next;
 
 	queue = kzalloc(sizeof(*queue), GFP_KERNEL_ACCOUNT);
 	if (!queue)
@@ -249,6 +251,12 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring,
 	INIT_LIST_HEAD(&queue->ent_in_userspace);
 	INIT_LIST_HEAD(&queue->fuse_req_queue);
 	INIT_LIST_HEAD(&queue->fuse_req_bg_queue);
+	INIT_LIST_HEAD(&queue->ent_released);
+
+	list_for_each_entry_safe(ent, next, &queue->ent_released, list) {
+		list_del_init(&ent->list);
+		kfree(ent);
+	}
 
 	queue->fpq.processing = pq;
 	fuse_pqueue_init(&queue->fpq);
@@ -281,24 +289,27 @@ static void fuse_uring_stop_fuse_req_end(struct fuse_ring_ent *ent)
 /*
  * Release a request/entry on connection tear down
  */
-static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
-					 bool need_cmd_done)
+static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent)
 {
-	/*
-	 * fuse_request_end() might take other locks like fi->lock and
-	 * can lead to lock ordering issues
-	 */
-	lockdep_assert_not_held(&ent->queue->lock);
+	struct fuse_ring_queue *queue = ent->queue;
 
-	if (need_cmd_done)
+	if (ent->need_cmd_done)
 		io_uring_cmd_done(ent->cmd, -ENOTCONN, 0,
 				  IO_URING_F_UNLOCKED);
 
 	if (ent->fuse_req)
 		fuse_uring_stop_fuse_req_end(ent);
 
-	list_del_init(&ent->list);
-	kfree(ent);
+	/*
+	 * The entry must not be freed immediately, due to access of direct
+	 * pointer access of entries through IO_URING_F_CANCEL - there is a risk
+	 * of race between daemon termination (which triggers IO_URING_F_CANCEL
+	 * and accesses entries without checking the list state first
+	 */
+	spin_lock(&queue->lock);
+	list_move(&ent->list, &queue->ent_released);
+	ent->state = FRRS_RELEASED;
+	spin_unlock(&queue->lock);
 }
 
 static void fuse_uring_stop_list_entries(struct list_head *head,
@@ -318,15 +329,15 @@ static void fuse_uring_stop_list_entries(struct list_head *head,
 			continue;
 		}
 
+		ent->need_cmd_done = ent->state != FRRS_USERSPACE;
+		ent->state = FRRS_TEARDOWN;
 		list_move(&ent->list, &to_teardown);
 	}
 	spin_unlock(&queue->lock);
 
 	/* no queue lock to avoid lock order issues */
 	list_for_each_entry_safe(ent, next, &to_teardown, list) {
-		bool need_cmd_done = ent->state != FRRS_USERSPACE;
-
-		fuse_uring_entry_teardown(ent, need_cmd_done);
+		fuse_uring_entry_teardown(ent);
 		queue_refs = atomic_dec_return(&ring->queue_refs);
 
 		WARN_ON_ONCE(queue_refs < 0);
@@ -434,6 +445,49 @@ void fuse_uring_stop_queues(struct fuse_ring *ring)
 	}
 }
 
+/*
+ * Handle IO_URING_F_CANCEL, typically should come on daemon termination
+ */
+static void fuse_uring_cancel(struct io_uring_cmd *cmd,
+			      unsigned int issue_flags, struct fuse_conn *fc)
+{
+	struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu;
+	struct fuse_ring_queue *queue = pdu->queue;
+	struct fuse_ring_ent *ent = pdu->ring_ent;
+	bool need_cmd_done = false;
+
+	/*
+	 * direct access on ent - it must not be destructed as long as
+	 * IO_URING_F_CANCEL might come up
+	 */
+	spin_lock(&queue->lock);
+	if (ent->state == FRRS_WAIT) {
+		ent->state = FRRS_USERSPACE;
+		list_move(&ent->list, &queue->ent_in_userspace);
+		need_cmd_done = true;
+	}
+	spin_unlock(&queue->lock);
+
+	if (need_cmd_done)
+		io_uring_cmd_done(cmd, -ENOTCONN, 0, issue_flags);
+
+	/*
+	 * releasing the last entry should trigger fuse_dev_release() if
+	 * the daemon was terminated
+	 */
+}
+
+static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags,
+				      struct fuse_ring_ent *ring_ent)
+{
+	struct fuse_uring_cmd_pdu *pdu = (struct fuse_uring_cmd_pdu *)cmd->pdu;
+
+	pdu->ring_ent = ring_ent;
+	pdu->queue = ring_ent->queue;
+
+	io_uring_cmd_mark_cancelable(cmd, issue_flags);
+}
+
 /*
  * Checks for errors and stores it into the request
  */
@@ -638,8 +692,10 @@ static int fuse_uring_send_next_to_ring(struct fuse_ring_ent *ring_ent)
  * Make a ring entry available for fuse_req assignment
  */
 static void fuse_uring_ent_avail(struct fuse_ring_ent *ring_ent,
-				 struct fuse_ring_queue *queue)
+				 struct fuse_ring_queue *queue,
+				 unsigned int issue_flags)
 {
+	fuse_uring_prepare_cancel(ring_ent->cmd, issue_flags, ring_ent);
 	list_move(&ring_ent->list, &queue->ent_avail_queue);
 	ring_ent->state = FRRS_WAIT;
 }
@@ -742,7 +798,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent,
  * Get the next fuse req and send it
  */
 static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent,
-				    struct fuse_ring_queue *queue)
+				    struct fuse_ring_queue *queue,
+				    unsigned int issue_flags)
 {
 	int has_next, err;
 	int prev_state = ring_ent->state;
@@ -751,7 +808,7 @@ static void fuse_uring_next_fuse_req(struct fuse_ring_ent *ring_ent,
 		spin_lock(&queue->lock);
 		has_next = fuse_uring_ent_assign_req(ring_ent);
 		if (!has_next) {
-			fuse_uring_ent_avail(ring_ent, queue);
+			fuse_uring_ent_avail(ring_ent, queue, issue_flags);
 			spin_unlock(&queue->lock);
 			break; /* no request left */
 		}
@@ -826,7 +883,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags,
 	 * and fetching is done in one step vs legacy fuse, which has separated
 	 * read (fetch request) and write (commit result).
 	 */
-	fuse_uring_next_fuse_req(ring_ent, queue);
+	fuse_uring_next_fuse_req(ring_ent, queue, issue_flags);
 	return 0;
 }
 
@@ -868,7 +925,7 @@ static void _fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
 	struct fuse_iqueue *fiq = &fc->iq;
 
 	spin_lock(&queue->lock);
-	fuse_uring_ent_avail(ring_ent, queue);
+	fuse_uring_ent_avail(ring_ent, queue, issue_flags);
 	ring_ent->cmd = cmd;
 	spin_unlock(&queue->lock);
 
@@ -1022,6 +1079,11 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
 	if (!fc->connected || fc->aborted)
 		return fc->aborted ? -ECONNABORTED : -ENOTCONN;
 
+	if ((unlikely(issue_flags & IO_URING_F_CANCEL))) {
+		fuse_uring_cancel(cmd, issue_flags, fc);
+		return 0;
+	}
+
 	switch (cmd_op) {
 	case FUSE_URING_REQ_FETCH:
 		err = fuse_uring_fetch(cmd, issue_flags, fc);
@@ -1074,7 +1136,7 @@ fuse_uring_send_req_in_task(struct io_uring_cmd *cmd,
 
 	return;
 err:
-	fuse_uring_next_fuse_req(ring_ent, queue);
+	fuse_uring_next_fuse_req(ring_ent, queue, issue_flags);
 }
 
 static struct fuse_ring_queue *fuse_uring_task_to_queue(struct fuse_ring *ring)
@@ -1129,14 +1191,11 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
 
 	if (ring_ent) {
 		struct io_uring_cmd *cmd = ring_ent->cmd;
-		struct fuse_uring_cmd_pdu *pdu =
-			(struct fuse_uring_cmd_pdu *)cmd->pdu;
-
 		err = -EIO;
 		if (WARN_ON_ONCE(ring_ent->state != FRRS_FUSE_REQ))
 			goto err;
 
-		pdu->ring_ent = ring_ent;
+		/* pdu already set by preparing IO_URING_F_CANCEL */
 		io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task);
 	}
 
@@ -1189,12 +1248,10 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req)
 				       list);
 	if (ring_ent && req) {
 		struct io_uring_cmd *cmd = ring_ent->cmd;
-		struct fuse_uring_cmd_pdu *pdu =
-			(struct fuse_uring_cmd_pdu *)cmd->pdu;
 
 		fuse_uring_add_req_to_ring_ent(ring_ent, req);
 
-		pdu->ring_ent = ring_ent;
+		/* pdu already set by preparing IO_URING_F_CANCEL */
 		io_uring_cmd_complete_in_task(cmd, fuse_uring_send_req_in_task);
 	}
 	spin_unlock(&queue->lock);
diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h
index 0826fb1c03e2e38dedad56552ea09461965e248f..df541247f07e413923b13b6bf203f301e8c1710a 100644
--- a/fs/fuse/dev_uring_i.h
+++ b/fs/fuse/dev_uring_i.h
@@ -28,6 +28,12 @@ enum fuse_ring_req_state {
 
 	/* The ring entry is in or on the way to user space */
 	FRRS_USERSPACE,
+
+	/* The ring entry is in teardown */
+	FRRS_TEARDOWN,
+
+	/* The ring entry is released, but not freed yet */
+	FRRS_RELEASED,
 };
 
 /** A fuse ring entry, part of the ring queue */
@@ -52,6 +58,9 @@ struct fuse_ring_ent {
 	 */
 	unsigned int state;
 
+	/* The entry needs io_uring_cmd_done for teardown */
+	unsigned int need_cmd_done;
+
 	struct fuse_req *fuse_req;
 };
 
@@ -84,6 +93,9 @@ struct fuse_ring_queue {
 	/* entries in userspace */
 	struct list_head ent_in_userspace;
 
+	/* entries that are released */
+	struct list_head ent_released;
+
 	/* fuse requests waiting for an entry slot */
 	struct list_head fuse_req_queue;
 

-- 
2.43.0


  parent reply	other threads:[~2024-11-27 13:41 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-27 13:40 [PATCH RFC v7 00/16] fuse: fuse-over-io-uring Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 01/16] fuse: rename to fuse_dev_end_requests and make non-static Bernd Schubert
2024-11-28  0:19   ` Joanne Koong
2024-11-27 13:40 ` [PATCH RFC v7 02/16] fuse: Move fuse_get_dev to header file Bernd Schubert
2024-11-28  0:20   ` Joanne Koong
2024-11-27 13:40 ` [PATCH RFC v7 03/16] fuse: Move request bits Bernd Schubert
2024-11-28  0:21   ` Joanne Koong
2024-11-27 13:40 ` [PATCH RFC v7 04/16] fuse: Add fuse-io-uring design documentation Bernd Schubert
2024-12-03 12:30   ` Pavel Begunkov
2024-11-27 13:40 ` [PATCH RFC v7 05/16] fuse: make args->in_args[0] to be always the header Bernd Schubert
2024-11-28  0:27   ` Joanne Koong
2024-11-27 13:40 ` [PATCH RFC v7 06/16] fuse: {uring} Handle SQEs - register commands Bernd Schubert
2024-11-28  2:23   ` Joanne Koong
2024-11-28 18:20     ` Bernd Schubert
2024-12-03 13:24   ` Pavel Begunkov
2024-12-03 13:49     ` Bernd Schubert
2024-12-03 14:16       ` Pavel Begunkov
2024-12-03 13:38   ` Pavel Begunkov
2024-11-27 13:40 ` [PATCH RFC v7 07/16] fuse: Make fuse_copy non static Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 08/16] fuse: Add fuse-io-uring handling into fuse_copy Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 09/16] fuse: {uring} Add uring sqe commit and fetch support Bernd Schubert
2024-12-03 13:47   ` Pavel Begunkov
2024-11-27 13:40 ` [PATCH RFC v7 10/16] fuse: {uring} Handle teardown of ring entries Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 11/16] fuse: {uring} Allow to queue fg requests through io-uring Bernd Schubert
2024-12-03 14:09   ` Pavel Begunkov
2024-12-03 22:46     ` Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 12/16] fuse: {uring} Allow to queue bg " Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 13/16] io_uring/cmd: let cmds to know about dying task Bernd Schubert
2024-12-03 12:15   ` Pavel Begunkov
2024-12-03 12:15     ` Bernd Schubert
2024-11-27 13:40 ` [PATCH RFC v7 14/16] fuse: {uring} Handle IO_URING_F_TASK_DEAD Bernd Schubert
2024-12-03 12:20   ` Pavel Begunkov
2024-11-27 13:40 ` Bernd Schubert [this message]
2024-11-27 13:40 ` [PATCH RFC v7 16/16] fuse: enable fuse-over-io-uring Bernd Schubert
2024-11-27 13:45 ` [PATCH RFC v7 00/16] fuse: fuse-over-io-uring Bernd Schubert
2024-12-03 14:24 ` Pavel Begunkov
2024-12-03 14:32   ` Bernd Schubert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241127-fuse-uring-for-6-10-rfc4-v7-15-934b3a69baca@ddn.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox