public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 0/4] address some hangs
@ 2020-12-30 21:34 Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 1/4] io_uring: add a helper for setting a ref node Pavel Begunkov
                   ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-30 21:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1/4 and 2/4 are for hangs during files unregister.

The other two fix not running task works during cancellation.
Instead of patching task_work it moves io_uring_files_cancel()
before PF_EXITING, should be less intrusive. Was there a
particular reasong for doing that in exit_files()?

Pavel Begunkov (4):
  io_uring: add a helper for setting a ref node
  io_uring: fix io_sqe_files_unregister() hangs
  kernel/io_uring: cancel io_uring before task works
  io_uring: cancel requests enqueued as task_work's

 fs/file.c     |  2 --
 fs/io_uring.c | 54 ++++++++++++++++++++++++++++++++++-----------------
 kernel/exit.c |  2 ++
 3 files changed, 38 insertions(+), 20 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/4] io_uring: add a helper for setting a ref node
  2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
@ 2020-12-30 21:34 ` Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 2/4] io_uring: fix io_sqe_files_unregister() hangs Pavel Begunkov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-30 21:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

Setting a new reference node to a file data is not trivial, don't repeat
it, add and use a helper.

Cc: [email protected] # 5.6+
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index eb4620ff638e..6372aba8d0c2 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7231,6 +7231,16 @@ static void io_file_ref_kill(struct percpu_ref *ref)
 	complete(&data->done);
 }
 
+static void io_sqe_files_set_node(struct fixed_file_data *file_data,
+				  struct fixed_file_ref_node *ref_node)
+{
+	spin_lock_bh(&file_data->lock);
+	file_data->node = ref_node;
+	list_add_tail(&ref_node->node, &file_data->ref_list);
+	spin_unlock_bh(&file_data->lock);
+	percpu_ref_get(&file_data->refs);
+}
+
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_file_data *data = ctx->file_data;
@@ -7758,11 +7768,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 		return PTR_ERR(ref_node);
 	}
 
-	file_data->node = ref_node;
-	spin_lock_bh(&file_data->lock);
-	list_add_tail(&ref_node->node, &file_data->ref_list);
-	spin_unlock_bh(&file_data->lock);
-	percpu_ref_get(&file_data->refs);
+	io_sqe_files_set_node(file_data, ref_node);
 	return ret;
 out_fput:
 	for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7918,11 +7924,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
 	if (needs_switch) {
 		percpu_ref_kill(&data->node->refs);
-		spin_lock_bh(&data->lock);
-		list_add_tail(&ref_node->node, &data->ref_list);
-		data->node = ref_node;
-		spin_unlock_bh(&data->lock);
-		percpu_ref_get(&ctx->file_data->refs);
+		io_sqe_files_set_node(data, ref_node);
 	} else
 		destroy_fixed_file_ref_node(ref_node);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] io_uring: fix io_sqe_files_unregister() hangs
  2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 1/4] io_uring: add a helper for setting a ref node Pavel Begunkov
@ 2020-12-30 21:34 ` Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 3/4] kernel/io_uring: cancel io_uring before task works Pavel Begunkov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-30 21:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

io_sqe_files_unregister() uninterruptibly waits for enqueued ref nodes,
however requests keeping them may never complete, e.g. because of some
userspace dependency. Make sure it's interruptible otherwise it would
hang forever.

Cc: [email protected] # 5.6+
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6372aba8d0c2..ca46f314640b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -992,6 +992,10 @@ enum io_mem_account {
 	ACCT_PINNED,
 };
 
+static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
+static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
+			struct io_ring_ctx *ctx);
+
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 			     struct io_comp_state *cs);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -7244,11 +7248,15 @@ static void io_sqe_files_set_node(struct fixed_file_data *file_data,
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
 	struct fixed_file_data *data = ctx->file_data;
-	struct fixed_file_ref_node *ref_node = NULL;
+	struct fixed_file_ref_node *backup_node, *ref_node = NULL;
 	unsigned nr_tables, i;
+	int ret;
 
 	if (!data)
 		return -ENXIO;
+	backup_node = alloc_fixed_file_ref_node(ctx);
+	if (!backup_node)
+		return -ENOMEM;
 
 	spin_lock_bh(&data->lock);
 	ref_node = data->node;
@@ -7260,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 
 	/* wait for all refs nodes to complete */
 	flush_delayed_work(&ctx->file_put_work);
-	wait_for_completion(&data->done);
+	do {
+		ret = wait_for_completion_interruptible(&data->done);
+		if (!ret)
+			break;
+		ret = io_run_task_work_sig();
+		if (ret < 0) {
+			percpu_ref_resurrect(&data->refs);
+			reinit_completion(&data->done);
+			io_sqe_files_set_node(data, backup_node);
+			return ret;
+		}
+	} while (1);
 
 	__io_sqe_files_unregister(ctx);
 	nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
@@ -7271,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 	kfree(data);
 	ctx->file_data = NULL;
 	ctx->nr_user_files = 0;
+	destroy_fixed_file_ref_node(backup_node);
 	return 0;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] kernel/io_uring: cancel io_uring before task works
  2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 1/4] io_uring: add a helper for setting a ref node Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 2/4] io_uring: fix io_sqe_files_unregister() hangs Pavel Begunkov
@ 2020-12-30 21:34 ` Pavel Begunkov
  2021-01-02 16:01   ` Pavel Begunkov
  2020-12-30 21:34 ` [PATCH 4/4] io_uring: cancel requests enqueued as task_work's Pavel Begunkov
  2020-12-31 15:07 ` [PATCH 0/4] address some hangs Jens Axboe
  4 siblings, 1 reply; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-30 21:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

For cancelling io_uring requests it needs either to be able to run
currently enqueued task_wokrs or having it shut down by that moment.
Otherwise io_uring_cancel_files() may be waiting for requests that won't
ever complete.

Go with the first way and do cancellations before setting PF_EXITING and
so before putting the task_work infrastructure into a transition state
where task_work_run() would better not be called.

Cc: [email protected] # 5.5+
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/file.c     | 2 --
 kernel/exit.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index c0b60961c672..dab120b71e44 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 #include <linux/rcupdate.h>
 #include <linux/close_range.h>
 #include <net/sock.h>
-#include <linux/io_uring.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk)
 	struct files_struct * files = tsk->files;
 
 	if (files) {
-		io_uring_files_cancel(files);
 		task_lock(tsk);
 		tsk->files = NULL;
 		task_unlock(tsk);
diff --git a/kernel/exit.c b/kernel/exit.c
index 3594291a8542..04029e35e69a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -63,6 +63,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -776,6 +777,7 @@ void __noreturn do_exit(long code)
 		schedule();
 	}
 
+	io_uring_files_cancel(tsk->files);
 	exit_signals(tsk);  /* sets PF_EXITING */
 
 	/* sync mm's RSS info before statistics gathering */
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] io_uring: cancel requests enqueued as task_work's
  2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
                   ` (2 preceding siblings ...)
  2020-12-30 21:34 ` [PATCH 3/4] kernel/io_uring: cancel io_uring before task works Pavel Begunkov
@ 2020-12-30 21:34 ` Pavel Begunkov
  2020-12-31 15:06   ` Jens Axboe
  2020-12-31 15:07 ` [PATCH 0/4] address some hangs Jens Axboe
  4 siblings, 1 reply; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-30 21:34 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

Currently request cancellations are happening before PF_EXITING is set,
so it's allowed to call task_work_run(). Even though it should work as
it's not it's safer to remove PF_EXITING checks.

Cc: [email protected] # 5.5+
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ca46f314640b..8d4fa0031e0a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2361,12 +2361,8 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
 
 static inline bool io_run_task_work(void)
 {
-	/*
-	 * Not safe to run on exiting task, and the task_work handling will
-	 * not add work to such a task.
-	 */
-	if (unlikely(current->flags & PF_EXITING))
-		return false;
+	WARN_ON_ONCE(current->flags & PF_EXITING);
+
 	if (current->task_works) {
 		__set_current_state(TASK_RUNNING);
 		task_work_run();
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] io_uring: cancel requests enqueued as task_work's
  2020-12-30 21:34 ` [PATCH 4/4] io_uring: cancel requests enqueued as task_work's Pavel Begunkov
@ 2020-12-31 15:06   ` Jens Axboe
  0 siblings, 0 replies; 9+ messages in thread
From: Jens Axboe @ 2020-12-31 15:06 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring; +Cc: stable

On 12/30/20 2:34 PM, Pavel Begunkov wrote:
> Currently request cancellations are happening before PF_EXITING is set,
> so it's allowed to call task_work_run(). Even though it should work as
> it's not it's safer to remove PF_EXITING checks.
> 
> Cc: [email protected] # 5.5+
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>  fs/io_uring.c | 8 ++------
>  1 file changed, 2 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index ca46f314640b..8d4fa0031e0a 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -2361,12 +2361,8 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
>  
>  static inline bool io_run_task_work(void)
>  {
> -	/*
> -	 * Not safe to run on exiting task, and the task_work handling will
> -	 * not add work to such a task.
> -	 */
> -	if (unlikely(current->flags & PF_EXITING))
> -		return false;
> +	WARN_ON_ONCE(current->flags & PF_EXITING);

Should still include the return, ala:

if (WARN_ON_ONCE(current->flags & PF_EXITING))
	return;

to be on the safe side, otherwise it'll crash anyway if we do hit this
condition.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4] address some hangs
  2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
                   ` (3 preceding siblings ...)
  2020-12-30 21:34 ` [PATCH 4/4] io_uring: cancel requests enqueued as task_work's Pavel Begunkov
@ 2020-12-31 15:07 ` Jens Axboe
  2020-12-31 16:40   ` Pavel Begunkov
  4 siblings, 1 reply; 9+ messages in thread
From: Jens Axboe @ 2020-12-31 15:07 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 12/30/20 2:34 PM, Pavel Begunkov wrote:
> 1/4 and 2/4 are for hangs during files unregister.
> 
> The other two fix not running task works during cancellation.
> Instead of patching task_work it moves io_uring_files_cancel()
> before PF_EXITING, should be less intrusive. Was there a
> particular reasong for doing that in exit_files()?
> 
> Pavel Begunkov (4):
>   io_uring: add a helper for setting a ref node
>   io_uring: fix io_sqe_files_unregister() hangs
>   kernel/io_uring: cancel io_uring before task works
>   io_uring: cancel requests enqueued as task_work's
> 
>  fs/file.c     |  2 --
>  fs/io_uring.c | 54 ++++++++++++++++++++++++++++++++++-----------------
>  kernel/exit.c |  2 ++
>  3 files changed, 38 insertions(+), 20 deletions(-)

Applied 1-3, as they look good. Can you resend 4/4 with the return
added?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/4] address some hangs
  2020-12-31 15:07 ` [PATCH 0/4] address some hangs Jens Axboe
@ 2020-12-31 16:40   ` Pavel Begunkov
  0 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2020-12-31 16:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 31/12/2020 15:07, Jens Axboe wrote:
> On 12/30/20 2:34 PM, Pavel Begunkov wrote:
>> 1/4 and 2/4 are for hangs during files unregister.
>>
>> The other two fix not running task works during cancellation.
>> Instead of patching task_work it moves io_uring_files_cancel()
>> before PF_EXITING, should be less intrusive. Was there a
>> particular reasong for doing that in exit_files()?
>>
>> Pavel Begunkov (4):
>>   io_uring: add a helper for setting a ref node
>>   io_uring: fix io_sqe_files_unregister() hangs
>>   kernel/io_uring: cancel io_uring before task works
>>   io_uring: cancel requests enqueued as task_work's
>>
>>  fs/file.c     |  2 --
>>  fs/io_uring.c | 54 ++++++++++++++++++++++++++++++++++-----------------
>>  kernel/exit.c |  2 ++
>>  3 files changed, 38 insertions(+), 20 deletions(-)
> 
> Applied 1-3, as they look good. Can you resend 4/4 with the return
> added?

1-3 should fix the problems, so let's put off 4 for later and not
for current.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] kernel/io_uring: cancel io_uring before task works
  2020-12-30 21:34 ` [PATCH 3/4] kernel/io_uring: cancel io_uring before task works Pavel Begunkov
@ 2021-01-02 16:01   ` Pavel Begunkov
  0 siblings, 0 replies; 9+ messages in thread
From: Pavel Begunkov @ 2021-01-02 16:01 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

On 30/12/2020 21:34, Pavel Begunkov wrote:
> For cancelling io_uring requests it needs either to be able to run
> currently enqueued task_wokrs or having it shut down by that moment.
> Otherwise io_uring_cancel_files() may be waiting for requests that won't
> ever complete.
> 
> Go with the first way and do cancellations before setting PF_EXITING and
> so before putting the task_work infrastructure into a transition state
> where task_work_run() would better not be called.

It won't do in the end, because after cancellation we don't have files
NULLed yet and SQPOLL task may use find and use them. I guess can't be
dropped because of rc, I'll think whether to revert or somehow shut
sqpoll.

> Cc: [email protected] # 5.5+
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>  fs/file.c     | 2 --
>  kernel/exit.c | 2 ++ 
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/file.c b/fs/file.c
> index c0b60961c672..dab120b71e44 100644
> --- a/fs/file.c
> +++ b/fs/file.c
> @@ -21,7 +21,6 @@
>  #include <linux/rcupdate.h>
>  #include <linux/close_range.h>
>  #include <net/sock.h>
> -#include <linux/io_uring.h>
>  
>  unsigned int sysctl_nr_open __read_mostly = 1024*1024;
>  unsigned int sysctl_nr_open_min = BITS_PER_LONG;
> @@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk)
>  	struct files_struct * files = tsk->files;
>  
>  	if (files) {
> -		io_uring_files_cancel(files);
>  		task_lock(tsk);
>  		tsk->files = NULL;
>  		task_unlock(tsk);
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 3594291a8542..04029e35e69a 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -63,6 +63,7 @@
>  #include <linux/random.h>
>  #include <linux/rcuwait.h>
>  #include <linux/compat.h>
> +#include <linux/io_uring.h>
>  
>  #include <linux/uaccess.h>
>  #include <asm/unistd.h>
> @@ -776,6 +777,7 @@ void __noreturn do_exit(long code)
>  		schedule();
>  	}
>  
> +	io_uring_files_cancel(tsk->files);
>  	exit_signals(tsk);  /* sets PF_EXITING */
>  
>  	/* sync mm's RSS info before statistics gathering */
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-01-02 16:06 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-12-30 21:34 [PATCH 0/4] address some hangs Pavel Begunkov
2020-12-30 21:34 ` [PATCH 1/4] io_uring: add a helper for setting a ref node Pavel Begunkov
2020-12-30 21:34 ` [PATCH 2/4] io_uring: fix io_sqe_files_unregister() hangs Pavel Begunkov
2020-12-30 21:34 ` [PATCH 3/4] kernel/io_uring: cancel io_uring before task works Pavel Begunkov
2021-01-02 16:01   ` Pavel Begunkov
2020-12-30 21:34 ` [PATCH 4/4] io_uring: cancel requests enqueued as task_work's Pavel Begunkov
2020-12-31 15:06   ` Jens Axboe
2020-12-31 15:07 ` [PATCH 0/4] address some hangs Jens Axboe
2020-12-31 16:40   ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox