public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH for-next] io_uring: allow non-fixed files with SQPOLL
@ 2020-09-01 23:10 Jens Axboe
  2020-09-02  0:15 ` Jann Horn
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2020-09-01 23:10 UTC (permalink / raw)
  To: io-uring

The restriction of needing fixed files for SQPOLL is problematic, and
prevents/inhibits several valid uses cases.

There's no real good reason for us not to allow it, except we need to
have the sqpoll thread inherit current->files from the task that setup
the ring. We can't easily do that, since we'd introduce a circular
reference by holding on to our own file table.

If we wait for the sqpoll thread to exit when the ring fd is closed,
then we can safely reference the task files_struct without holding
a reference to it. And once we inherit that in the SQPOLL thread, we
can support non-fixed files for SQPOLL.

Signed-off-by: Jens Axboe <[email protected]>

---

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7fcf83592046..1ac8e8bb7657 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -279,6 +279,13 @@ struct io_ring_ctx {
 	struct mm_struct	*sqo_mm;
 	wait_queue_head_t	sqo_wait;
 
+	/*
+	 * For SQPOLL usage - no reference is held to this file table, we
+	 * rely on fops->flush() and our callback there waiting for the users
+	 * to finish.
+	 */
+	struct files_struct	*sqo_files;
+
 	/*
 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
 	 * readers must ensure that ->refs is alive as long as the file* is
@@ -6045,13 +6052,7 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
 			   int fd)
 {
-	bool fixed;
-
-	fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
-	if (unlikely(!fixed && io_async_submit(req->ctx)))
-		return -EBADF;
-
-	return io_file_get(state, req, fd, &req->file, fixed);
+	return io_file_get(state, req, fd, &req->file, req->flags & REQ_F_FIXED_FILE);
 }
 
 static int io_grab_files(struct io_kiocb *req)
@@ -6621,6 +6622,10 @@ static int io_sq_thread(void *data)
 
 	old_cred = override_creds(ctx->creds);
 
+	task_lock(current);
+	current->files = ctx->sqo_files;
+	task_unlock(current);
+
 	timeout = jiffies + ctx->sq_thread_idle;
 	while (!kthread_should_park()) {
 		unsigned int to_submit;
@@ -6719,6 +6724,9 @@ static int io_sq_thread(void *data)
 	io_run_task_work();
 
 	io_sq_thread_drop_mm();
+	task_lock(current);
+	current->files = NULL;
+	task_unlock(current);
 	revert_creds(old_cred);
 
 	kthread_parkme();
@@ -7549,6 +7557,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		if (!capable(CAP_SYS_ADMIN))
 			goto err;
 
+		/*
+		 * We will exit the sqthread before current exits, so we can
+		 * avoid taking a reference here and introducing weird
+		 * circular dependencies on the files table.
+		 */
+		ctx->sqo_files = current->files;
+
 		ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
 		if (!ctx->sq_thread_idle)
 			ctx->sq_thread_idle = HZ;
@@ -7586,6 +7601,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 
 	return 0;
 err:
+	ctx->sqo_files = NULL;
 	io_finish_async(ctx);
 	return ret;
 }
@@ -8239,8 +8254,10 @@ static int io_uring_flush(struct file *file, void *data)
 	/*
 	 * If the task is going away, cancel work it may have pending
 	 */
-	if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+	if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
+		io_sq_thread_stop(ctx);
 		io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
+	}
 
 	return 0;
 }
@@ -8690,7 +8707,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-			IORING_FEAT_POLL_32BITS;
+			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3e45de39e04b..02528ec8e81b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -255,6 +255,7 @@ struct io_uring_params {
 #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
 
 /*
  * io_uring_register(2) opcodes and arguments

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH for-next] io_uring: allow non-fixed files with SQPOLL
  2020-09-01 23:10 [PATCH for-next] io_uring: allow non-fixed files with SQPOLL Jens Axboe
@ 2020-09-02  0:15 ` Jann Horn
  2020-09-02  1:09   ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Jann Horn @ 2020-09-02  0:15 UTC (permalink / raw)
  To: Jens Axboe; +Cc: io-uring

On Wed, Sep 2, 2020 at 1:11 AM Jens Axboe <[email protected]> wrote:
> The restriction of needing fixed files for SQPOLL is problematic, and
> prevents/inhibits several valid uses cases.
>
> There's no real good reason for us not to allow it, except we need to
> have the sqpoll thread inherit current->files from the task that setup
> the ring. We can't easily do that, since we'd introduce a circular
> reference by holding on to our own file table.
>
> If we wait for the sqpoll thread to exit when the ring fd is closed,
> then we can safely reference the task files_struct without holding
> a reference to it. And once we inherit that in the SQPOLL thread, we
> can support non-fixed files for SQPOLL.
[...]
> diff --git a/fs/io_uring.c b/fs/io_uring.c
[...]
> +       /*
> +        * For SQPOLL usage - no reference is held to this file table, we
> +        * rely on fops->flush() and our callback there waiting for the users
> +        * to finish.
> +        */
> +       struct files_struct     *sqo_files;
[...]
> @@ -6621,6 +6622,10 @@ static int io_sq_thread(void *data)
>
>         old_cred = override_creds(ctx->creds);
>
> +       task_lock(current);
> +       current->files = ctx->sqo_files;
> +       task_unlock(current);
[...]
> @@ -7549,6 +7557,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>                 if (!capable(CAP_SYS_ADMIN))
>                         goto err;
>
> +               /*
> +                * We will exit the sqthread before current exits, so we can
> +                * avoid taking a reference here and introducing weird
> +                * circular dependencies on the files table.
> +                */
> +               ctx->sqo_files = current->files;
[...]
> @@ -8239,8 +8254,10 @@ static int io_uring_flush(struct file *file, void *data)
>         /*
>          * If the task is going away, cancel work it may have pending
>          */
> -       if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
> +       if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
> +               io_sq_thread_stop(ctx);
>                 io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
> +       }

What happens when the uring setup syscall fails around
io_uring_get_fd() (after the sq offload thread was started, but before
an fd was installed)? Will that also properly wait for the sq_thread
to go away before returning from the syscall (at which point the
files_struct could disappear)?

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH for-next] io_uring: allow non-fixed files with SQPOLL
  2020-09-02  0:15 ` Jann Horn
@ 2020-09-02  1:09   ` Jens Axboe
  0 siblings, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2020-09-02  1:09 UTC (permalink / raw)
  To: Jann Horn; +Cc: io-uring

On 9/1/20 6:15 PM, Jann Horn wrote:
> On Wed, Sep 2, 2020 at 1:11 AM Jens Axboe <[email protected]> wrote:
>> The restriction of needing fixed files for SQPOLL is problematic, and
>> prevents/inhibits several valid uses cases.
>>
>> There's no real good reason for us not to allow it, except we need to
>> have the sqpoll thread inherit current->files from the task that setup
>> the ring. We can't easily do that, since we'd introduce a circular
>> reference by holding on to our own file table.
>>
>> If we wait for the sqpoll thread to exit when the ring fd is closed,
>> then we can safely reference the task files_struct without holding
>> a reference to it. And once we inherit that in the SQPOLL thread, we
>> can support non-fixed files for SQPOLL.
> [...]
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
> [...]
>> +       /*
>> +        * For SQPOLL usage - no reference is held to this file table, we
>> +        * rely on fops->flush() and our callback there waiting for the users
>> +        * to finish.
>> +        */
>> +       struct files_struct     *sqo_files;
> [...]
>> @@ -6621,6 +6622,10 @@ static int io_sq_thread(void *data)
>>
>>         old_cred = override_creds(ctx->creds);
>>
>> +       task_lock(current);
>> +       current->files = ctx->sqo_files;
>> +       task_unlock(current);
> [...]
>> @@ -7549,6 +7557,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
>>                 if (!capable(CAP_SYS_ADMIN))
>>                         goto err;
>>
>> +               /*
>> +                * We will exit the sqthread before current exits, so we can
>> +                * avoid taking a reference here and introducing weird
>> +                * circular dependencies on the files table.
>> +                */
>> +               ctx->sqo_files = current->files;
> [...]
>> @@ -8239,8 +8254,10 @@ static int io_uring_flush(struct file *file, void *data)
>>         /*
>>          * If the task is going away, cancel work it may have pending
>>          */
>> -       if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
>> +       if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
>> +               io_sq_thread_stop(ctx);
>>                 io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
>> +       }
> 
> What happens when the uring setup syscall fails around
> io_uring_get_fd() (after the sq offload thread was started, but before
> an fd was installed)? Will that also properly wait for the sq_thread
> to go away before returning from the syscall (at which point the
> files_struct could disappear)?

Thanks for taking a look, Jann - I actually meant to CC you on this one
explicitly, but forgot.

Good question, we do need to handle the error case there and wait for
the thread to exit to avoid a potential use-after-free on the file
structure of the current->files. This one-liner addition (and comment)
should ensure we do just that.


diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7fcf83592046..d97601cacd7f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -279,6 +279,13 @@ struct io_ring_ctx {
 	struct mm_struct	*sqo_mm;
 	wait_queue_head_t	sqo_wait;
 
+	/*
+	 * For SQPOLL usage - no reference is held to this file table, we
+	 * rely on fops->flush() and our callback there waiting for the users
+	 * to finish.
+	 */
+	struct files_struct	*sqo_files;
+
 	/*
 	 * If used, fixed file set. Writers must ensure that ->refs is dead,
 	 * readers must ensure that ->refs is alive as long as the file* is
@@ -6045,13 +6052,7 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
 			   int fd)
 {
-	bool fixed;
-
-	fixed = (req->flags & REQ_F_FIXED_FILE) != 0;
-	if (unlikely(!fixed && io_async_submit(req->ctx)))
-		return -EBADF;
-
-	return io_file_get(state, req, fd, &req->file, fixed);
+	return io_file_get(state, req, fd, &req->file, req->flags & REQ_F_FIXED_FILE);
 }
 
 static int io_grab_files(struct io_kiocb *req)
@@ -6621,6 +6622,10 @@ static int io_sq_thread(void *data)
 
 	old_cred = override_creds(ctx->creds);
 
+	task_lock(current);
+	current->files = ctx->sqo_files;
+	task_unlock(current);
+
 	timeout = jiffies + ctx->sq_thread_idle;
 	while (!kthread_should_park()) {
 		unsigned int to_submit;
@@ -6719,6 +6724,9 @@ static int io_sq_thread(void *data)
 	io_run_task_work();
 
 	io_sq_thread_drop_mm();
+	task_lock(current);
+	current->files = NULL;
+	task_unlock(current);
 	revert_creds(old_cred);
 
 	kthread_parkme();
@@ -7549,6 +7557,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 		if (!capable(CAP_SYS_ADMIN))
 			goto err;
 
+		/*
+		 * We will exit the sqthread before current exits, so we can
+		 * avoid taking a reference here and introducing weird
+		 * circular dependencies on the files table.
+		 */
+		ctx->sqo_files = current->files;
+
 		ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
 		if (!ctx->sq_thread_idle)
 			ctx->sq_thread_idle = HZ;
@@ -7586,6 +7601,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 
 	return 0;
 err:
+	ctx->sqo_files = NULL;
 	io_finish_async(ctx);
 	return ret;
 }
@@ -8239,8 +8255,10 @@ static int io_uring_flush(struct file *file, void *data)
 	/*
 	 * If the task is going away, cancel work it may have pending
 	 */
-	if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+	if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
+		io_sq_thread_stop(ctx);
 		io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, current, true);
+	}
 
 	return 0;
 }
@@ -8690,7 +8708,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP |
 			IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
 			IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
-			IORING_FEAT_POLL_32BITS;
+			IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED;
 
 	if (copy_to_user(params, p, sizeof(*p))) {
 		ret = -EFAULT;
@@ -8708,6 +8726,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
 	trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
 	return ret;
 err:
+	/*
+	 * Our wait-and-kill does do this, but we need it done before we
+	 * exit as the SQPOLL thread could already be active and the current
+	 * files could be done as soon as we exit here.
+	 */
+	io_finish_async(ctx);
 	io_ring_ctx_wait_and_kill(ctx);
 	return ret;
 }
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 3e45de39e04b..02528ec8e81b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -255,6 +255,7 @@ struct io_uring_params {
 #define IORING_FEAT_CUR_PERSONALITY	(1U << 4)
 #define IORING_FEAT_FAST_POLL		(1U << 5)
 #define IORING_FEAT_POLL_32BITS 	(1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED	(1U << 7)
 
 /*
  * io_uring_register(2) opcodes and arguments

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-09-02  1:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-09-01 23:10 [PATCH for-next] io_uring: allow non-fixed files with SQPOLL Jens Axboe
2020-09-02  0:15 ` Jann Horn
2020-09-02  1:09   ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox