public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 12/33] io_uring: signal worker thread unshare
Date: Wed,  3 Mar 2021 17:26:39 -0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

If the original task switches credentials or unshares any part of the
task state, then we should notify the io_uring workers to they can
re-fork as well. For credentials, this actually happens just fine for
the io-wq workers, as we grab and pass that down. For SQPOLL, we're
stuck with the original credentials, which means that it cannot be used
if the task does eg seteuid().

For unshare(2), the story is the same, except a task cannot do that and
expect the workers to assume the new identity.

Fix this up by just having the threads exit and re-fork if the ring task
does seteuid() (and friends), or does unshare(2) on any parts of the
task.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io-wq.c               | 21 ++++++++++++++++-----
 fs/io-wq.h               |  1 +
 fs/io_uring.c            | 26 ++++++++++++++++++++++++--
 include/linux/io_uring.h |  9 +++++++++
 kernel/cred.c            |  2 ++
 kernel/fork.c            |  2 ++
 6 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 65ae35ca8dba..c24473231eee 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -744,6 +744,7 @@ static int io_wq_manager(void *data)
 {
 	struct io_wq *wq = data;
 	char buf[TASK_COMM_LEN];
+	int node;
 
 	sprintf(buf, "iou-mgr-%d", wq->task_pid);
 	set_task_comm(current, buf);
@@ -761,6 +762,12 @@ static int io_wq_manager(void *data)
 	} while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
 
 	io_wq_check_workers(wq);
+
+	rcu_read_lock();
+	for_each_node(node)
+		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
+	rcu_read_unlock();
+
 	/* we might not ever have created any workers */
 	if (atomic_read(&wq->worker_refs))
 		wait_for_completion(&wq->worker_done);
@@ -1097,11 +1104,6 @@ static void io_wq_destroy(struct io_wq *wq)
 	set_bit(IO_WQ_BIT_EXIT, &wq->state);
 	io_wq_destroy_manager(wq);
 
-	rcu_read_lock();
-	for_each_node(node)
-		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-	rcu_read_unlock();
-
 	spin_lock_irq(&wq->hash->wait.lock);
 	for_each_node(node) {
 		struct io_wqe *wqe = wq->wqes[node];
@@ -1165,3 +1167,12 @@ static __init int io_wq_init(void)
 	return 0;
 }
 subsys_initcall(io_wq_init);
+
+void io_wq_unshare(struct io_wq *wq)
+{
+	refcount_inc(&wq->refs);
+	set_bit(IO_WQ_BIT_EXIT, &wq->state);
+	io_wq_destroy_manager(wq);
+	clear_bit(IO_WQ_BIT_EXIT, &wq->state);
+	io_wq_put(wq);
+}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index f6ef433df8a8..57e478af1e1d 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -115,6 +115,7 @@ struct io_wq_data {
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
 void io_wq_put(struct io_wq *wq);
 void io_wq_put_and_exit(struct io_wq *wq);
+void io_wq_unshare(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 83973f6b3c0a..f89d7375a7c3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8955,6 +8955,24 @@ void __io_uring_task_cancel(void)
 	io_uring_remove_task_files(tctx);
 }
 
+void __io_uring_unshare(void)
+{
+	struct io_uring_task *tctx = current->io_uring;
+	struct file *file;
+	unsigned long index;
+
+	io_wq_unshare(tctx->io_wq);
+	if (!tctx->sqpoll)
+		return;
+
+	xa_for_each(&tctx->xa, index, file) {
+		struct io_ring_ctx *ctx = file->private_data;
+
+		if (ctx->sq_data)
+			io_sq_thread_stop(ctx->sq_data);
+	}
+}
+
 static int io_uring_flush(struct file *file, void *data)
 {
 	struct io_uring_task *tctx = current->io_uring;
@@ -9170,10 +9188,14 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
 		if (unlikely(ctx->sqo_exec)) {
-			ret = io_sq_thread_fork(ctx->sq_data, ctx);
+			struct io_sq_data *sqd = ctx->sq_data;
+
+			ret = io_sq_thread_fork(sqd, ctx);
+			if (ret)
+				set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+			complete(&sqd->startup);
 			if (ret)
 				goto out;
-			ctx->sqo_exec = 0;
 		}
 		ret = -EOWNERDEAD;
 		if (unlikely(ctx->sqo_dead))
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 51ede771cd99..bfe2fcb4f478 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -35,7 +35,13 @@ struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_task_cancel(void);
 void __io_uring_files_cancel(struct files_struct *files);
 void __io_uring_free(struct task_struct *tsk);
+void __io_uring_unshare(void);
 
+static inline void io_uring_unshare(void)
+{
+	if (current->io_uring)
+		__io_uring_unshare();
+}
 static inline void io_uring_task_cancel(void)
 {
 	if (current->io_uring && !xa_empty(&current->io_uring->xa))
@@ -56,6 +62,9 @@ static inline struct sock *io_uring_get_socket(struct file *file)
 {
 	return NULL;
 }
+static inline void io_uring_unshare(void)
+{
+}
 static inline void io_uring_task_cancel(void)
 {
 }
diff --git a/kernel/cred.c b/kernel/cred.c
index 421b1149c651..324e3ee61e1d 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -16,6 +16,7 @@
 #include <linux/binfmts.h>
 #include <linux/cn_proc.h>
 #include <linux/uidgid.h>
+#include <linux/io_uring.h>
 
 #if 0
 #define kdebug(FMT, ...)						\
@@ -509,6 +510,7 @@ int commit_creds(struct cred *new)
 	/* release the old obj and subj refs both */
 	put_cred(old);
 	put_cred(old);
+	io_uring_unshare();
 	return 0;
 }
 EXPORT_SYMBOL(commit_creds);
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..5d1b00083c9e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2999,6 +2999,8 @@ int ksys_unshare(unsigned long unshare_flags)
 			commit_creds(new_cred);
 			new_cred = NULL;
 		}
+
+		io_uring_unshare();
 	}
 
 	perf_event_namespaces(current);
-- 
2.30.1


  parent reply	other threads:[~2021-03-04  1:10 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-04  0:26 [PATCHSET 0/33] Fixes queued up for 5.12 Jens Axboe
2021-03-04  0:26 ` [PATCH 01/33] io-wq: wait for worker startup when forking a new one Jens Axboe
2021-03-04  0:26 ` [PATCH 02/33] io-wq: have manager wait for all workers to exit Jens Axboe
2021-03-04  0:26 ` [PATCH 03/33] io-wq: don't ask for a new worker if we're exiting Jens Axboe
2021-03-04  0:26 ` [PATCH 04/33] io-wq: rename wq->done completion to wq->started Jens Axboe
2021-03-04  0:26 ` [PATCH 05/33] io-wq: wait for manager exit on wq destroy Jens Axboe
2021-03-04  0:26 ` [PATCH 06/33] io-wq: fix double put of 'wq' in error path Jens Axboe
2021-03-04  0:26 ` [PATCH 07/33] io_uring: SQPOLL stop error handling fixes Jens Axboe
2021-03-04  0:26 ` [PATCH 08/33] io_uring: run fallback on cancellation Jens Axboe
2021-03-04  0:26 ` [PATCH 09/33] io_uring: don't use complete_all() on SQPOLL thread exit Jens Axboe
2021-03-04  0:26 ` [PATCH 10/33] io-wq: provide an io_wq_put_and_exit() helper Jens Axboe
2021-03-04  0:26 ` [PATCH 11/33] io_uring: fix race condition in task_work add and clear Jens Axboe
2021-03-04  0:26 ` Jens Axboe [this message]
2021-03-04 12:15   ` [PATCH 12/33] io_uring: signal worker thread unshare Stefan Metzmacher
2021-03-04 14:05     ` Jens Axboe
2021-03-04  0:26 ` [PATCH 13/33] io_uring: warn on not destroyed io-wq Jens Axboe
2021-03-04  0:26 ` [PATCH 14/33] io_uring: destroy io-wq on exec Jens Axboe
2021-03-04  0:26 ` [PATCH 15/33] io_uring: remove unused argument 'tsk' from io_req_caches_free() Jens Axboe
2021-03-04  0:26 ` [PATCH 16/33] io_uring: kill unnecessary REQ_F_WORK_INITIALIZED checks Jens Axboe
2021-03-04  0:26 ` [PATCH 17/33] io_uring: move cred assignment into io_issue_sqe() Jens Axboe
2021-03-04  0:26 ` [PATCH 18/33] io_uring: kill unnecessary io_run_ctx_fallback() in io_ring_exit_work() Jens Axboe
2021-03-04  0:26 ` [PATCH 19/33] io_uring: kill io_uring_flush() Jens Axboe
2021-03-04  0:26 ` [PATCH 20/33] io_uring: fix __tctx_task_work() ctx race Jens Axboe
2021-03-04  0:26 ` [PATCH 21/33] io_uring: replace cmpxchg in fallback with xchg Jens Axboe
2021-03-04  0:26 ` [PATCH 22/33] io_uring: ensure that SQPOLL thread is started for exit Jens Axboe
2021-03-04  0:26 ` [PATCH 23/33] io_uring: ignore double poll add on the same waitqueue head Jens Axboe
2021-03-04  0:26 ` [PATCH 24/33] io_uring: kill sqo_dead and sqo submission halting Jens Axboe
2021-03-04  0:26 ` [PATCH 25/33] io_uring: remove sqo_task Jens Axboe
2021-03-04  0:26 ` [PATCH 26/33] io-wq: fix error path leak of buffered write hash map Jens Axboe
2021-03-04  0:26 ` [PATCH 27/33] io_uring: fix -EAGAIN retry with IOPOLL Jens Axboe
2021-03-04  0:26 ` [PATCH 28/33] io_uring: choose right tctx->io_wq for try cancel Jens Axboe
2021-03-04  0:26 ` [PATCH 29/33] io_uring: inline io_req_clean_work() Jens Axboe
2021-03-04  0:26 ` [PATCH 30/33] io_uring: inline __io_queue_async_work() Jens Axboe
2021-03-04  0:26 ` [PATCH 31/33] io_uring: remove extra in_idle wake up Jens Axboe
2021-03-04  0:26 ` [PATCH 32/33] io_uring: ensure that threads freeze on suspend Jens Axboe
2021-03-04  0:27 ` [PATCH 33/33] io-wq: ensure all pending work is canceled on exit Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox