public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCHSET 0/3] Move io_kiocb from task_struct to io_uring_task
@ 2024-11-03 17:49 Jens Axboe
  2024-11-03 17:49 ` [PATCH 1/3] io_uring: move cancelations to be io_uring_task based Jens Axboe
                   ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 17:49 UTC (permalink / raw)
  To: io-uring

Hi,

To help avoid doing task_struct dereferences in the hot path, move
struct io_kiocb from stashing a task_struct pointer to an io_uring_task
pointer instead.

Patch 1 moves cancelations to key off io_uring_task rather than task,
patch 2 just cleans up an unnecessary helper, and patch 3 moves to
using io_uring_task in io_kiocb.

 include/linux/io_uring/cmd.h   |  2 +-
 include/linux/io_uring_types.h |  3 +-
 io_uring/cancel.c              |  2 +-
 io_uring/fdinfo.c              |  2 +-
 io_uring/futex.c               |  4 +-
 io_uring/futex.h               |  4 +-
 io_uring/io_uring.c            | 99 +++++++++++++++-------------------
 io_uring/io_uring.h            |  2 +-
 io_uring/msg_ring.c            |  4 +-
 io_uring/notif.c               |  4 +-
 io_uring/poll.c                |  7 ++-
 io_uring/poll.h                |  2 +-
 io_uring/rw.c                  |  2 +-
 io_uring/tctx.c                |  1 +
 io_uring/timeout.c             | 12 ++---
 io_uring/timeout.h             |  2 +-
 io_uring/uring_cmd.c           |  4 +-
 io_uring/uring_cmd.h           |  2 +-
 io_uring/waitid.c              |  6 +--
 io_uring/waitid.h              |  2 +-
 20 files changed, 76 insertions(+), 90 deletions(-)

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH 1/3] io_uring: move cancelations to be io_uring_task based
  2024-11-03 17:49 [PATCHSET 0/3] Move io_kiocb from task_struct to io_uring_task Jens Axboe
@ 2024-11-03 17:49 ` Jens Axboe
  2024-11-03 17:49 ` [PATCH 2/3] io_uring: remove task ref helpers Jens Axboe
  2024-11-03 17:49 ` [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task Jens Axboe
  2 siblings, 0 replies; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 17:49 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Right now the task_struct pointer is used as the key to match a task,
but in preparation for some io_kiocb changes, move it to using struct
io_uring_task instead. No functional changes intended in this patch.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/futex.c     |  4 ++--
 io_uring/futex.h     |  4 ++--
 io_uring/io_uring.c  | 42 +++++++++++++++++++++---------------------
 io_uring/io_uring.h  |  2 +-
 io_uring/poll.c      |  4 ++--
 io_uring/poll.h      |  2 +-
 io_uring/timeout.c   |  8 ++++----
 io_uring/timeout.h   |  2 +-
 io_uring/uring_cmd.c |  4 ++--
 io_uring/uring_cmd.h |  2 +-
 io_uring/waitid.c    |  4 ++--
 io_uring/waitid.h    |  2 +-
 12 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/io_uring/futex.c b/io_uring/futex.c
index 914848f46beb..e29662f039e1 100644
--- a/io_uring/futex.c
+++ b/io_uring/futex.c
@@ -141,7 +141,7 @@ int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 	return -ENOENT;
 }
 
-bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			 bool cancel_all)
 {
 	struct hlist_node *tmp;
@@ -151,7 +151,7 @@ bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
 	lockdep_assert_held(&ctx->uring_lock);
 
 	hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
-		if (!io_match_task_safe(req, task, cancel_all))
+		if (!io_match_task_safe(req, tctx, cancel_all))
 			continue;
 		hlist_del_init(&req->hash_node);
 		__io_futex_cancel(ctx, req);
diff --git a/io_uring/futex.h b/io_uring/futex.h
index b8bb09873d57..d789fcf715e3 100644
--- a/io_uring/futex.h
+++ b/io_uring/futex.h
@@ -11,7 +11,7 @@ int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags);
 #if defined(CONFIG_FUTEX)
 int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 		    unsigned int issue_flags);
-bool io_futex_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			 bool cancel_all);
 bool io_futex_cache_init(struct io_ring_ctx *ctx);
 void io_futex_cache_free(struct io_ring_ctx *ctx);
@@ -23,7 +23,7 @@ static inline int io_futex_cancel(struct io_ring_ctx *ctx,
 	return 0;
 }
 static inline bool io_futex_remove_all(struct io_ring_ctx *ctx,
-				       struct task_struct *task, bool cancel_all)
+				       struct io_uring_task *tctx, bool cancel_all)
 {
 	return false;
 }
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5b421e67c031..701cbd4670d8 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -143,7 +143,7 @@ struct io_defer_entry {
 #define IO_CQ_WAKE_FORCE	(IO_CQ_WAKE_INIT >> 1)
 
 static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
-					 struct task_struct *task,
+					 struct io_uring_task *tctx,
 					 bool cancel_all);
 
 static void io_queue_sqe(struct io_kiocb *req);
@@ -202,12 +202,12 @@ static bool io_match_linked(struct io_kiocb *head)
  * As io_match_task() but protected against racing with linked timeouts.
  * User must not hold timeout_lock.
  */
-bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
 			bool cancel_all)
 {
 	bool matched;
 
-	if (task && head->task != task)
+	if (tctx && head->task->io_uring != tctx)
 		return false;
 	if (cancel_all)
 		return true;
@@ -3286,7 +3286,7 @@ static int io_uring_release(struct inode *inode, struct file *file)
 }
 
 struct io_task_cancel {
-	struct task_struct *task;
+	struct io_uring_task *tctx;
 	bool all;
 };
 
@@ -3295,11 +3295,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
 	struct io_kiocb *req = container_of(work, struct io_kiocb, work);
 	struct io_task_cancel *cancel = data;
 
-	return io_match_task_safe(req, cancel->task, cancel->all);
+	return io_match_task_safe(req, cancel->tctx, cancel->all);
 }
 
 static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
-					 struct task_struct *task,
+					 struct io_uring_task *tctx,
 					 bool cancel_all)
 {
 	struct io_defer_entry *de;
@@ -3307,7 +3307,7 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
 
 	spin_lock(&ctx->completion_lock);
 	list_for_each_entry_reverse(de, &ctx->defer_list, list) {
-		if (io_match_task_safe(de->req, task, cancel_all)) {
+		if (io_match_task_safe(de->req, tctx, cancel_all)) {
 			list_cut_position(&list, &ctx->defer_list, &de->list);
 			break;
 		}
@@ -3350,11 +3350,10 @@ static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
 }
 
 static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
-						struct task_struct *task,
+						struct io_uring_task *tctx,
 						bool cancel_all)
 {
-	struct io_task_cancel cancel = { .task = task, .all = cancel_all, };
-	struct io_uring_task *tctx = task ? task->io_uring : NULL;
+	struct io_task_cancel cancel = { .tctx = tctx, .all = cancel_all, };
 	enum io_wq_cancel cret;
 	bool ret = false;
 
@@ -3368,9 +3367,9 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	if (!ctx->rings)
 		return false;
 
-	if (!task) {
+	if (!tctx) {
 		ret |= io_uring_try_cancel_iowq(ctx);
-	} else if (tctx && tctx->io_wq) {
+	} else if (tctx->io_wq) {
 		/*
 		 * Cancels requests of all rings, not only @ctx, but
 		 * it's fine as the task is in exit/exec.
@@ -3393,15 +3392,15 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
 	if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
 	    io_allowed_defer_tw_run(ctx))
 		ret |= io_run_local_work(ctx, INT_MAX) > 0;
-	ret |= io_cancel_defer_files(ctx, task, cancel_all);
+	ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
 	mutex_lock(&ctx->uring_lock);
-	ret |= io_poll_remove_all(ctx, task, cancel_all);
-	ret |= io_waitid_remove_all(ctx, task, cancel_all);
-	ret |= io_futex_remove_all(ctx, task, cancel_all);
-	ret |= io_uring_try_cancel_uring_cmd(ctx, task, cancel_all);
+	ret |= io_poll_remove_all(ctx, tctx, cancel_all);
+	ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
+	ret |= io_futex_remove_all(ctx, tctx, cancel_all);
+	ret |= io_uring_try_cancel_uring_cmd(ctx, tctx, cancel_all);
 	mutex_unlock(&ctx->uring_lock);
-	ret |= io_kill_timeouts(ctx, task, cancel_all);
-	if (task)
+	ret |= io_kill_timeouts(ctx, tctx, cancel_all);
+	if (tctx)
 		ret |= io_run_task_work() > 0;
 	else
 		ret |= flush_delayed_work(&ctx->fallback_work);
@@ -3454,12 +3453,13 @@ __cold void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd)
 				if (node->ctx->sq_data)
 					continue;
 				loop |= io_uring_try_cancel_requests(node->ctx,
-							current, cancel_all);
+							current->io_uring,
+							cancel_all);
 			}
 		} else {
 			list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
 				loop |= io_uring_try_cancel_requests(ctx,
-								     current,
+								     current->io_uring,
 								     cancel_all);
 		}
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 52d15ac8d209..14d73a727320 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -116,7 +116,7 @@ void io_queue_next(struct io_kiocb *req);
 void io_task_refs_refill(struct io_uring_task *tctx);
 bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
 
-bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
+bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
 			bool cancel_all);
 
 void io_activate_pollwq(struct io_ring_ctx *ctx);
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 2d6698fb7400..7db3010b5733 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -714,7 +714,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
 /*
  * Returns true if we found and killed one or more poll requests
  */
-__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
+__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			       bool cancel_all)
 {
 	unsigned nr_buckets = 1U << ctx->cancel_table.hash_bits;
@@ -729,7 +729,7 @@ __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
 		struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
 
 		hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) {
-			if (io_match_task_safe(req, tsk, cancel_all)) {
+			if (io_match_task_safe(req, tctx, cancel_all)) {
 				hlist_del_init(&req->hash_node);
 				io_poll_cancel_req(req);
 				found = true;
diff --git a/io_uring/poll.h b/io_uring/poll.h
index b0e3745f5a29..04ede93113dc 100644
--- a/io_uring/poll.h
+++ b/io_uring/poll.h
@@ -40,7 +40,7 @@ struct io_cancel_data;
 int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 		   unsigned issue_flags);
 int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags);
-bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk,
+bool io_poll_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			bool cancel_all);
 
 void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index ed6c74f1a475..31fbea366d43 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -643,13 +643,13 @@ void io_queue_linked_timeout(struct io_kiocb *req)
 	io_put_req(req);
 }
 
-static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
+static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
 			  bool cancel_all)
 	__must_hold(&head->ctx->timeout_lock)
 {
 	struct io_kiocb *req;
 
-	if (task && head->task != task)
+	if (tctx && head->task->io_uring != tctx)
 		return false;
 	if (cancel_all)
 		return true;
@@ -662,7 +662,7 @@ static bool io_match_task(struct io_kiocb *head, struct task_struct *task,
 }
 
 /* Returns true if we found and killed one or more timeouts */
-__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			     bool cancel_all)
 {
 	struct io_timeout *timeout, *tmp;
@@ -677,7 +677,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
 	list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
 		struct io_kiocb *req = cmd_to_io_kiocb(timeout);
 
-		if (io_match_task(req, tsk, cancel_all) &&
+		if (io_match_task(req, tctx, cancel_all) &&
 		    io_kill_timeout(req, -ECANCELED))
 			canceled++;
 	}
diff --git a/io_uring/timeout.h b/io_uring/timeout.h
index a6939f18313e..e91b32448dcf 100644
--- a/io_uring/timeout.h
+++ b/io_uring/timeout.h
@@ -24,7 +24,7 @@ static inline struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
 __cold void io_flush_timeouts(struct io_ring_ctx *ctx);
 struct io_cancel_data;
 int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd);
-__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+__cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			     bool cancel_all);
 void io_queue_linked_timeout(struct io_kiocb *req);
 void io_disarm_next(struct io_kiocb *req);
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index 88a73d21fc0b..f88fbc9869d0 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -47,7 +47,7 @@ static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
 }
 
 bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
-				   struct task_struct *task, bool cancel_all)
+				   struct io_uring_task *tctx, bool cancel_all)
 {
 	struct hlist_node *tmp;
 	struct io_kiocb *req;
@@ -61,7 +61,7 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
 				struct io_uring_cmd);
 		struct file *file = req->file;
 
-		if (!cancel_all && req->task != task)
+		if (!cancel_all && req->task->io_uring != tctx)
 			continue;
 
 		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index a361f98664d2..7dba0f1efc58 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -8,4 +8,4 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
 int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 
 bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
-				   struct task_struct *task, bool cancel_all);
+				   struct io_uring_task *tctx, bool cancel_all);
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 6362ec20abc0..9b7c23f96c47 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -184,7 +184,7 @@ int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 	return -ENOENT;
 }
 
-bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			  bool cancel_all)
 {
 	struct hlist_node *tmp;
@@ -194,7 +194,7 @@ bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
 	lockdep_assert_held(&ctx->uring_lock);
 
 	hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
-		if (!io_match_task_safe(req, task, cancel_all))
+		if (!io_match_task_safe(req, tctx, cancel_all))
 			continue;
 		hlist_del_init(&req->hash_node);
 		__io_waitid_cancel(ctx, req);
diff --git a/io_uring/waitid.h b/io_uring/waitid.h
index 956a8adafe8c..d5544aaf302a 100644
--- a/io_uring/waitid.h
+++ b/io_uring/waitid.h
@@ -11,5 +11,5 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
 int io_waitid(struct io_kiocb *req, unsigned int issue_flags);
 int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
 		     unsigned int issue_flags);
-bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct task_struct *task,
+bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
 			  bool cancel_all);
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 2/3] io_uring: remove task ref helpers
  2024-11-03 17:49 [PATCHSET 0/3] Move io_kiocb from task_struct to io_uring_task Jens Axboe
  2024-11-03 17:49 ` [PATCH 1/3] io_uring: move cancelations to be io_uring_task based Jens Axboe
@ 2024-11-03 17:49 ` Jens Axboe
  2024-11-03 17:49 ` [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task Jens Axboe
  2 siblings, 0 replies; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 17:49 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

They are only used right where they are defined, just open-code them
inside io_put_task().

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/io_uring.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 701cbd4670d8..496f61de0f9b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -678,30 +678,19 @@ static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
 	mutex_unlock(&ctx->uring_lock);
 }
 
-/* can be called by any task */
-static void io_put_task_remote(struct task_struct *task)
-{
-	struct io_uring_task *tctx = task->io_uring;
-
-	percpu_counter_sub(&tctx->inflight, 1);
-	if (unlikely(atomic_read(&tctx->in_cancel)))
-		wake_up(&tctx->wait);
-	put_task_struct(task);
-}
-
-/* used by a task to put its own references */
-static void io_put_task_local(struct task_struct *task)
-{
-	task->io_uring->cached_refs++;
-}
-
 /* must to be called somewhat shortly after putting a request */
 static inline void io_put_task(struct task_struct *task)
 {
-	if (likely(task == current))
-		io_put_task_local(task);
-	else
-		io_put_task_remote(task);
+	struct io_uring_task *tctx = task->io_uring;
+
+	if (likely(task == current)) {
+		tctx->cached_refs++;
+	} else {
+		percpu_counter_sub(&tctx->inflight, 1);
+		if (unlikely(atomic_read(&tctx->in_cancel)))
+			wake_up(&tctx->wait);
+		put_task_struct(task);
+	}
 }
 
 void io_task_refs_refill(struct io_uring_task *tctx)
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 17:49 [PATCHSET 0/3] Move io_kiocb from task_struct to io_uring_task Jens Axboe
  2024-11-03 17:49 ` [PATCH 1/3] io_uring: move cancelations to be io_uring_task based Jens Axboe
  2024-11-03 17:49 ` [PATCH 2/3] io_uring: remove task ref helpers Jens Axboe
@ 2024-11-03 17:49 ` Jens Axboe
  2024-11-03 21:47   ` Pavel Begunkov
  2024-11-04 15:41   ` Pavel Begunkov
  2 siblings, 2 replies; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 17:49 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Rather than store the task_struct itself in struct io_kiocb, store
the io_uring specific task_struct. The life times are the same in terms
of io_uring, and this avoids doing some dereferences through the
task_struct. For the hot path of putting local task references, we can
deref req->tctx instead, which we'll need anyway in that function
regardless of whether it's local or remote references.

Signed-off-by: Jens Axboe <[email protected]>
---
 include/linux/io_uring/cmd.h   |  2 +-
 include/linux/io_uring_types.h |  3 ++-
 io_uring/cancel.c              |  2 +-
 io_uring/fdinfo.c              |  2 +-
 io_uring/io_uring.c            | 34 +++++++++++++++-------------------
 io_uring/msg_ring.c            |  4 ++--
 io_uring/notif.c               |  4 ++--
 io_uring/poll.c                |  3 +--
 io_uring/rw.c                  |  2 +-
 io_uring/tctx.c                |  1 +
 io_uring/timeout.c             |  6 +++---
 io_uring/uring_cmd.c           |  2 +-
 io_uring/waitid.c              |  2 +-
 13 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index c189d36ad55e..578a3fdf5c71 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -110,7 +110,7 @@ static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
 
 static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd)
 {
-	return cmd_to_io_kiocb(cmd)->task;
+	return cmd_to_io_kiocb(cmd)->tctx->task;
 }
 
 #endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index a87927a392f2..ad5001102c86 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -84,6 +84,7 @@ struct io_uring_task {
 	/* submission side */
 	int				cached_refs;
 	const struct io_ring_ctx 	*last;
+	struct task_struct		*task;
 	struct io_wq			*io_wq;
 	struct file			*registered_rings[IO_RINGFD_REG_MAX];
 
@@ -633,7 +634,7 @@ struct io_kiocb {
 	struct io_cqe			cqe;
 
 	struct io_ring_ctx		*ctx;
-	struct task_struct		*task;
+	struct io_uring_task		*tctx;
 
 	union {
 		/* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
diff --git a/io_uring/cancel.c b/io_uring/cancel.c
index bbca5cb69cb5..484193567839 100644
--- a/io_uring/cancel.c
+++ b/io_uring/cancel.c
@@ -205,7 +205,7 @@ int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 		.opcode	= cancel->opcode,
 		.seq	= atomic_inc_return(&req->ctx->cancel_seq),
 	};
-	struct io_uring_task *tctx = req->task->io_uring;
+	struct io_uring_task *tctx = req->tctx;
 	int ret;
 
 	if (cd.flags & IORING_ASYNC_CANCEL_FD) {
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index 8da0d9e4533a..efbec34ccb18 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -203,7 +203,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
 
 		hlist_for_each_entry(req, &hb->list, hash_node)
 			seq_printf(m, "  op=%d, task_works=%d\n", req->opcode,
-					task_work_pending(req->task));
+					task_work_pending(req->tctx->task));
 	}
 
 	if (has_lock)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 496f61de0f9b..d9a6a8703563 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -207,7 +207,7 @@ bool io_match_task_safe(struct io_kiocb *head, struct io_uring_task *tctx,
 {
 	bool matched;
 
-	if (tctx && head->task->io_uring != tctx)
+	if (tctx && head->tctx != tctx)
 		return false;
 	if (cancel_all)
 		return true;
@@ -408,11 +408,8 @@ static void io_clean_op(struct io_kiocb *req)
 		kfree(req->apoll);
 		req->apoll = NULL;
 	}
-	if (req->flags & REQ_F_INFLIGHT) {
-		struct io_uring_task *tctx = req->task->io_uring;
-
-		atomic_dec(&tctx->inflight_tracked);
-	}
+	if (req->flags & REQ_F_INFLIGHT)
+		atomic_dec(&req->tctx->inflight_tracked);
 	if (req->flags & REQ_F_CREDS)
 		put_cred(req->creds);
 	if (req->flags & REQ_F_ASYNC_DATA) {
@@ -426,7 +423,7 @@ static inline void io_req_track_inflight(struct io_kiocb *req)
 {
 	if (!(req->flags & REQ_F_INFLIGHT)) {
 		req->flags |= REQ_F_INFLIGHT;
-		atomic_inc(&req->task->io_uring->inflight_tracked);
+		atomic_inc(&req->tctx->inflight_tracked);
 	}
 }
 
@@ -515,7 +512,7 @@ static void io_prep_async_link(struct io_kiocb *req)
 static void io_queue_iowq(struct io_kiocb *req)
 {
 	struct io_kiocb *link = io_prep_linked_timeout(req);
-	struct io_uring_task *tctx = req->task->io_uring;
+	struct io_uring_task *tctx = req->tctx;
 
 	BUG_ON(!tctx);
 	BUG_ON(!tctx->io_wq);
@@ -530,7 +527,7 @@ static void io_queue_iowq(struct io_kiocb *req)
 	 * procedure rather than attempt to run this request (or create a new
 	 * worker for it).
 	 */
-	if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
+	if (WARN_ON_ONCE(!same_thread_group(tctx->task, current)))
 		atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags);
 
 	trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
@@ -679,17 +676,17 @@ static void io_cqring_do_overflow_flush(struct io_ring_ctx *ctx)
 }
 
 /* must to be called somewhat shortly after putting a request */
-static inline void io_put_task(struct task_struct *task)
+static inline void io_put_task(struct io_kiocb *req)
 {
-	struct io_uring_task *tctx = task->io_uring;
+	struct io_uring_task *tctx = req->tctx;
 
-	if (likely(task == current)) {
+	if (likely(tctx->task == current)) {
 		tctx->cached_refs++;
 	} else {
 		percpu_counter_sub(&tctx->inflight, 1);
 		if (unlikely(atomic_read(&tctx->in_cancel)))
 			wake_up(&tctx->wait);
-		put_task_struct(task);
+		put_task_struct(tctx->task);
 	}
 }
 
@@ -1340,7 +1337,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
 
 static void io_req_normal_work_add(struct io_kiocb *req)
 {
-	struct io_uring_task *tctx = req->task->io_uring;
+	struct io_uring_task *tctx = req->tctx;
 	struct io_ring_ctx *ctx = req->ctx;
 
 	/* task_work already pending, we're done */
@@ -1359,7 +1356,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
 		return;
 	}
 
-	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
+	if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method)))
 		return;
 
 	io_fallback_tw(tctx, false);
@@ -1476,8 +1473,7 @@ static void io_req_task_cancel(struct io_kiocb *req, struct io_tw_state *ts)
 void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
 {
 	io_tw_lock(req->ctx, ts);
-	/* req->task == current here, checking PF_EXITING is safe */
-	if (unlikely(req->task->flags & PF_EXITING))
+	if (unlikely(current->flags & PF_EXITING))
 		io_req_defer_failed(req, -EFAULT);
 	else if (req->flags & REQ_F_FORCE_ASYNC)
 		io_queue_iowq(req);
@@ -1561,7 +1557,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
 		}
 		io_put_file(req);
 		io_req_put_rsrc_nodes(req);
-		io_put_task(req->task);
+		io_put_task(req);
 
 		node = req->comp_list.next;
 		io_req_add_to_cache(req, ctx);
@@ -2181,7 +2177,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	req->flags = (__force io_req_flags_t) sqe_flags;
 	req->cqe.user_data = READ_ONCE(sqe->user_data);
 	req->file = NULL;
-	req->task = current;
+	req->tctx = current->io_uring;
 	req->cancel_seq_set = false;
 
 	if (unlikely(opcode >= IORING_OP_LAST)) {
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 99af39e1d0fb..e63af34004b7 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -89,8 +89,8 @@ static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
 			      int res, u32 cflags, u64 user_data)
 {
-	req->task = READ_ONCE(ctx->submitter_task);
-	if (!req->task) {
+	req->tctx = READ_ONCE(ctx->submitter_task->io_uring);
+	if (!req->tctx) {
 		kmem_cache_free(req_cachep, req);
 		return -EOWNERDEAD;
 	}
diff --git a/io_uring/notif.c b/io_uring/notif.c
index 8dfbb0bd8e4d..ee3a33510b3c 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -89,7 +89,7 @@ static int io_link_skb(struct sk_buff *skb, struct ubuf_info *uarg)
 
 	/* make sure all noifications can be finished in the same task_work */
 	if (unlikely(notif->ctx != prev_notif->ctx ||
-		     notif->task != prev_notif->task))
+		     notif->tctx != prev_notif->tctx))
 		return -EEXIST;
 
 	nd->head = prev_nd->head;
@@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
 	notif->opcode = IORING_OP_NOP;
 	notif->flags = 0;
 	notif->file = NULL;
-	notif->task = current;
+	notif->tctx = current->io_uring;
 	io_get_task_refs(1);
 	notif->file_node = NULL;
 	notif->buf_node = NULL;
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 7db3010b5733..56332893a4b0 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
 {
 	int v;
 
-	/* req->task == current here, checking PF_EXITING is safe */
-	if (unlikely(req->task->flags & PF_EXITING))
+	if (unlikely(current->flags & PF_EXITING))
 		return -ECANCELED;
 
 	do {
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 144730344c0f..e368b9afde03 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -435,7 +435,7 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 	 * Play it safe and assume not safe to re-import and reissue if we're
 	 * not in the original thread group (or in task context).
 	 */
-	if (!same_thread_group(req->task, current) || !in_task())
+	if (!same_thread_group(req->tctx->task, current) || !in_task())
 		return false;
 	return true;
 }
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index c043fe93a3f2..503f3ff8bc4f 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -81,6 +81,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
 		return ret;
 	}
 
+	tctx->task = task;
 	xa_init(&tctx->xa);
 	init_waitqueue_head(&tctx->wait);
 	atomic_set(&tctx->in_cancel, 0);
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 31fbea366d43..fd1f58f68fa1 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -305,13 +305,13 @@ static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *t
 	int ret = -ENOENT;
 
 	if (prev) {
-		if (!(req->task->flags & PF_EXITING)) {
+		if (!(current->flags & PF_EXITING)) {
 			struct io_cancel_data cd = {
 				.ctx		= req->ctx,
 				.data		= prev->cqe.user_data,
 			};
 
-			ret = io_try_cancel(req->task->io_uring, &cd, 0);
+			ret = io_try_cancel(req->tctx, &cd, 0);
 		}
 		io_req_set_res(req, ret ?: -ETIME, 0);
 		io_req_task_complete(req, ts);
@@ -649,7 +649,7 @@ static bool io_match_task(struct io_kiocb *head, struct io_uring_task *tctx,
 {
 	struct io_kiocb *req;
 
-	if (tctx && head->task->io_uring != tctx)
+	if (tctx && head->tctx != tctx)
 		return false;
 	if (cancel_all)
 		return true;
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index f88fbc9869d0..40b8b777ba12 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -61,7 +61,7 @@ bool io_uring_try_cancel_uring_cmd(struct io_ring_ctx *ctx,
 				struct io_uring_cmd);
 		struct file *file = req->file;
 
-		if (!cancel_all && req->task->io_uring != tctx)
+		if (!cancel_all && req->tctx != tctx)
 			continue;
 
 		if (cmd->flags & IORING_URING_CMD_CANCELABLE) {
diff --git a/io_uring/waitid.c b/io_uring/waitid.c
index 9b7c23f96c47..daef5dd644f0 100644
--- a/io_uring/waitid.c
+++ b/io_uring/waitid.c
@@ -331,7 +331,7 @@ int io_waitid(struct io_kiocb *req, unsigned int issue_flags)
 	hlist_add_head(&req->hash_node, &ctx->waitid_list);
 
 	init_waitqueue_func_entry(&iwa->wo.child_wait, io_waitid_wait);
-	iwa->wo.child_wait.private = req->task;
+	iwa->wo.child_wait.private = req->tctx->task;
 	iw->head = &current->signal->wait_chldexit;
 	add_wait_queue(iw->head, &iwa->wo.child_wait);
 
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 17:49 ` [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task Jens Axboe
@ 2024-11-03 21:47   ` Pavel Begunkov
  2024-11-03 21:54     ` Jens Axboe
  2024-11-04 15:41   ` Pavel Begunkov
  1 sibling, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-03 21:47 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 17:49, Jens Axboe wrote:
...
> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
...
>   	nd->head = prev_nd->head;
> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>   	notif->opcode = IORING_OP_NOP;
>   	notif->flags = 0;
>   	notif->file = NULL;
> -	notif->task = current;
> +	notif->tctx = current->io_uring;
>   	io_get_task_refs(1);
>   	notif->file_node = NULL;
>   	notif->buf_node = NULL;
> diff --git a/io_uring/poll.c b/io_uring/poll.c
> index 7db3010b5733..56332893a4b0 100644
> --- a/io_uring/poll.c
> +++ b/io_uring/poll.c
> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>   {
>   	int v;
>   
> -	/* req->task == current here, checking PF_EXITING is safe */
> -	if (unlikely(req->task->flags & PF_EXITING))
> +	if (unlikely(current->flags & PF_EXITING))
>   		return -ECANCELED

Unlike what the comment says, req->task doesn't have to match current,
in which case the new check does nothing and it'll break in many very
interesting ways.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 21:47   ` Pavel Begunkov
@ 2024-11-03 21:54     ` Jens Axboe
  2024-11-03 22:05       ` Pavel Begunkov
  0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 21:54 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/3/24 2:47 PM, Pavel Begunkov wrote:
> On 11/3/24 17:49, Jens Axboe wrote:
> ...
>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
> ...
>>       nd->head = prev_nd->head;
>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>       notif->opcode = IORING_OP_NOP;
>>       notif->flags = 0;
>>       notif->file = NULL;
>> -    notif->task = current;
>> +    notif->tctx = current->io_uring;
>>       io_get_task_refs(1);
>>       notif->file_node = NULL;
>>       notif->buf_node = NULL;
>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>> index 7db3010b5733..56332893a4b0 100644
>> --- a/io_uring/poll.c
>> +++ b/io_uring/poll.c
>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>   {
>>       int v;
>>   -    /* req->task == current here, checking PF_EXITING is safe */
>> -    if (unlikely(req->task->flags & PF_EXITING))
>> +    if (unlikely(current->flags & PF_EXITING))
>>           return -ECANCELED
> 
> Unlike what the comment says, req->task doesn't have to match current,
> in which case the new check does nothing and it'll break in many very
> interesting ways.

In which cases does it not outside of fallback?

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 21:54     ` Jens Axboe
@ 2024-11-03 22:05       ` Pavel Begunkov
  2024-11-03 22:18         ` Jens Axboe
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-03 22:05 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 21:54, Jens Axboe wrote:
> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>> On 11/3/24 17:49, Jens Axboe wrote:
>> ...
>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>> ...
>>>        nd->head = prev_nd->head;
>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>        notif->opcode = IORING_OP_NOP;
>>>        notif->flags = 0;
>>>        notif->file = NULL;
>>> -    notif->task = current;
>>> +    notif->tctx = current->io_uring;
>>>        io_get_task_refs(1);
>>>        notif->file_node = NULL;
>>>        notif->buf_node = NULL;
>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>> index 7db3010b5733..56332893a4b0 100644
>>> --- a/io_uring/poll.c
>>> +++ b/io_uring/poll.c
>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>    {
>>>        int v;
>>>    -    /* req->task == current here, checking PF_EXITING is safe */
>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>> +    if (unlikely(current->flags & PF_EXITING))
>>>            return -ECANCELED
>>
>> Unlike what the comment says, req->task doesn't have to match current,
>> in which case the new check does nothing and it'll break in many very
>> interesting ways.
> 
> In which cases does it not outside of fallback?

I think it can only be fallback path

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:05       ` Pavel Begunkov
@ 2024-11-03 22:18         ` Jens Axboe
  2024-11-03 22:36           ` Pavel Begunkov
  0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 22:18 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/3/24 3:05 PM, Pavel Begunkov wrote:
> On 11/3/24 21:54, Jens Axboe wrote:
>> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>>> On 11/3/24 17:49, Jens Axboe wrote:
>>> ...
>>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>>> ...
>>>>        nd->head = prev_nd->head;
>>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>>        notif->opcode = IORING_OP_NOP;
>>>>        notif->flags = 0;
>>>>        notif->file = NULL;
>>>> -    notif->task = current;
>>>> +    notif->tctx = current->io_uring;
>>>>        io_get_task_refs(1);
>>>>        notif->file_node = NULL;
>>>>        notif->buf_node = NULL;
>>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>>> index 7db3010b5733..56332893a4b0 100644
>>>> --- a/io_uring/poll.c
>>>> +++ b/io_uring/poll.c
>>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>>    {
>>>>        int v;
>>>>    -    /* req->task == current here, checking PF_EXITING is safe */
>>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>>> +    if (unlikely(current->flags & PF_EXITING))
>>>>            return -ECANCELED
>>>
>>> Unlike what the comment says, req->task doesn't have to match current,
>>> in which case the new check does nothing and it'll break in many very
>>> interesting ways.
>>
>> In which cases does it not outside of fallback?
> 
> I think it can only be fallback path

I think so too, that's what I was getting at. Hence I think we should just
change these PF_EXITING checks to be PF_KTHREAD instead. If we're invoked
from that kind of context, cancel.

I'll adjust.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:18         ` Jens Axboe
@ 2024-11-03 22:36           ` Pavel Begunkov
  2024-11-03 22:40             ` Jens Axboe
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-03 22:36 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 22:18, Jens Axboe wrote:
> On 11/3/24 3:05 PM, Pavel Begunkov wrote:
>> On 11/3/24 21:54, Jens Axboe wrote:
>>> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>>>> On 11/3/24 17:49, Jens Axboe wrote:
>>>> ...
>>>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>>>> ...
>>>>>         nd->head = prev_nd->head;
>>>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>>>         notif->opcode = IORING_OP_NOP;
>>>>>         notif->flags = 0;
>>>>>         notif->file = NULL;
>>>>> -    notif->task = current;
>>>>> +    notif->tctx = current->io_uring;
>>>>>         io_get_task_refs(1);
>>>>>         notif->file_node = NULL;
>>>>>         notif->buf_node = NULL;
>>>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>>>> index 7db3010b5733..56332893a4b0 100644
>>>>> --- a/io_uring/poll.c
>>>>> +++ b/io_uring/poll.c
>>>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>     {
>>>>>         int v;
>>>>>     -    /* req->task == current here, checking PF_EXITING is safe */
>>>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>>>> +    if (unlikely(current->flags & PF_EXITING))
>>>>>             return -ECANCELED
>>>>
>>>> Unlike what the comment says, req->task doesn't have to match current,
>>>> in which case the new check does nothing and it'll break in many very
>>>> interesting ways.
>>>
>>> In which cases does it not outside of fallback?
>>
>> I think it can only be fallback path
> 
> I think so too, that's what I was getting at. Hence I think we should just
> change these PF_EXITING checks to be PF_KTHREAD instead. If we're invoked
> from that kind of context, cancel.

Replacing with just a PF_KTHREAD check won't be right, you can
get here with the right task but after it has been half killed and
marked PF_EXITING.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:36           ` Pavel Begunkov
@ 2024-11-03 22:40             ` Jens Axboe
  2024-11-03 22:47               ` Pavel Begunkov
  0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 22:40 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/3/24 3:36 PM, Pavel Begunkov wrote:
> On 11/3/24 22:18, Jens Axboe wrote:
>> On 11/3/24 3:05 PM, Pavel Begunkov wrote:
>>> On 11/3/24 21:54, Jens Axboe wrote:
>>>> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>>>>> On 11/3/24 17:49, Jens Axboe wrote:
>>>>> ...
>>>>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>>>>> ...
>>>>>>         nd->head = prev_nd->head;
>>>>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>>>>         notif->opcode = IORING_OP_NOP;
>>>>>>         notif->flags = 0;
>>>>>>         notif->file = NULL;
>>>>>> -    notif->task = current;
>>>>>> +    notif->tctx = current->io_uring;
>>>>>>         io_get_task_refs(1);
>>>>>>         notif->file_node = NULL;
>>>>>>         notif->buf_node = NULL;
>>>>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>>>>> index 7db3010b5733..56332893a4b0 100644
>>>>>> --- a/io_uring/poll.c
>>>>>> +++ b/io_uring/poll.c
>>>>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>     {
>>>>>>         int v;
>>>>>>     -    /* req->task == current here, checking PF_EXITING is safe */
>>>>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>>>>> +    if (unlikely(current->flags & PF_EXITING))
>>>>>>             return -ECANCELED
>>>>>
>>>>> Unlike what the comment says, req->task doesn't have to match current,
>>>>> in which case the new check does nothing and it'll break in many very
>>>>> interesting ways.
>>>>
>>>> In which cases does it not outside of fallback?
>>>
>>> I think it can only be fallback path
>>
>> I think so too, that's what I was getting at. Hence I think we should just
>> change these PF_EXITING checks to be PF_KTHREAD instead. If we're invoked
>> from that kind of context, cancel.
> 
> Replacing with just a PF_KTHREAD check won't be right, you can
> get here with the right task but after it has been half killed and
> marked PF_EXITING.

Right, but:

if (current->flags & (PF_EXITING | PF_KTHREAD))
	...

should be fine as it'll catch both cases with the single check.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:40             ` Jens Axboe
@ 2024-11-03 22:47               ` Pavel Begunkov
  2024-11-03 22:51                 ` Jens Axboe
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-03 22:47 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 22:40, Jens Axboe wrote:
> On 11/3/24 3:36 PM, Pavel Begunkov wrote:
>> On 11/3/24 22:18, Jens Axboe wrote:
>>> On 11/3/24 3:05 PM, Pavel Begunkov wrote:
>>>> On 11/3/24 21:54, Jens Axboe wrote:
>>>>> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>>>>>> On 11/3/24 17:49, Jens Axboe wrote:
>>>>>> ...
>>>>>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>>>>>> ...
>>>>>>>          nd->head = prev_nd->head;
>>>>>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>>>>>          notif->opcode = IORING_OP_NOP;
>>>>>>>          notif->flags = 0;
>>>>>>>          notif->file = NULL;
>>>>>>> -    notif->task = current;
>>>>>>> +    notif->tctx = current->io_uring;
>>>>>>>          io_get_task_refs(1);
>>>>>>>          notif->file_node = NULL;
>>>>>>>          notif->buf_node = NULL;
>>>>>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>>>>>> index 7db3010b5733..56332893a4b0 100644
>>>>>>> --- a/io_uring/poll.c
>>>>>>> +++ b/io_uring/poll.c
>>>>>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>>      {
>>>>>>>          int v;
>>>>>>>      -    /* req->task == current here, checking PF_EXITING is safe */
>>>>>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>>>>>> +    if (unlikely(current->flags & PF_EXITING))
>>>>>>>              return -ECANCELED
>>>>>>
>>>>>> Unlike what the comment says, req->task doesn't have to match current,
>>>>>> in which case the new check does nothing and it'll break in many very
>>>>>> interesting ways.
>>>>>
>>>>> In which cases does it not outside of fallback?
>>>>
>>>> I think it can only be fallback path
>>>
>>> I think so too, that's what I was getting at. Hence I think we should just
>>> change these PF_EXITING checks to be PF_KTHREAD instead. If we're invoked
>>> from that kind of context, cancel.
>>
>> Replacing with just a PF_KTHREAD check won't be right, you can
>> get here with the right task but after it has been half killed and
>> marked PF_EXITING.
> 
> Right, but:
> 
> if (current->flags & (PF_EXITING | PF_KTHREAD))
> 	...
> 
> should be fine as it'll catch both cases with the single check.

Was thinking to mention it, it should be fine buf feels wrong. Instead
of directly checking what we want, i.e. whether the task we want to run
the request from is dead, we are now doing "let's check if the task
is dead. Ah yes, let's also see if it's PF_KTHREAD which indirectly
implies that the task is dead because of implementation details."

Should be fine to leave that, but why not just leave the check
how it was? Even if it now requires an extra deref through tctx.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:47               ` Pavel Begunkov
@ 2024-11-03 22:51                 ` Jens Axboe
  2024-11-03 23:17                   ` Pavel Begunkov
  0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 22:51 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/3/24 3:47 PM, Pavel Begunkov wrote:
> On 11/3/24 22:40, Jens Axboe wrote:
>> On 11/3/24 3:36 PM, Pavel Begunkov wrote:
>>> On 11/3/24 22:18, Jens Axboe wrote:
>>>> On 11/3/24 3:05 PM, Pavel Begunkov wrote:
>>>>> On 11/3/24 21:54, Jens Axboe wrote:
>>>>>> On 11/3/24 2:47 PM, Pavel Begunkov wrote:
>>>>>>> On 11/3/24 17:49, Jens Axboe wrote:
>>>>>>> ...
>>>>>>>> diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
>>>>>>> ...
>>>>>>>>          nd->head = prev_nd->head;
>>>>>>>> @@ -115,7 +115,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>>>>>>>>          notif->opcode = IORING_OP_NOP;
>>>>>>>>          notif->flags = 0;
>>>>>>>>          notif->file = NULL;
>>>>>>>> -    notif->task = current;
>>>>>>>> +    notif->tctx = current->io_uring;
>>>>>>>>          io_get_task_refs(1);
>>>>>>>>          notif->file_node = NULL;
>>>>>>>>          notif->buf_node = NULL;
>>>>>>>> diff --git a/io_uring/poll.c b/io_uring/poll.c
>>>>>>>> index 7db3010b5733..56332893a4b0 100644
>>>>>>>> --- a/io_uring/poll.c
>>>>>>>> +++ b/io_uring/poll.c
>>>>>>>> @@ -224,8 +224,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
>>>>>>>>      {
>>>>>>>>          int v;
>>>>>>>>      -    /* req->task == current here, checking PF_EXITING is safe */
>>>>>>>> -    if (unlikely(req->task->flags & PF_EXITING))
>>>>>>>> +    if (unlikely(current->flags & PF_EXITING))
>>>>>>>>              return -ECANCELED
>>>>>>>
>>>>>>> Unlike what the comment says, req->task doesn't have to match current,
>>>>>>> in which case the new check does nothing and it'll break in many very
>>>>>>> interesting ways.
>>>>>>
>>>>>> In which cases does it not outside of fallback?
>>>>>
>>>>> I think it can only be fallback path
>>>>
>>>> I think so too, that's what I was getting at. Hence I think we should just
>>>> change these PF_EXITING checks to be PF_KTHREAD instead. If we're invoked
>>>> from that kind of context, cancel.
>>>
>>> Replacing with just a PF_KTHREAD check won't be right, you can
>>> get here with the right task but after it has been half killed and
>>> marked PF_EXITING.
>>
>> Right, but:
>>
>> if (current->flags & (PF_EXITING | PF_KTHREAD))
>>     ...
>>
>> should be fine as it'll catch both cases with the single check.
> 
> Was thinking to mention it, it should be fine buf feels wrong. Instead
> of directly checking what we want, i.e. whether the task we want to run
> the request from is dead, we are now doing "let's check if the task
> is dead. Ah yes, let's also see if it's PF_KTHREAD which indirectly
> implies that the task is dead because of implementation details."
> 
> Should be fine to leave that, but why not just leave the check
> how it was? Even if it now requires an extra deref through tctx.

I think it'd be better with a comment, I added one that says:

/* exiting original task or fallback work, cancel */

We can retain the original check, but it's actually a data race to check
->flags from a different task. Yes for this case we're in fallback work
and the value should be long since stable, but seems prudent to just
check for the two criteria we care about. At least the comment will be
correct now ;-)

The extra deref mostly doesn't matter here, only potentially for
io_req_task_submit().

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 22:51                 ` Jens Axboe
@ 2024-11-03 23:17                   ` Pavel Begunkov
  2024-11-03 23:25                     ` Jens Axboe
  0 siblings, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-03 23:17 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 22:51, Jens Axboe wrote:
> On 11/3/24 3:47 PM, Pavel Begunkov wrote:
>> On 11/3/24 22:40, Jens Axboe wrote:
...
>>> Right, but:
>>>
>>> if (current->flags & (PF_EXITING | PF_KTHREAD))
>>>      ...
>>>
>>> should be fine as it'll catch both cases with the single check.
>>
>> Was thinking to mention it, it should be fine buf feels wrong. Instead
>> of directly checking what we want, i.e. whether the task we want to run
>> the request from is dead, we are now doing "let's check if the task
>> is dead. Ah yes, let's also see if it's PF_KTHREAD which indirectly
>> implies that the task is dead because of implementation details."
>>
>> Should be fine to leave that, but why not just leave the check
>> how it was? Even if it now requires an extra deref through tctx.
> 
> I think it'd be better with a comment, I added one that says:
> 
> /* exiting original task or fallback work, cancel */
> 
> We can retain the original check, but it's actually a data race to check
> ->flags from a different task. Yes for this case we're in fallback work
> and the value should be long since stable, but seems prudent to just
> check for the two criteria we care about. At least the comment will be
> correct now ;-)

I don't think whack-a-mole'ing all cases is a good thing,
but at least it can get moved into a helper and be reused in
all other places.

if (io_tw_should_terminate(req, tw))
	fail;

should be more readable

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 23:17                   ` Pavel Begunkov
@ 2024-11-03 23:25                     ` Jens Axboe
  0 siblings, 0 replies; 17+ messages in thread
From: Jens Axboe @ 2024-11-03 23:25 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/3/24 4:17 PM, Pavel Begunkov wrote:
> On 11/3/24 22:51, Jens Axboe wrote:
>> On 11/3/24 3:47 PM, Pavel Begunkov wrote:
>>> On 11/3/24 22:40, Jens Axboe wrote:
> ...
>>>> Right, but:
>>>>
>>>> if (current->flags & (PF_EXITING | PF_KTHREAD))
>>>>      ...
>>>>
>>>> should be fine as it'll catch both cases with the single check.
>>>
>>> Was thinking to mention it, it should be fine buf feels wrong. Instead
>>> of directly checking what we want, i.e. whether the task we want to run
>>> the request from is dead, we are now doing "let's check if the task
>>> is dead. Ah yes, let's also see if it's PF_KTHREAD which indirectly
>>> implies that the task is dead because of implementation details."
>>>
>>> Should be fine to leave that, but why not just leave the check
>>> how it was? Even if it now requires an extra deref through tctx.
>>
>> I think it'd be better with a comment, I added one that says:
>>
>> /* exiting original task or fallback work, cancel */
>>
>> We can retain the original check, but it's actually a data race to check
>> ->flags from a different task. Yes for this case we're in fallback work
>> and the value should be long since stable, but seems prudent to just
>> check for the two criteria we care about. At least the comment will be
>> correct now ;-)
> 
> I don't think whack-a-mole'ing all cases is a good thing,
> but at least it can get moved into a helper and be reused in
> all other places.
> 
> if (io_tw_should_terminate(req, tw))
>     fail;
> 
> should be more readable

There's only 3 spots, but yeah we can add a helper for this with a bit
more of a fulfilling comment. Will do that.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-03 17:49 ` [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task Jens Axboe
  2024-11-03 21:47   ` Pavel Begunkov
@ 2024-11-04 15:41   ` Pavel Begunkov
  2024-11-04 16:16     ` Jens Axboe
  1 sibling, 1 reply; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-04 15:41 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/3/24 17:49, Jens Axboe wrote:
> Rather than store the task_struct itself in struct io_kiocb, store
> the io_uring specific task_struct. The life times are the same in terms
> of io_uring, and this avoids doing some dereferences through the
> task_struct. For the hot path of putting local task references, we can

Makes me wonder, is __io_submit_flush_completions() the only hot
place it tries to improve? It doesn't have to look into the task
there but on the other hand we need to do it that init.
If that's costly, for DEFER_TASKRUN we can get rid of per task
counting, the task is pinned together with the ctx, and the task
exit path can count the number of requests allocated.

if (!(ctx->flags & DEFER_TASKRUN))
	io_task_get_ref();

if (!(ctx->flags & DEFER_TASKRUN))
	io_task_put_ref();

But can be further improved

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-04 15:41   ` Pavel Begunkov
@ 2024-11-04 16:16     ` Jens Axboe
  2024-11-04 16:43       ` Pavel Begunkov
  0 siblings, 1 reply; 17+ messages in thread
From: Jens Axboe @ 2024-11-04 16:16 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 11/4/24 8:41 AM, Pavel Begunkov wrote:
> On 11/3/24 17:49, Jens Axboe wrote:
>> Rather than store the task_struct itself in struct io_kiocb, store
>> the io_uring specific task_struct. The life times are the same in terms
>> of io_uring, and this avoids doing some dereferences through the
>> task_struct. For the hot path of putting local task references, we can
> 
> Makes me wonder, is __io_submit_flush_completions() the only hot
> place it tries to improve? It doesn't have to look into the task
> there but on the other hand we need to do it that init.
> If that's costly, for DEFER_TASKRUN we can get rid of per task
> counting, the task is pinned together with the ctx, and the task
> exit path can count the number of requests allocated.
> 
> if (!(ctx->flags & DEFER_TASKRUN))
>     io_task_get_ref();
> 
> if (!(ctx->flags & DEFER_TASKRUN))
>     io_task_put_ref();
> 
> But can be further improved

Avoid task refs would surely be useful. For SINGLE_ISSUER, no?

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task
  2024-11-04 16:16     ` Jens Axboe
@ 2024-11-04 16:43       ` Pavel Begunkov
  0 siblings, 0 replies; 17+ messages in thread
From: Pavel Begunkov @ 2024-11-04 16:43 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 11/4/24 16:16, Jens Axboe wrote:
> On 11/4/24 8:41 AM, Pavel Begunkov wrote:
>> On 11/3/24 17:49, Jens Axboe wrote:
>>> Rather than store the task_struct itself in struct io_kiocb, store
>>> the io_uring specific task_struct. The life times are the same in terms
>>> of io_uring, and this avoids doing some dereferences through the
>>> task_struct. For the hot path of putting local task references, we can
>>
>> Makes me wonder, is __io_submit_flush_completions() the only hot
>> place it tries to improve? It doesn't have to look into the task
>> there but on the other hand we need to do it that init.
>> If that's costly, for DEFER_TASKRUN we can get rid of per task
>> counting, the task is pinned together with the ctx, and the task
>> exit path can count the number of requests allocated.
>>
>> if (!(ctx->flags & DEFER_TASKRUN))
>>      io_task_get_ref();
>>
>> if (!(ctx->flags & DEFER_TASKRUN))
>>      io_task_put_ref();
>>
>> But can be further improved
> 
> Avoid task refs would surely be useful. For SINGLE_ISSUER, no?

Perhaps, but it doesn't imply single waiter / completer task.
IOPOLL would need to be checked and possibly there might be
races with io-wq. In general, all optimisations just got
shifted into DEFER_TASKRUN and SINGLE_ISSUER is not that useful
apart from carrying the semantics.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2024-11-04 16:43 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-03 17:49 [PATCHSET 0/3] Move io_kiocb from task_struct to io_uring_task Jens Axboe
2024-11-03 17:49 ` [PATCH 1/3] io_uring: move cancelations to be io_uring_task based Jens Axboe
2024-11-03 17:49 ` [PATCH 2/3] io_uring: remove task ref helpers Jens Axboe
2024-11-03 17:49 ` [PATCH 3/3] io_uring: move struct io_kiocb from task_struct to io_uring_task Jens Axboe
2024-11-03 21:47   ` Pavel Begunkov
2024-11-03 21:54     ` Jens Axboe
2024-11-03 22:05       ` Pavel Begunkov
2024-11-03 22:18         ` Jens Axboe
2024-11-03 22:36           ` Pavel Begunkov
2024-11-03 22:40             ` Jens Axboe
2024-11-03 22:47               ` Pavel Begunkov
2024-11-03 22:51                 ` Jens Axboe
2024-11-03 23:17                   ` Pavel Begunkov
2024-11-03 23:25                     ` Jens Axboe
2024-11-04 15:41   ` Pavel Begunkov
2024-11-04 16:16     ` Jens Axboe
2024-11-04 16:43       ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox