public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance
@ 2024-04-01 17:56 Jens Axboe
  2024-04-01 17:56 ` [PATCH 1/4] io_uring: add remote task_work execution helper Jens Axboe
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Jens Axboe @ 2024-04-01 17:56 UTC (permalink / raw)
  To: io-uring

Hi,

Hi,

MSG_RING rolls its own task_work handling, which there's really no need
for. Rather, it should use the generic io_uring infrastructure for this.

Add a helper for remote execution, and switch over MSG_RING to use it.
This both cleans up the code, and improves performance of this opcode
considerably.

 io_uring/io_uring.c | 30 ++++++++++++++++------
 io_uring/io_uring.h |  2 ++
 io_uring/msg_ring.c | 62 +++++++++++++++++++++------------------------
 3 files changed, 53 insertions(+), 41 deletions(-)

Changes since v2:
- Ensure fd install locking is correct

-- 
Jens Axboe



^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] io_uring: add remote task_work execution helper
  2024-04-01 17:56 [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance Jens Axboe
@ 2024-04-01 17:56 ` Jens Axboe
  2024-04-02 13:53   ` Pavel Begunkov
  2024-04-01 17:56 ` [PATCH 2/4] io_uring/msg_ring: cleanup posting to IOPOLL vs !IOPOLL ring Jens Axboe
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 6+ messages in thread
From: Jens Axboe @ 2024-04-01 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

All our task_work handling is targeted at the state in the io_kiocb
itself, which is what it is being used for. However, MSG_RING rolls its
own task_work handling, ignoring how that is usually done.

In preparation for switching MSG_RING to be able to use the normal
task_work handling, add io_req_task_work_add_remote() which allows the
caller to pass in the target io_ring_ctx.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/io_uring.c | 30 ++++++++++++++++++++++--------
 io_uring/io_uring.h |  2 ++
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9986e9bb825a..df4d9c9aeeab 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1232,9 +1232,10 @@ void tctx_task_work(struct callback_head *cb)
 	WARN_ON_ONCE(ret);
 }
 
-static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
+static inline void io_req_local_work_add(struct io_kiocb *req,
+					 struct io_ring_ctx *ctx,
+					 unsigned flags)
 {
-	struct io_ring_ctx *ctx = req->ctx;
 	unsigned nr_wait, nr_tw, nr_tw_prev;
 	struct llist_node *head;
 
@@ -1300,9 +1301,10 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
 	wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
 }
 
-static void io_req_normal_work_add(struct io_kiocb *req)
+static void io_req_normal_work_add(struct io_kiocb *req,
+				   struct task_struct *task)
 {
-	struct io_uring_task *tctx = req->task->io_uring;
+	struct io_uring_task *tctx = task->io_uring;
 	struct io_ring_ctx *ctx = req->ctx;
 
 	/* task_work already pending, we're done */
@@ -1321,7 +1323,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
 		return;
 	}
 
-	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
+	if (likely(!task_work_add(task, &tctx->task_work, ctx->notify_method)))
 		return;
 
 	io_fallback_tw(tctx, false);
@@ -1331,10 +1333,22 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
 {
 	if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
 		rcu_read_lock();
-		io_req_local_work_add(req, flags);
+		io_req_local_work_add(req, req->ctx, flags);
+		rcu_read_unlock();
+	} else {
+		io_req_normal_work_add(req, req->task);
+	}
+}
+
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+				 unsigned flags)
+{
+	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
+		rcu_read_lock();
+		io_req_local_work_add(req, ctx, flags);
 		rcu_read_unlock();
 	} else {
-		io_req_normal_work_add(req);
+		io_req_normal_work_add(req, READ_ONCE(ctx->submitter_task));
 	}
 }
 
@@ -1348,7 +1362,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
 						    io_task_work.node);
 
 		node = node->next;
-		io_req_normal_work_add(req);
+		io_req_normal_work_add(req, req->task);
 	}
 }
 
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 1eb65324792a..4155379ee586 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -74,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
 			       unsigned issue_flags);
 
 void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
+void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
+				 unsigned flags);
 bool io_alloc_async_data(struct io_kiocb *req);
 void io_req_task_queue(struct io_kiocb *req);
 void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] io_uring/msg_ring: cleanup posting to IOPOLL vs !IOPOLL ring
  2024-04-01 17:56 [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance Jens Axboe
  2024-04-01 17:56 ` [PATCH 1/4] io_uring: add remote task_work execution helper Jens Axboe
@ 2024-04-01 17:56 ` Jens Axboe
  2024-04-01 17:56 ` [PATCH 3/4] io_uring/msg_ring: split fd installing into a helper Jens Axboe
  2024-04-01 17:56 ` [PATCH 4/4] io_uring/msg_ring: improve handling of target CQE posting Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2024-04-01 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Move the posting outside the checking and locking, it's cleaner that
way.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/msg_ring.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index cd6dcf634ba3..d1f66a40b4b4 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -147,13 +147,11 @@ static int io_msg_ring_data(struct io_kiocb *req, unsigned int issue_flags)
 	if (target_ctx->flags & IORING_SETUP_IOPOLL) {
 		if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
 			return -EAGAIN;
-		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
-			ret = 0;
-		io_double_unlock_ctx(target_ctx);
-	} else {
-		if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
-			ret = 0;
 	}
+	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
+		ret = 0;
+	if (target_ctx->flags & IORING_SETUP_IOPOLL)
+		io_double_unlock_ctx(target_ctx);
 	return ret;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] io_uring/msg_ring: split fd installing into a helper
  2024-04-01 17:56 [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance Jens Axboe
  2024-04-01 17:56 ` [PATCH 1/4] io_uring: add remote task_work execution helper Jens Axboe
  2024-04-01 17:56 ` [PATCH 2/4] io_uring/msg_ring: cleanup posting to IOPOLL vs !IOPOLL ring Jens Axboe
@ 2024-04-01 17:56 ` Jens Axboe
  2024-04-01 17:56 ` [PATCH 4/4] io_uring/msg_ring: improve handling of target CQE posting Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2024-04-01 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

No functional changes in this patch, just in preparation for needing to
complete the fd install with the ctx lock already held.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/msg_ring.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index d1f66a40b4b4..9023b39fecef 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -173,25 +173,23 @@ static struct file *io_msg_grab_file(struct io_kiocb *req, unsigned int issue_fl
 	return file;
 }
 
-static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
+static int __io_msg_install_complete(struct io_kiocb *req)
 {
 	struct io_ring_ctx *target_ctx = req->file->private_data;
 	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	struct file *src_file = msg->src_file;
 	int ret;
 
-	if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
-		return -EAGAIN;
-
 	ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
 	if (ret < 0)
-		goto out_unlock;
+		return ret;
 
 	msg->src_file = NULL;
 	req->flags &= ~REQ_F_NEED_CLEANUP;
 
 	if (msg->flags & IORING_MSG_RING_CQE_SKIP)
-		goto out_unlock;
+		return ret;
+
 	/*
 	 * If this fails, the target still received the file descriptor but
 	 * wasn't notified of the fact. This means that if this request
@@ -199,8 +197,20 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
 	 * later IORING_OP_MSG_RING delivers the message.
 	 */
 	if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
-		ret = -EOVERFLOW;
-out_unlock:
+		return -EOVERFLOW;
+
+	return ret;
+}
+
+static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flags)
+{
+	struct io_ring_ctx *target_ctx = req->file->private_data;
+	int ret;
+
+	if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+		return -EAGAIN;
+
+	ret = __io_msg_install_complete(req);
 	io_double_unlock_ctx(target_ctx);
 	return ret;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] io_uring/msg_ring: improve handling of target CQE posting
  2024-04-01 17:56 [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance Jens Axboe
                   ` (2 preceding siblings ...)
  2024-04-01 17:56 ` [PATCH 3/4] io_uring/msg_ring: split fd installing into a helper Jens Axboe
@ 2024-04-01 17:56 ` Jens Axboe
  3 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2024-04-01 17:56 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

Use the exported helper for queueing task_work, rather than rolling our
own.

This improves peak performance of message passing by about 5x in some
basic testing, with 2 threads just sending messages to each other.
Before this change, it was capped at around 700K/sec, with the change
it's at over 4M/sec.

Signed-off-by: Jens Axboe <[email protected]>
---
 io_uring/msg_ring.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 9023b39fecef..3e1b9158798e 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -13,7 +13,6 @@
 #include "filetable.h"
 #include "msg_ring.h"
 
-
 /* All valid masks for MSG_RING */
 #define IORING_MSG_RING_MASK		(IORING_MSG_RING_CQE_SKIP | \
 					IORING_MSG_RING_FLAGS_PASS)
@@ -21,7 +20,6 @@
 struct io_msg {
 	struct file			*file;
 	struct file			*src_file;
-	struct callback_head		tw;
 	u64 user_data;
 	u32 len;
 	u32 cmd;
@@ -73,26 +71,18 @@ static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
 	return current != target_ctx->submitter_task;
 }
 
-static int io_msg_exec_remote(struct io_kiocb *req, task_work_func_t func)
+static int io_msg_exec_remote(struct io_kiocb *req, io_req_tw_func_t func)
 {
 	struct io_ring_ctx *ctx = req->file->private_data;
-	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
-	struct task_struct *task = READ_ONCE(ctx->submitter_task);
-
-	if (unlikely(!task))
-		return -EOWNERDEAD;
-
-	init_task_work(&msg->tw, func);
-	if (task_work_add(ctx->submitter_task, &msg->tw, TWA_SIGNAL))
-		return -EOWNERDEAD;
 
+	req->io_task_work.func = func;
+	io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
 	return IOU_ISSUE_SKIP_COMPLETE;
 }
 
-static void io_msg_tw_complete(struct callback_head *head)
+static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 {
-	struct io_msg *msg = container_of(head, struct io_msg, tw);
-	struct io_kiocb *req = cmd_to_io_kiocb(msg);
+	struct io_msg *msg = io_kiocb_to_cmd(req, struct io_msg);
 	struct io_ring_ctx *target_ctx = req->file->private_data;
 	int ret = 0;
 
@@ -215,14 +205,12 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
 	return ret;
 }
 
-static void io_msg_tw_fd_complete(struct callback_head *head)
+static void io_msg_tw_fd_complete(struct io_kiocb *req, struct io_tw_state *ts)
 {
-	struct io_msg *msg = container_of(head, struct io_msg, tw);
-	struct io_kiocb *req = cmd_to_io_kiocb(msg);
 	int ret = -EOWNERDEAD;
 
 	if (!(current->flags & PF_EXITING))
-		ret = io_msg_install_complete(req, IO_URING_F_UNLOCKED);
+		ret = __io_msg_install_complete(req);
 	if (ret < 0)
 		req_set_fail(req);
 	io_req_queue_tw_complete(req, ret);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/4] io_uring: add remote task_work execution helper
  2024-04-01 17:56 ` [PATCH 1/4] io_uring: add remote task_work execution helper Jens Axboe
@ 2024-04-02 13:53   ` Pavel Begunkov
  0 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2024-04-02 13:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 4/1/24 18:56, Jens Axboe wrote:
> All our task_work handling is targeted at the state in the io_kiocb
> itself, which is what it is being used for. However, MSG_RING rolls its
> own task_work handling, ignoring how that is usually done.
> 
> In preparation for switching MSG_RING to be able to use the normal
> task_work handling, add io_req_task_work_add_remote() which allows the
> caller to pass in the target io_ring_ctx.
> 
> Signed-off-by: Jens Axboe <[email protected]>
> ---
>   io_uring/io_uring.c | 30 ++++++++++++++++++++++--------
>   io_uring/io_uring.h |  2 ++
>   2 files changed, 24 insertions(+), 8 deletions(-)
> 
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 9986e9bb825a..df4d9c9aeeab 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -1232,9 +1232,10 @@ void tctx_task_work(struct callback_head *cb)
>   	WARN_ON_ONCE(ret);
>   }
>   
> -static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
> +static inline void io_req_local_work_add(struct io_kiocb *req,
> +					 struct io_ring_ctx *ctx,
> +					 unsigned flags)
>   {
> -	struct io_ring_ctx *ctx = req->ctx;
>   	unsigned nr_wait, nr_tw, nr_tw_prev;
>   	struct llist_node *head;
>   
> @@ -1300,9 +1301,10 @@ static inline void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
>   	wake_up_state(ctx->submitter_task, TASK_INTERRUPTIBLE);
>   }
>   
> -static void io_req_normal_work_add(struct io_kiocb *req)
> +static void io_req_normal_work_add(struct io_kiocb *req,
> +				   struct task_struct *task)
>   {
> -	struct io_uring_task *tctx = req->task->io_uring;
> +	struct io_uring_task *tctx = task->io_uring;
>   	struct io_ring_ctx *ctx = req->ctx;
>   
>   	/* task_work already pending, we're done */
> @@ -1321,7 +1323,7 @@ static void io_req_normal_work_add(struct io_kiocb *req)
>   		return;
>   	}
>   
> -	if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
> +	if (likely(!task_work_add(task, &tctx->task_work, ctx->notify_method)))
>   		return;
>   
>   	io_fallback_tw(tctx, false);
> @@ -1331,10 +1333,22 @@ void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
>   {
>   	if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
>   		rcu_read_lock();
> -		io_req_local_work_add(req, flags);
> +		io_req_local_work_add(req, req->ctx, flags);
> +		rcu_read_unlock();
> +	} else {
> +		io_req_normal_work_add(req, req->task);
> +	}
> +}
> +
> +void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
> +				 unsigned flags)
> +{
> +	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
> +		rcu_read_lock();

Let's move rcu section into io_req_local_work_add().

Perhaps the easiest way is to

guard(rcu)();

> +		io_req_local_work_add(req, ctx, flags);
>   		rcu_read_unlock();
>   	} else {
> -		io_req_normal_work_add(req);
> +		io_req_normal_work_add(req, READ_ONCE(ctx->submitter_task));

->submitter_task can be null.

Why do you care about ->submitter_task? SINGLE_ISSUER allows
CQE posting and all other stuff from a random context, most
optimisations shifted into a more stricter DEFER_TASKRUN.

But let's say it's queued it to a valid task. tw run kicks in,
it splices the req, takes req->ctx, locks it and executes from
there, at which point the callback would probably assume that
the target ctx is locked and do all kinds of messy stuff
without sync. Even funnier if the original ctx is DEFER_TASKRUN,
then you have both deferred and normal tw for that ctx, and
it should never happen.

Let's not pretend that io_req_normal_work_add to a foreign
context would work and limit io_req_task_work_add_remote()
to !DEFER_TASKRUN?



>   	}
>   }
>   
> @@ -1348,7 +1362,7 @@ static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
>   						    io_task_work.node);
>   
>   		node = node->next;
> -		io_req_normal_work_add(req);
> +		io_req_normal_work_add(req, req->task);
>   	}
>   }
>   
> diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
> index 1eb65324792a..4155379ee586 100644
> --- a/io_uring/io_uring.h
> +++ b/io_uring/io_uring.h
> @@ -74,6 +74,8 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
>   			       unsigned issue_flags);
>   
>   void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
> +void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
> +				 unsigned flags);
>   bool io_alloc_async_data(struct io_kiocb *req);
>   void io_req_task_queue(struct io_kiocb *req);
>   void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-04-02 13:53 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-01 17:56 [PATCHSET v3 0/4] Cleanup and improve MSG_RING performance Jens Axboe
2024-04-01 17:56 ` [PATCH 1/4] io_uring: add remote task_work execution helper Jens Axboe
2024-04-02 13:53   ` Pavel Begunkov
2024-04-01 17:56 ` [PATCH 2/4] io_uring/msg_ring: cleanup posting to IOPOLL vs !IOPOLL ring Jens Axboe
2024-04-01 17:56 ` [PATCH 3/4] io_uring/msg_ring: split fd installing into a helper Jens Axboe
2024-04-01 17:56 ` [PATCH 4/4] io_uring/msg_ring: improve handling of target CQE posting Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox