* [PATCH RFC for-next 1/8] io_uring: remove priority tw list optimisation
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 2/8] io_uring: remove __io_req_task_work_add Dylan Yudaken
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
This optimisation has some built in assumptions that make it easy to
introduce bugs. It also does not have clear wins that make it worth keeping.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 77 +++++++--------------------------------------
io_uring/io_uring.h | 1 -
io_uring/rw.c | 2 +-
io_uring/tctx.c | 1 -
io_uring/tctx.h | 1 -
5 files changed, 12 insertions(+), 70 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index afda42246d12..cc524d33748d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -986,44 +986,6 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
percpu_ref_put(&ctx->refs);
}
-static void handle_prev_tw_list(struct io_wq_work_node *node,
- struct io_ring_ctx **ctx, bool *uring_locked)
-{
- if (*ctx && !*uring_locked)
- spin_lock(&(*ctx)->completion_lock);
-
- do {
- struct io_wq_work_node *next = node->next;
- struct io_kiocb *req = container_of(node, struct io_kiocb,
- io_task_work.node);
-
- prefetch(container_of(next, struct io_kiocb, io_task_work.node));
-
- if (req->ctx != *ctx) {
- if (unlikely(!*uring_locked && *ctx))
- io_cq_unlock_post(*ctx);
-
- ctx_flush_and_put(*ctx, uring_locked);
- *ctx = req->ctx;
- /* if not contended, grab and improve batching */
- *uring_locked = mutex_trylock(&(*ctx)->uring_lock);
- percpu_ref_get(&(*ctx)->refs);
- if (unlikely(!*uring_locked))
- io_cq_lock(*ctx);
- }
- if (likely(*uring_locked)) {
- req->io_task_work.func(req, uring_locked);
- } else {
- req->cqe.flags = io_put_kbuf_comp(req);
- __io_req_complete_post(req);
- }
- node = next;
- } while (node);
-
- if (unlikely(!*uring_locked))
- io_cq_unlock_post(*ctx);
-}
-
static void handle_tw_list(struct io_wq_work_node *node,
struct io_ring_ctx **ctx, bool *locked)
{
@@ -1054,27 +1016,20 @@ void tctx_task_work(struct callback_head *cb)
task_work);
while (1) {
- struct io_wq_work_node *node1, *node2;
+ struct io_wq_work_node *node;
spin_lock_irq(&tctx->task_lock);
- node1 = tctx->prio_task_list.first;
- node2 = tctx->task_list.first;
+ node = tctx->task_list.first;
INIT_WQ_LIST(&tctx->task_list);
- INIT_WQ_LIST(&tctx->prio_task_list);
- if (!node2 && !node1)
+ if (!node)
tctx->task_running = false;
spin_unlock_irq(&tctx->task_lock);
- if (!node2 && !node1)
+ if (!node)
break;
-
- if (node1)
- handle_prev_tw_list(node1, &ctx, &uring_locked);
- if (node2)
- handle_tw_list(node2, &ctx, &uring_locked);
+ handle_tw_list(node, &ctx, &uring_locked);
cond_resched();
- if (data_race(!tctx->task_list.first) &&
- data_race(!tctx->prio_task_list.first) && uring_locked)
+ if (data_race(!tctx->task_list.first) && uring_locked)
io_submit_flush_completions(ctx);
}
@@ -1086,8 +1041,7 @@ void tctx_task_work(struct callback_head *cb)
}
static void __io_req_task_work_add(struct io_kiocb *req,
- struct io_uring_task *tctx,
- struct io_wq_work_list *list)
+ struct io_uring_task *tctx)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_wq_work_node *node;
@@ -1095,7 +1049,7 @@ static void __io_req_task_work_add(struct io_kiocb *req,
bool running;
spin_lock_irqsave(&tctx->task_lock, flags);
- wq_list_add_tail(&req->io_task_work.node, list);
+ wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
running = tctx->task_running;
if (!running)
tctx->task_running = true;
@@ -1113,7 +1067,8 @@ static void __io_req_task_work_add(struct io_kiocb *req,
spin_lock_irqsave(&tctx->task_lock, flags);
tctx->task_running = false;
- node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list);
+ node = tctx->task_list.first;
+ INIT_WQ_LIST(&tctx->task_list);
spin_unlock_irqrestore(&tctx->task_lock, flags);
while (node) {
@@ -1129,17 +1084,7 @@ void io_req_task_work_add(struct io_kiocb *req)
{
struct io_uring_task *tctx = req->task->io_uring;
- __io_req_task_work_add(req, tctx, &tctx->task_list);
-}
-
-void io_req_task_prio_work_add(struct io_kiocb *req)
-{
- struct io_uring_task *tctx = req->task->io_uring;
-
- if (req->ctx->flags & IORING_SETUP_SQPOLL)
- __io_req_task_work_add(req, tctx, &tctx->prio_task_list);
- else
- __io_req_task_work_add(req, tctx, &tctx->task_list);
+ __io_req_task_work_add(req, tctx);
}
static void io_req_tw_post(struct io_kiocb *req, bool *locked)
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 7a00bbe85d35..864e552c76a5 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -35,7 +35,6 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
bool io_is_uring_fops(struct file *file);
bool io_alloc_async_data(struct io_kiocb *req);
void io_req_task_work_add(struct io_kiocb *req);
-void io_req_task_prio_work_add(struct io_kiocb *req);
void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags);
void io_req_task_queue(struct io_kiocb *req);
void io_queue_iowq(struct io_kiocb *req, bool *dont_use);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 9166d8166b82..caf72f50c341 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -215,7 +215,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
return;
io_req_set_res(req, res, 0);
req->io_task_work.func = io_req_task_complete;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 9b30fb0d3603..7a68ba9beec3 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -88,7 +88,6 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
- INIT_WQ_LIST(&tctx->prio_task_list);
init_task_work(&tctx->task_work, tctx_task_work);
return 0;
}
diff --git a/io_uring/tctx.h b/io_uring/tctx.h
index dead0ed00429..c8566ea5dca4 100644
--- a/io_uring/tctx.h
+++ b/io_uring/tctx.h
@@ -22,7 +22,6 @@ struct io_uring_task {
spinlock_t task_lock;
bool task_running;
struct io_wq_work_list task_list;
- struct io_wq_work_list prio_task_list;
struct callback_head task_work;
} ____cacheline_aligned_in_smp;
};
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 2/8] io_uring: remove __io_req_task_work_add
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 1/8] io_uring: remove priority tw list optimisation Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 3/8] io_uring: lockless task list Dylan Yudaken
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
this is no longer needed as there is only one caller
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cc524d33748d..e1523b62103b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1040,9 +1040,9 @@ void tctx_task_work(struct callback_head *cb)
io_uring_drop_tctx_refs(current);
}
-static void __io_req_task_work_add(struct io_kiocb *req,
- struct io_uring_task *tctx)
+void io_req_task_work_add(struct io_kiocb *req)
{
+ struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
struct io_wq_work_node *node;
unsigned long flags;
@@ -1080,13 +1080,6 @@ static void __io_req_task_work_add(struct io_kiocb *req,
}
}
-void io_req_task_work_add(struct io_kiocb *req)
-{
- struct io_uring_task *tctx = req->task->io_uring;
-
- __io_req_task_work_add(req, tctx);
-}
-
static void io_req_tw_post(struct io_kiocb *req, bool *locked)
{
io_req_complete_post(req);
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 3/8] io_uring: lockless task list
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 1/8] io_uring: remove priority tw list optimisation Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 2/8] io_uring: remove __io_req_task_work_add Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 4/8] io_uring: introduce llist helpers Dylan Yudaken
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
With networking use cases we see contention on the spinlock used to
protect the task_list when multiple threads try and add completions at once.
Instead we can use a lockless list, and assume that the first caller to
add to the list is responsible for kicking off task work.
Signed-off-by: Dylan Yudaken <[email protected]>
---
include/linux/io_uring_types.h | 2 +-
io_uring/io_uring.c | 38 ++++++++--------------------------
io_uring/tctx.c | 3 +--
io_uring/tctx.h | 6 +++---
4 files changed, 14 insertions(+), 35 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 5987f8acca38..918165a20053 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -428,7 +428,7 @@ typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked);
struct io_task_work {
union {
- struct io_wq_work_node node;
+ struct llist_node node;
struct llist_node fallback_node;
};
io_req_tw_func_t func;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index e1523b62103b..985b46dfebb6 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -986,11 +986,12 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
percpu_ref_put(&ctx->refs);
}
-static void handle_tw_list(struct io_wq_work_node *node,
+
+static void handle_tw_list(struct llist_node *node,
struct io_ring_ctx **ctx, bool *locked)
{
do {
- struct io_wq_work_node *next = node->next;
+ struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
@@ -1014,23 +1015,11 @@ void tctx_task_work(struct callback_head *cb)
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
+ struct llist_node *node = llist_del_all(&tctx->task_list);
- while (1) {
- struct io_wq_work_node *node;
-
- spin_lock_irq(&tctx->task_lock);
- node = tctx->task_list.first;
- INIT_WQ_LIST(&tctx->task_list);
- if (!node)
- tctx->task_running = false;
- spin_unlock_irq(&tctx->task_lock);
- if (!node)
- break;
+ if (node) {
handle_tw_list(node, &ctx, &uring_locked);
cond_resched();
-
- if (data_race(!tctx->task_list.first) && uring_locked)
- io_submit_flush_completions(ctx);
}
ctx_flush_and_put(ctx, &uring_locked);
@@ -1044,16 +1033,10 @@ void io_req_task_work_add(struct io_kiocb *req)
{
struct io_uring_task *tctx = req->task->io_uring;
struct io_ring_ctx *ctx = req->ctx;
- struct io_wq_work_node *node;
- unsigned long flags;
+ struct llist_node *node;
bool running;
- spin_lock_irqsave(&tctx->task_lock, flags);
- wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
- running = tctx->task_running;
- if (!running)
- tctx->task_running = true;
- spin_unlock_irqrestore(&tctx->task_lock, flags);
+ running = !llist_add(&req->io_task_work.node, &tctx->task_list);
/* task_work already pending, we're done */
if (running)
@@ -1065,11 +1048,8 @@ void io_req_task_work_add(struct io_kiocb *req)
if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method)))
return;
- spin_lock_irqsave(&tctx->task_lock, flags);
- tctx->task_running = false;
- node = tctx->task_list.first;
- INIT_WQ_LIST(&tctx->task_list);
- spin_unlock_irqrestore(&tctx->task_lock, flags);
+
+ node = llist_del_all(&tctx->task_list);
while (node) {
req = container_of(node, struct io_kiocb, io_task_work.node);
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 7a68ba9beec3..7f97d97fef0a 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -86,8 +86,7 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
atomic_set(&tctx->in_idle, 0);
atomic_set(&tctx->inflight_tracked, 0);
task->io_uring = tctx;
- spin_lock_init(&tctx->task_lock);
- INIT_WQ_LIST(&tctx->task_list);
+ init_llist_head(&tctx->task_list);
init_task_work(&tctx->task_work, tctx_task_work);
return 0;
}
diff --git a/io_uring/tctx.h b/io_uring/tctx.h
index c8566ea5dca4..8a33ff6e5d91 100644
--- a/io_uring/tctx.h
+++ b/io_uring/tctx.h
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/llist.h>
+
/*
* Arbitrary limit, can be raised if need be
*/
@@ -19,9 +21,7 @@ struct io_uring_task {
struct percpu_counter inflight;
struct { /* task_work */
- spinlock_t task_lock;
- bool task_running;
- struct io_wq_work_list task_list;
+ struct llist_head task_list;
struct callback_head task_work;
} ____cacheline_aligned_in_smp;
};
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 4/8] io_uring: introduce llist helpers
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
` (2 preceding siblings ...)
2022-06-20 16:18 ` [PATCH RFC for-next 3/8] io_uring: lockless task list Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 5/8] io_uring: batch task_work Dylan Yudaken
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
Introduce helpers to atomically switch llist.
Will later move this into common code
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 985b46dfebb6..eb29e3f7da5c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1009,6 +1009,36 @@ static void handle_tw_list(struct llist_node *node,
} while (node);
}
+/**
+ * io_llist_xchg - swap all entries in a lock-less list
+ * @head: the head of lock-less list to delete all entries
+ * @new: new entry as the head of the list
+ *
+ * If list is empty, return NULL, otherwise, return the pointer to the first entry.
+ * The order of entries returned is from the newest to the oldest added one.
+ */
+static inline struct llist_node *io_llist_xchg(struct llist_head *head,
+ struct llist_node *node)
+{
+ return xchg(&head->first, node);
+}
+
+/**
+ * io_llist_xchg - possibly swap all entries in a lock-less list
+ * @head: the head of lock-less list to delete all entries
+ * @old: expected old value of the first entry of the list
+ * @new: new entry as the head of the list
+ *
+ * perform a cmpxchg on the first entry of the list.
+ */
+
+static inline struct llist_node *io_llist_cmpxchg(struct llist_head *head,
+ struct llist_node *old,
+ struct llist_node *new)
+{
+ return cmpxchg(&head->first, old, new);
+}
+
void tctx_task_work(struct callback_head *cb)
{
bool uring_locked = false;
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 5/8] io_uring: batch task_work
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
` (3 preceding siblings ...)
2022-06-20 16:18 ` [PATCH RFC for-next 4/8] io_uring: introduce llist helpers Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:18 ` [PATCH RFC for-next 6/8] io_uring: move io_uring_get_opcode out of TP_printk Dylan Yudaken
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
Batching task work up is an important performance optimisation, as
task_work_add is expensive.
In order to keep the semantics replace the task_list with a fake node
while processing the old list, and then do a cmpxchg at the end to see if
there is more work.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index eb29e3f7da5c..0b4d3eb06d16 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -988,9 +988,10 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
static void handle_tw_list(struct llist_node *node,
- struct io_ring_ctx **ctx, bool *locked)
+ struct io_ring_ctx **ctx, bool *locked,
+ struct llist_node *fake)
{
- do {
+ while (node && node != fake) {
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
io_task_work.node);
@@ -1006,7 +1007,10 @@ static void handle_tw_list(struct llist_node *node,
}
req->io_task_work.func(req, locked);
node = next;
- } while (node);
+ count++;
+ }
+
+ return count;
}
/**
@@ -1045,11 +1049,15 @@ void tctx_task_work(struct callback_head *cb)
struct io_ring_ctx *ctx = NULL;
struct io_uring_task *tctx = container_of(cb, struct io_uring_task,
task_work);
- struct llist_node *node = llist_del_all(&tctx->task_list);
-
- if (node) {
- handle_tw_list(node, &ctx, &uring_locked);
- cond_resched();
+ struct llist_node fake = {};
+ struct llist_node *node = io_llist_xchg(&tctx->task_list, &fake);
+
+ handle_tw_list(node, &ctx, &uring_locked, &fake);
+ node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
+ while (node != &fake) {
+ node = io_llist_xchg(&tctx->task_list, &fake);
+ handle_tw_list(node, &ctx, &uring_locked, &fake);
+ node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
}
ctx_flush_and_put(ctx, &uring_locked);
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 6/8] io_uring: move io_uring_get_opcode out of TP_printk
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
` (4 preceding siblings ...)
2022-06-20 16:18 ` [PATCH RFC for-next 5/8] io_uring: batch task_work Dylan Yudaken
@ 2022-06-20 16:18 ` Dylan Yudaken
2022-06-20 16:19 ` [PATCH RFC for-next 7/8] io_uring: add trace event for running task work Dylan Yudaken
2022-06-20 16:19 ` [PATCH RFC for-next 8/8] io_uring: trace task_work_run Dylan Yudaken
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:18 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
The TP_printk macro's are not supposed to use custom code ([1]) or else
tools such as perf cannot use these events.
Convert the opcode string representation to use the __string wiring that
the event framework provides ([2]).
[1]: https://lwn.net/Articles/379903/
[2]: https://lwn.net/Articles/381064/
Fixes: 033b87d2 ("io_uring: use the text representation of ops in trace")
Signed-off-by: Dylan Yudaken <[email protected]>
---
include/trace/events/io_uring.h | 42 +++++++++++++++++++++++++++------
1 file changed, 35 insertions(+), 7 deletions(-)
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 5635912e1013..3bc8dec9acaa 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -151,6 +151,8 @@ TRACE_EVENT(io_uring_queue_async_work,
__field( unsigned int, flags )
__field( struct io_wq_work *, work )
__field( int, rw )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -161,11 +163,13 @@ TRACE_EVENT(io_uring_queue_async_work,
__entry->opcode = req->opcode;
__entry->work = &req->work;
__entry->rw = rw;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
);
@@ -188,6 +192,8 @@ TRACE_EVENT(io_uring_defer,
__field( void *, req )
__field( unsigned long long, data )
__field( u8, opcode )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -195,11 +201,13 @@ TRACE_EVENT(io_uring_defer,
__entry->req = req;
__entry->data = req->cqe.user_data;
__entry->opcode = req->opcode;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
__entry->ctx, __entry->req, __entry->data,
- io_uring_get_opcode(__entry->opcode))
+ __get_str(op_str))
);
/**
@@ -284,6 +292,8 @@ TRACE_EVENT(io_uring_fail_link,
__field( unsigned long long, user_data )
__field( u8, opcode )
__field( void *, link )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -292,11 +302,13 @@ TRACE_EVENT(io_uring_fail_link,
__entry->user_data = req->cqe.user_data;
__entry->opcode = req->opcode;
__entry->link = link;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode), __entry->link)
+ __get_str(op_str), __entry->link)
);
/**
@@ -370,6 +382,8 @@ TRACE_EVENT(io_uring_submit_sqe,
__field( u32, flags )
__field( bool, force_nonblock )
__field( bool, sq_thread )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -380,11 +394,13 @@ TRACE_EVENT(io_uring_submit_sqe,
__entry->flags = req->flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = req->ctx->flags & IORING_SETUP_SQPOLL;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
- __entry->user_data, io_uring_get_opcode(__entry->opcode),
+ __entry->user_data, __get_str(op_str),
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
@@ -411,6 +427,8 @@ TRACE_EVENT(io_uring_poll_arm,
__field( u8, opcode )
__field( int, mask )
__field( int, events )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -420,11 +438,13 @@ TRACE_EVENT(io_uring_poll_arm,
__entry->opcode = req->opcode;
__entry->mask = mask;
__entry->events = events;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->mask, __entry->events)
);
@@ -447,6 +467,8 @@ TRACE_EVENT(io_uring_task_add,
__field( unsigned long long, user_data )
__field( u8, opcode )
__field( int, mask )
+
+ __string( op_str, io_uring_get_opcode(req->opcode) )
),
TP_fast_assign(
@@ -455,11 +477,13 @@ TRACE_EVENT(io_uring_task_add,
__entry->user_data = req->cqe.user_data;
__entry->opcode = req->opcode;
__entry->mask = mask;
+
+ __assign_str(op_str, io_uring_get_opcode(req->opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->mask)
);
@@ -495,6 +519,8 @@ TRACE_EVENT(io_uring_req_failed,
__field( u64, pad1 )
__field( u64, addr3 )
__field( int, error )
+
+ __string( op_str, io_uring_get_opcode(sqe->opcode) )
),
TP_fast_assign(
@@ -514,6 +540,8 @@ TRACE_EVENT(io_uring_req_failed,
__entry->pad1 = sqe->__pad2[0];
__entry->addr3 = sqe->addr3;
__entry->error = error;
+
+ __assign_str(op_str, io_uring_get_opcode(sqe->opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, "
@@ -522,7 +550,7 @@ TRACE_EVENT(io_uring_req_failed,
"personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
"error=%d",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->flags, __entry->ioprio,
(unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len,
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 7/8] io_uring: add trace event for running task work
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
` (5 preceding siblings ...)
2022-06-20 16:18 ` [PATCH RFC for-next 6/8] io_uring: move io_uring_get_opcode out of TP_printk Dylan Yudaken
@ 2022-06-20 16:19 ` Dylan Yudaken
2022-06-20 16:19 ` [PATCH RFC for-next 8/8] io_uring: trace task_work_run Dylan Yudaken
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:19 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
This is useful for investigating if task_work is batching
Signed-off-by: Dylan Yudaken <[email protected]>
---
include/trace/events/io_uring.h | 30 ++++++++++++++++++++++++++++++
1 file changed, 30 insertions(+)
diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h
index 3bc8dec9acaa..918e3a43e4b2 100644
--- a/include/trace/events/io_uring.h
+++ b/include/trace/events/io_uring.h
@@ -600,6 +600,36 @@ TRACE_EVENT(io_uring_cqe_overflow,
__entry->cflags, __entry->ocqe)
);
+/*
+ * io_uring_task_work_run - ran task work
+ *
+ * @tctx: pointer to a io_uring_task
+ * @count: how many functions it ran
+ * @loops: how many loops it ran
+ *
+ */
+TRACE_EVENT(io_uring_task_work_run,
+
+ TP_PROTO(void *tctx, unsigned int count, unsigned int loops),
+
+ TP_ARGS(tctx, count, loops),
+
+ TP_STRUCT__entry (
+ __field( void *, tctx )
+ __field( unsigned int, count )
+ __field( unsigned int, loops )
+ ),
+
+ TP_fast_assign(
+ __entry->tctx = tctx;
+ __entry->count = count;
+ __entry->loops = loops;
+ ),
+
+ TP_printk("tctx %p, count %u, loops %u",
+ __entry->tctx, __entry->count, __entry->loops)
+);
+
#endif /* _TRACE_IO_URING_H */
/* This part must be outside protection */
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH RFC for-next 8/8] io_uring: trace task_work_run
2022-06-20 16:18 [PATCH RFC for-next 0/8] io_uring: tw contention improvments Dylan Yudaken
` (6 preceding siblings ...)
2022-06-20 16:19 ` [PATCH RFC for-next 7/8] io_uring: add trace event for running task work Dylan Yudaken
@ 2022-06-20 16:19 ` Dylan Yudaken
7 siblings, 0 replies; 9+ messages in thread
From: Dylan Yudaken @ 2022-06-20 16:19 UTC (permalink / raw)
To: axboe, asml.silence, io-uring; +Cc: Kernel-team, Dylan Yudaken
trace task_work_run to help provide stats on how often task work is run
and what batch sizes are coming through.
Signed-off-by: Dylan Yudaken <[email protected]>
---
io_uring/io_uring.c | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0b4d3eb06d16..8cf868315008 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -987,10 +987,12 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked)
}
-static void handle_tw_list(struct llist_node *node,
- struct io_ring_ctx **ctx, bool *locked,
- struct llist_node *fake)
+static unsigned int handle_tw_list(struct llist_node *node,
+ struct io_ring_ctx **ctx, bool *locked,
+ struct llist_node *fake)
{
+ unsigned int count = 0;
+
while (node && node != fake) {
struct llist_node *next = node->next;
struct io_kiocb *req = container_of(node, struct io_kiocb,
@@ -1051,12 +1053,14 @@ void tctx_task_work(struct callback_head *cb)
task_work);
struct llist_node fake = {};
struct llist_node *node = io_llist_xchg(&tctx->task_list, &fake);
+ unsigned int loops = 1;
+ unsigned int count = handle_tw_list(node, &ctx, &uring_locked, &fake);
- handle_tw_list(node, &ctx, &uring_locked, &fake);
node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
while (node != &fake) {
+ loops++;
node = io_llist_xchg(&tctx->task_list, &fake);
- handle_tw_list(node, &ctx, &uring_locked, &fake);
+ count += handle_tw_list(node, &ctx, &uring_locked, &fake);
node = io_llist_cmpxchg(&tctx->task_list, &fake, NULL);
}
@@ -1065,6 +1069,8 @@ void tctx_task_work(struct callback_head *cb)
/* relaxed read is enough as only the task itself sets ->in_idle */
if (unlikely(atomic_read(&tctx->in_idle)))
io_uring_drop_tctx_refs(current);
+
+ trace_io_uring_task_work_run(tctx, count, loops);
}
void io_req_task_work_add(struct io_kiocb *req)
--
2.30.2
^ permalink raw reply related [flat|nested] 9+ messages in thread