From: Xiaobing Li <[email protected]>
To: [email protected], [email protected]
Cc: [email protected], [email protected],
[email protected], [email protected], [email protected],
[email protected], [email protected],
[email protected], [email protected],
[email protected], Xiaobing Li <[email protected]>
Subject: [PATCH v3] io_uring: Statistics of the true utilization of sq threads.
Date: Wed, 15 Nov 2023 20:18:39 +0800 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: CGME20231115122627epcas5p37263cadafd5af20043fbb74e57fe5a4c@epcas5p3.samsung.com
v3:
1.Since the sq thread has a while(1) structure, during this process,
there may be a lot of time that is not processing IO but does not
exceed the timeout period, therefore, the sqpoll thread will keep
running and will keep occupying the CPU. Obviously, the CPU is wasted at
this time;Our goal is to count the part of the time that the sqpoll
thread actually processes IO, so as to reflect the part of the CPU it
uses to process IO, which can be used to help improve the actual
utilization of the CPU in the future.
2."work_time" in the code represents the sum of the jiffies count of the
sq thread actually processing IO, that is, how many milliseconds it
actually takes to process IO. "total_time" represents the total time
that the sq thread has elapsed from the beginning of the loop to the
current time point, that is, how many milliseconds it has spent in
total.
The output "SqBusy" represents the percentage of time utilization that
the sq thread actually uses to process IO.
3.The task_pid value in the io_sq_data structure should be assigned
after the sq thread is created, otherwise the pid of its parent
process will be recorded.
4.After many tests, we do not need to obtain ctx->uring_lock in advance
when obtaining ctx->sq_data. We can avoid null pointer references by
judging that ctx is not null.
Signed-off-by: Xiaobing Li <[email protected]>
The test results are as follows:
Every 0.5s: cat /proc/281126/fdinfo/6 | grep Sq
SqMask: 0x3
SqHead: 1168417
SqTail: 1168418
CachedSqHead: 1168418
SqThread: 281126
SqThreadCpu: 55
SqBusy: 96%
---
io_uring/fdinfo.c | 31 ++++++++++++++++---------------
io_uring/sqpoll.c | 20 ++++++++++++++++----
io_uring/sqpoll.h | 2 ++
3 files changed, 34 insertions(+), 19 deletions(-)
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index f04a43044d91..b9e2e339140d 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -64,6 +64,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
unsigned int sq_shift = 0;
unsigned int sq_entries, cq_entries;
int sq_pid = -1, sq_cpu = -1;
+ int sq_busy = 0;
bool has_lock;
unsigned int i;
@@ -134,6 +135,21 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
seq_printf(m, "\n");
}
+ if (ctx && (ctx->flags & IORING_SETUP_SQPOLL)) {
+ struct io_sq_data *sq = ctx->sq_data;
+
+ if (sq && sq->total_time != 0)
+ sq_busy = (int)(sq->work_time * 100 / sq->total_time);
+
+ sq_pid = sq->task_pid;
+ sq_cpu = sq->sq_cpu;
+ }
+
+ seq_printf(m, "SqThread:\t%d\n", sq_pid);
+ seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
+ seq_printf(m, "SqBusy:\t%d%%\n", sq_busy);
+ seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
+
/*
* Avoid ABBA deadlock between the seq lock and the io_uring mutex,
* since fdinfo case grabs it in the opposite direction of normal use
@@ -142,21 +158,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
*/
has_lock = mutex_trylock(&ctx->uring_lock);
- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
- struct io_sq_data *sq = ctx->sq_data;
-
- if (mutex_trylock(&sq->lock)) {
- if (sq->thread) {
- sq_pid = task_pid_nr(sq->thread);
- sq_cpu = task_cpu(sq->thread);
- }
- mutex_unlock(&sq->lock);
- }
- }
-
- seq_printf(m, "SqThread:\t%d\n", sq_pid);
- seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
- seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
struct file *f = io_file_from_index(&ctx->file_table, i);
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index bd6c2c7959a5..dc093adc1ce5 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -224,17 +224,21 @@ static int io_sq_thread(void *data)
struct io_ring_ctx *ctx;
unsigned long timeout = 0;
char buf[TASK_COMM_LEN];
+ unsigned long sq_start, sq_work_begin, sq_work_end;
DEFINE_WAIT(wait);
snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
set_task_comm(current, buf);
- if (sqd->sq_cpu != -1)
+ if (sqd->sq_cpu != -1) {
set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
- else
+ } else {
set_cpus_allowed_ptr(current, cpu_online_mask);
+ sqd->sq_cpu = raw_smp_processor_id();
+ }
mutex_lock(&sqd->lock);
+ sq_start = jiffies;
while (1) {
bool cap_entries, sqt_spin = false;
@@ -245,6 +249,7 @@ static int io_sq_thread(void *data)
}
cap_entries = !list_is_singular(&sqd->ctx_list);
+ sq_work_begin = jiffies;
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
int ret = __io_sq_thread(ctx, cap_entries);
@@ -254,6 +259,11 @@ static int io_sq_thread(void *data)
if (io_run_task_work())
sqt_spin = true;
+ sq_work_end = jiffies;
+ sqd->total_time = sq_work_end - sq_start;
+ if (sqt_spin == true)
+ sqd->work_time += sq_work_end - sq_work_begin;
+
if (sqt_spin || !time_after(jiffies, timeout)) {
if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
@@ -261,6 +271,7 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
cond_resched();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
continue;
}
@@ -294,6 +305,7 @@ static int io_sq_thread(void *data)
mutex_unlock(&sqd->lock);
schedule();
mutex_lock(&sqd->lock);
+ sqd->sq_cpu = raw_smp_processor_id();
}
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
atomic_andnot(IORING_SQ_NEED_WAKEUP,
@@ -395,14 +407,14 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
sqd->sq_cpu = -1;
}
- sqd->task_pid = current->pid;
- sqd->task_tgid = current->tgid;
+ sqd->task_tgid = current->pid;
tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
goto err_sqpoll;
}
+ sqd->task_pid = task_pid_nr(tsk);
sqd->thread = tsk;
ret = io_uring_alloc_task_context(tsk, ctx);
wake_up_new_task(tsk);
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index 8df37e8c9149..fd6fa9587843 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -16,6 +16,8 @@ struct io_sq_data {
pid_t task_pid;
pid_t task_tgid;
+ unsigned long work_time;
+ unsigned long total_time;
unsigned long state;
struct completion exited;
};
--
2.34.1
next parent reply other threads:[~2023-11-15 12:32 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20231115122627epcas5p37263cadafd5af20043fbb74e57fe5a4c@epcas5p3.samsung.com>
2023-11-15 12:18 ` Xiaobing Li [this message]
2023-11-15 13:51 ` [PATCH v3] io_uring: Statistics of the true utilization of sq threads Jens Axboe
2023-11-20 14:26 ` Dan Carpenter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox