public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] io_uring: add IORING_SETUP_SQTHREAD_STATS flag to enable sqthread stats collection
@ 2025-10-20 11:30 Fengnan Chang
  2025-10-20 14:59 ` Gabriel Krisman Bertazi
  2025-10-20 15:12 ` Jens Axboe
  0 siblings, 2 replies; 6+ messages in thread
From: Fengnan Chang @ 2025-10-20 11:30 UTC (permalink / raw)
  To: axboe, xiaobing.li, asml.silence, io-uring; +Cc: Fengnan Chang, Diangang Li

In previous versions, getrusage was always called in sqrthread
to count work time, but this could incur some overhead.
This patch turn off stats by default, and introduces a new flag
IORING_SETUP_SQTHREAD_STATS that allows user to enable the
collection of statistics in the sqthread.

./t/io_uring -p1 -d128 -b4096 -s32 -c1 -F1 -B1 -R1 -X1 -n1 ./testfile
IOPS base: 570K, patch: 590K

./t/io_uring -p1 -d128 -b4096 -s32 -c1 -F1 -B1 -R1 -X1 -n1 /dev/nvme1n1
IOPS base: 826K, patch: 889K

Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
Reviewed-by: Diangang Li <lidiangang@bytedance.com>
---
 include/uapi/linux/io_uring.h |  5 +++++
 io_uring/fdinfo.c             | 15 ++++++++++-----
 io_uring/io_uring.h           |  3 ++-
 io_uring/sqpoll.c             | 10 +++++++---
 io_uring/sqpoll.h             |  1 +
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 263bed13473e..8c5cb9533950 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -231,6 +231,11 @@ enum io_uring_sqe_flags_bit {
  */
 #define IORING_SETUP_CQE_MIXED		(1U << 18)
 
+/*
+ * Enable SQPOLL thread stats collection
+ */
+#define IORING_SETUP_SQTHREAD_STATS	(1U << 19)
+
 enum io_uring_op {
 	IORING_OP_NOP,
 	IORING_OP_READV,
diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c
index ff3364531c77..4c532e414255 100644
--- a/io_uring/fdinfo.c
+++ b/io_uring/fdinfo.c
@@ -154,13 +154,16 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 		if (tsk) {
 			get_task_struct(tsk);
 			rcu_read_unlock();
-			getrusage(tsk, RUSAGE_SELF, &sq_usage);
+			if (sq->enable_work_time_stat)
+				getrusage(tsk, RUSAGE_SELF, &sq_usage);
 			put_task_struct(tsk);
 			sq_pid = sq->task_pid;
 			sq_cpu = sq->sq_cpu;
-			sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
+			if (sq->enable_work_time_stat) {
+				sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
 					 + sq_usage.ru_stime.tv_usec);
-			sq_work_time = sq->work_time;
+				sq_work_time = sq->work_time;
+			}
 		} else {
 			rcu_read_unlock();
 		}
@@ -168,8 +171,10 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 
 	seq_printf(m, "SqThread:\t%d\n", sq_pid);
 	seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu);
-	seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
-	seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
+	if (ctx->flags & IORING_SETUP_SQTHREAD_STATS) {
+		seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
+		seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
+	}
 	seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr);
 	for (i = 0; i < ctx->file_table.data.nr; i++) {
 		struct file *f = NULL;
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 46d9141d772a..949dc7cba111 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -54,7 +54,8 @@
 			IORING_SETUP_REGISTERED_FD_ONLY |\
 			IORING_SETUP_NO_SQARRAY |\
 			IORING_SETUP_HYBRID_IOPOLL |\
-			IORING_SETUP_CQE_MIXED)
+			IORING_SETUP_CQE_MIXED |\
+			IORING_SETUP_SQTHREAD_STATS)
 
 #define IORING_ENTER_FLAGS (IORING_ENTER_GETEVENTS |\
 			IORING_ENTER_SQ_WAKEUP |\
diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index a3f11349ce06..46bcd4854abc 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -161,6 +161,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
 	mutex_init(&sqd->lock);
 	init_waitqueue_head(&sqd->wait);
 	init_completion(&sqd->exited);
+	sqd->enable_work_time_stat = false;
 	return sqd;
 }
 
@@ -317,7 +318,8 @@ static int io_sq_thread(void *data)
 		}
 
 		cap_entries = !list_is_singular(&sqd->ctx_list);
-		getrusage(current, RUSAGE_SELF, &start);
+		if (sqd->enable_work_time_stat)
+			getrusage(current, RUSAGE_SELF, &start);
 		list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
 			int ret = __io_sq_thread(ctx, cap_entries);
 
@@ -333,7 +335,8 @@ static int io_sq_thread(void *data)
 
 		if (sqt_spin || !time_after(jiffies, timeout)) {
 			if (sqt_spin) {
-				io_sq_update_worktime(sqd, &start);
+				if (sqd->enable_work_time_stat)
+					io_sq_update_worktime(sqd, &start);
 				timeout = jiffies + sqd->sq_thread_idle;
 			}
 			if (unlikely(need_resched())) {
@@ -445,7 +448,8 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx,
 			ret = PTR_ERR(sqd);
 			goto err;
 		}
-
+		if (ctx->flags & IORING_SETUP_SQTHREAD_STATS)
+			sqd->enable_work_time_stat = true;
 		ctx->sq_creds = get_current_cred();
 		ctx->sq_data = sqd;
 		ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
diff --git a/io_uring/sqpoll.h b/io_uring/sqpoll.h
index b83dcdec9765..55f2e4d46d54 100644
--- a/io_uring/sqpoll.h
+++ b/io_uring/sqpoll.h
@@ -19,6 +19,7 @@ struct io_sq_data {
 	u64			work_time;
 	unsigned long		state;
 	struct completion	exited;
+	bool			enable_work_time_stat;
 };
 
 int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p);
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-10-21 17:54 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-20 11:30 [PATCH v2] io_uring: add IORING_SETUP_SQTHREAD_STATS flag to enable sqthread stats collection Fengnan Chang
2025-10-20 14:59 ` Gabriel Krisman Bertazi
2025-10-21  8:50   ` [External] " Fengnan Chang
2025-10-20 15:12 ` Jens Axboe
2025-10-21  8:54   ` [External] " Fengnan Chang
2025-10-21 17:54     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox