public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH v7 RESENT] io_uring: releasing CPU resources when polling
       [not found] <CGME20240808071720epcas5p3f6f4f8abc6d4c02523dd4f64153a7cec@epcas5p3.samsung.com>
@ 2024-08-08  7:17 ` hexue
  2024-08-08 16:30   ` kernel test robot
  2024-08-09  5:17   ` kernel test robot
  0 siblings, 2 replies; 3+ messages in thread
From: hexue @ 2024-08-08  7:17 UTC (permalink / raw)
  To: axboe, asml.silence; +Cc: io-uring, linux-kernel, hexue

This patch add a new hybrid poll at io_uring level, it also set a signal
"IORING_SETUP_HY_POLL" to application, aim to provide a interface for users
to enable use new hybrid polling flexibly.

io_uring use polling mode could improve the IO performence, but it will
spend 100% of CPU resources to do polling.

A new hybrid poll is implemented on the io_uring layer. Once IO issued,
it will not polling immediately, but block first and re-run before IO
complete, then poll to reap IO. This poll function could be a suboptimal
solution when running on a single thread, it offers the performance lower
than regular polling but higher than IRQ, and CPU utilization is also lower
than polling.

Test Result
fio-3.35, 16 poll queues, 1 thread
-------------------------------------------------------------------------
Performance
-------------------------------------------------------------------------
                write         read        randwrite  randread
regular poll BW=3939MiB/s  BW=6613MiB/s  IOPS=190K  IOPS=470K
IRQ          BW=3927MiB/s  BW=6612MiB/s  IOPS=181K  IOPS=203K
hybrid poll  BW=3937MiB/s  BW=6623MiB/s  IOPS=190K  IOPS=358K(suboptimal)
-------------------------------------------------------------------------
CPU Utilization
------------------------------------------------------
                write   read    randwrite   randread
regular poll    100%    100%    100%        100%
IRQ             50%     53%     100%        100%
hybrid poll     70%     37%     70%         85%
------------------------------------------------------

--
changes of RESENT:
- rebase code on for-6.12/io_uring

changes since v6:
- Modified IO path, distinct iopoll and uring_cmd_iopoll
- update test results

changes since v5:
- Remove cstime recorder
- Use minimize sleep time in different drivers
- Use the half of whole runtime to do schedule
- Consider as a suboptimal solution between
  regular poll and IRQ

changes since v4:
- Rewrote the commit
- Update the test results
- Reorganized the code basd on 6.11

changes since v3:
- Simplified the commit
- Add some comments on code

changes since v2:
- Modified some formatting errors
- Move judgement to poll path

changes since v1:
- Extend hybrid poll to async polled io

Signed-off-by: hexue <[email protected]>
---
 include/linux/io_uring_types.h |   6 ++
 include/uapi/linux/io_uring.h  |   1 +
 io_uring/io_uring.c            |   3 +-
 io_uring/rw.c                  | 100 +++++++++++++++++++++++++++++----
 4 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 3315005df117..35ac4a8bf6ab 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -422,6 +422,8 @@ struct io_ring_ctx {
 	unsigned short			n_sqe_pages;
 	struct page			**ring_pages;
 	struct page			**sqe_pages;
+	/* for io_uring hybrid poll*/
+	u64			available_time;
 };
 
 struct io_tw_state {
@@ -657,6 +659,10 @@ struct io_kiocb {
 		u64			extra1;
 		u64			extra2;
 	} big_cqe;
+    /* for io_uring hybrid iopoll */
+	bool		poll_state;
+	u64			iopoll_start;
+	u64			iopoll_end;
 };
 
 struct io_overflow_cqe {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2aaf7ee256ac..42ae868651b0 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -199,6 +199,7 @@ enum io_uring_sqe_flags_bit {
  * Removes indirection through the SQ index array.
  */
 #define IORING_SETUP_NO_SQARRAY		(1U << 16)
+#define IORING_SETUP_HY_POLL	(1U << 17)
 
 enum io_uring_op {
 	IORING_OP_NOP,
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3942db160f18..bb3dfd749572 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -301,6 +301,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
+	ctx->available_time = LLONG_MAX;
 	atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
 	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
@@ -3603,7 +3604,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
 			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
 			IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
-			IORING_SETUP_NO_SQARRAY))
+			IORING_SETUP_NO_SQARRAY | IORING_SETUP_HY_POLL))
 		return -EINVAL;
 
 	return io_uring_create(entries, &p, params);
diff --git a/io_uring/rw.c b/io_uring/rw.c
index c004d21e2f12..eb9791b50d36 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -772,6 +772,13 @@ static bool need_complete_io(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static void init_hybrid_poll(struct io_kiocb *req)
+{
+	/* make sure every req only block once*/
+	req->poll_state = false;
+	req->iopoll_start = ktime_get_ns();
+}
+
 static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 {
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
@@ -809,6 +816,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_flags |= IOCB_HIPRI;
 		kiocb->ki_complete = io_complete_rw_iopoll;
 		req->iopoll_completed = 0;
+		if (ctx->flags & IORING_SETUP_HY_POLL)
+			init_hybrid_poll(req);
 	} else {
 		if (kiocb->ki_flags & IOCB_HIPRI)
 			return -EINVAL;
@@ -1105,6 +1114,83 @@ void io_rw_fail(struct io_kiocb *req)
 	io_req_set_res(req, res, req->cqe.flags);
 }
 
+static int io_uring_classic_poll(struct io_kiocb *req,
+		struct io_comp_batch *iob, unsigned int poll_flags)
+{
+	int ret;
+	struct file *file = req->file;
+
+	if (req->opcode == IORING_OP_URING_CMD) {
+		struct io_uring_cmd *ioucmd;
+
+		ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
+		ret = file->f_op->uring_cmd_iopoll(ioucmd, iob,
+						poll_flags);
+	} else {
+		struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+		ret = file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
+	}
+	return ret;
+}
+
+static u64 io_delay(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	struct hrtimer_sleeper timer;
+	enum hrtimer_mode mode;
+	ktime_t kt;
+	u64 sleep_time;
+
+	if (req->poll_state)
+		return 0;
+
+	if (ctx->available_time == LLONG_MAX)
+		return 0;
+
+	/* Using half running time to do schedul */
+	sleep_time = ctx->available_time / 2;
+
+	kt = ktime_set(0, sleep_time);
+	req->poll_state = true;
+
+	mode = HRTIMER_MODE_REL;
+	hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
+	hrtimer_set_expires(&timer.timer, kt);
+	set_current_state(TASK_INTERRUPTIBLE);
+	hrtimer_sleeper_start_expires(&timer, mode);
+
+	if (timer.task)
+		io_schedule();
+
+	hrtimer_cancel(&timer.timer);
+	__set_current_state(TASK_RUNNING);
+	destroy_hrtimer_on_stack(&timer.timer);
+
+	return sleep_time;
+}
+
+static int io_uring_hybrid_poll(struct io_kiocb *req,
+				struct io_comp_batch *iob, unsigned int poll_flags)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	int ret;
+	u64 runtime, sleep_time;
+
+	sleep_time = io_delay(ctx, req);
+	ret = io_uring_classic_poll(req, iob, poll_flags);
+	req->iopoll_end = ktime_get_ns();
+	runtime = req->iopoll_end - req->iopoll_start - sleep_time;
+	if (runtime < 0)
+		return 0;
+
+	/* use minimize sleep time if there are different speed
+	 * drivers, it could get more completions from fast one
+	 */
+	if (ctx->available_time > runtime)
+		ctx->available_time = runtime;
+	return ret;
+}
+
 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 {
 	struct io_wq_work_node *pos, *start, *prev;
@@ -1132,17 +1218,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (READ_ONCE(req->iopoll_completed))
 			break;
 
-		if (req->opcode == IORING_OP_URING_CMD) {
-			struct io_uring_cmd *ioucmd;
-
-			ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-			ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
-								poll_flags);
-		} else {
-			struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+		if (ctx->flags & IORING_SETUP_HY_POLL)
+			ret = io_uring_hybrid_poll(req, &iob, poll_flags);
+		else
+			ret = io_uring_classic_poll(req, &iob, poll_flags);
 
-			ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
-		}
 		if (unlikely(ret < 0))
 			return ret;
 		else if (ret)
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v7 RESENT] io_uring: releasing CPU resources when polling
  2024-08-08  7:17 ` [PATCH v7 RESENT] io_uring: releasing CPU resources when polling hexue
@ 2024-08-08 16:30   ` kernel test robot
  2024-08-09  5:17   ` kernel test robot
  1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2024-08-08 16:30 UTC (permalink / raw)
  To: hexue, axboe, asml.silence; +Cc: oe-kbuild-all, io-uring, linux-kernel, hexue

Hi hexue,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.11-rc2 next-20240808]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/hexue/io_uring-releasing-CPU-resources-when-polling/20240808-153455
base:   linus/master
patch link:    https://lore.kernel.org/r/20240808071712.2429842-1-xue01.he%40samsung.com
patch subject: [PATCH v7 RESENT] io_uring: releasing CPU resources when polling
config: alpha-defconfig (https://download.01.org/0day-ci/archive/20240809/[email protected]/config)
compiler: alpha-linux-gcc (GCC) 13.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240809/[email protected]/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

All warnings (new ones prefixed by >>):

   io_uring/rw.c: In function 'io_do_iopoll':
>> io_uring/rw.c:1210:30: warning: unused variable 'file' [-Wunused-variable]
    1210 |                 struct file *file = req->file;
         |                              ^~~~


vim +/file +1210 io_uring/rw.c

e61753df273102 hexue          2024-08-08  1193  
f3b44f92e59a80 Jens Axboe     2022-06-13  1194  int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
f3b44f92e59a80 Jens Axboe     2022-06-13  1195  {
f3b44f92e59a80 Jens Axboe     2022-06-13  1196  	struct io_wq_work_node *pos, *start, *prev;
54bdd67d0f8848 Keith Busch    2023-03-20  1197  	unsigned int poll_flags = 0;
f3b44f92e59a80 Jens Axboe     2022-06-13  1198  	DEFINE_IO_COMP_BATCH(iob);
f3b44f92e59a80 Jens Axboe     2022-06-13  1199  	int nr_events = 0;
f3b44f92e59a80 Jens Axboe     2022-06-13  1200  
f3b44f92e59a80 Jens Axboe     2022-06-13  1201  	/*
f3b44f92e59a80 Jens Axboe     2022-06-13  1202  	 * Only spin for completions if we don't have multiple devices hanging
f3b44f92e59a80 Jens Axboe     2022-06-13  1203  	 * off our complete list.
f3b44f92e59a80 Jens Axboe     2022-06-13  1204  	 */
f3b44f92e59a80 Jens Axboe     2022-06-13  1205  	if (ctx->poll_multi_queue || force_nonspin)
f3b44f92e59a80 Jens Axboe     2022-06-13  1206  		poll_flags |= BLK_POLL_ONESHOT;
f3b44f92e59a80 Jens Axboe     2022-06-13  1207  
f3b44f92e59a80 Jens Axboe     2022-06-13  1208  	wq_list_for_each(pos, start, &ctx->iopoll_list) {
f3b44f92e59a80 Jens Axboe     2022-06-13  1209  		struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
a1119fb0711591 Jens Axboe     2022-09-02 @1210  		struct file *file = req->file;
f3b44f92e59a80 Jens Axboe     2022-06-13  1211  		int ret;
f3b44f92e59a80 Jens Axboe     2022-06-13  1212  
f3b44f92e59a80 Jens Axboe     2022-06-13  1213  		/*
f3b44f92e59a80 Jens Axboe     2022-06-13  1214  		 * Move completed and retryable entries to our local lists.
f3b44f92e59a80 Jens Axboe     2022-06-13  1215  		 * If we find a request that requires polling, break out
f3b44f92e59a80 Jens Axboe     2022-06-13  1216  		 * and complete those lists first, if we have entries there.
f3b44f92e59a80 Jens Axboe     2022-06-13  1217  		 */
f3b44f92e59a80 Jens Axboe     2022-06-13  1218  		if (READ_ONCE(req->iopoll_completed))
f3b44f92e59a80 Jens Axboe     2022-06-13  1219  			break;
f3b44f92e59a80 Jens Axboe     2022-06-13  1220  
e61753df273102 hexue          2024-08-08  1221  		if (ctx->flags & IORING_SETUP_HY_POLL)
e61753df273102 hexue          2024-08-08  1222  			ret = io_uring_hybrid_poll(req, &iob, poll_flags);
e61753df273102 hexue          2024-08-08  1223  		else
e61753df273102 hexue          2024-08-08  1224  			ret = io_uring_classic_poll(req, &iob, poll_flags);
5756a3a7e713bc Kanchan Joshi  2022-08-23  1225  
f3b44f92e59a80 Jens Axboe     2022-06-13  1226  		if (unlikely(ret < 0))
f3b44f92e59a80 Jens Axboe     2022-06-13  1227  			return ret;
f3b44f92e59a80 Jens Axboe     2022-06-13  1228  		else if (ret)
f3b44f92e59a80 Jens Axboe     2022-06-13  1229  			poll_flags |= BLK_POLL_ONESHOT;
f3b44f92e59a80 Jens Axboe     2022-06-13  1230  
f3b44f92e59a80 Jens Axboe     2022-06-13  1231  		/* iopoll may have completed current req */
f3b44f92e59a80 Jens Axboe     2022-06-13  1232  		if (!rq_list_empty(iob.req_list) ||
f3b44f92e59a80 Jens Axboe     2022-06-13  1233  		    READ_ONCE(req->iopoll_completed))
f3b44f92e59a80 Jens Axboe     2022-06-13  1234  			break;
f3b44f92e59a80 Jens Axboe     2022-06-13  1235  	}
f3b44f92e59a80 Jens Axboe     2022-06-13  1236  
f3b44f92e59a80 Jens Axboe     2022-06-13  1237  	if (!rq_list_empty(iob.req_list))
f3b44f92e59a80 Jens Axboe     2022-06-13  1238  		iob.complete(&iob);
f3b44f92e59a80 Jens Axboe     2022-06-13  1239  	else if (!pos)
f3b44f92e59a80 Jens Axboe     2022-06-13  1240  		return 0;
f3b44f92e59a80 Jens Axboe     2022-06-13  1241  
f3b44f92e59a80 Jens Axboe     2022-06-13  1242  	prev = start;
f3b44f92e59a80 Jens Axboe     2022-06-13  1243  	wq_list_for_each_resume(pos, prev) {
f3b44f92e59a80 Jens Axboe     2022-06-13  1244  		struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
f3b44f92e59a80 Jens Axboe     2022-06-13  1245  
f3b44f92e59a80 Jens Axboe     2022-06-13  1246  		/* order with io_complete_rw_iopoll(), e.g. ->result updates */
f3b44f92e59a80 Jens Axboe     2022-06-13  1247  		if (!smp_load_acquire(&req->iopoll_completed))
f3b44f92e59a80 Jens Axboe     2022-06-13  1248  			break;
f3b44f92e59a80 Jens Axboe     2022-06-13  1249  		nr_events++;
f3b44f92e59a80 Jens Axboe     2022-06-13  1250  		req->cqe.flags = io_put_kbuf(req, 0);
a9165b83c1937e Jens Axboe     2024-03-18  1251  		if (req->opcode != IORING_OP_URING_CMD)
a9165b83c1937e Jens Axboe     2024-03-18  1252  			io_req_rw_cleanup(req, 0);
544d163d659d45 Pavel Begunkov 2023-01-12  1253  	}
f3b44f92e59a80 Jens Axboe     2022-06-13  1254  	if (unlikely(!nr_events))
f3b44f92e59a80 Jens Axboe     2022-06-13  1255  		return 0;
f3b44f92e59a80 Jens Axboe     2022-06-13  1256  
f3b44f92e59a80 Jens Axboe     2022-06-13  1257  	pos = start ? start->next : ctx->iopoll_list.first;
f3b44f92e59a80 Jens Axboe     2022-06-13  1258  	wq_list_cut(&ctx->iopoll_list, prev, start);
ec26c225f06f59 Pavel Begunkov 2023-08-24  1259  
ec26c225f06f59 Pavel Begunkov 2023-08-24  1260  	if (WARN_ON_ONCE(!wq_list_empty(&ctx->submit_state.compl_reqs)))
ec26c225f06f59 Pavel Begunkov 2023-08-24  1261  		return 0;
ec26c225f06f59 Pavel Begunkov 2023-08-24  1262  	ctx->submit_state.compl_reqs.first = pos;
ec26c225f06f59 Pavel Begunkov 2023-08-24  1263  	__io_submit_flush_completions(ctx);
f3b44f92e59a80 Jens Axboe     2022-06-13  1264  	return nr_events;
f3b44f92e59a80 Jens Axboe     2022-06-13  1265  }
a9165b83c1937e Jens Axboe     2024-03-18  1266  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v7 RESENT] io_uring: releasing CPU resources when polling
  2024-08-08  7:17 ` [PATCH v7 RESENT] io_uring: releasing CPU resources when polling hexue
  2024-08-08 16:30   ` kernel test robot
@ 2024-08-09  5:17   ` kernel test robot
  1 sibling, 0 replies; 3+ messages in thread
From: kernel test robot @ 2024-08-09  5:17 UTC (permalink / raw)
  To: hexue, axboe, asml.silence; +Cc: oe-kbuild-all, io-uring, linux-kernel, hexue

Hi hexue,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on v6.11-rc2 next-20240808]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/hexue/io_uring-releasing-CPU-resources-when-polling/20240808-153455
base:   linus/master
patch link:    https://lore.kernel.org/r/20240808071712.2429842-1-xue01.he%40samsung.com
patch subject: [PATCH v7 RESENT] io_uring: releasing CPU resources when polling
config: x86_64-randconfig-161-20240809 (https://download.01.org/0day-ci/archive/20240809/[email protected]/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <[email protected]>
| Closes: https://lore.kernel.org/oe-kbuild-all/[email protected]/

smatch warnings:
io_uring/rw.c:1183 io_uring_hybrid_poll() warn: unsigned 'runtime' is never less than zero.

vim +/runtime +1183 io_uring/rw.c

  1171	
  1172	static int io_uring_hybrid_poll(struct io_kiocb *req,
  1173					struct io_comp_batch *iob, unsigned int poll_flags)
  1174	{
  1175		struct io_ring_ctx *ctx = req->ctx;
  1176		int ret;
  1177		u64 runtime, sleep_time;
  1178	
  1179		sleep_time = io_delay(ctx, req);
  1180		ret = io_uring_classic_poll(req, iob, poll_flags);
  1181		req->iopoll_end = ktime_get_ns();
  1182		runtime = req->iopoll_end - req->iopoll_start - sleep_time;
> 1183		if (runtime < 0)
  1184			return 0;
  1185	
  1186		/* use minimize sleep time if there are different speed
  1187		 * drivers, it could get more completions from fast one
  1188		 */
  1189		if (ctx->available_time > runtime)
  1190			ctx->available_time = runtime;
  1191		return ret;
  1192	}
  1193	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-08-09  5:18 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CGME20240808071720epcas5p3f6f4f8abc6d4c02523dd4f64153a7cec@epcas5p3.samsung.com>
2024-08-08  7:17 ` [PATCH v7 RESENT] io_uring: releasing CPU resources when polling hexue
2024-08-08 16:30   ` kernel test robot
2024-08-09  5:17   ` kernel test robot

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox