public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH v3] io_uring: releasing CPU resources when polling
       [not found] <CGME20240507091704epcas5p14ae67ce9a9cf6ab8c366d4a99b9a19ef@epcas5p1.samsung.com>
@ 2024-05-07  9:16 ` hexue
       [not found]   ` <CGME20240513032056epcas5p22f23ffea6848df3fd07e081a2b0bb659@epcas5p2.samsung.com>
  0 siblings, 1 reply; 2+ messages in thread
From: hexue @ 2024-05-07  9:16 UTC (permalink / raw)
  To: axboe
  Cc: asml.silence, io-uring, linux-kernel, peiwei.li, joshi.k,
	kundan.kumar, anuj20.g, ruyi.zhang, wenwen.chen, xiaobing.li,
	cliang01.li, hexue

This patch is intended to release the CPU resources of io_uring in
polling mode. When IO is issued, the program immediately polls for
check completion, which is a waste of CPU resources when IO commands
are executed on the disk.

I add the hybrid polling feature in io_uring, enables polling to
release a portion of CPU resources without affecting block layer.

- Record the running time and context switching time of each
  IO, and use these time to determine whether a process continue
  to schedule.

- Adaptive adjustment to different devices. Due to the real-time
  nature of time recording, each device's IO processing speed is
  different, so the CPU optimization effect will vary.

- Set a interface (ctx->flag) enables application to choose whether
  or not to use this feature.

The CPU optimization in peak workload of patch is tested as follows:
  set 8 poll queues
  all CPU utilization of original polling is 100% for per CPU, after
  optimization, the CPU utilization drop a lot (per CPU);

   read(128k, QD64, 1Job)     37%   write(128k, QD64, 1Job)     40%
   randread(4k, QD64, 16Job)  52%   randwrite(4k, QD64, 16Job)  12%

  Compared to original polling, the optimised performance reduction
  with peak workload within 1%.

   read  0.29%     write  0.51%    randread  0.09%    randwrite  0%

Signed-off-by: hexue <[email protected]>

---

changes:
v2:
 - extend hybrid poll to async polled io

v1:
 - initial version
---
 include/linux/io_uring_types.h |  14 ++++
 include/uapi/linux/io_uring.h  |   1 +
 io_uring/io_uring.c            |   4 +-
 io_uring/io_uring.h            |   3 +
 io_uring/rw.c                  | 115 ++++++++++++++++++++++++++++++++-
 5 files changed, 135 insertions(+), 2 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 854ad67a5f70..3a75b9904326 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -224,6 +224,11 @@ struct io_alloc_cache {
 	size_t			elem_size;
 };
 
+struct iopoll_info {
+	long		last_runtime;
+	long		last_irqtime;
+};
+
 struct io_ring_ctx {
 	/* const or read-mostly hot data */
 	struct {
@@ -421,6 +426,7 @@ struct io_ring_ctx {
 	unsigned short			n_sqe_pages;
 	struct page			**ring_pages;
 	struct page			**sqe_pages;
+	struct xarray		poll_array;
 };
 
 struct io_tw_state {
@@ -571,6 +577,12 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz)
 )
 #define cmd_to_io_kiocb(ptr)	((struct io_kiocb *) ptr)
 
+struct hy_poll_time {
+	int		poll_state;
+	struct timespec64		iopoll_start;
+	struct timespec64		iopoll_end;
+};
+
 struct io_kiocb {
 	union {
 		/*
@@ -641,6 +653,8 @@ struct io_kiocb {
 		u64			extra1;
 		u64			extra2;
 	} big_cqe;
+	/* for hybrid iopoll */
+	struct hy_poll_time		*hy_poll;
 };
 
 struct io_overflow_cqe {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 7a673b52827b..0038cdfec18f 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -198,6 +198,7 @@ enum {
  * Removes indirection through the SQ index array.
  */
 #define IORING_SETUP_NO_SQARRAY		(1U << 16)
+#define IORING_SETUP_HY_POLL	(1U << 17)
 
 enum io_uring_op {
 	IORING_OP_NOP,
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index cd9a137ad6ce..2c14768bbe27 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -311,6 +311,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 		goto err;
 
 	ctx->flags = p->flags;
+	xa_init(&ctx->poll_array);
 	atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
 	init_waitqueue_head(&ctx->sqo_sq_wait);
 	INIT_LIST_HEAD(&ctx->sqd_list);
@@ -2921,6 +2922,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	kfree(ctx->cancel_table_locked.hbs);
 	kfree(ctx->io_bl);
 	xa_destroy(&ctx->io_bl_xa);
+	xa_destroy(&ctx->poll_array);
 	kfree(ctx);
 }
 
@@ -4050,7 +4052,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
 			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
 			IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
-			IORING_SETUP_NO_SQARRAY))
+			IORING_SETUP_NO_SQARRAY | IORING_SETUP_HY_POLL))
 		return -EINVAL;
 
 	return io_uring_create(entries, &p, params);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index d5495710c178..72d6a4c3b46d 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -125,6 +125,9 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
 	__io_req_task_work_add(req, 0);
 }
 
+/* if sleep time less than 1us, then do not do the schedule op */
+#define MIN_SCHETIME 1000
+
 #define io_for_each_link(pos, head) \
 	for (pos = (head); pos; pos = pos->link)
 
diff --git a/io_uring/rw.c b/io_uring/rw.c
index d5e79d9bdc71..29c7ce23ed71 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -713,6 +713,46 @@ static bool need_complete_io(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+void init_hybrid_poll(struct io_ring_ctx *ctx, struct io_kiocb *req)
+{
+	/*
+	 * In multiple concurrency, a thread may operate several files
+	 * under different file systems, the inode numbers may be
+	 * duplicated. Each device has a different IO command processing
+	 * capability, so using device number to record the running time
+	 * of device
+	 */
+	u32 index = req->file->f_inode->i_rdev;
+	struct iopoll_info *entry = xa_load(&ctx->poll_array, index);
+	struct hy_poll_time *hpt = kmalloc(sizeof(struct hy_poll_time), GFP_KERNEL);
+
+	/* if alloc fail, go to regular poll */
+	if (!hpt) {
+		ctx->flags &= ~IORING_SETUP_HY_POLL;
+		return;
+	}
+	hpt->poll_state = 0;
+	req->hy_poll = hpt;
+
+	if (!entry) {
+		entry = kmalloc(sizeof(struct iopoll_info), GFP_KERNEL);
+		if (!entry) {
+			ctx->flags &= ~IORING_SETUP_HY_POLL;
+			return;
+		}
+		entry->last_runtime = 0;
+		entry->last_irqtime = 0;
+		xa_store(&ctx->poll_array, index, entry, GFP_KERNEL);
+	}
+
+	/*
+	 * Here we need nanosecond timestamps, some ways of reading
+	 * timestamps directly are only accurate to microseconds, so
+	 * there's no better alternative here for now
+	 */
+	ktime_get_ts64(&hpt->iopoll_start);
+}
+
 static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
 {
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
@@ -750,6 +790,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
 		kiocb->ki_flags |= IOCB_HIPRI;
 		kiocb->ki_complete = io_complete_rw_iopoll;
 		req->iopoll_completed = 0;
+		if (ctx->flags & IORING_SETUP_HY_POLL)
+			init_hybrid_poll(ctx, req);
 	} else {
 		if (kiocb->ki_flags & IOCB_HIPRI)
 			return -EINVAL;
@@ -1118,6 +1160,75 @@ void io_rw_fail(struct io_kiocb *req)
 	io_req_set_res(req, res, req->cqe.flags);
 }
 
+void io_delay(struct hy_poll_time *hpt, struct iopoll_info *entry)
+{
+	struct hrtimer_sleeper timer;
+	struct timespec64 tc, oldtc;
+	enum hrtimer_mode mode;
+	ktime_t kt;
+	long sleep_ti;
+
+	if (hpt->poll_state == 1)
+		return;
+
+	if (entry->last_runtime <= entry->last_irqtime)
+		return;
+
+	/*
+	 * Avoid excessive scheduling time affecting performance
+	 * by using only 25 per cent of the remaining time
+	 */
+	sleep_ti = (entry->last_runtime - entry->last_irqtime) / 4;
+
+	/*
+	 * If the time available for sleep is too short, i.e. the
+	 * totle running time and the context switching loss time
+	 * are very close to each other, the scheduling operation
+	 * is not performed to avoid increasing latency
+	 */
+	if (sleep_ti < MIN_SCHETIME)
+		return;
+
+	ktime_get_ts64(&oldtc);
+	kt = ktime_set(0, sleep_ti);
+	hpt->poll_state = 1;
+
+	mode = HRTIMER_MODE_REL;
+	hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
+	hrtimer_set_expires(&timer.timer, kt);
+	set_current_state(TASK_UNINTERRUPTIBLE);
+	hrtimer_sleeper_start_expires(&timer, mode);
+
+	if (timer.task)
+		io_schedule();
+
+	hrtimer_cancel(&timer.timer);
+	mode = HRTIMER_MODE_ABS;
+	__set_current_state(TASK_RUNNING);
+	destroy_hrtimer_on_stack(&timer.timer);
+
+	ktime_get_ts64(&tc);
+	entry->last_irqtime = tc.tv_nsec - oldtc.tv_nsec - sleep_ti;
+}
+
+int io_uring_hybrid_poll(struct io_kiocb *req,
+				struct io_comp_batch *iob, unsigned int poll_flags)
+{
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+	struct io_ring_ctx *ctx = req->ctx;
+	struct hy_poll_time *hpt = req->hy_poll;
+	u32 index = req->file->f_inode->i_rdev;
+	struct iopoll_info *entry = xa_load(&ctx->poll_array, index);
+	int ret;
+
+	io_delay(hpt, entry);
+	ret = req->file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
+
+	ktime_get_ts64(&hpt->iopoll_end);
+	entry->last_runtime = hpt->iopoll_end.tv_nsec - hpt->iopoll_start.tv_nsec;
+	return ret;
+}
+
 int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 {
 	struct io_wq_work_node *pos, *start, *prev;
@@ -1145,7 +1256,9 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
 		if (READ_ONCE(req->iopoll_completed))
 			break;
 
-		if (req->opcode == IORING_OP_URING_CMD) {
+		if (ctx->flags & IORING_SETUP_HY_POLL) {
+			ret = io_uring_hybrid_poll(req, &iob, poll_flags);
+		} else if (req->opcode == IORING_OP_URING_CMD) {
 			struct io_uring_cmd *ioucmd;
 
 			ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
-- 
2.40.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v3] io_uring: releasing CPU resources when polling
       [not found]   ` <CGME20240513032056epcas5p22f23ffea6848df3fd07e081a2b0bb659@epcas5p2.samsung.com>
@ 2024-05-13  3:20     ` hexue
  0 siblings, 0 replies; 2+ messages in thread
From: hexue @ 2024-05-13  3:20 UTC (permalink / raw)
  To: axboe
  Cc: anuj20.g, asml.silence, cliang01.li, io-uring, joshi.k,
	kundan.kumar, linux-kernel, peiwei.li, ruyi.zhang, wenwen.chen,
	xiaobing.li

On 5/7/24 17:16, hexue wrote:
>This patch is intended to release the CPU resources of io_uring in
>polling mode. When IO is issued, the program immediately polls for
>check completion, which is a waste of CPU resources when IO commands
>are executed on the disk.
>
>I add the hybrid polling feature in io_uring, enables polling to
>release a portion of CPU resources without affecting block layer.
>
>- Record the running time and context switching time of each
>  IO, and use these time to determine whether a process continue
>  to schedule.
>
>- Adaptive adjustment to different devices. Due to the real-time
>  nature of time recording, each device's IO processing speed is
>  different, so the CPU optimization effect will vary.
>
>- Set a interface (ctx->flag) enables application to choose whether
>  or not to use this feature.
>
>The CPU optimization in peak workload of patch is tested as follows:
>  set 8 poll queues
>  all CPU utilization of original polling is 100% for per CPU, after
>  optimization, the CPU utilization drop a lot (per CPU);
>
>   read(128k, QD64, 1Job)     37%   write(128k, QD64, 1Job)     40%
>   randread(4k, QD64, 16Job)  52%   randwrite(4k, QD64, 16Job)  12%
>
>  Compared to original polling, the optimised performance reduction
>  with peak workload within 1%.
>
>   read  0.29%     write  0.51%    randread  0.09%    randwrite  0%
>
>Signed-off-by: hexue <[email protected]>
>
>---
>
>changes:
>v2:
> - extend hybrid poll to async polled io
>
>v1:
> - initial version
>---
> include/linux/io_uring_types.h |  14 ++++
> include/uapi/linux/io_uring.h  |   1 +
> io_uring/io_uring.c            |   4 +-
> io_uring/io_uring.h            |   3 +
> io_uring/rw.c                  | 115 ++++++++++++++++++++++++++++++++-
> 5 files changed, 135 insertions(+), 2 deletions(-)
>
>diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
>index 854ad67a5f70..3a75b9904326 100644
>--- a/include/linux/io_uring_types.h
>+++ b/include/linux/io_uring_types.h
>@@ -224,6 +224,11 @@ struct io_alloc_cache {
> 	size_t			elem_size;
> };
>
>+struct iopoll_info {
>+	long		last_runtime;
>+	long		last_irqtime;
>+};
>+
> struct io_ring_ctx {
> 	/* const or read-mostly hot data */
> 	struct {
>@@ -421,6 +426,7 @@ struct io_ring_ctx {
> 	unsigned short			n_sqe_pages;
> 	struct page			**ring_pages;
> 	struct page			**sqe_pages;
>+	struct xarray		poll_array;
> };
>
> struct io_tw_state {
>@@ -571,6 +577,12 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz)
> )
> #define cmd_to_io_kiocb(ptr)	((struct io_kiocb *) ptr)
>
>+struct hy_poll_time {
>+	int		poll_state;
>+	struct timespec64		iopoll_start;
>+	struct timespec64		iopoll_end;
>+};
>+
> struct io_kiocb {
> 	union {
> 		/*
>@@ -641,6 +653,8 @@ struct io_kiocb {
> 		u64			extra1;
> 		u64			extra2;
> 	} big_cqe;
>+	/* for hybrid iopoll */
>+	struct hy_poll_time		*hy_poll;
> };
>
> struct io_overflow_cqe {
>diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
>index 7a673b52827b..0038cdfec18f 100644
>--- a/include/uapi/linux/io_uring.h
>+++ b/include/uapi/linux/io_uring.h
>@@ -198,6 +198,7 @@ enum {
>  * Removes indirection through the SQ index array.
>  */
> #define IORING_SETUP_NO_SQARRAY		(1U << 16)
>+#define IORING_SETUP_HY_POLL	(1U << 17)
>
> enum io_uring_op {
> 	IORING_OP_NOP,
>diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
>index cd9a137ad6ce..2c14768bbe27 100644
>--- a/io_uring/io_uring.c
>+++ b/io_uring/io_uring.c
>@@ -311,6 +311,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
> 		goto err;
>
> 	ctx->flags = p->flags;
>+	xa_init(&ctx->poll_array);
> 	atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
> 	init_waitqueue_head(&ctx->sqo_sq_wait);
> 	INIT_LIST_HEAD(&ctx->sqd_list);
>@@ -2921,6 +2922,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
> 	kfree(ctx->cancel_table_locked.hbs);
> 	kfree(ctx->io_bl);
> 	xa_destroy(&ctx->io_bl_xa);
>+	xa_destroy(&ctx->poll_array);
> 	kfree(ctx);
> }
>
>@@ -4050,7 +4052,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
> 			IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
> 			IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
> 			IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
>-			IORING_SETUP_NO_SQARRAY))
>+			IORING_SETUP_NO_SQARRAY | IORING_SETUP_HY_POLL))
> 		return -EINVAL;
>
> 	return io_uring_create(entries, &p, params);
>diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
>index d5495710c178..72d6a4c3b46d 100644
>--- a/io_uring/io_uring.h
>+++ b/io_uring/io_uring.h
>@@ -125,6 +125,9 @@ static inline void io_req_task_work_add(struct io_kiocb *req)
> 	__io_req_task_work_add(req, 0);
> }
>
>+/* if sleep time less than 1us, then do not do the schedule op */
>+#define MIN_SCHETIME 1000
>+
> #define io_for_each_link(pos, head) \
> 	for (pos = (head); pos; pos = pos->link)
>
>diff --git a/io_uring/rw.c b/io_uring/rw.c
>index d5e79d9bdc71..29c7ce23ed71 100644
>--- a/io_uring/rw.c
>+++ b/io_uring/rw.c
>@@ -713,6 +713,46 @@ static bool need_complete_io(struct io_kiocb *req)
> 		S_ISBLK(file_inode(req->file)->i_mode);
> }
>
>+void init_hybrid_poll(struct io_ring_ctx *ctx, struct io_kiocb *req)
>+{
>+	/*
>+	 * In multiple concurrency, a thread may operate several files
>+	 * under different file systems, the inode numbers may be
>+	 * duplicated. Each device has a different IO command processing
>+	 * capability, so using device number to record the running time
>+	 * of device
>+	 */
>+	u32 index = req->file->f_inode->i_rdev;
>+	struct iopoll_info *entry = xa_load(&ctx->poll_array, index);
>+	struct hy_poll_time *hpt = kmalloc(sizeof(struct hy_poll_time), GFP_KERNEL);
>+
>+	/* if alloc fail, go to regular poll */
>+	if (!hpt) {
>+		ctx->flags &= ~IORING_SETUP_HY_POLL;
>+		return;
>+	}
>+	hpt->poll_state = 0;
>+	req->hy_poll = hpt;
>+
>+	if (!entry) {
>+		entry = kmalloc(sizeof(struct iopoll_info), GFP_KERNEL);
>+		if (!entry) {
>+			ctx->flags &= ~IORING_SETUP_HY_POLL;
>+			return;
>+		}
>+		entry->last_runtime = 0;
>+		entry->last_irqtime = 0;
>+		xa_store(&ctx->poll_array, index, entry, GFP_KERNEL);
>+	}
>+
>+	/*
>+	 * Here we need nanosecond timestamps, some ways of reading
>+	 * timestamps directly are only accurate to microseconds, so
>+	 * there's no better alternative here for now
>+	 */
>+	ktime_get_ts64(&hpt->iopoll_start);
>+}
>+
> static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
> {
> 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
>@@ -750,6 +790,8 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode)
> 		kiocb->ki_flags |= IOCB_HIPRI;
> 		kiocb->ki_complete = io_complete_rw_iopoll;
> 		req->iopoll_completed = 0;
>+		if (ctx->flags & IORING_SETUP_HY_POLL)
>+			init_hybrid_poll(ctx, req);
> 	} else {
> 		if (kiocb->ki_flags & IOCB_HIPRI)
> 			return -EINVAL;
>@@ -1118,6 +1160,75 @@ void io_rw_fail(struct io_kiocb *req)
> 	io_req_set_res(req, res, req->cqe.flags);
> }
>
>+void io_delay(struct hy_poll_time *hpt, struct iopoll_info *entry)
>+{
>+	struct hrtimer_sleeper timer;
>+	struct timespec64 tc, oldtc;
>+	enum hrtimer_mode mode;
>+	ktime_t kt;
>+	long sleep_ti;
>+
>+	if (hpt->poll_state == 1)
>+		return;
>+
>+	if (entry->last_runtime <= entry->last_irqtime)
>+		return;
>+
>+	/*
>+	 * Avoid excessive scheduling time affecting performance
>+	 * by using only 25 per cent of the remaining time
>+	 */
>+	sleep_ti = (entry->last_runtime - entry->last_irqtime) / 4;
>+
>+	/*
>+	 * If the time available for sleep is too short, i.e. the
>+	 * totle running time and the context switching loss time
>+	 * are very close to each other, the scheduling operation
>+	 * is not performed to avoid increasing latency
>+	 */
>+	if (sleep_ti < MIN_SCHETIME)
>+		return;
>+
>+	ktime_get_ts64(&oldtc);
>+	kt = ktime_set(0, sleep_ti);
>+	hpt->poll_state = 1;
>+
>+	mode = HRTIMER_MODE_REL;
>+	hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
>+	hrtimer_set_expires(&timer.timer, kt);
>+	set_current_state(TASK_UNINTERRUPTIBLE);
>+	hrtimer_sleeper_start_expires(&timer, mode);
>+
>+	if (timer.task)
>+		io_schedule();
>+
>+	hrtimer_cancel(&timer.timer);
>+	mode = HRTIMER_MODE_ABS;
>+	__set_current_state(TASK_RUNNING);
>+	destroy_hrtimer_on_stack(&timer.timer);
>+
>+	ktime_get_ts64(&tc);
>+	entry->last_irqtime = tc.tv_nsec - oldtc.tv_nsec - sleep_ti;
>+}
>+
>+int io_uring_hybrid_poll(struct io_kiocb *req,
>+				struct io_comp_batch *iob, unsigned int poll_flags)
>+{
>+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
>+	struct io_ring_ctx *ctx = req->ctx;
>+	struct hy_poll_time *hpt = req->hy_poll;
>+	u32 index = req->file->f_inode->i_rdev;
>+	struct iopoll_info *entry = xa_load(&ctx->poll_array, index);
>+	int ret;
>+
>+	io_delay(hpt, entry);
>+	ret = req->file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
>+
>+	ktime_get_ts64(&hpt->iopoll_end);
>+	entry->last_runtime = hpt->iopoll_end.tv_nsec - hpt->iopoll_start.tv_nsec;
>+	return ret;
>+}
>+
> int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
> {
> 	struct io_wq_work_node *pos, *start, *prev;
>@@ -1145,7 +1256,9 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
> 		if (READ_ONCE(req->iopoll_completed))
> 			break;
>
>-		if (req->opcode == IORING_OP_URING_CMD) {
>+		if (ctx->flags & IORING_SETUP_HY_POLL) {
>+			ret = io_uring_hybrid_poll(req, &iob, poll_flags);
>+		} else if (req->opcode == IORING_OP_URING_CMD) {
> 			struct io_uring_cmd *ioucmd;
>
> 			ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);

Hi, Jens
I have revised some of the code according to your suggestions,
and added comments to the parts that were not modified.
Do you have any other comments?

--

Xue He

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-05-13  3:21 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CGME20240507091704epcas5p14ae67ce9a9cf6ab8c366d4a99b9a19ef@epcas5p1.samsung.com>
2024-05-07  9:16 ` [PATCH v3] io_uring: releasing CPU resources when polling hexue
     [not found]   ` <CGME20240513032056epcas5p22f23ffea6848df3fd07e081a2b0bb659@epcas5p2.samsung.com>
2024-05-13  3:20     ` hexue

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox