[PATCHSET 0/2] Reduce poll based overhead

public inbox for [email protected]
 help / color / mirror / Atom feed

* [PATCHSET 0/2] Reduce poll based overhead
@ 2022-03-16 23:03 Jens Axboe
  2022-03-16 23:03 ` [PATCH 1/2] io_uring: cache req->apoll->events in req->cflags Jens Axboe
  2022-03-16 23:03 ` [PATCH 2/2] io_uring: cache poll/double-poll state with a request flag Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Jens Axboe @ 2022-03-16 23:03 UTC (permalink / raw)
  To: io-uring

Hi,

Networked workloads are intensive on the poll arming side, as most
receive operations will be triggered async by poll. For that kind
of poll triggering, we have allocated req->apoll dynamically and
that serves as our poll entry. This means that the poll->events
and poll->head are not part of the io_kiocb cachelines, and hence
often not hot in the completion path. When profiling workloads,
io_poll_check_events() shows up as hotter than it should be, exactly
because we have to pull in this cacheline separately.

Cache state in the io_kiocb itself instead, which avoids pulling
in unnecessary data in the poll task_work path. This reduces overhead
by about 3-4%.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] io_uring: cache req->apoll->events in req->cflags
  2022-03-16 23:03 [PATCHSET 0/2] Reduce poll based overhead Jens Axboe
@ 2022-03-16 23:03 ` Jens Axboe
  2022-03-16 23:03 ` [PATCH 2/2] io_uring: cache poll/double-poll state with a request flag Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2022-03-16 23:03 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

When we arm poll on behalf of a different type of request, like a network
receive, then we allocate req->apoll as our poll entry. Running network
workloads shows io_poll_check_events() as the most expensive part of
io_uring, and it's all due to having to pull in req->apoll instead of
just the request which we have hot already.

Cache poll->events in req->cflags, which isn't used until the request
completes anyway. This isn't strictly needed for regular poll, where
req->poll.events is used and thus already hot, but for the sake of
unification we do it all around.

This saves 3-4% of overhead in certain request workloads.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index fa4e2cb47e56..bfddad7a14ef 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5876,13 +5876,13 @@ static int io_poll_check_events(struct io_kiocb *req)
 			return -ECANCELED;
 
 		if (!req->result) {
-			struct poll_table_struct pt = { ._key = poll->events };
+			struct poll_table_struct pt = { ._key = req->cflags };
 
-			req->result = vfs_poll(req->file, &pt) & poll->events;
+			req->result = vfs_poll(req->file, &pt) & req->cflags;
 		}
 
 		/* multishot, just fill an CQE and proceed */
-		if (req->result && !(poll->events & EPOLLONESHOT)) {
+		if (req->result && !(req->cflags & EPOLLONESHOT)) {
 			__poll_t mask = mangle_poll(req->result & poll->events);
 			bool filled;
 
@@ -5953,9 +5953,16 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
 		io_req_complete_failed(req, ret);
 }
 
-static void __io_poll_execute(struct io_kiocb *req, int mask)
+static void __io_poll_execute(struct io_kiocb *req, int mask, int events)
 {
 	req->result = mask;
+	/*
+	 * This is useful for poll that is armed on behalf of another
+	 * request, and where the wakeup path could be on a different
+	 * CPU. We want to avoid pulling in req->apoll->events for that
+	 * case.
+	 */
+	req->cflags = events;
 	if (req->opcode == IORING_OP_POLL_ADD)
 		req->io_task_work.func = io_poll_task_func;
 	else
@@ -5965,17 +5972,17 @@ static void __io_poll_execute(struct io_kiocb *req, int mask)
 	io_req_task_work_add(req, false);
 }
 
-static inline void io_poll_execute(struct io_kiocb *req, int res)
+static inline void io_poll_execute(struct io_kiocb *req, int res, int events)
 {
 	if (io_poll_get_ownership(req))
-		__io_poll_execute(req, res);
+		__io_poll_execute(req, res, events);
 }
 
 static void io_poll_cancel_req(struct io_kiocb *req)
 {
 	io_poll_mark_cancelled(req);
 	/* kick tw, which should complete the request */
-	io_poll_execute(req, 0);
+	io_poll_execute(req, 0, 0);
 }
 
 static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
@@ -5989,7 +5996,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 	if (unlikely(mask & POLLFREE)) {
 		io_poll_mark_cancelled(req);
 		/* we have to kick tw in case it's not already */
-		io_poll_execute(req, 0);
+		io_poll_execute(req, 0, poll->events);
 
 		/*
 		 * If the waitqueue is being freed early but someone is already
@@ -6020,7 +6027,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 			list_del_init(&poll->wait.entry);
 			poll->head = NULL;
 		}
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 	}
 	return 1;
 }
@@ -6124,7 +6131,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 		/* can't multishot if failed, just queue the event we've got */
 		if (unlikely(ipt->error || !ipt->nr_entries))
 			poll->events |= EPOLLONESHOT;
-		__io_poll_execute(req, mask);
+		__io_poll_execute(req, mask, poll->events);
 		return 0;
 	}
 	io_add_napi(req->file, req->ctx);
@@ -6135,7 +6142,7 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
 	 */
 	v = atomic_dec_return(&req->poll_refs);
 	if (unlikely(v & IO_POLL_REF_MASK))
-		__io_poll_execute(req, 0);
+		__io_poll_execute(req, 0, poll->events);
 	return 0;
 }
 
@@ -6333,7 +6340,7 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 		return -EINVAL;
 
 	io_req_set_refcount(req);
-	poll->events = io_poll_parse_events(sqe, flags);
+	req->cflags = poll->events = io_poll_parse_events(sqe, flags);
 	return 0;
 }
 
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] io_uring: cache poll/double-poll state with a request flag
  2022-03-16 23:03 [PATCHSET 0/2] Reduce poll based overhead Jens Axboe
  2022-03-16 23:03 ` [PATCH 1/2] io_uring: cache req->apoll->events in req->cflags Jens Axboe
@ 2022-03-16 23:03 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2022-03-16 23:03 UTC (permalink / raw)
  To: io-uring; +Cc: Jens Axboe

With commit "io_uring: cache req->apoll->events in req->cflags" applied,
we now have just io_poll_remove_entries() dipping into req->apoll when
it isn't strictly necessary.

Mark poll and double-poll with a flag, so we know if we need to look
at apoll->double_poll. This avoids pulling in those cachelines if we
don't need them. The common case is that the poll wake handler already
removed these entries while hot off the completion path.

Signed-off-by: Jens Axboe <[email protected]>
---
 fs/io_uring.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bfddad7a14ef..5b5f48f0f81e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -771,6 +771,8 @@ enum {
 	REQ_F_ARM_LTIMEOUT_BIT,
 	REQ_F_ASYNC_DATA_BIT,
 	REQ_F_SKIP_LINK_CQES_BIT,
+	REQ_F_SINGLE_POLL_BIT,
+	REQ_F_DOUBLE_POLL_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_SUPPORT_NOWAIT_BIT,
 	REQ_F_ISREG_BIT,
@@ -829,6 +831,10 @@ enum {
 	REQ_F_ASYNC_DATA	= BIT(REQ_F_ASYNC_DATA_BIT),
 	/* don't post CQEs while failing linked requests */
 	REQ_F_SKIP_LINK_CQES	= BIT(REQ_F_SKIP_LINK_CQES_BIT),
+	/* single poll may be active */
+	REQ_F_SINGLE_POLL	= BIT(REQ_F_SINGLE_POLL_BIT),
+	/* double poll may active */
+	REQ_F_DOUBLE_POLL	= BIT(REQ_F_DOUBLE_POLL_BIT),
 };
 
 struct async_poll {
@@ -5823,8 +5829,12 @@ static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
 
 static void io_poll_remove_entries(struct io_kiocb *req)
 {
-	struct io_poll_iocb *poll = io_poll_get_single(req);
-	struct io_poll_iocb *poll_double = io_poll_get_double(req);
+	/*
+	 * Nothing to do if neither of those flags are set. Avoid dipping
+	 * into the poll/apoll/double cachelines if we can.
+	 */
+	if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL)))
+		return;
 
 	/*
 	 * While we hold the waitqueue lock and the waitqueue is nonempty,
@@ -5842,9 +5852,10 @@ static void io_poll_remove_entries(struct io_kiocb *req)
 	 * In that case, only RCU prevents the queue memory from being freed.
 	 */
 	rcu_read_lock();
-	io_poll_remove_entry(poll);
-	if (poll_double)
-		io_poll_remove_entry(poll_double);
+	if (req->flags & REQ_F_SINGLE_POLL)
+		io_poll_remove_entry(io_poll_get_single(req));
+	if (req->flags & REQ_F_DOUBLE_POLL)
+		io_poll_remove_entry(io_poll_get_double(req));
 	rcu_read_unlock();
 }
 
@@ -6026,6 +6037,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		if (mask && poll->events & EPOLLONESHOT) {
 			list_del_init(&poll->wait.entry);
 			poll->head = NULL;
+			req->flags &= ~REQ_F_SINGLE_POLL;
 		}
 		__io_poll_execute(req, mask, poll->events);
 	}
@@ -6062,12 +6074,14 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 			pt->error = -ENOMEM;
 			return;
 		}
+		req->flags |= REQ_F_DOUBLE_POLL;
 		io_init_poll_iocb(poll, first->events, first->wait.func);
 		*poll_ptr = poll;
 		if (req->opcode == IORING_OP_POLL_ADD)
 			req->flags |= REQ_F_ASYNC_DATA;
 	}
 
+	req->flags |= REQ_F_SINGLE_POLL;
 	pt->nr_entries++;
 	poll->head = head;
 	poll->wait.private = req;
-- 
2.35.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-03-16 23:04 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-03-16 23:03 [PATCHSET 0/2] Reduce poll based overhead Jens Axboe
2022-03-16 23:03 ` [PATCH 1/2] io_uring: cache req->apoll->events in req->cflags Jens Axboe
2022-03-16 23:03 ` [PATCH 2/2] io_uring: cache poll/double-poll state with a request flag Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox