From: Pavel Begunkov <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH for-next 6/6] io_uring: optimise submission side poll_refs
Date: Thu, 23 Jun 2022 10:34:35 +0100 [thread overview]
Message-ID: <c3b5484c1236f86e3d60a5975456a927670f11b6.1655976119.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
The final poll_refs put in __io_arm_poll_handler() takes quite some
cycles. When we're arming from the original task context task_work won't
be run, so in this case we can assume that we won't race with task_works
and so not take the initial ownership ref.
One caveat is that after arming a poll we may race with it, so we have
to add a bunch of io_poll_get_ownership() hidden inside of
io_poll_can_finish_inline() whenever we want to complete arming inline.
For the same reason we can't just set REQ_F_DOUBLE_POLL in
__io_queue_proc() and so need to sync with the first poll entry by
taking its wq head lock.
Signed-off-by: Pavel Begunkov <[email protected]>
---
io_uring/poll.c | 88 +++++++++++++++++++++++++++++++++++++------------
1 file changed, 67 insertions(+), 21 deletions(-)
diff --git a/io_uring/poll.c b/io_uring/poll.c
index 149205eae418..69b2f4bab3b2 100644
--- a/io_uring/poll.c
+++ b/io_uring/poll.c
@@ -34,6 +34,7 @@ struct io_poll_table {
struct io_kiocb *req;
int nr_entries;
int error;
+ bool owning;
/* output value, set only if arm poll returns >0 */
__poll_t result_mask;
};
@@ -374,6 +375,27 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
return 1;
}
+static void io_poll_double_prepare(struct io_kiocb *req)
+{
+ struct wait_queue_head *head;
+ struct io_poll *poll = io_poll_get_single(req);
+
+ /* head is RCU protected, see io_poll_remove_entries() comments */
+ rcu_read_lock();
+ head = smp_load_acquire(&poll->head);
+ if (head) {
+ /*
+ * poll arm may not hold ownership and so race with
+ * io_poll_wake() by modifying req->flags. There is only one
+ * poll entry queued, serialise with it by taking its head lock.
+ */
+ spin_lock_irq(&head->lock);
+ req->flags |= REQ_F_DOUBLE_POLL;
+ spin_unlock_irq(&head->lock);
+ }
+ rcu_read_unlock();
+}
+
static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
struct wait_queue_head *head,
struct io_poll **poll_ptr)
@@ -405,16 +427,19 @@ static void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt,
pt->error = -ENOMEM;
return;
}
+
+ io_poll_double_prepare(req);
/* mark as double wq entry */
wqe_private |= IO_WQE_F_DOUBLE;
- req->flags |= REQ_F_DOUBLE_POLL;
io_init_poll_iocb(poll, first->events, first->wait.func);
*poll_ptr = poll;
if (req->opcode == IORING_OP_POLL_ADD)
req->flags |= REQ_F_ASYNC_DATA;
+ } else {
+ /* fine to modify, there is no poll queued to race with us */
+ req->flags |= REQ_F_SINGLE_POLL;
}
- req->flags |= REQ_F_SINGLE_POLL;
pt->nr_entries++;
poll->head = head;
poll->wait.private = (void *) wqe_private;
@@ -435,6 +460,12 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
(struct io_poll **) &pt->req->async_data);
}
+static bool io_poll_can_finish_inline(struct io_kiocb *req,
+ struct io_poll_table *pt)
+{
+ return pt->owning || io_poll_get_ownership(req);
+}
+
/*
* Returns 0 when it's handed over for polling. The caller owns the requests if
* it returns non-zero, but otherwise should not touch it. Negative values
@@ -443,7 +474,8 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
*/
static int __io_arm_poll_handler(struct io_kiocb *req,
struct io_poll *poll,
- struct io_poll_table *ipt, __poll_t mask)
+ struct io_poll_table *ipt, __poll_t mask,
+ unsigned issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
int v;
@@ -452,34 +484,45 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
req->work.cancel_seq = atomic_read(&ctx->cancel_seq);
io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file;
-
req->apoll_events = poll->events;
ipt->pt._key = mask;
ipt->req = req;
ipt->error = 0;
ipt->nr_entries = 0;
-
/*
- * Take the ownership to delay any tw execution up until we're done
- * with poll arming. see io_poll_get_ownership().
+ * Polling is either completed here or via task_work, so if we're in the
+ * task context we're naturally serialised with tw by merit of running
+ * the same task. When it's io-wq, take the ownership to prevent tw
+ * from running. However, when we're in the task context, skip taking
+ * it as an optimisation.
+ *
+ * Note: even though the request won't be completed/freed, without
+ * ownership we still can race with io_poll_wake().
+ * io_poll_can_finish_inline() tries to deal with that.
*/
- atomic_set(&req->poll_refs, 1);
+ ipt->owning = issue_flags & IO_URING_F_UNLOCKED;
+
+ atomic_set(&req->poll_refs, (int)ipt->owning);
mask = vfs_poll(req->file, &ipt->pt) & poll->events;
if (unlikely(ipt->error || !ipt->nr_entries)) {
io_poll_remove_entries(req);
- if (mask && !(poll->events & EPOLLET)) {
+ if (!io_poll_can_finish_inline(req, ipt)) {
+ io_poll_mark_cancelled(req);
+ return 0;
+ } else if (mask && !(poll->events & EPOLLET)) {
ipt->result_mask = mask;
return 1;
- } else {
- return ipt->error ?: -EINVAL;
}
+ return ipt->error ?: -EINVAL;
}
if (mask &&
((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) {
+ if (!io_poll_can_finish_inline(req, ipt))
+ return 0;
io_poll_remove_entries(req);
ipt->result_mask = mask;
/* no one else has access to the req, forget about the ref */
@@ -491,18 +534,21 @@ static int __io_arm_poll_handler(struct io_kiocb *req,
else
io_poll_req_insert(req);
- if (mask && (poll->events & EPOLLET)) {
+ if (mask && (poll->events & EPOLLET) &&
+ io_poll_can_finish_inline(req, ipt)) {
__io_poll_execute(req, mask);
return 0;
}
- /*
- * Release ownership. If someone tried to queue a tw while it was
- * locked, kick it off for them.
- */
- v = atomic_dec_return(&req->poll_refs);
- if (unlikely(v & IO_POLL_REF_MASK))
- __io_poll_execute(req, 0);
+ if (ipt->owning) {
+ /*
+ * Release ownership. If someone tried to queue a tw while it was
+ * locked, kick it off for them.
+ */
+ v = atomic_dec_return(&req->poll_refs);
+ if (unlikely(v & IO_POLL_REF_MASK))
+ __io_poll_execute(req, 0);
+ }
return 0;
}
@@ -585,7 +631,7 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags)
io_kbuf_recycle(req, issue_flags);
- ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
+ ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags);
if (ret)
return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED;
trace_io_uring_poll_arm(req, mask, apoll->poll.events);
@@ -817,7 +863,7 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
else
req->flags &= ~REQ_F_HASH_LOCKED;
- ret = __io_arm_poll_handler(req, poll, &ipt, poll->events);
+ ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags);
if (ret > 0) {
io_req_set_res(req, ipt.result_mask, 0);
return IOU_OK;
--
2.36.1
prev parent reply other threads:[~2022-06-23 9:35 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-23 9:34 [PATCH for-next 0/6] poll cleanups and optimisations Pavel Begunkov
2022-06-23 9:34 ` [PATCH for-next 1/6] io_uring: clean poll ->private flagging Pavel Begunkov
2022-06-23 9:34 ` [PATCH for-next 2/6] io_uring: remove events caching atavisms Pavel Begunkov
2022-06-23 9:34 ` [PATCH for-next 3/6] io_uring: add a helper for apoll alloc Pavel Begunkov
2022-06-23 9:34 ` [PATCH for-next 4/6] io_uring: change arm poll return values Pavel Begunkov
2022-06-23 9:34 ` [PATCH for-next 5/6] io_uring: refactor poll arm error handling Pavel Begunkov
2022-06-23 12:09 ` Jens Axboe
2022-06-23 12:16 ` Pavel Begunkov
2022-06-23 9:34 ` Pavel Begunkov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c3b5484c1236f86e3d60a5975456a927670f11b6.1655976119.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox