From: Jens Axboe <[email protected]>
To: [email protected]
Cc: Jens Axboe <[email protected]>
Subject: [PATCH 4/9] io_uring: improve poll completion performance
Date: Sat, 28 Dec 2019 12:21:13 -0700 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
For busy IORING_OP_POLL_ADD workloads, we can have enough contention
on the completion lock that we fail the inline completion path quite
often as we fail the trylock on that lock. Add a list for deferred
completions that we can use in that case. This helps reduce the number
of async offloads we have to do, as if we get multiple completions in
a row, we'll piggy back on to the poll_llist instead of having to queue
our own offload.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 108 ++++++++++++++++++++++++++++++++++++++++----------
1 file changed, 88 insertions(+), 20 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6f99a52c350c..0ee9115a599d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -286,7 +286,8 @@ struct io_ring_ctx {
struct {
spinlock_t completion_lock;
- bool poll_multi_file;
+ struct llist_head poll_llist;
+
/*
* ->poll_list is protected by the ctx->uring_lock for
* io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -296,6 +297,7 @@ struct io_ring_ctx {
struct list_head poll_list;
struct hlist_head *cancel_hash;
unsigned cancel_hash_bits;
+ bool poll_multi_file;
spinlock_t inflight_lock;
struct list_head inflight_list;
@@ -453,7 +455,14 @@ struct io_kiocb {
};
struct io_async_ctx *io;
- struct file *ring_file;
+ union {
+ /*
+ * ring_file is only used in the submission path, and
+ * llist_node is only used for poll deferred completions
+ */
+ struct file *ring_file;
+ struct llist_node llist_node;
+ };
int ring_fd;
bool has_user;
bool in_async;
@@ -727,6 +736,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
mutex_init(&ctx->uring_lock);
init_waitqueue_head(&ctx->wait);
spin_lock_init(&ctx->completion_lock);
+ init_llist_head(&ctx->poll_llist);
INIT_LIST_HEAD(&ctx->poll_list);
INIT_LIST_HEAD(&ctx->defer_list);
INIT_LIST_HEAD(&ctx->timeout_list);
@@ -1322,6 +1332,20 @@ static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx)
return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head;
}
+static inline bool io_req_multi_free(struct io_kiocb *req)
+{
+ /*
+ * If we're not using fixed files, we have to pair the completion part
+ * with the file put. Use regular completions for those, only batch
+ * free for fixed file and non-linked commands.
+ */
+ if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) == REQ_F_FIXED_FILE)
+ && !io_is_fallback_req(req) && !req->io)
+ return true;
+
+ return false;
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -1341,14 +1365,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
(*nr_events)++;
if (refcount_dec_and_test(&req->refs)) {
- /* If we're not using fixed files, we have to pair the
- * completion part with the file put. Use regular
- * completions for those, only batch free for fixed
- * file and non-linked commands.
- */
- if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
- REQ_F_FIXED_FILE) && !io_is_fallback_req(req) &&
- !req->io) {
+ if (io_req_multi_free(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
io_free_req_many(ctx, reqs, &to_free);
@@ -3082,6 +3099,44 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
*workptr = &nxt->work;
}
+static void __io_poll_flush(struct io_ring_ctx *ctx, struct llist_node *nodes)
+{
+ void *reqs[IO_IOPOLL_BATCH];
+ struct io_kiocb *req, *tmp;
+ int to_free = 0;
+
+ spin_lock_irq(&ctx->completion_lock);
+ llist_for_each_entry_safe(req, tmp, nodes, llist_node) {
+ hash_del(&req->hash_node);
+ io_poll_complete(req, req->result, 0);
+
+ if (refcount_dec_and_test(&req->refs)) {
+ if (io_req_multi_free(req)) {
+ reqs[to_free++] = req;
+ if (to_free == ARRAY_SIZE(reqs))
+ io_free_req_many(ctx, reqs, &to_free);
+ } else {
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_free_req(req);
+ }
+ }
+ }
+ spin_unlock_irq(&ctx->completion_lock);
+
+ io_cqring_ev_posted(ctx);
+ io_free_req_many(ctx, reqs, &to_free);
+}
+
+static void io_poll_flush(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct llist_node *nodes;
+
+ nodes = llist_del_all(&req->ctx->poll_llist);
+ if (nodes)
+ __io_poll_flush(req->ctx, nodes);
+}
+
static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
void *key)
{
@@ -3089,7 +3144,6 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
struct io_kiocb *req = container_of(poll, struct io_kiocb, poll);
struct io_ring_ctx *ctx = req->ctx;
__poll_t mask = key_to_poll(key);
- unsigned long flags;
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
@@ -3103,17 +3157,31 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
* If we have a link timeout we're going to need the completion_lock
* for finalizing the request, mark us as having grabbed that already.
*/
- if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) {
- hash_del(&req->hash_node);
- io_poll_complete(req, mask, 0);
- req->flags |= REQ_F_COMP_LOCKED;
- io_put_req(req);
- spin_unlock_irqrestore(&ctx->completion_lock, flags);
+ if (mask) {
+ unsigned long flags;
- io_cqring_ev_posted(ctx);
- } else {
- io_queue_async_work(req);
+ if (llist_empty(&ctx->poll_llist) &&
+ spin_trylock_irqsave(&ctx->completion_lock, flags)) {
+ hash_del(&req->hash_node);
+ io_poll_complete(req, mask, 0);
+ req->flags |= REQ_F_COMP_LOCKED;
+ io_put_req(req);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+ io_cqring_ev_posted(ctx);
+ req = NULL;
+ } else {
+ req->result = mask;
+ req->llist_node.next = NULL;
+ /* if the list wasn't empty, we're done */
+ if (!llist_add(&req->llist_node, &ctx->poll_llist))
+ req = NULL;
+ else
+ req->work.func = io_poll_flush;
+ }
}
+ if (req)
+ io_queue_async_work(req);
return 1;
}
--
2.24.1
next prev parent reply other threads:[~2019-12-28 19:21 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-28 19:21 [PATCHSET 0/9] cleanups, improvements, additions Jens Axboe
2019-12-28 19:21 ` [PATCH 1/9] io_uring: remove two unnecessary function declarations Jens Axboe
2019-12-28 19:21 ` [PATCH 2/9] io_uring: add lookup table for various opcode needs Jens Axboe
2019-12-28 19:21 ` [PATCH 3/9] io_uring: split overflow state into SQ and CQ side Jens Axboe
2019-12-28 19:21 ` Jens Axboe [this message]
2019-12-28 19:21 ` [PATCH 5/9] io_uring: add non-vectored read/write commands Jens Axboe
2019-12-28 19:21 ` [PATCH 6/9] io_uring: allow use of offset == -1 to mean file position Jens Axboe
2019-12-28 19:21 ` [PATCH 7/9] io_uring: add IORING_OP_FADVISE Jens Axboe
2019-12-28 19:21 ` [PATCH 8/9] mm: make do_madvise() available internally Jens Axboe
2019-12-28 19:21 ` [PATCH 9/9] io_uring: add IORING_OP_MADVISE Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox