From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, io-uring <[email protected]>
Subject: Re: [PATCH v2] io-wq: handle hashed writes in chains
Date: Sun, 22 Mar 2020 21:54:07 +0300 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
[-- Attachment #1.1: Type: text/plain, Size: 6171 bytes --]
On 22/03/2020 19:24, Pavel Begunkov wrote:
> On 22/03/2020 19:09, Pavel Begunkov wrote:
>> On 19/03/2020 21:56, Jens Axboe wrote:
>>> We always punt async buffered writes to an io-wq helper, as the core
>>> kernel does not have IOCB_NOWAIT support for that. Most buffered async
>>> writes complete very quickly, as it's just a copy operation. This means
>>> that doing multiple locking roundtrips on the shared wqe lock for each
>>> buffered write is wasteful. Additionally, buffered writes are hashed
>>> work items, which means that any buffered write to a given file is
>>> serialized.
>>>
>>> When looking for a new work item, build a chain of identicaly hashed
>>> work items, and then hand back that batch. Until the batch is done, the
>>> caller doesn't have to synchronize with the wqe or worker locks again.
>
> I have an idea, how to do it a bit better. Let me try it.
The diff below is buggy (Ooopses somewhere in blk-mq for
read-write.c:read_poll_link), I'll double check it on a fresh head.
The idea is to keep same-hashed works continuously in @work_list, so they
can be spliced in one go. For each hash bucket, I keep last added work
- on enqueue, it adds a work after the last one
- on dequeue it splices [first, last]. Where @first is the current one, because
of how we traverse @work_list.
It throws a bit of extra memory, but
- removes extra looping
- and also takes all hashed requests, but not only sequentially submitted
e.g. for the following submission sequence, it will take all (hash=0) requests.
REQ(hash=0)
REQ(hash=1)
REQ(hash=0)
REQ()
REQ(hash=0)
...
Please, tell if you see a hole in the concept. And as said, there is still a bug
somewhere.
---
fs/io-wq.c | 49 +++++++++++++++++++++++++++++++++++++++++++------
fs/io-wq.h | 34 +++++++++++++++++++++++++++-------
2 files changed, 70 insertions(+), 13 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index b3fb61ec0870..00d8f8b12df3 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -69,6 +69,8 @@ struct io_worker {
#define IO_WQ_HASH_ORDER 5
#endif
+#define IO_WQ_HASH_NR_BUCKETS (1u << IO_WQ_HASH_ORDER)
+
struct io_wqe_acct {
unsigned nr_workers;
unsigned max_workers;
@@ -98,6 +100,7 @@ struct io_wqe {
struct list_head all_list;
struct io_wq *wq;
+ struct io_wq_work *last_hashed[IO_WQ_HASH_NR_BUCKETS];
};
/*
@@ -400,7 +403,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
hash = io_get_work_hash(work);
if (!(wqe->hash_map & BIT(hash))) {
wqe->hash_map |= BIT(hash);
- wq_node_del(&wqe->work_list, node, prev);
+ wq_del_node_range(&wqe->work_list,
+ &wqe->last_hashed[hash]->list, prev);
+ wqe->last_hashed[hash] = NULL;
return work;
}
}
@@ -508,7 +513,11 @@ static void io_worker_handle_work(struct io_worker *worker)
/* handle a whole dependent link */
do {
- struct io_wq_work *old_work;
+ struct io_wq_work *old_work, *next_hashed = NULL;
+
+ if (work->list.next)
+ next_hashed = container_of(work->list.next,
+ struct io_wq_work, list);
io_impersonate_work(worker, work);
/*
@@ -523,12 +532,20 @@ static void io_worker_handle_work(struct io_worker *worker)
work->func(&work);
work = (old_work == work) ? NULL : work;
- assign_work = work;
- if (work && io_wq_is_hashed(work))
- assign_work = NULL;
+ assign_work = next_hashed;
+ if (!next_hashed && work && !io_wq_is_hashed(work))
+ assign_work = work;
+
io_assign_current_work(worker, assign_work);
wq->free_work(old_work);
+ if (next_hashed) {
+ if (work)
+ io_wqe_enqueue(wqe, work);
+ work = next_hashed;
+ continue;
+ }
+
if (work && !assign_work) {
io_wqe_enqueue(wqe, work);
work = NULL;
@@ -776,6 +793,26 @@ static void io_run_cancel(struct io_wq_work *work, struct
io_wqe *wqe)
} while (work);
}
+static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
+{
+ int hash;
+ struct io_wq_work *last_hashed;
+
+ if (!io_wq_is_hashed(work)) {
+append:
+ wq_list_add_tail(&work->list, &wqe->work_list);
+ return;
+ }
+
+ hash = io_get_work_hash(work);
+ last_hashed = wqe->last_hashed[hash];
+ wqe->last_hashed[hash] = work;
+ if (!last_hashed)
+ goto append;
+
+ wq_list_add_after(&work->list, &last_hashed->list, &wqe->work_list);
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
@@ -795,7 +832,7 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct
io_wq_work *work)
work_flags = work->flags;
spin_lock_irqsave(&wqe->lock, flags);
- wq_list_add_tail(&work->list, &wqe->work_list);
+ io_wqe_insert_work(wqe, work);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
spin_unlock_irqrestore(&wqe->lock, flags);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 298b21f4a4d2..10367b6238da 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -40,17 +40,37 @@ static inline void wq_list_add_tail(struct io_wq_work_node
*node,
}
}
+static inline void wq_list_add_after(struct io_wq_work_node *node,
+ struct io_wq_work_node *pos,
+ struct io_wq_work_list *list)
+{
+ struct io_wq_work_node *next = pos->next;
+
+ pos->next = node;
+ node->next = next;
+ if (!next)
+ list->last = node;
+}
+
+static inline void wq_del_node_range(struct io_wq_work_list *list,
+ struct io_wq_work_node *last,
+ struct io_wq_work_node *prev)
+{
+ if (!prev)
+ WRITE_ONCE(list->first, last->next);
+ else
+ prev->next = last->next;
+
+ if (last == list->last)
+ list->last = prev;
+ last->next = NULL;
+}
+
static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *node,
struct io_wq_work_node *prev)
{
- if (node == list->first)
- WRITE_ONCE(list->first, node->next);
- if (node == list->last)
- list->last = prev;
- if (prev)
- prev->next = node->next;
- node->next = NULL;
+ wq_del_node_range(list, node, prev);
}
#define wq_list_for_each(pos, prv, head) \
--
2.24.0
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2020-03-22 18:55 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-03-19 18:56 [PATCH v2] io-wq: handle hashed writes in chains Jens Axboe
2020-03-22 16:09 ` Pavel Begunkov
2020-03-22 16:24 ` Pavel Begunkov
2020-03-22 17:08 ` Jens Axboe
2020-03-22 18:54 ` Pavel Begunkov [this message]
2020-03-22 19:51 ` Jens Axboe
2020-03-22 20:05 ` Jens Axboe
2020-03-22 20:15 ` Jens Axboe
2020-03-22 20:20 ` Pavel Begunkov
2020-03-22 21:16 ` Pavel Begunkov
2020-03-22 21:31 ` Pavel Begunkov
2020-03-22 20:25 ` Pavel Begunkov
2020-03-23 1:37 ` Jens Axboe
2020-03-23 8:38 ` Pavel Begunkov
2020-03-23 14:26 ` Jens Axboe
2020-03-22 17:08 ` Jens Axboe
2020-03-22 17:37 ` Pavel Begunkov
2020-03-22 20:56 ` Pavel Begunkov
-- strict thread matches above, loose matches on Subject: below --
2020-03-23 19:57 Pavel Begunkov
2020-03-24 2:31 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox