From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Subject: [PATCH 14/17] io_uring: provide FIFO ordering for task_work
Date: Wed, 10 Feb 2021 00:03:20 +0000 [thread overview]
Message-ID: <5c6b2f10b98c56abd3e4f78cd77f688feedad5ce.1612915326.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>
From: Jens Axboe <[email protected]>
task_work is a LIFO list, due to how it's implemented as a lockless
list. For long chains of task_work, this can be problematic as the
first entry added is the last one processed. Similarly, we'd waste
a lot of CPU cycles reversing this list.
Wrap the task_work so we have a single task_work entry per task per
ctx, and use that to run it in the right order.
Signed-off-by: Jens Axboe <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io-wq.h | 9 ----
fs/io_uring.c | 101 ++++++++++++++++++++++++++++++++++++---
include/linux/io_uring.h | 14 ++++++
3 files changed, 108 insertions(+), 16 deletions(-)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index e37a0f217cc8..096f1021018e 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -27,15 +27,6 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};
-struct io_wq_work_node {
- struct io_wq_work_node *next;
-};
-
-struct io_wq_work_list {
- struct io_wq_work_node *first;
- struct io_wq_work_node *last;
-};
-
static inline void wq_list_add_after(struct io_wq_work_node *node,
struct io_wq_work_node *pos,
struct io_wq_work_list *list)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 58f150680c05..1d55ff827242 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -721,6 +721,11 @@ struct async_poll {
struct io_poll_iocb *double_poll;
};
+struct io_task_work {
+ struct io_wq_work_node node;
+ task_work_func_t func;
+};
+
/*
* NOTE! Each of the iocb union members has the file pointer
* as the first entry in their struct definition. So you can
@@ -779,7 +784,10 @@ struct io_kiocb {
* 2. to track reqs with ->files (see io_op_def::file_table)
*/
struct list_head inflight_entry;
- struct callback_head task_work;
+ union {
+ struct io_task_work io_task_work;
+ struct callback_head task_work;
+ };
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
struct hlist_node hash_node;
struct async_poll *apoll;
@@ -2130,6 +2138,81 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
+static bool __tctx_task_work(struct io_uring_task *tctx)
+{
+ struct io_wq_work_list list;
+ struct io_wq_work_node *node;
+
+ if (wq_list_empty(&tctx->task_list))
+ return false;
+
+ spin_lock(&tctx->task_lock);
+ list = tctx->task_list;
+ INIT_WQ_LIST(&tctx->task_list);
+ spin_unlock(&tctx->task_lock);
+
+ node = list.first;
+ while (node) {
+ struct io_wq_work_node *next = node->next;
+ struct io_kiocb *req;
+
+ req = container_of(node, struct io_kiocb, io_task_work.node);
+ req->task_work.func(&req->task_work);
+ node = next;
+ }
+
+ return list.first != NULL;
+}
+
+static void tctx_task_work(struct callback_head *cb)
+{
+ struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
+
+ while (__tctx_task_work(tctx))
+ cond_resched();
+
+ clear_bit(0, &tctx->task_state);
+}
+
+static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req,
+ enum task_work_notify_mode notify)
+{
+ struct io_uring_task *tctx = tsk->io_uring;
+ struct io_wq_work_node *node, *prev;
+ int ret;
+
+ WARN_ON_ONCE(!tctx);
+
+ spin_lock(&tctx->task_lock);
+ wq_list_add_tail(&req->io_task_work.node, &tctx->task_list);
+ spin_unlock(&tctx->task_lock);
+
+ /* task_work already pending, we're done */
+ if (test_bit(0, &tctx->task_state) ||
+ test_and_set_bit(0, &tctx->task_state))
+ return 0;
+
+ if (!task_work_add(tsk, &tctx->task_work, notify))
+ return 0;
+
+ /*
+ * Slow path - we failed, find and delete work. if the work is not
+ * in the list, it got run and we're fine.
+ */
+ ret = 0;
+ spin_lock(&tctx->task_lock);
+ wq_list_for_each(node, prev, &tctx->task_list) {
+ if (&req->io_task_work.node == node) {
+ wq_list_del(&tctx->task_list, node, prev);
+ ret = 1;
+ break;
+ }
+ }
+ spin_unlock(&tctx->task_lock);
+ clear_bit(0, &tctx->task_state);
+ return ret;
+}
+
static int io_req_task_work_add(struct io_kiocb *req)
{
struct task_struct *tsk = req->task;
@@ -2150,7 +2233,7 @@ static int io_req_task_work_add(struct io_kiocb *req)
if (!(ctx->flags & IORING_SETUP_SQPOLL))
notify = TWA_SIGNAL;
- ret = task_work_add(tsk, &req->task_work, notify);
+ ret = io_task_work_add(tsk, req, notify);
if (!ret)
wake_up_process(tsk);
@@ -2158,7 +2241,7 @@ static int io_req_task_work_add(struct io_kiocb *req)
}
static void io_req_task_work_add_fallback(struct io_kiocb *req,
- void (*cb)(struct callback_head *))
+ task_work_func_t cb)
{
struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
@@ -2217,7 +2300,7 @@ static void io_req_task_queue(struct io_kiocb *req)
{
int ret;
- init_task_work(&req->task_work, io_req_task_submit);
+ req->task_work.func = io_req_task_submit;
percpu_ref_get(&req->ctx->refs);
ret = io_req_task_work_add(req);
@@ -2348,7 +2431,7 @@ static void io_free_req_deferred(struct io_kiocb *req)
{
int ret;
- init_task_work(&req->task_work, io_put_req_deferred_cb);
+ req->task_work.func = io_put_req_deferred_cb;
ret = io_req_task_work_add(req);
if (unlikely(ret))
io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
@@ -3393,7 +3476,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
list_del_init(&wait->entry);
- init_task_work(&req->task_work, io_req_task_submit);
+ req->task_work.func = io_req_task_submit;
percpu_ref_get(&req->ctx->refs);
/* submit ref gets dropped, acquire a new one */
@@ -5090,7 +5173,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
list_del_init(&poll->wait.entry);
req->result = mask;
- init_task_work(&req->task_work, func);
+ req->task_work.func = func;
percpu_ref_get(&req->ctx->refs);
/*
@@ -8093,6 +8176,10 @@ static int io_uring_alloc_task_context(struct task_struct *task)
io_init_identity(&tctx->__identity);
tctx->identity = &tctx->__identity;
task->io_uring = tctx;
+ spin_lock_init(&tctx->task_lock);
+ INIT_WQ_LIST(&tctx->task_list);
+ tctx->task_state = 0;
+ init_task_work(&tctx->task_work, tctx_task_work);
return 0;
}
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 35b2d845704d..2eb6d19de336 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -22,6 +22,15 @@ struct io_identity {
refcount_t count;
};
+struct io_wq_work_node {
+ struct io_wq_work_node *next;
+};
+
+struct io_wq_work_list {
+ struct io_wq_work_node *first;
+ struct io_wq_work_node *last;
+};
+
struct io_uring_task {
/* submission side */
struct xarray xa;
@@ -32,6 +41,11 @@ struct io_uring_task {
struct io_identity *identity;
atomic_t in_idle;
bool sqpoll;
+
+ spinlock_t task_lock;
+ struct io_wq_work_list task_list;
+ unsigned long task_state;
+ struct callback_head task_work;
};
#if defined(CONFIG_IO_URING)
--
2.24.0
next prev parent reply other threads:[~2021-02-10 0:20 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-10 0:03 [PATCH RFC 00/17] playing around req alloc Pavel Begunkov
2021-02-10 0:03 ` [PATCH 01/17] io_uring: replace force_nonblock with flags Pavel Begunkov
2021-02-10 0:03 ` [PATCH 02/17] io_uring: make op handlers always take issue flags Pavel Begunkov
2021-02-10 0:03 ` [PATCH 03/17] io_uring: don't propagate io_comp_state Pavel Begunkov
2021-02-10 14:00 ` Pavel Begunkov
2021-02-10 14:27 ` Jens Axboe
2021-02-10 0:03 ` [PATCH 04/17] io_uring: don't keep submit_state on stack Pavel Begunkov
2021-02-10 0:03 ` [PATCH 05/17] io_uring: remove ctx from comp_state Pavel Begunkov
2021-02-10 0:03 ` [PATCH 06/17] io_uring: don't reinit submit state every time Pavel Begunkov
2021-02-10 0:03 ` [PATCH 07/17] io_uring: replace list with array for compl batch Pavel Begunkov
2021-02-10 0:03 ` [PATCH 08/17] io_uring: submit-completion free batching Pavel Begunkov
2021-02-10 0:03 ` [PATCH 09/17] io_uring: remove fallback_req Pavel Begunkov
2021-02-10 0:03 ` [PATCH 10/17] io_uring: count ctx refs separately from reqs Pavel Begunkov
2021-02-10 0:03 ` [PATCH 11/17] io_uring: persistent req cache Pavel Begunkov
2021-02-10 0:03 ` [PATCH 12/17] io_uring: feed reqs back into alloc cache Pavel Begunkov
2021-02-10 0:03 ` [PATCH 13/17] io_uring: use persistent request cache Pavel Begunkov
2021-02-10 2:14 ` Jens Axboe
2021-02-10 0:03 ` Pavel Begunkov [this message]
2021-02-10 0:03 ` [PATCH 15/17] io_uring: enable req cache for task_work items Pavel Begunkov
2021-02-10 0:03 ` [PATCH 16/17] io_uring: take comp_state from ctx Pavel Begunkov
2021-02-10 0:03 ` [PATCH 17/17] io_uring: defer flushing cached reqs Pavel Begunkov
2021-02-10 2:10 ` Jens Axboe
2021-02-10 2:08 ` [PATCH RFC 00/17] playing around req alloc Jens Axboe
2021-02-10 3:14 ` Pavel Begunkov
2021-02-10 3:23 ` Jens Axboe
2021-02-10 11:53 ` Pavel Begunkov
2021-02-10 14:27 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=5c6b2f10b98c56abd3e4f78cd77f688feedad5ce.1612915326.git.asml.silence@gmail.com \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox