* [PATCHSET 0/2] io_uring: close lookup gap for dependent work
@ 2019-11-20 20:09 Jens Axboe
2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
` (9 more replies)
0 siblings, 10 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
As discussed earlier today on this list, there's a gap between finding
dependent work and ensuring we can look it up for cancellation purposes.
On top of that, we also currently NEVER find dependent work due to how
we do lookups of it, so that is fixed in patch 1 while patch 2
implements the fix for the lookup gap.
Patches are against for-5.5/io_uring-post
fs/io-wq.c | 3 +++
fs/io-wq.h | 12 +++++++++++-
fs/io_uring.c | 34 +++++++++++++++++++++++++++-------
3 files changed, 41 insertions(+), 8 deletions(-)
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
` (8 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe, syzbot+0f1cc17f85154f400465
We currently have a race where if setup is really slow, we can be
calling io_wq_destroy() before we're done setting up. This will cause
the caller to get stuck waiting for the manager to set things up, but
the manager already exited.
Fix this by doing a sync setup of the manager. This also fixes the case
where if we failed creating workers, we'd also get stuck.
In practice this race window was really small, as we already wait for
the manager to start. Hence someone would have to call io_wq_destroy()
after the task has started, but before it started the first loop. The
reported test case forked tons of these, which is why it became an
issue.
Reported-by: [email protected]
Fixes: 771b53d033e8 ("io-wq: small threadpool implementation for io_uring")
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 38 ++++++++++++++++++++++++++------------
1 file changed, 26 insertions(+), 12 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 9174007ce107..1f640c489f7c 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -33,6 +33,7 @@ enum {
enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
+ IO_WQ_BIT_ERROR = 2, /* error on setup */
};
enum {
@@ -562,14 +563,14 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
spin_unlock_irq(&wqe->lock);
}
-static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
+static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct =&wqe->acct[index];
struct io_worker *worker;
worker = kcalloc_node(1, sizeof(*worker), GFP_KERNEL, wqe->node);
if (!worker)
- return;
+ return false;
refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL;
@@ -581,7 +582,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
"io_wqe_worker-%d/%d", index, wqe->node);
if (IS_ERR(worker->task)) {
kfree(worker);
- return;
+ return false;
}
spin_lock_irq(&wqe->lock);
@@ -599,6 +600,7 @@ static void create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
atomic_inc(&wq->user->processes);
wake_up_process(worker->task);
+ return true;
}
static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
@@ -606,9 +608,6 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
- /* always ensure we have one bounded worker */
- if (index == IO_WQ_ACCT_BOUND && !acct->nr_workers)
- return true;
/* if we have available workers or no work, no need */
if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
return false;
@@ -621,10 +620,19 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
static int io_wq_manager(void *data)
{
struct io_wq *wq = data;
+ int i;
- while (!kthread_should_stop()) {
- int i;
+ /* create fixed workers */
+ for (i = 0; i < wq->nr_wqes; i++) {
+ if (create_io_worker(wq, wq->wqes[i], IO_WQ_ACCT_BOUND))
+ continue;
+ goto err;
+ }
+ refcount_set(&wq->refs, wq->nr_wqes);
+ complete(&wq->done);
+
+ while (!kthread_should_stop()) {
for (i = 0; i < wq->nr_wqes; i++) {
struct io_wqe *wqe = wq->wqes[i];
bool fork_worker[2] = { false, false };
@@ -644,6 +652,10 @@ static int io_wq_manager(void *data)
schedule_timeout(HZ);
}
+ return 0;
+err:
+ set_bit(IO_WQ_BIT_ERROR, &wq->state);
+ complete(&wq->done);
return 0;
}
@@ -982,7 +994,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
wq->user = user;
i = 0;
- refcount_set(&wq->refs, wq->nr_wqes);
for_each_online_node(node) {
struct io_wqe *wqe;
@@ -1020,6 +1031,10 @@ struct io_wq *io_wq_create(unsigned bounded, struct mm_struct *mm,
wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
if (!IS_ERR(wq->manager)) {
wake_up_process(wq->manager);
+ wait_for_completion(&wq->done);
+ if (test_bit(IO_WQ_BIT_ERROR, &wq->state))
+ goto err;
+ reinit_completion(&wq->done);
return wq;
}
@@ -1041,10 +1056,9 @@ void io_wq_destroy(struct io_wq *wq)
{
int i;
- if (wq->manager) {
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ if (wq->manager)
kthread_stop(wq->manager);
- }
rcu_read_lock();
for (i = 0; i < wq->nr_wqes; i++) {
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 1/2] io_uring: allow finding next link independent of req reference count
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
` (7 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
We currently try and start the next link when we put the request, and
only if we were going to free it. This means that the optimization to
continue executing requests from the same context often fails, as we're
not putting the final reference.
Add REQ_F_LINK_NEXT to keep track of this, and allow io_uring to find the
next request more efficiently.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 066b59ffb54e..132a890368bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -340,6 +340,7 @@ struct io_kiocb {
#define REQ_F_NOWAIT 1 /* must not punt to workers */
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
+#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */
@@ -874,6 +875,10 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
struct io_kiocb *nxt;
bool wake_ev = false;
+ /* Already got next link */
+ if (req->flags & REQ_F_LINK_NEXT)
+ return;
+
/*
* The list should never be empty when we are called here. But could
* potentially happen if the chain is messed up, check to be on the
@@ -910,6 +915,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
break;
}
+ req->flags |= REQ_F_LINK_NEXT;
if (wake_ev)
io_cqring_ev_posted(ctx);
}
@@ -946,12 +952,10 @@ static void io_fail_links(struct io_kiocb *req)
io_cqring_ev_posted(ctx);
}
-static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
{
- if (likely(!(req->flags & REQ_F_LINK))) {
- __io_free_req(req);
+ if (likely(!(req->flags & REQ_F_LINK)))
return;
- }
/*
* If LINK is set, we have dependent requests in this chain. If we
@@ -977,7 +981,11 @@ static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
} else {
io_req_link_next(req, nxt);
}
+}
+static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+ io_req_find_next(req, nxt);
__io_free_req(req);
}
@@ -994,8 +1002,10 @@ static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
struct io_kiocb *nxt = NULL;
+ io_req_find_next(req, &nxt);
+
if (refcount_dec_and_test(&req->refs))
- io_free_req_find_next(req, &nxt);
+ __io_free_req(req);
if (nxt) {
if (nxtptr)
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/7] io-wq: remove extra space characters
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
` (6 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Dan Carpenter, Jens Axboe
From: Dan Carpenter <[email protected]>
These lines are indented an extra space character.
Signed-off-by: Dan Carpenter <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 1f640c489f7c..81b2a456d1ce 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -328,9 +328,9 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
* If worker is moving from bound to unbound (or vice versa), then
* ensure we update the running accounting.
*/
- worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
- work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
- if (worker_bound != work_bound) {
+ worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
+ work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
+ if (worker_bound != work_bound) {
io_wqe_dec_running(wqe, worker);
if (work_bound) {
worker->flags |= IO_WORKER_F_BOUND;
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/2] io_uring: close lookup gap for dependent next work
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (2 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
` (5 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
When we find new work to process within the work handler, we queue the
linked timeout before we have issued the new work. This can be
problematic for very short timeouts, as we have a window where the new
work isn't visible.
Allow the work handler to store a callback function for this in the work
item, and flag it with IO_WQ_WORK_CB if the caller has done so. If that
is set, then io-wq will call the callback when it has setup the new work
item.
Reported-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io-wq.c | 3 +++
fs/io-wq.h | 12 +++++++++++-
fs/io_uring.c | 14 ++++++++++++--
3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index b4bc377dda61..2666384aaf44 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -427,6 +427,9 @@ static void io_worker_handle_work(struct io_worker *worker)
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
+ if (work->flags & IO_WQ_WORK_CB)
+ work->cb.fn(work->cb.data);
+
if ((work->flags & IO_WQ_WORK_NEEDS_FILES) &&
current->files != work->files) {
task_lock(current);
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 4b29f922f80c..892989f3e41e 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -11,6 +11,7 @@ enum {
IO_WQ_WORK_NEEDS_FILES = 16,
IO_WQ_WORK_UNBOUND = 32,
IO_WQ_WORK_INTERNAL = 64,
+ IO_WQ_WORK_CB = 128,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
@@ -21,8 +22,17 @@ enum io_wq_cancel {
IO_WQ_CANCEL_NOTFOUND, /* work not found */
};
+struct io_wq_work;
+struct io_wq_work_cb {
+ void (*fn)(void *data);
+ void *data;
+};
+
struct io_wq_work {
- struct list_head list;
+ union {
+ struct list_head list;
+ struct io_wq_work_cb cb;
+ };
void (*func)(struct io_wq_work **);
unsigned flags;
struct files_struct *files;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 132a890368bf..6175e2e195c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2664,6 +2664,13 @@ static int __io_submit_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
return 0;
}
+static void io_link_work_cb(void *data)
+{
+ struct io_kiocb *link = data;
+
+ io_queue_linked_timeout(link);
+}
+
static void io_wq_submit_work(struct io_wq_work **workptr)
{
struct io_wq_work *work = *workptr;
@@ -2710,8 +2717,11 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_prep_async_work(nxt, &link);
*workptr = &nxt->work;
- if (link)
- io_queue_linked_timeout(link);
+ if (link) {
+ nxt->work.flags |= IO_WQ_WORK_CB;
+ nxt->work.cb.fn = io_link_work_cb;
+ nxt->work.cb.data = link;
+ }
}
}
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/7] io_uring: break links for failed defer
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (3 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
` (4 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
From: Pavel Begunkov <[email protected]>
If io_req_defer() failed, it needs to cancel a dependant link.
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ebc58f896088..7b9bd6ad4fb9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2943,6 +2943,8 @@ static void io_queue_sqe(struct io_kiocb *req)
if (ret) {
if (ret != -EIOCBQUEUED) {
io_cqring_add_event(req, ret);
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
io_double_put_req(req);
}
} else
@@ -2975,6 +2977,8 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
if (ret != -EIOCBQUEUED) {
err:
io_cqring_add_event(req, ret);
+ if (req->flags & REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
io_double_put_req(req);
if (shadow)
__io_free_req(shadow);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 4/7] io_uring: remove redundant check
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (4 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
` (3 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
From: Pavel Begunkov <[email protected]>
Pass any IORING_OP_LINK_TIMEOUT request further, where it will
eventually fail in io_issue_sqe().
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 4 ----
1 file changed, 4 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7b9bd6ad4fb9..8d25e157b4d8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3064,10 +3064,6 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
INIT_LIST_HEAD(&req->link_list);
*link = req;
- } else if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
- /* Only valid as a linked SQE */
- ret = -EINVAL;
- goto err_req;
} else {
io_queue_sqe(req);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 5/7] io_uring: Fix leaking linked timeouts
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (5 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
` (2 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
From: Pavel Begunkov <[email protected]>
let have a dependant link: REQ -> LINK_TIMEOUT -> LINK_TIMEOUT
1. submission stage: submission references for REQ and LINK_TIMEOUT
are dropped. So, references respectively (1,1,2)
2. io_put(REQ) + FAIL_LINKS stage: calls io_fail_links(), which for all
linked timeouts will call cancel_timeout() and drop 1 reference.
So, references after: (0,0,1). That's a leak.
Make it treat only the first linked timeout as such, and pass others
through __io_double_put_req().
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8d25e157b4d8..a79ef43367b1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -937,6 +937,7 @@ static void io_fail_links(struct io_kiocb *req)
if ((req->flags & REQ_F_LINK_TIMEOUT) &&
link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
io_link_cancel_timeout(link);
+ req->flags &= ~REQ_F_LINK_TIMEOUT;
} else {
io_cqring_fill_event(link, -ECANCELED);
__io_double_put_req(link);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (6 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
We currently clear the linked timeout field if we cancel such a timeout,
but we should only attempt to cancel if it's the first one we see.
Others should simply be freed like other requests, as they haven't
been started yet.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a79ef43367b1..d1085e4e8ae9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -937,12 +937,12 @@ static void io_fail_links(struct io_kiocb *req)
if ((req->flags & REQ_F_LINK_TIMEOUT) &&
link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
io_link_cancel_timeout(link);
- req->flags &= ~REQ_F_LINK_TIMEOUT;
} else {
io_cqring_fill_event(link, -ECANCELED);
__io_double_put_req(link);
}
kfree(sqe_to_free);
+ req->flags &= ~REQ_F_LINK_TIMEOUT;
}
io_commit_cqring(ctx);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (7 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
@ 2019-11-20 20:09 ` Jens Axboe
2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:09 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
From: Pavel Begunkov <[email protected]>
Always mark requests with allocated sqe and deallocate it in
__io_free_req(). It's easier to follow and doesn't add edge cases.
Signed-off-by: Pavel Begunkov <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 49 ++++++++++++++++++++++---------------------------
1 file changed, 22 insertions(+), 27 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d1085e4e8ae9..df7f5ce5bb06 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -829,6 +829,8 @@ static void __io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ if (req->flags & REQ_F_FREE_SQE)
+ kfree(req->submit.sqe);
if (req->file && !(req->flags & REQ_F_FIXED_FILE))
fput(req->file);
if (req->flags & REQ_F_INFLIGHT) {
@@ -924,16 +926,11 @@ static void io_fail_links(struct io_kiocb *req)
spin_lock_irqsave(&ctx->completion_lock, flags);
while (!list_empty(&req->link_list)) {
- const struct io_uring_sqe *sqe_to_free = NULL;
-
link = list_first_entry(&req->link_list, struct io_kiocb, list);
list_del_init(&link->list);
trace_io_uring_fail_link(req, link);
- if (link->flags & REQ_F_FREE_SQE)
- sqe_to_free = link->submit.sqe;
-
if ((req->flags & REQ_F_LINK_TIMEOUT) &&
link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
io_link_cancel_timeout(link);
@@ -941,7 +938,6 @@ static void io_fail_links(struct io_kiocb *req)
io_cqring_fill_event(link, -ECANCELED);
__io_double_put_req(link);
}
- kfree(sqe_to_free);
req->flags &= ~REQ_F_LINK_TIMEOUT;
}
@@ -1084,7 +1080,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
* completions for those, only batch free for fixed
* file and non-linked commands.
*/
- if (((req->flags & (REQ_F_FIXED_FILE|REQ_F_LINK)) ==
+ if (((req->flags &
+ (REQ_F_FIXED_FILE|REQ_F_LINK|REQ_F_FREE_SQE)) ==
REQ_F_FIXED_FILE) && !io_is_fallback_req(req)) {
reqs[to_free++] = req;
if (to_free == ARRAY_SIZE(reqs))
@@ -2567,6 +2564,7 @@ static int io_req_defer(struct io_kiocb *req)
}
memcpy(sqe_copy, sqe, sizeof(*sqe_copy));
+ req->flags |= REQ_F_FREE_SQE;
req->submit.sqe = sqe_copy;
trace_io_uring_defer(ctx, req, false);
@@ -2661,7 +2659,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
struct io_wq_work *work = *workptr;
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct sqe_submit *s = &req->submit;
- const struct io_uring_sqe *sqe = s->sqe;
struct io_kiocb *nxt = NULL;
int ret = 0;
@@ -2697,9 +2694,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_put_req(req);
}
- /* async context always use a copy of the sqe */
- kfree(sqe);
-
/* if a dependent link is ready, pass it back */
if (!ret && nxt) {
struct io_kiocb *link;
@@ -2897,23 +2891,24 @@ static void __io_queue_sqe(struct io_kiocb *req)
struct io_uring_sqe *sqe_copy;
sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
- if (sqe_copy) {
- s->sqe = sqe_copy;
- if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
- ret = io_grab_files(req);
- if (ret) {
- kfree(sqe_copy);
- goto err;
- }
- }
+ if (!sqe_copy)
+ goto err;
- /*
- * Queued up for async execution, worker will release
- * submit reference when the iocb is actually submitted.
- */
- io_queue_async_work(req);
- return;
+ s->sqe = sqe_copy;
+ req->flags |= REQ_F_FREE_SQE;
+
+ if (req->work.flags & IO_WQ_WORK_NEEDS_FILES) {
+ ret = io_grab_files(req);
+ if (ret)
+ goto err;
}
+
+ /*
+ * Queued up for async execution, worker will release
+ * submit reference when the iocb is actually submitted.
+ */
+ io_queue_async_work(req);
+ return;
}
err:
@@ -3008,7 +3003,6 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
struct io_kiocb **link)
{
- struct io_uring_sqe *sqe_copy;
struct sqe_submit *s = &req->submit;
struct io_ring_ctx *ctx = req->ctx;
int ret;
@@ -3038,6 +3032,7 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
*/
if (*link) {
struct io_kiocb *prev = *link;
+ struct io_uring_sqe *sqe_copy;
if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
ret = io_timeout_setup(req);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCHSET 0/2] io_uring: close lookup gap for dependent work
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
` (8 preceding siblings ...)
2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
@ 2019-11-20 20:11 ` Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:11 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence
On 11/20/19 1:09 PM, Jens Axboe wrote:
> As discussed earlier today on this list, there's a gap between finding
> dependent work and ensuring we can look it up for cancellation purposes.
> On top of that, we also currently NEVER find dependent work due to how
> we do lookups of it, so that is fixed in patch 1 while patch 2
> implements the fix for the lookup gap.
>
> Patches are against for-5.5/io_uring-post
>
> fs/io-wq.c | 3 +++
> fs/io-wq.h | 12 +++++++++++-
> fs/io_uring.c | 34 +++++++++++++++++++++++++++-------
> 3 files changed, 41 insertions(+), 8 deletions(-)
Gah, disregard this posting, there were older patches in that dir too.
Re-sending a correct one.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 1/2] io_uring: allow finding next link independent of req reference count
2019-11-20 20:12 [PATCHSET v2 " Jens Axboe
@ 2019-11-20 20:12 ` Jens Axboe
0 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2019-11-20 20:12 UTC (permalink / raw)
To: io-uring; +Cc: asml.silence, Jens Axboe
We currently try and start the next link when we put the request, and
only if we were going to free it. This means that the optimization to
continue executing requests from the same context often fails, as we're
not putting the final reference.
Add REQ_F_LINK_NEXT to keep track of this, and allow io_uring to find the
next request more efficiently.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/io_uring.c | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 066b59ffb54e..132a890368bf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -340,6 +340,7 @@ struct io_kiocb {
#define REQ_F_NOWAIT 1 /* must not punt to workers */
#define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */
#define REQ_F_FIXED_FILE 4 /* ctx owns file */
+#define REQ_F_LINK_NEXT 8 /* already grabbed next link */
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */
@@ -874,6 +875,10 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
struct io_kiocb *nxt;
bool wake_ev = false;
+ /* Already got next link */
+ if (req->flags & REQ_F_LINK_NEXT)
+ return;
+
/*
* The list should never be empty when we are called here. But could
* potentially happen if the chain is messed up, check to be on the
@@ -910,6 +915,7 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
break;
}
+ req->flags |= REQ_F_LINK_NEXT;
if (wake_ev)
io_cqring_ev_posted(ctx);
}
@@ -946,12 +952,10 @@ static void io_fail_links(struct io_kiocb *req)
io_cqring_ev_posted(ctx);
}
-static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+static void io_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
{
- if (likely(!(req->flags & REQ_F_LINK))) {
- __io_free_req(req);
+ if (likely(!(req->flags & REQ_F_LINK)))
return;
- }
/*
* If LINK is set, we have dependent requests in this chain. If we
@@ -977,7 +981,11 @@ static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
} else {
io_req_link_next(req, nxt);
}
+}
+static void io_free_req_find_next(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+ io_req_find_next(req, nxt);
__io_free_req(req);
}
@@ -994,8 +1002,10 @@ static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
struct io_kiocb *nxt = NULL;
+ io_req_find_next(req, &nxt);
+
if (refcount_dec_and_test(&req->refs))
- io_free_req_find_next(req, &nxt);
+ __io_free_req(req);
if (nxt) {
if (nxtptr)
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2019-11-20 20:12 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-11-20 20:09 [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
2019-11-20 20:09 ` [PATCH 1/7] io-wq: wait for io_wq_create() to setup necessary workers Jens Axboe
2019-11-20 20:09 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
2019-11-20 20:09 ` [PATCH 2/7] io-wq: remove extra space characters Jens Axboe
2019-11-20 20:09 ` [PATCH 2/2] io_uring: close lookup gap for dependent next work Jens Axboe
2019-11-20 20:09 ` [PATCH 3/7] io_uring: break links for failed defer Jens Axboe
2019-11-20 20:09 ` [PATCH 4/7] io_uring: remove redundant check Jens Axboe
2019-11-20 20:09 ` [PATCH 5/7] io_uring: Fix leaking linked timeouts Jens Axboe
2019-11-20 20:09 ` [PATCH 6/7] io_uring: io_fail_links() should only consider first linked timeout Jens Axboe
2019-11-20 20:09 ` [PATCH 7/7] io_uring: Always REQ_F_FREE_SQE for allocated sqe Jens Axboe
2019-11-20 20:11 ` [PATCHSET 0/2] io_uring: close lookup gap for dependent work Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2019-11-20 20:12 [PATCHSET v2 " Jens Axboe
2019-11-20 20:12 ` [PATCH 1/2] io_uring: allow finding next link independent of req reference count Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox