From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>,
[email protected], [email protected]
Cc: [email protected], [email protected]
Subject: Re: [PATCHSET 0/2] io_uring support for linked timeouts
Date: Sat, 16 Nov 2019 00:22:19 +0300 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
[-- Attachment #1.1: Type: text/plain, Size: 8505 bytes --]
On 15/11/2019 22:34, Jens Axboe wrote:
> How about something like this? Should work (and be valid) to have any
> sequence of timeout links, as long as there's something in front of it.
> Commit message has more details.
If you don't mind, I'll give a try rewriting this. A bit tight
on time, so hopefully by this Sunday.
In any case, there are enough of edge cases, I need to spend some
time to review and check it.
>
>
> commit 437fd0500d08f32444747b861c72cd58a4b833d5
> Author: Jens Axboe <[email protected]>
> Date: Thu Nov 14 19:39:52 2019 -0700
>
> io_uring: fix sequencing issues with linked timeouts
>
> We have an issue with timeout links that are deeper in the submit chain,
> because we only handle it upfront, not from later submissions. Move the
> prep + issue of the timeout link to the async work prep handler, and do
> it normally for non-async queue. If we validate and prepare the timeout
> links upfront when we first see them, there's nothing stopping us from
> supporting any sort of nesting.
>
> Ensure that we handle the sequencing of linked timeouts correctly as
> well. The loop in io_req_link_next() isn't necessary, and it will never
> do anything, because we can't have both REQ_F_LINK_TIMEOUT set and have
> dependent links.
REQ1 -> LINKED_TIMEOUT -> REQ2 -> REQ3
Is this a valid case? Just to check that I got this "can't have both" right.
If no, why so? I think there are a lot of use cases for this.
>
> Reported-by: Pavel Begunkov <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index a0204b48866f..47d61899b8ba 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -352,6 +352,7 @@ struct io_kiocb {
> #define REQ_F_MUST_PUNT 4096 /* must be punted even for NONBLOCK */
> #define REQ_F_INFLIGHT 8192 /* on inflight list */
> #define REQ_F_COMP_LOCKED 16384 /* completion under lock */
> +#define REQ_F_FREE_SQE 32768
> u64 user_data;
> u32 result;
> u32 sequence;
> @@ -390,6 +391,8 @@ static void __io_free_req(struct io_kiocb *req);
> static void io_put_req(struct io_kiocb *req);
> static void io_double_put_req(struct io_kiocb *req);
> static void __io_double_put_req(struct io_kiocb *req);
> +static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
> +static void io_queue_linked_timeout(struct io_kiocb *req);
>
> static struct kmem_cache *req_cachep;
>
> @@ -524,7 +527,8 @@ static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
> opcode == IORING_OP_WRITE_FIXED);
> }
>
> -static inline bool io_prep_async_work(struct io_kiocb *req)
> +static inline bool io_prep_async_work(struct io_kiocb *req,
> + struct io_kiocb **link)
> {
> bool do_hashed = false;
>
> @@ -553,13 +557,17 @@ static inline bool io_prep_async_work(struct io_kiocb *req)
> req->work.flags |= IO_WQ_WORK_NEEDS_USER;
> }
>
> + *link = io_prep_linked_timeout(req);
> return do_hashed;
> }
>
> static inline void io_queue_async_work(struct io_kiocb *req)
> {
> - bool do_hashed = io_prep_async_work(req);
> struct io_ring_ctx *ctx = req->ctx;
> + struct io_kiocb *link;
> + bool do_hashed;
> +
> + do_hashed = io_prep_async_work(req, &link);
>
> trace_io_uring_queue_async_work(ctx, do_hashed, req, &req->work,
> req->flags);
> @@ -569,6 +577,9 @@ static inline void io_queue_async_work(struct io_kiocb *req)
> io_wq_enqueue_hashed(ctx->io_wq, &req->work,
> file_inode(req->file));
> }
> +
> + if (link)
> + io_queue_linked_timeout(link);
> }
>
> static void io_kill_timeout(struct io_kiocb *req)
> @@ -868,30 +879,26 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
> * safe side.
> */
> nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb, list);
> - while (nxt) {
> + if (nxt) {
> list_del_init(&nxt->list);
> if (!list_empty(&req->link_list)) {
> INIT_LIST_HEAD(&nxt->link_list);
> list_splice(&req->link_list, &nxt->link_list);
> nxt->flags |= REQ_F_LINK;
> + } else if (req->flags & REQ_F_LINK_TIMEOUT) {
> + wake_ev = io_link_cancel_timeout(nxt);
> + nxt = NULL;
> }
>
> /*
> * If we're in async work, we can continue processing the chain
> * in this context instead of having to queue up new async work.
> */
> - if (req->flags & REQ_F_LINK_TIMEOUT) {
> - wake_ev = io_link_cancel_timeout(nxt);
> -
> - /* we dropped this link, get next */
> - nxt = list_first_entry_or_null(&req->link_list,
> - struct io_kiocb, list);
> - } else if (nxtptr && io_wq_current_is_worker()) {
> - *nxtptr = nxt;
> - break;
> - } else {
> - io_queue_async_work(nxt);
> - break;
> + if (nxt) {
> + if (nxtptr && io_wq_current_is_worker())
> + *nxtptr = nxt;
> + else
> + io_queue_async_work(nxt);
> }
> }
>
> @@ -916,6 +923,9 @@ static void io_fail_links(struct io_kiocb *req)
>
> trace_io_uring_fail_link(req, link);
>
> + if (req->flags & REQ_F_FREE_SQE)
> + kfree(link->submit.sqe);
> +
> if ((req->flags & REQ_F_LINK_TIMEOUT) &&
> link->submit.sqe->opcode == IORING_OP_LINK_TIMEOUT) {
> io_link_cancel_timeout(link);
> @@ -2651,8 +2661,12 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
>
> /* if a dependent link is ready, pass it back */
> if (!ret && nxt) {
> - io_prep_async_work(nxt);
> + struct io_kiocb *link;
> +
> + io_prep_async_work(nxt, &link);
> *workptr = &nxt->work;
> + if (link)
> + io_queue_linked_timeout(link);
> }
> }
>
> @@ -2832,7 +2846,6 @@ static int io_validate_link_timeout(struct io_kiocb *req)
> static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
> {
> struct io_kiocb *nxt;
> - int ret;
>
> if (!(req->flags & REQ_F_LINK))
> return NULL;
> @@ -2841,14 +2854,6 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
> if (!nxt || nxt->submit.sqe->opcode != IORING_OP_LINK_TIMEOUT)
> return NULL;
>
> - ret = io_validate_link_timeout(nxt);
> - if (ret) {
> - list_del_init(&nxt->list);
> - io_cqring_add_event(nxt, ret);
> - io_double_put_req(nxt);
> - return ERR_PTR(-ECANCELED);
> - }
> -
> if (nxt->submit.sqe->timeout_flags & IORING_TIMEOUT_ABS)
> nxt->timeout.data->mode = HRTIMER_MODE_ABS;
> else
> @@ -2862,16 +2867,9 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
>
> static void __io_queue_sqe(struct io_kiocb *req)
> {
> - struct io_kiocb *nxt;
> + struct io_kiocb *nxt = io_prep_linked_timeout(req);
> int ret;
>
> - nxt = io_prep_linked_timeout(req);
> - if (IS_ERR(nxt)) {
> - ret = PTR_ERR(nxt);
> - nxt = NULL;
> - goto err;
> - }
> -
> ret = __io_submit_sqe(req, NULL, true);
>
> /*
> @@ -2899,10 +2897,6 @@ static void __io_queue_sqe(struct io_kiocb *req)
> * submit reference when the iocb is actually submitted.
> */
> io_queue_async_work(req);
> -
> - if (nxt)
> - io_queue_linked_timeout(nxt);
> -
> return;
> }
> }
> @@ -2947,6 +2941,10 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
> int need_submit = false;
> struct io_ring_ctx *ctx = req->ctx;
>
> + if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
> + ret = -ECANCELED;
> + goto err;
> + }
> if (!shadow) {
> io_queue_sqe(req);
> return;
> @@ -2961,9 +2959,11 @@ static void io_queue_link_head(struct io_kiocb *req, struct io_kiocb *shadow)
> ret = io_req_defer(req);
> if (ret) {
> if (ret != -EIOCBQUEUED) {
> +err:
> io_cqring_add_event(req, ret);
> io_double_put_req(req);
> - __io_free_req(shadow);
> + if (shadow)
> + __io_free_req(shadow);
> return;
> }
> } else {
> @@ -3020,6 +3020,14 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
> if (*link) {
> struct io_kiocb *prev = *link;
>
> + if (READ_ONCE(s->sqe->opcode) == IORING_OP_LINK_TIMEOUT) {
> + ret = io_validate_link_timeout(req);
> + if (ret) {
> + prev->flags |= REQ_F_FAIL_LINK;
> + goto err_req;
> + }
> + }
> +
> sqe_copy = kmemdup(s->sqe, sizeof(*sqe_copy), GFP_KERNEL);
> if (!sqe_copy) {
> ret = -EAGAIN;
>
--
Pavel Begunkov
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2019-11-15 21:22 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-11-05 21:11 [PATCHSET 0/2] io_uring support for linked timeouts Jens Axboe
2019-11-05 21:11 ` [PATCH 1/2] io_uring: abstract out io_async_cancel_one() helper Jens Axboe
2019-11-05 21:11 ` [PATCH 2/2] io_uring: add support for linked SQE timeouts Jens Axboe
2019-11-14 21:24 ` [PATCHSET 0/2] io_uring support for linked timeouts Pavel Begunkov
2019-11-14 22:37 ` Jens Axboe
2019-11-15 9:40 ` Pavel Begunkov
2019-11-15 14:21 ` Pavel Begunkov
2019-11-15 15:13 ` Jens Axboe
2019-11-15 17:11 ` Pavel Begunkov
2019-11-15 19:34 ` Jens Axboe
2019-11-15 21:16 ` Jens Axboe
2019-11-15 21:38 ` Pavel Begunkov
2019-11-15 22:15 ` Jens Axboe
2019-11-15 22:19 ` Pavel Begunkov
2019-11-15 22:23 ` Pavel Begunkov
2019-11-15 22:25 ` Jens Axboe
2019-11-15 21:22 ` Pavel Begunkov [this message]
2019-11-15 21:26 ` Jens Axboe
2019-11-19 21:11 ` Pavel Begunkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox