From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Subject: Re: [PATCH 5.10] io_uring: fix overflowed cancel w/ linked ->files
Date: Wed, 4 Nov 2020 15:41:33 +0000 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 04/11/2020 15:25, Jens Axboe wrote:
> On 11/4/20 6:39 AM, Pavel Begunkov wrote:
>> Current io_match_files() check in io_cqring_overflow_flush() is useless
>> because requests drop ->files before going to the overflow list, however
>> linked to it request do not, and we don't check them.
>>
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>>
>> Jens, there will be conflicts with 5.11 patches, I'd appreciate if you
>> could ping me when/if you merge it into 5.11
>
> Just for this one:
>
> io_uring: link requests with singly linked list
>
> How does the below look, now based on top of this patch added to
> io_uring-5.10:
Yep, it looks exactly like in my 5.11!
>
>
> commit f5a63f6bb1ecfb8852ac2628d53dc3e9644f7f3f
> Author: Pavel Begunkov <[email protected]>
> Date: Tue Oct 27 23:25:37 2020 +0000
>
> io_uring: link requests with singly linked list
>
> Singly linked list for keeping linked requests is enough, because we
> almost always operate on the head and traverse forward with the
> exception of linked timeouts going 1 hop backwards.
>
> Replace ->link_list with a handmade singly linked list. Also kill
> REQ_F_LINK_HEAD in favour of checking a newly added ->list for NULL
> directly.
>
> That saves 8B in io_kiocb, is not as heavy as list fixup, makes better
> use of cache by not touching a previous request (i.e. last request of
> the link) each time on list modification and optimises cache use further
> in the following patch, and actually makes travesal easier removing in
> the end some lines. Also, keeping invariant in ->list instead of having
> REQ_F_LINK_HEAD is less error-prone.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 3f6bc98a91da..a701e8fe6716 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -598,7 +598,6 @@ enum {
> REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
> REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
>
> - REQ_F_LINK_HEAD_BIT,
> REQ_F_FAIL_LINK_BIT,
> REQ_F_INFLIGHT_BIT,
> REQ_F_CUR_POS_BIT,
> @@ -630,8 +629,6 @@ enum {
> /* IOSQE_BUFFER_SELECT */
> REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT),
>
> - /* head of a link */
> - REQ_F_LINK_HEAD = BIT(REQ_F_LINK_HEAD_BIT),
> /* fail rest of links */
> REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT),
> /* on inflight list */
> @@ -713,7 +710,7 @@ struct io_kiocb {
> struct task_struct *task;
> u64 user_data;
>
> - struct list_head link_list;
> + struct io_kiocb *link;
>
> /*
> * 1. used with ctx->iopoll_list with reads/writes
> @@ -1021,6 +1018,9 @@ struct sock *io_uring_get_socket(struct file *file)
> }
> EXPORT_SYMBOL(io_uring_get_socket);
>
> +#define io_for_each_link(pos, head) \
> + for (pos = (head); pos; pos = pos->link)
> +
> static inline void io_clean_op(struct io_kiocb *req)
> {
> if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
> @@ -1505,10 +1505,8 @@ static void io_prep_async_link(struct io_kiocb *req)
> {
> struct io_kiocb *cur;
>
> - io_prep_async_work(req);
> - if (req->flags & REQ_F_LINK_HEAD)
> - list_for_each_entry(cur, &req->link_list, link_list)
> - io_prep_async_work(cur);
> + io_for_each_link(cur, req)
> + io_prep_async_work(cur);
> }
>
> static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
> @@ -1691,20 +1689,15 @@ static inline bool __io_match_files(struct io_kiocb *req,
> req->work.identity->files == files;
> }
>
> -static bool io_match_files(struct io_kiocb *req,
> - struct files_struct *files)
> +static bool io_match_files(struct io_kiocb *head, struct files_struct *files)
> {
> - struct io_kiocb *link;
> + struct io_kiocb *req;
>
> if (!files)
> return true;
> - if (__io_match_files(req, files))
> - return true;
> - if (req->flags & REQ_F_LINK_HEAD) {
> - list_for_each_entry(link, &req->link_list, link_list) {
> - if (__io_match_files(link, files))
> - return true;
> - }
> + io_for_each_link(req, head) {
> + if (__io_match_files(req, files))
> + return true;
> }
> return false;
> }
> @@ -1971,6 +1964,14 @@ static void __io_free_req(struct io_kiocb *req)
> percpu_ref_put(&ctx->refs);
> }
>
> +static inline void io_remove_next_linked(struct io_kiocb *req)
> +{
> + struct io_kiocb *nxt = req->link;
> +
> + req->link = nxt->link;
> + nxt->link = NULL;
> +}
> +
> static void io_kill_linked_timeout(struct io_kiocb *req)
> {
> struct io_ring_ctx *ctx = req->ctx;
> @@ -1979,8 +1980,8 @@ static void io_kill_linked_timeout(struct io_kiocb *req)
> unsigned long flags;
>
> spin_lock_irqsave(&ctx->completion_lock, flags);
> - link = list_first_entry_or_null(&req->link_list, struct io_kiocb,
> - link_list);
> + link = req->link;
> +
> /*
> * Can happen if a linked timeout fired and link had been like
> * req -> link t-out -> link t-out [-> ...]
> @@ -1989,7 +1990,7 @@ static void io_kill_linked_timeout(struct io_kiocb *req)
> struct io_timeout_data *io = link->async_data;
> int ret;
>
> - list_del_init(&link->link_list);
> + io_remove_next_linked(req);
> link->timeout.head = NULL;
> ret = hrtimer_try_to_cancel(&io->timer);
> if (ret != -1) {
> @@ -2007,41 +2008,22 @@ static void io_kill_linked_timeout(struct io_kiocb *req)
> }
> }
>
> -static struct io_kiocb *io_req_link_next(struct io_kiocb *req)
> -{
> - struct io_kiocb *nxt;
> -
> - /*
> - * The list should never be empty when we are called here. But could
> - * potentially happen if the chain is messed up, check to be on the
> - * safe side.
> - */
> - if (unlikely(list_empty(&req->link_list)))
> - return NULL;
> -
> - nxt = list_first_entry(&req->link_list, struct io_kiocb, link_list);
> - list_del_init(&req->link_list);
> - if (!list_empty(&nxt->link_list))
> - nxt->flags |= REQ_F_LINK_HEAD;
> - return nxt;
> -}
>
> -/*
> - * Called if REQ_F_LINK_HEAD is set, and we fail the head request
> - */
> static void io_fail_links(struct io_kiocb *req)
> {
> + struct io_kiocb *link, *nxt;
> struct io_ring_ctx *ctx = req->ctx;
> unsigned long flags;
>
> spin_lock_irqsave(&ctx->completion_lock, flags);
> - while (!list_empty(&req->link_list)) {
> - struct io_kiocb *link = list_first_entry(&req->link_list,
> - struct io_kiocb, link_list);
> + link = req->link;
> + req->link = NULL;
>
> - list_del_init(&link->link_list);
> - trace_io_uring_fail_link(req, link);
> + while (link) {
> + nxt = link->link;
> + link->link = NULL;
>
> + trace_io_uring_fail_link(req, link);
> io_cqring_fill_event(link, -ECANCELED);
>
> /*
> @@ -2053,8 +2035,8 @@ static void io_fail_links(struct io_kiocb *req)
> io_put_req_deferred(link, 2);
> else
> io_double_put_req(link);
> + link = nxt;
> }
> -
> io_commit_cqring(ctx);
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
>
> @@ -2063,7 +2045,6 @@ static void io_fail_links(struct io_kiocb *req)
>
> static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
> {
> - req->flags &= ~REQ_F_LINK_HEAD;
> if (req->flags & REQ_F_LINK_TIMEOUT)
> io_kill_linked_timeout(req);
>
> @@ -2073,15 +2054,19 @@ static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
> * dependencies to the next request. In case of failure, fail the rest
> * of the chain.
> */
> - if (likely(!(req->flags & REQ_F_FAIL_LINK)))
> - return io_req_link_next(req);
> + if (likely(!(req->flags & REQ_F_FAIL_LINK))) {
> + struct io_kiocb *nxt = req->link;
> +
> + req->link = NULL;
> + return nxt;
> + }
> io_fail_links(req);
> return NULL;
> }
>
> -static struct io_kiocb *io_req_find_next(struct io_kiocb *req)
> +static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
> {
> - if (likely(!(req->flags & REQ_F_LINK_HEAD)))
> + if (likely(!(req->link) && !(req->flags & REQ_F_LINK_TIMEOUT)))
> return NULL;
> return __io_req_find_next(req);
> }
> @@ -2176,7 +2161,7 @@ static void io_req_task_queue(struct io_kiocb *req)
> }
> }
>
> -static void io_queue_next(struct io_kiocb *req)
> +static inline void io_queue_next(struct io_kiocb *req)
> {
> struct io_kiocb *nxt = io_req_find_next(req);
>
> @@ -2233,8 +2218,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
> io_free_req(req);
> return;
> }
> - if (req->flags & REQ_F_LINK_HEAD)
> - io_queue_next(req);
> + io_queue_next(req);
>
> if (req->task != rb->task) {
> if (rb->task) {
> @@ -6003,11 +5987,10 @@ static u32 io_get_sequence(struct io_kiocb *req)
> {
> struct io_kiocb *pos;
> struct io_ring_ctx *ctx = req->ctx;
> - u32 total_submitted, nr_reqs = 1;
> + u32 total_submitted, nr_reqs = 0;
>
> - if (req->flags & REQ_F_LINK_HEAD)
> - list_for_each_entry(pos, &req->link_list, link_list)
> - nr_reqs++;
> + io_for_each_link(pos, req)
> + nr_reqs++;
>
> total_submitted = ctx->cached_sq_head - ctx->cached_sq_dropped;
> return total_submitted - nr_reqs;
> @@ -6362,7 +6345,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
> * race with the completion of the linked work.
> */
> if (prev && refcount_inc_not_zero(&prev->refs))
> - list_del_init(&req->link_list);
> + io_remove_next_linked(prev);
> else
> prev = NULL;
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
> @@ -6380,8 +6363,8 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
> static void __io_queue_linked_timeout(struct io_kiocb *req)
> {
> /*
> - * If the list is now empty, then our linked request finished before
> - * we got a chance to setup the timer
> + * If the back reference is NULL, then our linked request finished
> + * before we got a chance to setup the timer
> */
> if (req->timeout.head) {
> struct io_timeout_data *data = req->async_data;
> @@ -6406,16 +6389,10 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
>
> static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
> {
> - struct io_kiocb *nxt;
> -
> - if (!(req->flags & REQ_F_LINK_HEAD))
> - return NULL;
> - if (req->flags & REQ_F_LINK_TIMEOUT)
> - return NULL;
> + struct io_kiocb *nxt = req->link;
>
> - nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
> - link_list);
> - if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
> + if (!nxt || (req->flags & REQ_F_LINK_TIMEOUT) ||
> + nxt->opcode != IORING_OP_LINK_TIMEOUT)
> return NULL;
>
> nxt->timeout.head = req;
> @@ -6563,7 +6540,7 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> return ret;
> }
> trace_io_uring_link(ctx, req, head);
> - list_add_tail(&req->link_list, &head->link_list);
> + link->last->link = req;
> link->last = req;
>
> /* last request of a link, enqueue the link */
> @@ -6577,9 +6554,6 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> ctx->drain_next = 0;
> }
> if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
> - req->flags |= REQ_F_LINK_HEAD;
> - INIT_LIST_HEAD(&req->link_list);
> -
> ret = io_req_defer_prep(req, sqe);
> if (unlikely(ret))
> req->flags |= REQ_F_FAIL_LINK;
> @@ -6712,6 +6686,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
> req->file = NULL;
> req->ctx = ctx;
> req->flags = 0;
> + req->link = NULL;
> /* one is dropped after submission, the other at completion */
> refcount_set(&req->refs, 2);
> req->task = current;
> @@ -8663,14 +8638,10 @@ static bool io_match_link(struct io_kiocb *preq, struct io_kiocb *req)
> {
> struct io_kiocb *link;
>
> - if (!(preq->flags & REQ_F_LINK_HEAD))
> - return false;
> -
> - list_for_each_entry(link, &preq->link_list, link_list) {
> + io_for_each_link(link, preq->link) {
> if (link == req)
> return true;
> }
> -
> return false;
> }
>
>
--
Pavel Begunkov
next prev parent reply other threads:[~2020-11-04 15:44 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-04 13:39 [PATCH 5.10] io_uring: fix overflowed cancel w/ linked ->files Pavel Begunkov
2020-11-04 15:25 ` Jens Axboe
2020-11-04 15:41 ` Pavel Begunkov [this message]
2020-11-04 15:52 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox