From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Cc: [email protected], 李通洲 <[email protected]>
Subject: Re: [PATCH 01/11] io_uring: allow unbreakable links
Date: Wed, 11 Dec 2019 00:10:12 +0300 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
[-- Attachment #1.1: Type: text/plain, Size: 11463 bytes --]
Apart from debug code (see comments below) io_uring part of
the patchset looks good.
Reviewed-by: Pavel Begunkov <[email protected]>
On 10/12/2019 18:57, Jens Axboe wrote:
> Some commands will invariably end in a failure in the sense that the
> completion result will be less than zero. One such example is timeouts
> that don't have a completion count set, they will always complete with
> -ETIME unless cancelled.
>
> For linked commands, we sever links and fail the rest of the chain if
> the result is less than zero. Since we have commands where we know that
> will happen, add IOSQE_IO_HARDLINK as a stronger link that doesn't sever
> regardless of the completion result. Note that the link will still sever
> if we fail submitting the parent request, hard links are only resilient
> in the presence of completion results for requests that did submit
> correctly.
>
> Cc: [email protected] # v5.4
> Cc: Pavel Begunkov <[email protected]>
> Reported-by: 李通洲 <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/io_uring.c | 96 ++++++++++++++++++++---------------
> include/uapi/linux/io_uring.h | 1 +
> 2 files changed, 56 insertions(+), 41 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 405be10da73d..4cda61fe67da 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -377,6 +377,7 @@ struct io_kiocb {
> #define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
> #define REQ_F_INFLIGHT 16384 /* on inflight list */
> #define REQ_F_COMP_LOCKED 32768 /* completion under lock */
> +#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
> u64 user_data;
> u32 result;
> u32 sequence;
> @@ -1292,6 +1293,12 @@ static void kiocb_end_write(struct io_kiocb *req)
> file_end_write(req->file);
> }
>
> +static inline void req_set_fail_links(struct io_kiocb *req)
> +{
> + if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
> + req->flags |= REQ_F_FAIL_LINK;
> +}
> +
> static void io_complete_rw_common(struct kiocb *kiocb, long res)
> {
> struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
> @@ -1299,8 +1306,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
> if (kiocb->ki_flags & IOCB_WRITE)
> kiocb_end_write(req);
>
> - if ((req->flags & REQ_F_LINK) && res != req->result)
> - req->flags |= REQ_F_FAIL_LINK;
> + if (res != req->result)
> + req_set_fail_links(req);
> io_cqring_add_event(req, res);
> }
>
> @@ -1330,8 +1337,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
> if (kiocb->ki_flags & IOCB_WRITE)
> kiocb_end_write(req);
>
> - if ((req->flags & REQ_F_LINK) && res != req->result)
> - req->flags |= REQ_F_FAIL_LINK;
> + if (res != req->result)
> + req_set_fail_links(req);
> req->result = res;
> if (res != -EAGAIN)
> req->flags |= REQ_F_IOPOLL_COMPLETED;
> @@ -1956,8 +1963,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> end > 0 ? end : LLONG_MAX,
> fsync_flags & IORING_FSYNC_DATASYNC);
>
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_cqring_add_event(req, ret);
> io_put_req_find_next(req, nxt);
> return 0;
> @@ -2003,8 +2010,8 @@ static int io_sync_file_range(struct io_kiocb *req,
>
> ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
>
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_cqring_add_event(req, ret);
> io_put_req_find_next(req, nxt);
> return 0;
> @@ -2079,8 +2086,8 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>
> out:
> io_cqring_add_event(req, ret);
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req_find_next(req, nxt);
> return 0;
> #else
> @@ -2161,8 +2168,8 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>
> out:
> io_cqring_add_event(req, ret);
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req_find_next(req, nxt);
> return 0;
> #else
> @@ -2196,8 +2203,8 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> }
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_cqring_add_event(req, ret);
> io_put_req_find_next(req, nxt);
> return 0;
> @@ -2263,8 +2270,8 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
> out:
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_cqring_add_event(req, ret);
> io_put_req_find_next(req, nxt);this and
> return 0;
> @@ -2340,8 +2347,8 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> spin_unlock_irq(&ctx->completion_lock);
>
> io_cqring_add_event(req, ret);
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req(req);
> return 0;
> }
> @@ -2399,8 +2406,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
>
> io_cqring_ev_posted(ctx);
>
> - if (ret < 0 && req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req_find_next(req, &nxt);
> if (nxt)
> *workptr = &nxt->work;
> @@ -2582,8 +2589,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
>
> io_cqring_ev_posted(ctx);
> - if (req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(req);
> io_put_req(req);
> return HRTIMER_NORESTART;
> }
> @@ -2608,8 +2614,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
> if (ret == -1)
> return -EALREADY;
>
> - if (req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(req);
> io_cqring_fill_event(req, -ECANCELED);
> io_put_req(req);
> return 0;
> @@ -2640,8 +2645,8 @@ static int io_timeout_remove(struct io_kiocb *req,
> io_commit_cqring(ctx);
> spin_unlock_irq(&ctx->completion_lock);
> io_cqring_ev_posted(ctx);
> - if (ret < 0 && req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req(req);
> return 0;
> }
> @@ -2655,13 +2660,19 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
>
> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> return -EINVAL;
> - if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
> + if (sqe->ioprio || sqe->buf_index || sqe->len != 1) {
> + printk("1\n");
debug output
> return -EINVAL;
> - if (sqe->off && is_timeout_link)
> + }
> + if (sqe->off && is_timeout_link) {
> + printk("2\n");
same
> return -EINVAL;
> + }
> flags = READ_ONCE(sqe->timeout_flags);
> - if (flags & ~IORING_TIMEOUT_ABS)
> + if (flags & ~IORING_TIMEOUT_ABS) {
> + printk("3\n");
same
> return -EINVAL;
> + }
>
> data = &io->timeout;
> data->req = req;
> @@ -2822,8 +2833,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
> io_cqring_ev_posted(ctx);
>
> - if (ret < 0 && (req->flags & REQ_F_LINK))
> - req->flags |= REQ_F_FAIL_LINK;
> + if (ret < 0)
> + req_set_fail_links(req);
> io_put_req_find_next(req, nxt);
> }
>
> @@ -3044,8 +3055,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
> io_put_req(req);
>
> if (ret) {
> - if (req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(req);
> io_cqring_add_event(req, ret);
> io_put_req(req);
> }
> @@ -3179,8 +3189,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
> spin_unlock_irqrestore(&ctx->completion_lock, flags);
>
> if (prev) {
> - if (prev->flags & REQ_F_LINK)
> - prev->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(prev);
> io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
> -ETIME);
> io_put_req(prev);
> @@ -3273,8 +3282,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
> /* and drop final reference, if we failed */
> if (ret) {
> io_cqring_add_event(req, ret);
> - if (req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(req);
> io_put_req(req);
> }
> }
> @@ -3293,8 +3301,7 @@ static void io_queue_sqe(struct io_kiocb *req)
> if (ret) {
> if (ret != -EIOCBQUEUED) {
> io_cqring_add_event(req, ret);
> - if (req->flags & REQ_F_LINK)
> - req->flags |= REQ_F_FAIL_LINK;
> + req_set_fail_links(req);
> io_double_put_req(req);
> }
> } else
> @@ -3311,7 +3318,8 @@ static inline void io_queue_link_head(struct io_kiocb *req)
> }
>
>
> -#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
> +#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
> + IOSQE_IO_HARDLINK)
>
> static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
> struct io_kiocb **link)
> @@ -3349,6 +3357,9 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
> if (req->sqe->flags & IOSQE_IO_DRAIN)
> (*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
>
> + if (req->sqe->flags & IOSQE_IO_HARDLINK)
> + req->flags |= REQ_F_HARDLINK;
> +
> io = kmalloc(sizeof(*io), GFP_KERNEL);
> if (!io) {
> ret = -EAGAIN;
> @@ -3358,13 +3369,16 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
> ret = io_req_defer_prep(req, io);
> if (ret) {
> kfree(io);
> + /* fail even hard links since we don't submit */
> prev->flags |= REQ_F_FAIL_LINK;
> goto err_req;
> }
> trace_io_uring_link(ctx, req, prev);
> list_add_tail(&req->link_list, &prev->link_list);
> - } else if (req->sqe->flags & IOSQE_IO_LINK) {
> + } else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
> req->flags |= REQ_F_LINK;
> + if (req->sqe->flags & IOSQE_IO_HARDLINK)
> + req->flags |= REQ_F_HARDLINK;
>
> INIT_LIST_HEAD(&req->link_list);
> *link = req;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index eabccb46edd1..ea231366f5fd 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -48,6 +48,7 @@ struct io_uring_sqe {
> #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
> #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
> #define IOSQE_IO_LINK (1U << 2) /* links next sqe */
> +#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
>
> /*
> * io_uring_setup() flags
>
--
Pavel Begunkov
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2019-12-10 21:25 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-12-10 15:57 [PATCHSET 0/11] io_uring improvements/fixes for 5.5-rc Jens Axboe
2019-12-10 15:57 ` [PATCH 01/11] io_uring: allow unbreakable links Jens Axboe
2019-12-10 21:10 ` Pavel Begunkov [this message]
2019-12-10 21:12 ` Jens Axboe
2019-12-10 21:28 ` Pavel Begunkov
2019-12-10 22:17 ` Jens Axboe
2019-12-10 15:57 ` [PATCH 02/11] io-wq: remove worker->wait waitqueue Jens Axboe
2019-12-10 15:57 ` [PATCH 03/11] io-wq: briefly spin for new work after finishing work Jens Axboe
2019-12-10 15:57 ` [PATCH 04/11] io_uring: sqthread should grab ctx->uring_lock for submissions Jens Axboe
2019-12-10 15:57 ` [PATCH 05/11] io_uring: deferred send/recvmsg should assign iov Jens Axboe
2019-12-10 15:57 ` [PATCH 06/11] io_uring: don't dynamically allocate poll data Jens Axboe
2019-12-10 15:57 ` [PATCH 07/11] io_uring: use atomic_t for refcounts Jens Axboe
2019-12-10 22:04 ` Jann Horn
2019-12-10 22:21 ` Jens Axboe
2019-12-10 22:46 ` Kees Cook
2019-12-10 22:55 ` Jens Axboe
2019-12-11 10:20 ` Will Deacon
2019-12-11 16:56 ` Kees Cook
2019-12-11 17:00 ` Jens Axboe
2019-12-10 15:57 ` [PATCH 08/11] io_uring: run next sqe inline if possible Jens Axboe
2019-12-10 15:57 ` [PATCH 09/11] io_uring: only hash regular files for async work execution Jens Axboe
2019-12-10 15:57 ` [PATCH 10/11] net: make socket read/write_iter() honor IOCB_NOWAIT Jens Axboe
2019-12-10 19:37 ` David Miller
2019-12-10 20:43 ` Jens Axboe
2019-12-10 15:57 ` [PATCH 11/11] io_uring: add sockets to list of files that support non-blocking issue Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox