public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: Jens Axboe <[email protected]>, [email protected]
Cc: [email protected], 李通洲 <[email protected]>
Subject: Re: [PATCH 01/11] io_uring: allow unbreakable links
Date: Wed, 11 Dec 2019 00:10:12 +0300	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>


[-- Attachment #1.1: Type: text/plain, Size: 11463 bytes --]

Apart from debug code (see comments below) io_uring part of
the patchset looks good.

Reviewed-by: Pavel Begunkov <[email protected]>

On 10/12/2019 18:57, Jens Axboe wrote:
> Some commands will invariably end in a failure in the sense that the
> completion result will be less than zero. One such example is timeouts
> that don't have a completion count set, they will always complete with
> -ETIME unless cancelled.
> 
> For linked commands, we sever links and fail the rest of the chain if
> the result is less than zero. Since we have commands where we know that
> will happen, add IOSQE_IO_HARDLINK as a stronger link that doesn't sever
> regardless of the completion result. Note that the link will still sever
> if we fail submitting the parent request, hard links are only resilient
> in the presence of completion results for requests that did submit
> correctly.
> 
> Cc: [email protected] # v5.4
> Cc: Pavel Begunkov <[email protected]>
> Reported-by: 李通洲 <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
>  fs/io_uring.c                 | 96 ++++++++++++++++++++---------------
>  include/uapi/linux/io_uring.h |  1 +
>  2 files changed, 56 insertions(+), 41 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 405be10da73d..4cda61fe67da 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -377,6 +377,7 @@ struct io_kiocb {
>  #define REQ_F_TIMEOUT_NOSEQ	8192	/* no timeout sequence */
>  #define REQ_F_INFLIGHT		16384	/* on inflight list */
>  #define REQ_F_COMP_LOCKED	32768	/* completion under lock */
> +#define REQ_F_HARDLINK		65536	/* doesn't sever on completion < 0 */
>  	u64			user_data;
>  	u32			result;
>  	u32			sequence;
> @@ -1292,6 +1293,12 @@ static void kiocb_end_write(struct io_kiocb *req)
>  	file_end_write(req->file);
>  }
>  
> +static inline void req_set_fail_links(struct io_kiocb *req)
> +{
> +	if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
> +		req->flags |= REQ_F_FAIL_LINK;
> +}
> +
>  static void io_complete_rw_common(struct kiocb *kiocb, long res)
>  {
>  	struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
> @@ -1299,8 +1306,8 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res)
>  	if (kiocb->ki_flags & IOCB_WRITE)
>  		kiocb_end_write(req);
>  
> -	if ((req->flags & REQ_F_LINK) && res != req->result)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (res != req->result)
> +		req_set_fail_links(req);
>  	io_cqring_add_event(req, res);
>  }
>  
> @@ -1330,8 +1337,8 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
>  	if (kiocb->ki_flags & IOCB_WRITE)
>  		kiocb_end_write(req);
>  
> -	if ((req->flags & REQ_F_LINK) && res != req->result)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (res != req->result)
> +		req_set_fail_links(req);
>  	req->result = res;
>  	if (res != -EAGAIN)
>  		req->flags |= REQ_F_IOPOLL_COMPLETED;
> @@ -1956,8 +1963,8 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  				end > 0 ? end : LLONG_MAX,
>  				fsync_flags & IORING_FSYNC_DATASYNC);
>  
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_cqring_add_event(req, ret);
>  	io_put_req_find_next(req, nxt);
>  	return 0;
> @@ -2003,8 +2010,8 @@ static int io_sync_file_range(struct io_kiocb *req,
>  
>  	ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
>  
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_cqring_add_event(req, ret);
>  	io_put_req_find_next(req, nxt);
>  	return 0;
> @@ -2079,8 +2086,8 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  
>  out:
>  	io_cqring_add_event(req, ret);
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req_find_next(req, nxt);
>  	return 0;
>  #else
> @@ -2161,8 +2168,8 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  
>  out:
>  	io_cqring_add_event(req, ret);
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req_find_next(req, nxt);
>  	return 0;
>  #else
> @@ -2196,8 +2203,8 @@ static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  	}
>  	if (ret == -ERESTARTSYS)
>  		ret = -EINTR;
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_cqring_add_event(req, ret);
>  	io_put_req_find_next(req, nxt);
>  	return 0;
> @@ -2263,8 +2270,8 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  	if (ret == -ERESTARTSYS)
>  		ret = -EINTR;
>  out:
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_cqring_add_event(req, ret);
>  	io_put_req_find_next(req, nxt);this and
>  	return 0;
> @@ -2340,8 +2347,8 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  	spin_unlock_irq(&ctx->completion_lock);
>  
>  	io_cqring_add_event(req, ret);
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req(req);
>  	return 0;
>  }
> @@ -2399,8 +2406,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
>  
>  	io_cqring_ev_posted(ctx);
>  
> -	if (ret < 0 && req->flags & REQ_F_LINK)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req_find_next(req, &nxt);
>  	if (nxt)
>  		*workptr = &nxt->work;
> @@ -2582,8 +2589,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
>  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
>  
>  	io_cqring_ev_posted(ctx);
> -	if (req->flags & REQ_F_LINK)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	req_set_fail_links(req);
>  	io_put_req(req);
>  	return HRTIMER_NORESTART;
>  }
> @@ -2608,8 +2614,7 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
>  	if (ret == -1)
>  		return -EALREADY;
>  
> -	if (req->flags & REQ_F_LINK)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	req_set_fail_links(req);
>  	io_cqring_fill_event(req, -ECANCELED);
>  	io_put_req(req);
>  	return 0;
> @@ -2640,8 +2645,8 @@ static int io_timeout_remove(struct io_kiocb *req,
>  	io_commit_cqring(ctx);
>  	spin_unlock_irq(&ctx->completion_lock);
>  	io_cqring_ev_posted(ctx);
> -	if (ret < 0 && req->flags & REQ_F_LINK)
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req(req);
>  	return 0;
>  }
> @@ -2655,13 +2660,19 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
>  
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>  		return -EINVAL;
> -	if (sqe->ioprio || sqe->buf_index || sqe->len != 1)
> +	if (sqe->ioprio || sqe->buf_index || sqe->len != 1) {
> +		printk("1\n");

debug output

>  		return -EINVAL;
> -	if (sqe->off && is_timeout_link)
> +	}
> +	if (sqe->off && is_timeout_link) {
> +		printk("2\n");

same

>  		return -EINVAL;
> +	}
>  	flags = READ_ONCE(sqe->timeout_flags);
> -	if (flags & ~IORING_TIMEOUT_ABS)
> +	if (flags & ~IORING_TIMEOUT_ABS) {
> +		printk("3\n");

same

>  		return -EINVAL;
> +	}
>  
>  	data = &io->timeout;
>  	data->req = req;
> @@ -2822,8 +2833,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
>  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
>  	io_cqring_ev_posted(ctx);
>  
> -	if (ret < 0 && (req->flags & REQ_F_LINK))
> -		req->flags |= REQ_F_FAIL_LINK;
> +	if (ret < 0)
> +		req_set_fail_links(req);
>  	io_put_req_find_next(req, nxt);
>  }
>  
> @@ -3044,8 +3055,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
>  	io_put_req(req);
>  
>  	if (ret) {
> -		if (req->flags & REQ_F_LINK)
> -			req->flags |= REQ_F_FAIL_LINK;
> +		req_set_fail_links(req);
>  		io_cqring_add_event(req, ret);
>  		io_put_req(req);
>  	}
> @@ -3179,8 +3189,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
>  	spin_unlock_irqrestore(&ctx->completion_lock, flags);
>  
>  	if (prev) {
> -		if (prev->flags & REQ_F_LINK)
> -			prev->flags |= REQ_F_FAIL_LINK;
> +		req_set_fail_links(prev);
>  		io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
>  						-ETIME);
>  		io_put_req(prev);
> @@ -3273,8 +3282,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
>  	/* and drop final reference, if we failed */
>  	if (ret) {
>  		io_cqring_add_event(req, ret);
> -		if (req->flags & REQ_F_LINK)
> -			req->flags |= REQ_F_FAIL_LINK;
> +		req_set_fail_links(req);
>  		io_put_req(req);
>  	}
>  }
> @@ -3293,8 +3301,7 @@ static void io_queue_sqe(struct io_kiocb *req)
>  	if (ret) {
>  		if (ret != -EIOCBQUEUED) {
>  			io_cqring_add_event(req, ret);
> -			if (req->flags & REQ_F_LINK)
> -				req->flags |= REQ_F_FAIL_LINK;
> +			req_set_fail_links(req);
>  			io_double_put_req(req);
>  		}
>  	} else
> @@ -3311,7 +3318,8 @@ static inline void io_queue_link_head(struct io_kiocb *req)
>  }
>  
>  
> -#define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
> +#define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
> +				IOSQE_IO_HARDLINK)
>  
>  static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
>  			  struct io_kiocb **link)
> @@ -3349,6 +3357,9 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
>  		if (req->sqe->flags & IOSQE_IO_DRAIN)
>  			(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
>  
> +		if (req->sqe->flags & IOSQE_IO_HARDLINK)
> +			req->flags |= REQ_F_HARDLINK;
> +
>  		io = kmalloc(sizeof(*io), GFP_KERNEL);
>  		if (!io) {
>  			ret = -EAGAIN;
> @@ -3358,13 +3369,16 @@ static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
>  		ret = io_req_defer_prep(req, io);
>  		if (ret) {
>  			kfree(io);
> +			/* fail even hard links since we don't submit */
>  			prev->flags |= REQ_F_FAIL_LINK;
>  			goto err_req;
>  		}
>  		trace_io_uring_link(ctx, req, prev);
>  		list_add_tail(&req->link_list, &prev->link_list);
> -	} else if (req->sqe->flags & IOSQE_IO_LINK) {
> +	} else if (req->sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
>  		req->flags |= REQ_F_LINK;
> +		if (req->sqe->flags & IOSQE_IO_HARDLINK)
> +			req->flags |= REQ_F_HARDLINK;
>  
>  		INIT_LIST_HEAD(&req->link_list);
>  		*link = req;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index eabccb46edd1..ea231366f5fd 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -48,6 +48,7 @@ struct io_uring_sqe {
>  #define IOSQE_FIXED_FILE	(1U << 0)	/* use fixed fileset */
>  #define IOSQE_IO_DRAIN		(1U << 1)	/* issue after inflight IO */
>  #define IOSQE_IO_LINK		(1U << 2)	/* links next sqe */
> +#define IOSQE_IO_HARDLINK	(1U << 3)	/* like LINK, but stronger */
>  
>  /*
>   * io_uring_setup() flags
> 

-- 
Pavel Begunkov


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  reply	other threads:[~2019-12-10 21:25 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-10 15:57 [PATCHSET 0/11] io_uring improvements/fixes for 5.5-rc Jens Axboe
2019-12-10 15:57 ` [PATCH 01/11] io_uring: allow unbreakable links Jens Axboe
2019-12-10 21:10   ` Pavel Begunkov [this message]
2019-12-10 21:12     ` Jens Axboe
2019-12-10 21:28       ` Pavel Begunkov
2019-12-10 22:17         ` Jens Axboe
2019-12-10 15:57 ` [PATCH 02/11] io-wq: remove worker->wait waitqueue Jens Axboe
2019-12-10 15:57 ` [PATCH 03/11] io-wq: briefly spin for new work after finishing work Jens Axboe
2019-12-10 15:57 ` [PATCH 04/11] io_uring: sqthread should grab ctx->uring_lock for submissions Jens Axboe
2019-12-10 15:57 ` [PATCH 05/11] io_uring: deferred send/recvmsg should assign iov Jens Axboe
2019-12-10 15:57 ` [PATCH 06/11] io_uring: don't dynamically allocate poll data Jens Axboe
2019-12-10 15:57 ` [PATCH 07/11] io_uring: use atomic_t for refcounts Jens Axboe
2019-12-10 22:04   ` Jann Horn
2019-12-10 22:21     ` Jens Axboe
2019-12-10 22:46       ` Kees Cook
2019-12-10 22:55         ` Jens Axboe
2019-12-11 10:20           ` Will Deacon
2019-12-11 16:56             ` Kees Cook
2019-12-11 17:00               ` Jens Axboe
2019-12-10 15:57 ` [PATCH 08/11] io_uring: run next sqe inline if possible Jens Axboe
2019-12-10 15:57 ` [PATCH 09/11] io_uring: only hash regular files for async work execution Jens Axboe
2019-12-10 15:57 ` [PATCH 10/11] net: make socket read/write_iter() honor IOCB_NOWAIT Jens Axboe
2019-12-10 19:37   ` David Miller
2019-12-10 20:43     ` Jens Axboe
2019-12-10 15:57 ` [PATCH 11/11] io_uring: add sockets to list of files that support non-blocking issue Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox