public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: Keith Busch <[email protected]>,
	[email protected], [email protected],
	[email protected]
Cc: Keith Busch <[email protected]>
Subject: Re: [PATCHv3] io_uring: set plug tags for same file
Date: Wed, 9 Aug 2023 13:37:19 +0100	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 7/31/23 21:39, Keith Busch wrote:
> From: Keith Busch <[email protected]>
> 
> io_uring tries to optimize allocating tags by hinting to the plug how
> many it expects to need for a batch instead of allocating each tag
> individually. But io_uring submission queueus may have a mix of many
> devices for io, so the number of io's counted may be overestimated. This
> can lead to allocating too many tags, which adds overhead to finding
> that many contiguous tags, freeing up the ones we didn't use, and may
> starve out other users that can actually use them.
> 
> When starting a new batch of uring commands, count only commands that
> match the file descriptor of the first seen for this optimization. This
> avoids have to call the unlikely "blk_mq_free_plug_rqs()" at the end of
> a submission when multiple devices are used in a batch.

looks good and doesn't blow up on tests

Reviewed-by: Pavel Begunkov <[email protected]>


> Signed-off-by: Keith Busch <[email protected]>
> ---
> v2->v3
> 
>    The previous attempted to split the setup and submit further, but was
>    requested to go back to this simpler version.
> 
>   block/blk-core.c               | 49 +++++++++++++++-------------------
>   block/blk-mq.c                 |  6 +++--
>   include/linux/blkdev.h         |  6 -----
>   include/linux/io_uring_types.h |  1 +
>   io_uring/io_uring.c            | 37 ++++++++++++++++++-------
>   5 files changed, 54 insertions(+), 45 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 99d8b9812b18f..b8f8aa1376e60 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
>   }
>   EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
>   
> -void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> -{
> -	struct task_struct *tsk = current;
> -
> -	/*
> -	 * If this is a nested plug, don't actually assign it.
> -	 */
> -	if (tsk->plug)
> -		return;
> -
> -	plug->mq_list = NULL;
> -	plug->cached_rq = NULL;
> -	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
> -	plug->rq_count = 0;
> -	plug->multiple_queues = false;
> -	plug->has_elevator = false;
> -	plug->nowait = false;
> -	INIT_LIST_HEAD(&plug->cb_list);
> -
> -	/*
> -	 * Store ordering should not be needed here, since a potential
> -	 * preempt will imply a full memory barrier
> -	 */
> -	tsk->plug = plug;
> -}
> -
>   /**
>    * blk_start_plug - initialize blk_plug and track it inside the task_struct
>    * @plug:	The &struct blk_plug that needs to be initialized
> @@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
>    */
>   void blk_start_plug(struct blk_plug *plug)
>   {
> -	blk_start_plug_nr_ios(plug, 1);
> +	struct task_struct *tsk = current;
> +
> +	/*
> +	 * If this is a nested plug, don't actually assign it.
> +	 */
> +	if (tsk->plug)
> +		return;
> +
> +	plug->mq_list = NULL;
> +	plug->cached_rq = NULL;
> +	plug->nr_ios = 1;
> +	plug->rq_count = 0;
> +	plug->multiple_queues = false;
> +	plug->has_elevator = false;
> +	plug->nowait = false;
> +	INIT_LIST_HEAD(&plug->cb_list);
> +
> +	/*
> +	 * Store ordering should not be needed here, since a potential
> +	 * preempt will imply a full memory barrier
> +	 */
> +	tsk->plug = plug;
>   }
>   EXPORT_SYMBOL(blk_start_plug);
>   
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index d50b1d62a3d92..fc75fb9ef34ed 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -523,7 +523,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
>   		.q		= q,
>   		.flags		= flags,
>   		.cmd_flags	= opf,
> -		.nr_tags	= plug->nr_ios,
> +		.nr_tags	= min_t(unsigned int, plug->nr_ios,
> +					BLK_MAX_REQUEST_COUNT),
>   		.cached_rq	= &plug->cached_rq,
>   	};
>   	struct request *rq;
> @@ -2859,7 +2860,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
>   	rq_qos_throttle(q, bio);
>   
>   	if (plug) {
> -		data.nr_tags = plug->nr_ios;
> +		data.nr_tags = min_t(unsigned int, plug->nr_ios,
> +				     BLK_MAX_REQUEST_COUNT);
>   		plug->nr_ios = 1;
>   		data.cached_rq = &plug->cached_rq;
>   	}
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629f..a2a022957cd96 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -984,7 +984,6 @@ struct blk_plug_cb {
>   extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
>   					     void *data, int size);
>   extern void blk_start_plug(struct blk_plug *);
> -extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
>   extern void blk_finish_plug(struct blk_plug *);
>   
>   void __blk_flush_plug(struct blk_plug *plug, bool from_schedule);
> @@ -1000,11 +999,6 @@ long nr_blockdev_pages(void);
>   struct blk_plug {
>   };
>   
> -static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
> -					 unsigned short nr_ios)
> -{
> -}
> -
>   static inline void blk_start_plug(struct blk_plug *plug)
>   {
>   }
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index f04ce513fadba..109d4530bccbf 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -175,6 +175,7 @@ struct io_submit_state {
>   	bool			need_plug;
>   	unsigned short		submit_nr;
>   	unsigned int		cqes_count;
> +	int			fd;
>   	struct blk_plug		plug;
>   	struct io_uring_cqe	cqes[16];
>   };
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 135da2fd0edab..36f45d234fe49 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -2195,18 +2195,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>   		return -EINVAL;
>   
>   	if (def->needs_file) {
> -		struct io_submit_state *state = &ctx->submit_state;
> -
>   		req->cqe.fd = READ_ONCE(sqe->fd);
>   
>   		/*
>   		 * Plug now if we have more than 2 IO left after this, and the
>   		 * target is potentially a read/write to block based storage.
>   		 */
> -		if (state->need_plug && def->plug) {
> -			state->plug_started = true;
> -			state->need_plug = false;
> -			blk_start_plug_nr_ios(&state->plug, state->submit_nr);
> +	        if (def->plug) {
> +			struct io_submit_state *state = &ctx->submit_state;
> +
> +			if (state->need_plug) {
> +			        state->plug_started = true;
> +			        state->need_plug = false;
> +			        state->fd = req->cqe.fd;
> +			        blk_start_plug(&state->plug);
> +			} else if (state->plug_started &&
> +			           state->fd == req->cqe.fd &&
> +			           !state->link.head) {
> +			        state->plug.nr_ios++;
> +			}
>   		}
>   	}
>   
> @@ -2267,7 +2274,8 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
>   }
>   
>   static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
> -			 const struct io_uring_sqe *sqe)
> +			 const struct io_uring_sqe *sqe,
> +			 struct io_wq_work_list *req_list)
>   	__must_hold(&ctx->uring_lock)
>   {
>   	struct io_submit_link *link = &ctx->submit_state.link;
> @@ -2315,7 +2323,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
>   		return 0;
>   	}
>   
> -	io_queue_sqe(req);
> +	wq_list_add_tail(&req->comp_list, req_list);
>   	return 0;
>   }
>   
> @@ -2400,6 +2408,8 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
>   	__must_hold(&ctx->uring_lock)
>   {
>   	unsigned int entries = io_sqring_entries(ctx);
> +	struct io_wq_work_list req_list;
> +	struct io_kiocb *req;
>   	unsigned int left;
>   	int ret;
>   
> @@ -2410,9 +2420,9 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
>   	io_get_task_refs(left);
>   	io_submit_state_start(&ctx->submit_state, left);
>   
> +	INIT_WQ_LIST(&req_list);
>   	do {
>   		const struct io_uring_sqe *sqe;
> -		struct io_kiocb *req;
>   
>   		if (unlikely(!io_alloc_req(ctx, &req)))
>   			break;
> @@ -2425,13 +2435,20 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
>   		 * Continue submitting even for sqe failure if the
>   		 * ring was setup with IORING_SETUP_SUBMIT_ALL
>   		 */
> -		if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
> +		if (unlikely(io_submit_sqe(ctx, req, sqe, &req_list)) &&
>   		    !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
>   			left--;
>   			break;
>   		}
>   	} while (--left);
>   
> +	while (req_list.first) {
> +		req = container_of(req_list.first, struct io_kiocb, comp_list);
> +		req_list.first = req->comp_list.next;
> +		req->comp_list.next = NULL;
> +		io_queue_sqe(req);
> +	}
> +
>   	if (unlikely(left)) {
>   		ret -= left;
>   		/* try again if it submitted nothing and can't allocate a req */

-- 
Pavel Begunkov

  reply	other threads:[~2023-08-09 12:39 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-07-31 20:39 [PATCHv3] io_uring: set plug tags for same file Keith Busch
2023-08-09 12:37 ` Pavel Begunkov [this message]
2023-08-11 19:24 ` Jens Axboe
2023-08-14 16:50   ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox