From: Pavel Begunkov <[email protected]>
To: Keith Busch <[email protected]>,
[email protected], [email protected],
[email protected]
Cc: Keith Busch <[email protected]>
Subject: Re: [PATCHv3] io_uring: set plug tags for same file
Date: Wed, 9 Aug 2023 13:37:19 +0100 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 7/31/23 21:39, Keith Busch wrote:
> From: Keith Busch <[email protected]>
>
> io_uring tries to optimize allocating tags by hinting to the plug how
> many it expects to need for a batch instead of allocating each tag
> individually. But io_uring submission queueus may have a mix of many
> devices for io, so the number of io's counted may be overestimated. This
> can lead to allocating too many tags, which adds overhead to finding
> that many contiguous tags, freeing up the ones we didn't use, and may
> starve out other users that can actually use them.
>
> When starting a new batch of uring commands, count only commands that
> match the file descriptor of the first seen for this optimization. This
> avoids have to call the unlikely "blk_mq_free_plug_rqs()" at the end of
> a submission when multiple devices are used in a batch.
looks good and doesn't blow up on tests
Reviewed-by: Pavel Begunkov <[email protected]>
> Signed-off-by: Keith Busch <[email protected]>
> ---
> v2->v3
>
> The previous attempted to split the setup and submit further, but was
> requested to go back to this simpler version.
>
> block/blk-core.c | 49 +++++++++++++++-------------------
> block/blk-mq.c | 6 +++--
> include/linux/blkdev.h | 6 -----
> include/linux/io_uring_types.h | 1 +
> io_uring/io_uring.c | 37 ++++++++++++++++++-------
> 5 files changed, 54 insertions(+), 45 deletions(-)
>
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 99d8b9812b18f..b8f8aa1376e60 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
> }
> EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
>
> -void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> -{
> - struct task_struct *tsk = current;
> -
> - /*
> - * If this is a nested plug, don't actually assign it.
> - */
> - if (tsk->plug)
> - return;
> -
> - plug->mq_list = NULL;
> - plug->cached_rq = NULL;
> - plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
> - plug->rq_count = 0;
> - plug->multiple_queues = false;
> - plug->has_elevator = false;
> - plug->nowait = false;
> - INIT_LIST_HEAD(&plug->cb_list);
> -
> - /*
> - * Store ordering should not be needed here, since a potential
> - * preempt will imply a full memory barrier
> - */
> - tsk->plug = plug;
> -}
> -
> /**
> * blk_start_plug - initialize blk_plug and track it inside the task_struct
> * @plug: The &struct blk_plug that needs to be initialized
> @@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
> */
> void blk_start_plug(struct blk_plug *plug)
> {
> - blk_start_plug_nr_ios(plug, 1);
> + struct task_struct *tsk = current;
> +
> + /*
> + * If this is a nested plug, don't actually assign it.
> + */
> + if (tsk->plug)
> + return;
> +
> + plug->mq_list = NULL;
> + plug->cached_rq = NULL;
> + plug->nr_ios = 1;
> + plug->rq_count = 0;
> + plug->multiple_queues = false;
> + plug->has_elevator = false;
> + plug->nowait = false;
> + INIT_LIST_HEAD(&plug->cb_list);
> +
> + /*
> + * Store ordering should not be needed here, since a potential
> + * preempt will imply a full memory barrier
> + */
> + tsk->plug = plug;
> }
> EXPORT_SYMBOL(blk_start_plug);
>
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index d50b1d62a3d92..fc75fb9ef34ed 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -523,7 +523,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
> .q = q,
> .flags = flags,
> .cmd_flags = opf,
> - .nr_tags = plug->nr_ios,
> + .nr_tags = min_t(unsigned int, plug->nr_ios,
> + BLK_MAX_REQUEST_COUNT),
> .cached_rq = &plug->cached_rq,
> };
> struct request *rq;
> @@ -2859,7 +2860,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
> rq_qos_throttle(q, bio);
>
> if (plug) {
> - data.nr_tags = plug->nr_ios;
> + data.nr_tags = min_t(unsigned int, plug->nr_ios,
> + BLK_MAX_REQUEST_COUNT);
> plug->nr_ios = 1;
> data.cached_rq = &plug->cached_rq;
> }
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index ed44a997f629f..a2a022957cd96 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -984,7 +984,6 @@ struct blk_plug_cb {
> extern struct blk_plug_cb *blk_check_plugged(blk_plug_cb_fn unplug,
> void *data, int size);
> extern void blk_start_plug(struct blk_plug *);
> -extern void blk_start_plug_nr_ios(struct blk_plug *, unsigned short);
> extern void blk_finish_plug(struct blk_plug *);
>
> void __blk_flush_plug(struct blk_plug *plug, bool from_schedule);
> @@ -1000,11 +999,6 @@ long nr_blockdev_pages(void);
> struct blk_plug {
> };
>
> -static inline void blk_start_plug_nr_ios(struct blk_plug *plug,
> - unsigned short nr_ios)
> -{
> -}
> -
> static inline void blk_start_plug(struct blk_plug *plug)
> {
> }
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index f04ce513fadba..109d4530bccbf 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -175,6 +175,7 @@ struct io_submit_state {
> bool need_plug;
> unsigned short submit_nr;
> unsigned int cqes_count;
> + int fd;
> struct blk_plug plug;
> struct io_uring_cqe cqes[16];
> };
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 135da2fd0edab..36f45d234fe49 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -2195,18 +2195,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
> return -EINVAL;
>
> if (def->needs_file) {
> - struct io_submit_state *state = &ctx->submit_state;
> -
> req->cqe.fd = READ_ONCE(sqe->fd);
>
> /*
> * Plug now if we have more than 2 IO left after this, and the
> * target is potentially a read/write to block based storage.
> */
> - if (state->need_plug && def->plug) {
> - state->plug_started = true;
> - state->need_plug = false;
> - blk_start_plug_nr_ios(&state->plug, state->submit_nr);
> + if (def->plug) {
> + struct io_submit_state *state = &ctx->submit_state;
> +
> + if (state->need_plug) {
> + state->plug_started = true;
> + state->need_plug = false;
> + state->fd = req->cqe.fd;
> + blk_start_plug(&state->plug);
> + } else if (state->plug_started &&
> + state->fd == req->cqe.fd &&
> + !state->link.head) {
> + state->plug.nr_ios++;
> + }
> }
> }
>
> @@ -2267,7 +2274,8 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
> }
>
> static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
> - const struct io_uring_sqe *sqe)
> + const struct io_uring_sqe *sqe,
> + struct io_wq_work_list *req_list)
> __must_hold(&ctx->uring_lock)
> {
> struct io_submit_link *link = &ctx->submit_state.link;
> @@ -2315,7 +2323,7 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
> return 0;
> }
>
> - io_queue_sqe(req);
> + wq_list_add_tail(&req->comp_list, req_list);
> return 0;
> }
>
> @@ -2400,6 +2408,8 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
> __must_hold(&ctx->uring_lock)
> {
> unsigned int entries = io_sqring_entries(ctx);
> + struct io_wq_work_list req_list;
> + struct io_kiocb *req;
> unsigned int left;
> int ret;
>
> @@ -2410,9 +2420,9 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
> io_get_task_refs(left);
> io_submit_state_start(&ctx->submit_state, left);
>
> + INIT_WQ_LIST(&req_list);
> do {
> const struct io_uring_sqe *sqe;
> - struct io_kiocb *req;
>
> if (unlikely(!io_alloc_req(ctx, &req)))
> break;
> @@ -2425,13 +2435,20 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
> * Continue submitting even for sqe failure if the
> * ring was setup with IORING_SETUP_SUBMIT_ALL
> */
> - if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
> + if (unlikely(io_submit_sqe(ctx, req, sqe, &req_list)) &&
> !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
> left--;
> break;
> }
> } while (--left);
>
> + while (req_list.first) {
> + req = container_of(req_list.first, struct io_kiocb, comp_list);
> + req_list.first = req->comp_list.next;
> + req->comp_list.next = NULL;
> + io_queue_sqe(req);
> + }
> +
> if (unlikely(left)) {
> ret -= left;
> /* try again if it submitted nothing and can't allocate a req */
--
Pavel Begunkov
next prev parent reply other threads:[~2023-08-09 12:39 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-07-31 20:39 [PATCHv3] io_uring: set plug tags for same file Keith Busch
2023-08-09 12:37 ` Pavel Begunkov [this message]
2023-08-11 19:24 ` Jens Axboe
2023-08-14 16:50 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox