public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 1/3] io_uring: split req init from submit
@ 2023-07-28 20:14 Keith Busch
  2023-07-28 20:14 ` [PATCH 2/3] io_uring: split req prep and submit loops Keith Busch
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Keith Busch @ 2023-07-28 20:14 UTC (permalink / raw)
  To: axboe, asml.silence, linux-block, io-uring; +Cc: Keith Busch

From: Keith Busch <[email protected]>

Split the req initialization and link handling from the submit. This
simplifies the submit path since everything that can fail is separate
from it, and makes it easier to create batched submissions later.

Signed-off-by: Keith Busch <[email protected]>
---
 io_uring/io_uring.c | 66 +++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 29 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d585171560ce5..818b2d1661c5e 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2279,18 +2279,20 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
 	return 0;
 }
 
-static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
-			 const struct io_uring_sqe *sqe)
-	__must_hold(&ctx->uring_lock)
+static inline void io_submit_sqe(struct io_kiocb *req)
 {
-	struct io_submit_link *link = &ctx->submit_state.link;
-	int ret;
+	trace_io_uring_submit_req(req);
 
-	ret = io_init_req(ctx, req, sqe);
-	if (unlikely(ret))
-		return io_submit_fail_init(sqe, req, ret);
+	if (unlikely(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))
+		io_queue_sqe_fallback(req);
+	else
+		io_queue_sqe(req);
+}
 
-	trace_io_uring_submit_req(req);
+static int io_setup_link(struct io_submit_link *link, struct io_kiocb **orig)
+{
+	struct io_kiocb *req = *orig;
+	int ret;
 
 	/*
 	 * If we already have a head request, queue this one for async
@@ -2300,35 +2302,28 @@ static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	 * conditions are true (normal request), then just queue it.
 	 */
 	if (unlikely(link->head)) {
+		*orig = NULL;
+
 		ret = io_req_prep_async(req);
 		if (unlikely(ret))
-			return io_submit_fail_init(sqe, req, ret);
+			return ret;
 
 		trace_io_uring_link(req, link->head);
 		link->last->link = req;
 		link->last = req;
-
 		if (req->flags & IO_REQ_LINK_FLAGS)
 			return 0;
+
 		/* last request of the link, flush it */
-		req = link->head;
+		*orig = link->head;
 		link->head = NULL;
-		if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL))
-			goto fallback;
-
-	} else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS |
-					  REQ_F_FORCE_ASYNC | REQ_F_FAIL))) {
-		if (req->flags & IO_REQ_LINK_FLAGS) {
-			link->head = req;
-			link->last = req;
-		} else {
-fallback:
-			io_queue_sqe_fallback(req);
-		}
-		return 0;
+	} else if (unlikely(req->flags & IO_REQ_LINK_FLAGS)) {
+	        link->head = req;
+	        link->last = req;
+		*orig = NULL;
+	        return 0;
 	}
 
-	io_queue_sqe(req);
 	return 0;
 }
 
@@ -2412,9 +2407,10 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
 int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 	__must_hold(&ctx->uring_lock)
 {
+	struct io_submit_link *link = &ctx->submit_state.link;
 	unsigned int entries = io_sqring_entries(ctx);
 	unsigned int left;
-	int ret;
+	int ret, err;
 
 	if (unlikely(!entries))
 		return 0;
@@ -2434,12 +2430,24 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 			break;
 		}
 
+		err = io_init_req(ctx, req, sqe);
+		if (unlikely(err))
+			goto error;
+
+		err = io_setup_link(link, &req);
+		if (unlikely(err))
+			goto error;
+
+		if (likely(req))
+			io_submit_sqe(req);
+		continue;
+error:
 		/*
 		 * Continue submitting even for sqe failure if the
 		 * ring was setup with IORING_SETUP_SUBMIT_ALL
 		 */
-		if (unlikely(io_submit_sqe(ctx, req, sqe)) &&
-		    !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
+		err = io_submit_fail_init(sqe, req, err);
+		if (err && !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) {
 			left--;
 			break;
 		}
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/3] io_uring: split req prep and submit loops
  2023-07-28 20:14 [PATCH 1/3] io_uring: split req init from submit Keith Busch
@ 2023-07-28 20:14 ` Keith Busch
  2023-07-28 20:14 ` [PATCH 3/3] io_uring: set plug tags for same file Keith Busch
  2023-07-31 12:53 ` [PATCH 1/3] io_uring: split req init from submit Pavel Begunkov
  2 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2023-07-28 20:14 UTC (permalink / raw)
  To: axboe, asml.silence, linux-block, io-uring; +Cc: Keith Busch

From: Keith Busch <[email protected]>

Do all the prep work up front, then dispatch all synchronous requests at
once. This will make it easier to count batches for plugging.

Signed-off-by: Keith Busch <[email protected]>
---
 io_uring/io_uring.c | 26 ++++++++++++++++++--------
 io_uring/slist.h    |  4 ++++
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 818b2d1661c5e..5434aef0a8ef7 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1082,6 +1082,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
 	req->ctx = ctx;
 	req->link = NULL;
 	req->async_data = NULL;
+	req->comp_list.next = NULL;
 	/* not necessary, but safer to zero */
 	req->cqe.res = 0;
 }
@@ -2282,11 +2283,7 @@ static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe,
 static inline void io_submit_sqe(struct io_kiocb *req)
 {
 	trace_io_uring_submit_req(req);
-
-	if (unlikely(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))
-		io_queue_sqe_fallback(req);
-	else
-		io_queue_sqe(req);
+	io_queue_sqe(req);
 }
 
 static int io_setup_link(struct io_submit_link *link, struct io_kiocb **orig)
@@ -2409,6 +2406,9 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 {
 	struct io_submit_link *link = &ctx->submit_state.link;
 	unsigned int entries = io_sqring_entries(ctx);
+	struct io_wq_work_node *pos, *next;
+	struct io_wq_work_list req_list;
+	struct io_kiocb *req;
 	unsigned int left;
 	int ret, err;
 
@@ -2419,6 +2419,7 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 	io_get_task_refs(left);
 	io_submit_state_start(&ctx->submit_state, left);
 
+	INIT_WQ_LIST(&req_list);
 	do {
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
@@ -2437,9 +2438,12 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		err = io_setup_link(link, &req);
 		if (unlikely(err))
 			goto error;
-
-		if (likely(req))
-			io_submit_sqe(req);
+		else if (unlikely(!req))
+			continue;
+		else if (unlikely(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))
+			io_queue_sqe_fallback(req);
+		else
+			wq_list_add_tail(&req->comp_list, &req_list);
 		continue;
 error:
 		/*
@@ -2453,6 +2457,12 @@ int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 		}
 	} while (--left);
 
+	wq_list_for_each_safe(pos, next, &req_list) {
+		req = container_of(pos, struct io_kiocb, comp_list);
+		req->comp_list.next = NULL;
+		io_submit_sqe(req);
+	}
+
 	if (unlikely(left)) {
 		ret -= left;
 		/* try again if it submitted nothing and can't allocate a req */
diff --git a/io_uring/slist.h b/io_uring/slist.h
index 0eb194817242e..93fbb715111ca 100644
--- a/io_uring/slist.h
+++ b/io_uring/slist.h
@@ -12,6 +12,10 @@
 #define wq_list_for_each_resume(pos, prv)			\
 	for (; pos; prv = pos, pos = (pos)->next)
 
+#define wq_list_for_each_safe(pos, n, head)			\
+	for (pos = (head)->first, n = pos ? pos->next : NULL;	\
+	     pos; pos = n, n = pos ? pos->next : NULL)
+
 #define wq_list_empty(list)	(READ_ONCE((list)->first) == NULL)
 
 #define INIT_WQ_LIST(list)	do {				\
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 3/3] io_uring: set plug tags for same file
  2023-07-28 20:14 [PATCH 1/3] io_uring: split req init from submit Keith Busch
  2023-07-28 20:14 ` [PATCH 2/3] io_uring: split req prep and submit loops Keith Busch
@ 2023-07-28 20:14 ` Keith Busch
  2023-07-31 12:53 ` [PATCH 1/3] io_uring: split req init from submit Pavel Begunkov
  2 siblings, 0 replies; 8+ messages in thread
From: Keith Busch @ 2023-07-28 20:14 UTC (permalink / raw)
  To: axboe, asml.silence, linux-block, io-uring; +Cc: Keith Busch

From: Keith Busch <[email protected]>

io_uring tries to optimize allocating tags by hinting to the plug how
many it expects to need for a batch instead of allocating each tag
individually. But io_uring submission queueus may have a mix of many
devices for io, so the number of io's counted may be overestimated. This
can lead to allocating too many tags, which adds overhead to finding
that many contiguous tags, freeing up the ones we didn't use, and may
starve out other users that can actually use them.

When starting a new batch of uring commands, count only commands that
match the file descriptor of the first seen for this optimization. This
avoids have to call the unlikely "blk_mq_free_plug_rqs()" at the end of
a submission when multiple devices are used in a batch.

Signed-off-by: Keith Busch <[email protected]>
---
 block/blk-core.c               | 49 +++++++++++++++-------------------
 block/blk-mq.c                 |  6 +++--
 include/linux/io_uring_types.h |  1 +
 io_uring/io_uring.c            | 19 ++++++++-----
 4 files changed, 40 insertions(+), 35 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 90de50082146a..72523a983c419 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1043,32 +1043,6 @@ int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_mod_delayed_work_on);
 
-void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
-{
-	struct task_struct *tsk = current;
-
-	/*
-	 * If this is a nested plug, don't actually assign it.
-	 */
-	if (tsk->plug)
-		return;
-
-	plug->mq_list = NULL;
-	plug->cached_rq = NULL;
-	plug->nr_ios = min_t(unsigned short, nr_ios, BLK_MAX_REQUEST_COUNT);
-	plug->rq_count = 0;
-	plug->multiple_queues = false;
-	plug->has_elevator = false;
-	plug->nowait = false;
-	INIT_LIST_HEAD(&plug->cb_list);
-
-	/*
-	 * Store ordering should not be needed here, since a potential
-	 * preempt will imply a full memory barrier
-	 */
-	tsk->plug = plug;
-}
-
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:	The &struct blk_plug that needs to be initialized
@@ -1094,7 +1068,28 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
  */
 void blk_start_plug(struct blk_plug *plug)
 {
-	blk_start_plug_nr_ios(plug, 1);
+	struct task_struct *tsk = current;
+
+	/*
+	 * If this is a nested plug, don't actually assign it.
+	 */
+	if (tsk->plug)
+		return;
+
+	plug->mq_list = NULL;
+	plug->cached_rq = NULL;
+	plug->nr_ios = 1;
+	plug->rq_count = 0;
+	plug->multiple_queues = false;
+	plug->has_elevator = false;
+	plug->nowait = false;
+	INIT_LIST_HEAD(&plug->cb_list);
+
+	/*
+	 * Store ordering should not be needed here, since a potential
+	 * preempt will imply a full memory barrier
+	 */
+	tsk->plug = plug;
 }
 EXPORT_SYMBOL(blk_start_plug);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f14b8669ac69f..1e18ccd7d1376 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -524,7 +524,8 @@ static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
 		.q		= q,
 		.flags		= flags,
 		.cmd_flags	= opf,
-		.nr_tags	= plug->nr_ios,
+		.nr_tags	= min_t(unsigned int, plug->nr_ios,
+					BLK_MAX_REQUEST_COUNT),
 		.cached_rq	= &plug->cached_rq,
 	};
 	struct request *rq;
@@ -2867,7 +2868,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
 	rq_qos_throttle(q, bio);
 
 	if (plug) {
-		data.nr_tags = plug->nr_ios;
+		data.nr_tags = min_t(unsigned int, plug->nr_ios,
+				     BLK_MAX_REQUEST_COUNT);
 		plug->nr_ios = 1;
 		data.cached_rq = &plug->cached_rq;
 	}
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 598553877fc25..6d922e7749989 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -175,6 +175,7 @@ struct io_submit_state {
 	bool			need_plug;
 	unsigned short		submit_nr;
 	unsigned int		cqes_count;
+	int			fd;
 	struct blk_plug		plug;
 	struct io_uring_cqe	cqes[16];
 };
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 5434aef0a8ef7..379e41b53efde 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2209,18 +2209,25 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 		return -EINVAL;
 
 	if (def->needs_file) {
-		struct io_submit_state *state = &ctx->submit_state;
-
 		req->cqe.fd = READ_ONCE(sqe->fd);
 
 		/*
 		 * Plug now if we have more than 2 IO left after this, and the
 		 * target is potentially a read/write to block based storage.
 		 */
-		if (state->need_plug && def->plug) {
-			state->plug_started = true;
-			state->need_plug = false;
-			blk_start_plug_nr_ios(&state->plug, state->submit_nr);
+		if (def->plug) {
+			struct io_submit_state *state = &ctx->submit_state;
+
+			if (state->need_plug) {
+				state->plug_started = true;
+				state->need_plug = false;
+				state->fd = req->cqe.fd;
+				blk_start_plug(&state->plug);
+			} else if (state->plug_started &&
+				   state->fd == req->cqe.fd &&
+				   !state->link.head) {
+				state->plug.nr_ios++;
+			}
 		}
 	}
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] io_uring: split req init from submit
  2023-07-28 20:14 [PATCH 1/3] io_uring: split req init from submit Keith Busch
  2023-07-28 20:14 ` [PATCH 2/3] io_uring: split req prep and submit loops Keith Busch
  2023-07-28 20:14 ` [PATCH 3/3] io_uring: set plug tags for same file Keith Busch
@ 2023-07-31 12:53 ` Pavel Begunkov
  2023-07-31 21:00   ` Jens Axboe
  2 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2023-07-31 12:53 UTC (permalink / raw)
  To: Keith Busch, axboe, linux-block, io-uring; +Cc: Keith Busch

On 7/28/23 21:14, Keith Busch wrote:
> From: Keith Busch <[email protected]>
> 
> Split the req initialization and link handling from the submit. This
> simplifies the submit path since everything that can fail is separate
> from it, and makes it easier to create batched submissions later.

Keith, I don't think this prep patch does us any good, I'd rather
shove the link assembling code further out of the common path. I like
the first version more (see [1]). I'd suggest to merge it, and do
cleaning up after.

I'll also say that IMHO the overhead is well justified. It's not only
about having multiple nvmes, the problem slows down cases mixing storage
with net and the rest of IO in a single ring.

[1] https://lore.kernel.org/io-uring/[email protected]/

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] io_uring: split req init from submit
  2023-07-31 12:53 ` [PATCH 1/3] io_uring: split req init from submit Pavel Begunkov
@ 2023-07-31 21:00   ` Jens Axboe
  2023-08-01 14:13     ` Pavel Begunkov
  0 siblings, 1 reply; 8+ messages in thread
From: Jens Axboe @ 2023-07-31 21:00 UTC (permalink / raw)
  To: Pavel Begunkov, Keith Busch, linux-block, io-uring; +Cc: Keith Busch

On 7/31/23 6:53?AM, Pavel Begunkov wrote:
> On 7/28/23 21:14, Keith Busch wrote:
>> From: Keith Busch <[email protected]>
>>
>> Split the req initialization and link handling from the submit. This
>> simplifies the submit path since everything that can fail is separate
>> from it, and makes it easier to create batched submissions later.
> 
> Keith, I don't think this prep patch does us any good, I'd rather
> shove the link assembling code further out of the common path. I like
> the first version more (see [1]). I'd suggest to merge it, and do
> cleaning up after.
> 
> I'll also say that IMHO the overhead is well justified. It's not only
> about having multiple nvmes, the problem slows down cases mixing storage
> with net and the rest of IO in a single ring.
> 
> [1] https://lore.kernel.org/io-uring/[email protected]/

The downside of that one, to me, is that it just serializes all of it
and we end up looping over the submission list twice. With alloc+init
split, at least we get some locality wins by grouping the setup side of
the requests.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] io_uring: split req init from submit
  2023-07-31 21:00   ` Jens Axboe
@ 2023-08-01 14:13     ` Pavel Begunkov
  2023-08-01 15:17       ` Keith Busch
  0 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2023-08-01 14:13 UTC (permalink / raw)
  To: Jens Axboe, Keith Busch, linux-block, io-uring; +Cc: Keith Busch

On 7/31/23 22:00, Jens Axboe wrote:
> On 7/31/23 6:53?AM, Pavel Begunkov wrote:
>> On 7/28/23 21:14, Keith Busch wrote:
>>> From: Keith Busch <[email protected]>
>>>
>>> Split the req initialization and link handling from the submit. This
>>> simplifies the submit path since everything that can fail is separate
>>> from it, and makes it easier to create batched submissions later.
>>
>> Keith, I don't think this prep patch does us any good, I'd rather
>> shove the link assembling code further out of the common path. I like
>> the first version more (see [1]). I'd suggest to merge it, and do
>> cleaning up after.
>>
>> I'll also say that IMHO the overhead is well justified. It's not only
>> about having multiple nvmes, the problem slows down cases mixing storage
>> with net and the rest of IO in a single ring.
>>
>> [1] https://lore.kernel.org/io-uring/[email protected]/
> 
> The downside of that one, to me, is that it just serializes all of it
> and we end up looping over the submission list twice.

Right, and there is nothing can be done if we want to know about all
requests in advance, at least without changing uapi and/or adding
userspace hints.

> With alloc+init
> split, at least we get some locality wins by grouping the setup side of
> the requests.

I don't think I follow, what grouping do you mean? As far as I see, v1
and v2 are essentially same with the difference of whether you have a
helper for setting up links or not, see io_setup_link() from v2. In both
cases it's executed in the same sequence:

1) init (generic init + opcode init + link setup) each request and put
    into a temporary list.
2) go go over the list and submit them one by one

And after inlining they should look pretty close.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] io_uring: split req init from submit
  2023-08-01 14:13     ` Pavel Begunkov
@ 2023-08-01 15:17       ` Keith Busch
  2023-08-01 16:05         ` Pavel Begunkov
  0 siblings, 1 reply; 8+ messages in thread
From: Keith Busch @ 2023-08-01 15:17 UTC (permalink / raw)
  To: Pavel Begunkov; +Cc: Jens Axboe, Keith Busch, linux-block, io-uring

On Tue, Aug 01, 2023 at 03:13:59PM +0100, Pavel Begunkov wrote:
> On 7/31/23 22:00, Jens Axboe wrote:
> > On 7/31/23 6:53?AM, Pavel Begunkov wrote:
> > > On 7/28/23 21:14, Keith Busch wrote:
> > > > From: Keith Busch <[email protected]>
> > > > 
> > > > Split the req initialization and link handling from the submit. This
> > > > simplifies the submit path since everything that can fail is separate
> > > > from it, and makes it easier to create batched submissions later.
> > > 
> > > Keith, I don't think this prep patch does us any good, I'd rather
> > > shove the link assembling code further out of the common path. I like
> > > the first version more (see [1]). I'd suggest to merge it, and do
> > > cleaning up after.
> > > 
> > > I'll also say that IMHO the overhead is well justified. It's not only
> > > about having multiple nvmes, the problem slows down cases mixing storage
> > > with net and the rest of IO in a single ring.
> > > 
> > > [1] https://lore.kernel.org/io-uring/[email protected]/
> > 
> > The downside of that one, to me, is that it just serializes all of it
> > and we end up looping over the submission list twice.
> 
> Right, and there is nothing can be done if we want to know about all
> requests in advance, at least without changing uapi and/or adding
> userspace hints.
> 
> > With alloc+init
> > split, at least we get some locality wins by grouping the setup side of
> > the requests.
> 
> I don't think I follow, what grouping do you mean? As far as I see, v1
> and v2 are essentially same with the difference of whether you have a
> helper for setting up links or not, see io_setup_link() from v2. In both
> cases it's executed in the same sequence:
> 
> 1) init (generic init + opcode init + link setup) each request and put
>    into a temporary list.
> 2) go go over the list and submit them one by one
> 
> And after inlining they should look pretty close.

The main difference in this one compared to the original version is that
everything in the 2nd loop is just for the final dispatch. Anything that
can fail, fallback, or defer to async happens in the first loop. I'm not
sure that makes a difference in runtime, but having the 2nd loop handle
only fast-path requests was what I set out to do for this version.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/3] io_uring: split req init from submit
  2023-08-01 15:17       ` Keith Busch
@ 2023-08-01 16:05         ` Pavel Begunkov
  0 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2023-08-01 16:05 UTC (permalink / raw)
  To: Keith Busch; +Cc: Jens Axboe, Keith Busch, linux-block, io-uring

On 8/1/23 16:17, Keith Busch wrote:
> On Tue, Aug 01, 2023 at 03:13:59PM +0100, Pavel Begunkov wrote:
>> On 7/31/23 22:00, Jens Axboe wrote:
>>> On 7/31/23 6:53?AM, Pavel Begunkov wrote:
>>>> On 7/28/23 21:14, Keith Busch wrote:
>>>>> From: Keith Busch <[email protected]>
>>>>>
>>>>> Split the req initialization and link handling from the submit. This
>>>>> simplifies the submit path since everything that can fail is separate
>>>>> from it, and makes it easier to create batched submissions later.
>>>>
>>>> Keith, I don't think this prep patch does us any good, I'd rather
>>>> shove the link assembling code further out of the common path. I like
>>>> the first version more (see [1]). I'd suggest to merge it, and do
>>>> cleaning up after.
>>>>
>>>> I'll also say that IMHO the overhead is well justified. It's not only
>>>> about having multiple nvmes, the problem slows down cases mixing storage
>>>> with net and the rest of IO in a single ring.
>>>>
>>>> [1] https://lore.kernel.org/io-uring/[email protected]/
>>>
>>> The downside of that one, to me, is that it just serializes all of it
>>> and we end up looping over the submission list twice.
>>
>> Right, and there is nothing can be done if we want to know about all
>> requests in advance, at least without changing uapi and/or adding
>> userspace hints.
>>
>>> With alloc+init
>>> split, at least we get some locality wins by grouping the setup side of
>>> the requests.
>>
>> I don't think I follow, what grouping do you mean? As far as I see, v1
>> and v2 are essentially same with the difference of whether you have a
>> helper for setting up links or not, see io_setup_link() from v2. In both
>> cases it's executed in the same sequence:
>>
>> 1) init (generic init + opcode init + link setup) each request and put
>>     into a temporary list.
>> 2) go go over the list and submit them one by one
>>
>> And after inlining they should look pretty close.
> 
> The main difference in this one compared to the original version is that
> everything in the 2nd loop is just for the final dispatch. Anything that
> can fail, fallback, or defer to async happens in the first loop. I'm not
> sure that makes a difference in runtime, but having the 2nd loop handle
> only fast-path requests was what I set out to do for this version.

For performance it doesn't matter, it's a very slow path and we should
not be hitting it. And it only smears single req submission over multiple
places, for instance it won't be legal to use io_submit_sqe() without
those extra checks. Those are all minor points, but I don't think it's
anyhow better than v1 in this aspect.

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2023-08-01 16:08 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-28 20:14 [PATCH 1/3] io_uring: split req init from submit Keith Busch
2023-07-28 20:14 ` [PATCH 2/3] io_uring: split req prep and submit loops Keith Busch
2023-07-28 20:14 ` [PATCH 3/3] io_uring: set plug tags for same file Keith Busch
2023-07-31 12:53 ` [PATCH 1/3] io_uring: split req init from submit Pavel Begunkov
2023-07-31 21:00   ` Jens Axboe
2023-08-01 14:13     ` Pavel Begunkov
2023-08-01 15:17       ` Keith Busch
2023-08-01 16:05         ` Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox