public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH] io_uring/net: allow opportunistic initial bundle recv
@ 2024-08-06 17:49 Jens Axboe
  2024-08-08 17:56 ` Jens Axboe
  0 siblings, 1 reply; 2+ messages in thread
From: Jens Axboe @ 2024-08-06 17:49 UTC (permalink / raw)
  To: io-uring

For bundles, the initial recv operation is always just a single buffer,
as we don't yet know how much data is available in the socket. However,
this can lead to a somewhat imbalanced string of receives, where the
first recv gets a single buffer and the second gets a bunch.

Allow the initial peek operation to get up to 4 buffers, taking
advantage of the fact that there may be more data available, rather
than just doing a single buffer. This has been shown to work well across
a variety of recv workloads, as it's still cheap enough to do, while
ensuring that we do get to amortize the cost of traversing the network
stack and socket operations.

Link: https://github.com/axboe/liburing/issues/1197
Fixes: 2f9c9515bdfd ("io_uring/net: support bundles for recv")
Signed-off-by: Jens Axboe <[email protected]>

---

diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index c95dc1736dd9..2c052996c9bf 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -209,6 +209,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 	int nr_iovs = arg->nr_iovs;
 	__u16 nr_avail, tail, head;
 	struct io_uring_buf *buf;
+	int needed = 0;
 
 	tail = smp_load_acquire(&br->tail);
 	head = bl->head;
@@ -218,19 +219,22 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 
 	buf = io_ring_head_to_buf(br, head, bl->mask);
 	if (arg->max_len) {
-		int needed;
-
 		needed = (arg->max_len + buf->len - 1) / buf->len;
 		needed = min(needed, PEEK_MAX_IMPORT);
-		if (nr_avail > needed)
-			nr_avail = needed;
+	} else if (arg->max_vecs) {
+		needed = arg->max_vecs;
 	}
 
+	if (nr_avail > needed)
+		nr_avail = needed;
+
 	/*
-	 * only alloc a bigger array if we know we have data to map, eg not
-	 * a speculative peek operation.
+	 * Alloc a bigger array if we know we have data to map, or if a
+	 * a speculative peek operation tries to map more than what is
+	 * available.
 	 */
-	if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) {
+	if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs &&
+	    (arg->max_len || arg->max_vecs)) {
 		iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL);
 		if (unlikely(!iov))
 			return -ENOMEM;
@@ -238,7 +242,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
 			kfree(arg->iovs);
 		arg->iovs = iov;
 		nr_iovs = nr_avail;
-	} else if (nr_avail < nr_iovs) {
+	} else if (nr_iovs > nr_avail) {
 		nr_iovs = nr_avail;
 	}
 
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index b90aca3a57fa..8248ffda3a43 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -53,7 +53,8 @@ struct buf_sel_arg {
 	size_t out_len;
 	size_t max_len;
 	int nr_iovs;
-	int mode;
+	unsigned short mode;
+	unsigned short max_vecs;
 };
 
 void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/net.c b/io_uring/net.c
index 594490a1389b..48667f3a2388 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1076,8 +1076,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
 			arg.mode |= KBUF_MODE_FREE;
 		}
 
+		/*
+		 * Use the passed back residual if we have it, if not allow
+		 * peeking of up to 4 buffers.
+		 */
 		if (kmsg->msg.msg_inq > 0)
 			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq);
+		else
+			arg.max_vecs = 4;
 
 		ret = io_buffers_peek(req, &arg);
 		if (unlikely(ret < 0))

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-08-08 17:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-06 17:49 [PATCH] io_uring/net: allow opportunistic initial bundle recv Jens Axboe
2024-08-08 17:56 ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox