public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/3] io_uring/net: allow multishot receive per-invocation cap
Date: Wed,  9 Jul 2025 14:32:42 -0600	[thread overview]
Message-ID: <20250709203420.1321689-4-axboe@kernel.dk> (raw)
In-Reply-To: <20250709203420.1321689-1-axboe@kernel.dk>

If an application is handling multiple receive streams using recv
multishot, then the amount of retries and buffer peeking for multishot
and bundles can process too much per socket before moving on. This isn't
directly controllable by the application. By default, io_uring will
retry a recv MULTISHOT_MAX_RETRY (32) times, if the socket keeps having
data to receive. And if using bundles, then each bundle peek will
potentially map up to PEEK_MAX_IMPORT (256) iovecs of data. Once these
limits are hit, then a requeue operation will be done, where the request
will get retried after other pending requests have had a time to get
executed.

Add support for capping the per-invocation receive length, before a
requeue condition is considered for each receive. This is done by setting
sqe->mshot_len to the byte value. For example, if this is set to 1024,
then each receive will be requeued by 1024 bytes received.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/net.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/io_uring/net.c b/io_uring/net.c
index 144788bea009..c0a5bd358bd0 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -75,6 +75,7 @@ struct io_sr_msg {
 	u16				flags;
 	/* initialised and used only by !msg send variants */
 	u16				buf_group;
+	unsigned			mshot_len;
 	void __user			*msg_control;
 	/* used only for send zerocopy */
 	struct io_kiocb 		*notif;
@@ -87,9 +88,11 @@ struct io_sr_msg {
 enum sr_retry_flags {
 	IORING_RECV_RETRY	= (1U << 15),
 	IORING_RECV_PARTIAL_MAP	= (1U << 14),
+	IORING_RECV_MSHOT_CAP	= (1U << 13),
 
 	IORING_RECV_RETRY_CLEAR	= IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
-	IORING_RECV_INTERNAL	= IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
+	IORING_RECV_INTERNAL	= IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
+				  IORING_RECV_MSHOT_CAP,
 };
 
 /*
@@ -199,7 +202,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
 	req->flags &= ~REQ_F_BL_EMPTY;
 	sr->done_io = 0;
 	sr->flags &= ~IORING_RECV_RETRY_CLEAR;
-	sr->len = 0; /* get from the provided buffer */
+	sr->len = sr->mshot_len;
 }
 
 static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
@@ -787,13 +790,14 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		sr->buf_group = req->buf_index;
 		req->buf_list = NULL;
 	}
+	sr->mshot_len = 0;
 	if (sr->flags & IORING_RECV_MULTISHOT) {
 		if (!(req->flags & REQ_F_BUFFER_SELECT))
 			return -EINVAL;
 		if (sr->msg_flags & MSG_WAITALL)
 			return -EINVAL;
-		if (req->opcode == IORING_OP_RECV && sr->len)
-			return -EINVAL;
+		if (req->opcode == IORING_OP_RECV)
+			sr->mshot_len = sr->len;
 		req->flags |= REQ_F_APOLL_MULTISHOT;
 	}
 	if (sr->flags & IORING_RECVSEND_BUNDLE) {
@@ -834,6 +838,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 				      issue_flags);
 		if (sr->flags & IORING_RECV_RETRY)
 			cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+		if (sr->mshot_len && *ret >= sr->mshot_len)
+			sr->flags |= IORING_RECV_MSHOT_CAP;
 		/* bundle with no more immediate buffers, we're done */
 		if (req->flags & REQ_F_BL_EMPTY)
 			goto finish;
@@ -864,10 +870,13 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
 		io_mshot_prep_retry(req, kmsg);
 		/* Known not-empty or unknown state, retry */
 		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
-			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
+			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
+			    !(sr->flags & IORING_RECV_MSHOT_CAP)) {
 				return false;
+			}
 			/* mshot retries exceeded, force a requeue */
 			sr->nr_multishot_loops = 0;
+			sr->flags &= ~IORING_RECV_MSHOT_CAP;
 			if (issue_flags & IO_URING_F_MULTISHOT)
 				*ret = IOU_REQUEUE;
 		}
@@ -1080,7 +1089,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
 			arg.mode |= KBUF_MODE_FREE;
 		}
 
-		if (kmsg->msg.msg_inq > 1)
+		if (*len)
+			arg.max_len = *len;
+		else if (kmsg->msg.msg_inq > 1)
 			arg.max_len = min_not_zero(*len, kmsg->msg.msg_inq);
 
 		ret = io_buffers_peek(req, &arg);
-- 
2.50.0


  parent reply	other threads:[~2025-07-09 20:34 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-09 20:32 [PATCHSET v2] Add retry cap for multishot recv receive size Jens Axboe
2025-07-09 20:32 ` [PATCH 1/3] io_uring/net: move io_sr_msg->retry_flags to io_sr_msg->flags Jens Axboe
2025-07-09 20:32 ` [PATCH 2/3] io_uring/net: use passed in 'len' in io_recv_buf_select() Jens Axboe
2025-07-09 20:32 ` Jens Axboe [this message]
  -- strict thread matches above, loose matches on Subject: below --
2025-07-08 14:26 [PATCHSET 0/3] Add cap for multishot recv receive size Jens Axboe
2025-07-08 14:26 ` [PATCH 3/3] io_uring/net: allow multishot receive per-invocation cap Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250709203420.1321689-4-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox