From: Norman Maurer <norman.maurer@googlemail.com>
To: io-uring@vger.kernel.org
Cc: axboe@kernel.dk, Norman Maurer <norman_maurer@apple.com>
Subject: [PATCH] io_uring/net: Support multishot receive len cap
Date: Tue, 15 Jul 2025 15:00:26 +0200 [thread overview]
Message-ID: <20250715130026.48204-1-norman_maurer@apple.com> (raw)
At the moment its very hard to do fine grained backpressure when using
multishot as the kernel might produce a lot of completions before the
user has a chance to cancel a previous submitted multishot recv.
This change adds support to issue a multishot recv that is capped by a
len, which means the kernel will only rearm until X amount of data is
received. When the limit is reached the completion will signal to the
user that a re-arm needs to happen manually by not setting the IORING_CQE_F_MORE
flag.
---
io_uring/net.c | 38 +++++++++++++++++++++++++++++++++++---
1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 639f111408a1..9965976c9a98 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -75,7 +75,10 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 buf_group;
+ /* per-invocation mshot limit */
unsigned mshot_len;
+ /* overall mshot byte limit */
+ unsigned mshot_total_len;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
@@ -89,10 +92,14 @@ enum sr_retry_flags {
IORING_RECV_RETRY = (1U << 15),
IORING_RECV_PARTIAL_MAP = (1U << 14),
IORING_RECV_MSHOT_CAP = (1U << 13),
+ IORING_RECV_MSHOT_LIM = (1U << 12),
+ IORING_RECV_MSHOT_DONE = (1U << 11),
IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
IORING_RECV_MSHOT_CAP,
+ IORING_RECV_INTERNAL = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
+ IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,
};
/*
@@ -765,7 +772,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->done_io = 0;
- if (unlikely(sqe->file_index || sqe->addr2))
+ if (unlikely(sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
@@ -790,16 +797,25 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->buf_group = req->buf_index;
req->buf_list = NULL;
}
- sr->mshot_len = 0;
+ sr->mshot_total_len = sr->mshot_len = 0;
if (sr->flags & IORING_RECV_MULTISHOT) {
if (!(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (sr->msg_flags & MSG_WAITALL)
return -EINVAL;
- if (req->opcode == IORING_OP_RECV)
+ if (req->opcode == IORING_OP_RECV) {
sr->mshot_len = sr->len;
+ sr->mshot_total_len = READ_ONCE(sqe->optlen);
+ if (sr->mshot_total_len)
+ sr->flags |= IORING_RECV_MSHOT_LIM;
+ } else if (sqe->optlen) {
+ return -EINVAL;
+ }
req->flags |= REQ_F_APOLL_MULTISHOT;
+ } else if (sqe->optlen) {
+ return -EINVAL;
}
+
if (sr->flags & IORING_RECVSEND_BUNDLE) {
if (req->opcode == IORING_OP_RECVMSG)
return -EINVAL;
@@ -831,6 +847,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
if (kmsg->msg.msg_inq > 0)
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
+ if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) {
+ /*
+ * If sr->len hits zero, the limit has been reached. Mark
+ * mshot as finished, and flag MSHOT_DONE as well to prevent
+ * a potential bundle from being retried.
+ */
+ sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len);
+ if (!sr->mshot_total_len) {
+ sr->flags |= IORING_RECV_MSHOT_DONE;
+ mshot_finished = true;
+ }
+ }
+
if (sr->flags & IORING_RECVSEND_BUNDLE) {
size_t this_ret = *ret - sr->done_io;
@@ -1094,6 +1123,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
else if (kmsg->msg.msg_inq > 1)
arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq);
+ /* if mshot limited, ensure we don't go over */
+ if (sr->flags & IORING_RECV_MSHOT_LIM)
+ arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);
ret = io_buffers_peek(req, &arg);
if (unlikely(ret < 0))
return ret;
--
2.50.1
reply other threads:[~2025-07-15 13:00 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250715130026.48204-1-norman_maurer@apple.com \
--to=norman.maurer@googlemail.com \
--cc=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
--cc=norman_maurer@apple.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox