* [PATCH v1 0/2] io_uring zc rx single shot recvzc
@ 2025-02-18 16:57 David Wei
2025-02-18 16:57 ` [PATCH v1 1/2] io_uring/zcrx: add " David Wei
2025-02-18 16:57 ` [PATCH v1 2/2] io_uring/zcrx: add selftest case for " David Wei
0 siblings, 2 replies; 5+ messages in thread
From: David Wei @ 2025-02-18 16:57 UTC (permalink / raw)
To: io-uring, netdev
Cc: Jens Axboe, Pavel Begunkov, Jakub Kicinski, Paolo Abeni,
David S. Miller, Eric Dumazet, Simon Horman
Currently only multishot recvzc requests are supported in io_uring zc
rx, but sometimes there is a need to do a single recvzc e.g. peeking at
some data in the socket.
In this series, add single shot recvzc request and a selftest for the
feature.
David Wei (2):
io_uring/zcrx: add single shot recvzc
io_uring/zcrx: add selftest case for single shot recvzc
io_uring/net.c | 26 ++++++++----
io_uring/zcrx.c | 17 ++++++--
io_uring/zcrx.h | 2 +-
.../selftests/drivers/net/hw/iou-zcrx.c | 42 ++++++++++++++++---
.../selftests/drivers/net/hw/iou-zcrx.py | 27 +++++++++++-
5 files changed, 94 insertions(+), 20 deletions(-)
--
2.43.5
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH v1 1/2] io_uring/zcrx: add single shot recvzc
2025-02-18 16:57 [PATCH v1 0/2] io_uring zc rx single shot recvzc David Wei
@ 2025-02-18 16:57 ` David Wei
2025-02-20 13:45 ` Pavel Begunkov
2025-02-18 16:57 ` [PATCH v1 2/2] io_uring/zcrx: add selftest case for " David Wei
1 sibling, 1 reply; 5+ messages in thread
From: David Wei @ 2025-02-18 16:57 UTC (permalink / raw)
To: io-uring, netdev
Cc: Jens Axboe, Pavel Begunkov, Jakub Kicinski, Paolo Abeni,
David S. Miller, Eric Dumazet, Simon Horman
Currently only multishot recvzc requests are supported, but sometimes
there is a need to do a single recv e.g. peeking at some data in the
socket. Add single shot recvzc requests where IORING_RECV_MULTISHOT is
_not_ set and the sqe->len field is set to the number of bytes to read
N.
There could be multiple completions containing data, like the multishot
case, since N bytes could be split across multiple frags. This is
followed by a final completion with res and cflags both set to 0 that
indicate the completion of the request, or a -res that indicate an
error.
Signed-off-by: David Wei <[email protected]>
---
io_uring/net.c | 26 ++++++++++++++++++--------
io_uring/zcrx.c | 17 ++++++++++++++---
io_uring/zcrx.h | 2 +-
3 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/io_uring/net.c b/io_uring/net.c
index 000dc70d08d0..d3a9aaa52a13 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -94,6 +94,7 @@ struct io_recvzc {
struct file *file;
unsigned msg_flags;
u16 flags;
+ u32 len;
struct io_zcrx_ifq *ifq;
};
@@ -1241,7 +1242,7 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
unsigned ifq_idx;
if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr ||
- sqe->len || sqe->addr3))
+ sqe->addr3))
return -EINVAL;
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
@@ -1250,6 +1251,9 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
zc->ifq = req->ctx->ifq;
if (!zc->ifq)
return -EINVAL;
+ zc->len = READ_ONCE(sqe->len);
+ if (zc->len == UINT_MAX)
+ return -EINVAL;
zc->flags = READ_ONCE(sqe->ioprio);
zc->msg_flags = READ_ONCE(sqe->msg_flags);
@@ -1257,12 +1261,14 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -EINVAL;
if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
return -EINVAL;
- /* multishot required */
- if (!(zc->flags & IORING_RECV_MULTISHOT))
- return -EINVAL;
- /* All data completions are posted as aux CQEs. */
- req->flags |= REQ_F_APOLL_MULTISHOT;
-
+ if (zc->flags & IORING_RECV_MULTISHOT) {
+ if (zc->len)
+ return -EINVAL;
+ /* All data completions are posted as aux CQEs. */
+ req->flags |= REQ_F_APOLL_MULTISHOT;
+ }
+ if (!zc->len)
+ zc->len = UINT_MAX;
return 0;
}
@@ -1281,7 +1287,7 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
return -ENOTSOCK;
ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
- issue_flags);
+ issue_flags, zc->len);
if (unlikely(ret <= 0) && ret != -EAGAIN) {
if (ret == -ERESTARTSYS)
ret = -EINTR;
@@ -1296,6 +1302,10 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
return IOU_OK;
}
+ if (zc->len != UINT_MAX) {
+ io_req_set_res(req, ret, 0);
+ return IOU_OK;
+ }
if (issue_flags & IO_URING_F_MULTISHOT)
return IOU_ISSUE_SKIP_COMPLETE;
return -EAGAIN;
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index ea099f746599..834c887743c8 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -106,6 +106,7 @@ struct io_zcrx_args {
struct io_zcrx_ifq *ifq;
struct socket *sock;
unsigned nr_skbs;
+ unsigned long len;
};
static const struct memory_provider_ops io_uring_pp_zc_ops;
@@ -826,6 +827,10 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
int i, copy, end, off;
int ret = 0;
+ if (args->len == 0)
+ return -EINTR;
+ len = (args->len != UINT_MAX) ? min_t(size_t, len, args->len) : len;
+
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
return -EAGAIN;
@@ -920,17 +925,21 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
out:
if (offset == start_off)
return ret;
+ args->len -= (offset - start_off);
+ if (args->len == 0)
+ desc->count = 0;
return offset - start_off;
}
static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
struct sock *sk, int flags,
- unsigned issue_flags)
+ unsigned issue_flags, unsigned long len)
{
struct io_zcrx_args args = {
.req = req,
.ifq = ifq,
.sock = sk->sk_socket,
+ .len = len,
};
read_descriptor_t rd_desc = {
.count = 1,
@@ -956,6 +965,8 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
ret = IOU_REQUEUE;
} else if (sock_flag(sk, SOCK_DONE)) {
/* Make it to retry until it finally gets 0. */
+ if (len != UINT_MAX)
+ goto out;
if (issue_flags & IO_URING_F_MULTISHOT)
ret = IOU_REQUEUE;
else
@@ -968,7 +979,7 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
struct socket *sock, unsigned int flags,
- unsigned issue_flags)
+ unsigned issue_flags, unsigned long len)
{
struct sock *sk = sock->sk;
const struct proto *prot = READ_ONCE(sk->sk_prot);
@@ -977,7 +988,7 @@ int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
return -EPROTONOSUPPORT;
sock_rps_record_flow(sk);
- return io_zcrx_tcp_recvmsg(req, ifq, sk, flags, issue_flags);
+ return io_zcrx_tcp_recvmsg(req, ifq, sk, flags, issue_flags, len);
}
#include <linux/io_uring/net.h>
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index a16bdd921f03..85da6a14543f 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -46,7 +46,7 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx);
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx);
int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
struct socket *sock, unsigned int flags,
- unsigned issue_flags);
+ unsigned issue_flags, unsigned long len);
#else
static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
--
2.43.5
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH v1 2/2] io_uring/zcrx: add selftest case for single shot recvzc
2025-02-18 16:57 [PATCH v1 0/2] io_uring zc rx single shot recvzc David Wei
2025-02-18 16:57 ` [PATCH v1 1/2] io_uring/zcrx: add " David Wei
@ 2025-02-18 16:57 ` David Wei
1 sibling, 0 replies; 5+ messages in thread
From: David Wei @ 2025-02-18 16:57 UTC (permalink / raw)
To: io-uring, netdev
Cc: Jens Axboe, Pavel Begunkov, Jakub Kicinski, Paolo Abeni,
David S. Miller, Eric Dumazet, Simon Horman
Add a selftest case to iou-zcrx where the sender sends 4x4K = 16K and
the receiver does 4x4K single shot recvzc. Validate that the requests
are successful and the data is not corrupted.
Signed-off-by: David Wei <[email protected]>
---
.../selftests/drivers/net/hw/iou-zcrx.c | 42 ++++++++++++++++---
.../selftests/drivers/net/hw/iou-zcrx.py | 27 +++++++++++-
2 files changed, 61 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index 5d04dd55ae55..e7f0688991ab 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -61,6 +61,9 @@ static int cfg_port = 8000;
static int cfg_payload_len;
static const char *cfg_ifname;
static int cfg_queue_id = -1;
+static bool cfg_oneshot;
+static int cfg_oneshot_recvs;
+static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
@@ -196,6 +199,16 @@ static void add_recvzc(struct io_uring *ring, int sockfd)
sqe->user_data = 2;
}
+static void add_recvzc_oneshot(struct io_uring *ring, int sockfd, size_t len)
+{
+ struct io_uring_sqe *sqe;
+
+ sqe = io_uring_get_sqe(ring);
+
+ io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, len, 0);
+ sqe->user_data = 2;
+}
+
static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
{
if (cqe->res < 0)
@@ -204,7 +217,10 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
error(1, 0, "Unexpected second connection");
connfd = cqe->res;
- add_recvzc(ring, connfd);
+ if (cfg_oneshot)
+ add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ else
+ add_recvzc(ring, connfd);
}
static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
@@ -218,7 +234,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
ssize_t n;
int i;
- if (cqe->res == 0 && cqe->flags == 0) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs == 0) {
stop = true;
return;
}
@@ -226,8 +242,14 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
if (cqe->res < 0)
error(1, 0, "recvzc(): %d", cqe->res);
- if (!(cqe->flags & IORING_CQE_F_MORE))
+ if (cfg_oneshot) {
+ if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
+ add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ cfg_oneshot_recvs--;
+ }
+ } else if (!(cqe->flags & IORING_CQE_F_MORE)) {
add_recvzc(ring, connfd);
+ }
rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1);
@@ -237,7 +259,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
for (i = 0; i < n; i++) {
if (*(data + i) != payload[(received + i)])
- error(1, 0, "payload mismatch");
+ error(1, 0, "payload mismatch at ", i);
}
received += n;
@@ -313,7 +335,7 @@ static void run_server(void)
static void run_client(void)
{
- ssize_t to_send = SEND_SIZE;
+ ssize_t to_send = cfg_send_size;
ssize_t sent = 0;
ssize_t chunk, res;
int fd;
@@ -360,7 +382,7 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46sch:p:l:i:q:")) != -1) {
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
switch (c) {
case 's':
if (cfg_client)
@@ -387,6 +409,14 @@ static void parse_opts(int argc, char **argv)
case 'q':
cfg_queue_id = strtoul(optarg, NULL, 0);
break;
+ case 'o': {
+ cfg_oneshot = true;
+ cfg_oneshot_recvs = strtoul(optarg, NULL, 0);
+ break;
+ }
+ case 'z':
+ cfg_send_size = strtoul(optarg, NULL, 0);
+ break;
}
}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index ea0a346c3eff..d301d9b356f7 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -34,14 +34,37 @@ def test_zcrx(cfg) -> None:
raise KsftSkipEx('at least 2 combined channels required')
rx_ring = _get_rx_ring_entries(cfg)
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840"
+ try:
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(9999, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+ finally:
+ ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+ ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+
+def test_zcrx_oneshot(cfg) -> None:
+ cfg.require_v6()
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ rx_ring = _get_rx_ring_entries(cfg)
try:
ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
+ rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
wait_port_listen(9999, proto="tcp", host=cfg.remote)
cmd(tx_cmd)
--
2.43.5
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH v1 1/2] io_uring/zcrx: add single shot recvzc
2025-02-18 16:57 ` [PATCH v1 1/2] io_uring/zcrx: add " David Wei
@ 2025-02-20 13:45 ` Pavel Begunkov
2025-02-20 19:29 ` David Wei
0 siblings, 1 reply; 5+ messages in thread
From: Pavel Begunkov @ 2025-02-20 13:45 UTC (permalink / raw)
To: David Wei, io-uring, netdev
Cc: Jens Axboe, Jakub Kicinski, Paolo Abeni, David S. Miller,
Eric Dumazet, Simon Horman
On 2/18/25 16:57, David Wei wrote:
> Currently only multishot recvzc requests are supported, but sometimes
> there is a need to do a single recv e.g. peeking at some data in the
> socket. Add single shot recvzc requests where IORING_RECV_MULTISHOT is
> _not_ set and the sqe->len field is set to the number of bytes to read
> N.
>
> There could be multiple completions containing data, like the multishot
> case, since N bytes could be split across multiple frags. This is
> followed by a final completion with res and cflags both set to 0 that
> indicate the completion of the request, or a -res that indicate an
> error.
>
> Signed-off-by: David Wei <[email protected]>
> ---
> io_uring/net.c | 26 ++++++++++++++++++--------
> io_uring/zcrx.c | 17 ++++++++++++++---
> io_uring/zcrx.h | 2 +-
> 3 files changed, 33 insertions(+), 12 deletions(-)
>
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 000dc70d08d0..d3a9aaa52a13 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -94,6 +94,7 @@ struct io_recvzc {
> struct file *file;
> unsigned msg_flags;
> u16 flags;
> + u32 len;
Something is up with the types, it's u32, for which you use
UINT_MAX, and later convert to ulong.
> struct io_zcrx_ifq *ifq;
> };
>
...
> @@ -1250,6 +1251,9 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> zc->ifq = req->ctx->ifq;
> if (!zc->ifq)
> return -EINVAL;
> + zc->len = READ_ONCE(sqe->len);
> + if (zc->len == UINT_MAX)
> + return -EINVAL;
>
> zc->flags = READ_ONCE(sqe->ioprio);
> zc->msg_flags = READ_ONCE(sqe->msg_flags);
> @@ -1257,12 +1261,14 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return -EINVAL;
> if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
> return -EINVAL;
> - /* multishot required */
> - if (!(zc->flags & IORING_RECV_MULTISHOT))
> - return -EINVAL;
> - /* All data completions are posted as aux CQEs. */
> - req->flags |= REQ_F_APOLL_MULTISHOT;
> -
> + if (zc->flags & IORING_RECV_MULTISHOT) {
> + if (zc->len)
> + return -EINVAL;
> + /* All data completions are posted as aux CQEs. */
> + req->flags |= REQ_F_APOLL_MULTISHOT;
If you're posting "aux" cqes you have to set the flag for
synchronisation reasons. We probably can split out a "I want to post
aux cqes" flag, but it seems like you don't actually care about
multishot here but limiting the length, or limiting the length + nowait.
> + }
> + if (!zc->len)
> + zc->len = UINT_MAX;
> return 0;
> }
>
> @@ -1281,7 +1287,7 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
> return -ENOTSOCK;
>
> ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
> - issue_flags);
> + issue_flags, zc->len);
> if (unlikely(ret <= 0) && ret != -EAGAIN) {
> if (ret == -ERESTARTSYS)
> ret = -EINTR;
> @@ -1296,6 +1302,10 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
> return IOU_OK;
> }
>
> + if (zc->len != UINT_MAX) {
> + io_req_set_res(req, ret, 0);
> + return IOU_OK;
> + }
> if (issue_flags & IO_URING_F_MULTISHOT)
> return IOU_ISSUE_SKIP_COMPLETE;
> return -EAGAIN;
> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
> index ea099f746599..834c887743c8 100644
> --- a/io_uring/zcrx.c
> +++ b/io_uring/zcrx.c
> @@ -106,6 +106,7 @@ struct io_zcrx_args {
> struct io_zcrx_ifq *ifq;
> struct socket *sock;
> unsigned nr_skbs;
> + unsigned long len;
> };
>
> static const struct memory_provider_ops io_uring_pp_zc_ops;
> @@ -826,6 +827,10 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
> int i, copy, end, off;
> int ret = 0;
>
> + if (args->len == 0)
> + return -EINTR;
> + len = (args->len != UINT_MAX) ? min_t(size_t, len, args->len) : len;
Just min?
> +
> if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
> return -EAGAIN;
>
> @@ -920,17 +925,21 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
> out:
> if (offset == start_off)
> return ret;
> + args->len -= (offset - start_off);
Doesn't it unconditionally change the magic value UINT_MAX
you're trying to preserve?
> + if (args->len == 0)
> + desc->count = 0;
> return offset - start_off;
> }
>
> static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
> struct sock *sk, int flags,
> - unsigned issue_flags)
> + unsigned issue_flags, unsigned long len)
> {
> struct io_zcrx_args args = {
> .req = req,
> .ifq = ifq,
> .sock = sk->sk_socket,
> + .len = len,
> };
> read_descriptor_t rd_desc = {
> .count = 1,
> @@ -956,6 +965,8 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
> ret = IOU_REQUEUE;
> } else if (sock_flag(sk, SOCK_DONE)) {
> /* Make it to retry until it finally gets 0. */
> + if (len != UINT_MAX)
> + goto out;
> if (issue_flags & IO_URING_F_MULTISHOT)
> ret = IOU_REQUEUE;
> else
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH v1 1/2] io_uring/zcrx: add single shot recvzc
2025-02-20 13:45 ` Pavel Begunkov
@ 2025-02-20 19:29 ` David Wei
0 siblings, 0 replies; 5+ messages in thread
From: David Wei @ 2025-02-20 19:29 UTC (permalink / raw)
To: Pavel Begunkov, io-uring, netdev
Cc: Jens Axboe, Jakub Kicinski, Paolo Abeni, David S. Miller,
Eric Dumazet, Simon Horman
On 2025-02-20 05:45, Pavel Begunkov wrote:
> On 2/18/25 16:57, David Wei wrote:
>> Currently only multishot recvzc requests are supported, but sometimes
>> there is a need to do a single recv e.g. peeking at some data in the
>> socket. Add single shot recvzc requests where IORING_RECV_MULTISHOT is
>> _not_ set and the sqe->len field is set to the number of bytes to read
>> N.
>>
>> There could be multiple completions containing data, like the multishot
>> case, since N bytes could be split across multiple frags. This is
>> followed by a final completion with res and cflags both set to 0 that
>> indicate the completion of the request, or a -res that indicate an
>> error.
>>
>> Signed-off-by: David Wei <[email protected]>
>> ---
>> io_uring/net.c | 26 ++++++++++++++++++--------
>> io_uring/zcrx.c | 17 ++++++++++++++---
>> io_uring/zcrx.h | 2 +-
>> 3 files changed, 33 insertions(+), 12 deletions(-)
>>
>> diff --git a/io_uring/net.c b/io_uring/net.c
>> index 000dc70d08d0..d3a9aaa52a13 100644
>> --- a/io_uring/net.c
>> +++ b/io_uring/net.c
>> @@ -94,6 +94,7 @@ struct io_recvzc {
>> struct file *file;
>> unsigned msg_flags;
>> u16 flags;
>> + u32 len;
>
> Something is up with the types, it's u32, for which you use
> UINT_MAX, and later convert to ulong.
Inconsistency is my middle name.
I'll fix it up.
>
>> struct io_zcrx_ifq *ifq;
>> };
>>
> ...
>> @@ -1250,6 +1251,9 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> zc->ifq = req->ctx->ifq;
>> if (!zc->ifq)
>> return -EINVAL;
>> + zc->len = READ_ONCE(sqe->len);
>> + if (zc->len == UINT_MAX)
>> + return -EINVAL;
>> zc->flags = READ_ONCE(sqe->ioprio);
>> zc->msg_flags = READ_ONCE(sqe->msg_flags);
>> @@ -1257,12 +1261,14 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>> return -EINVAL;
>> if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT))
>> return -EINVAL;
>> - /* multishot required */
>> - if (!(zc->flags & IORING_RECV_MULTISHOT))
>> - return -EINVAL;
>> - /* All data completions are posted as aux CQEs. */
>> - req->flags |= REQ_F_APOLL_MULTISHOT;
>> -
>> + if (zc->flags & IORING_RECV_MULTISHOT) {
>> + if (zc->len)
>> + return -EINVAL;
>> + /* All data completions are posted as aux CQEs. */
>> + req->flags |= REQ_F_APOLL_MULTISHOT;
>
> If you're posting "aux" cqes you have to set the flag for
> synchronisation reasons. We probably can split out a "I want to post
> aux cqes" flag, but it seems like you don't actually care about
> multishot here but limiting the length, or limiting the length + nowait.
Yeah, it's still "multishot" because there are still aux cqes for data
notifications. The requested N bytes could be in multiple frags. I'll
make sure REQ_F_APOLL_MULTISHOT is set.
>
>> + }
>> + if (!zc->len)
>> + zc->len = UINT_MAX;
>> return 0;
>> }
>> @@ -1281,7 +1287,7 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
>> return -ENOTSOCK;
>> ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT,
>> - issue_flags);
>> + issue_flags, zc->len);
>> if (unlikely(ret <= 0) && ret != -EAGAIN) {
>> if (ret == -ERESTARTSYS)
>> ret = -EINTR;
>> @@ -1296,6 +1302,10 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags)
>> return IOU_OK;
>> }
>> + if (zc->len != UINT_MAX) {
>> + io_req_set_res(req, ret, 0);
>> + return IOU_OK;
>> + }
>> if (issue_flags & IO_URING_F_MULTISHOT)
>> return IOU_ISSUE_SKIP_COMPLETE;
>> return -EAGAIN;
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index ea099f746599..834c887743c8 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> @@ -106,6 +106,7 @@ struct io_zcrx_args {
>> struct io_zcrx_ifq *ifq;
>> struct socket *sock;
>> unsigned nr_skbs;
>> + unsigned long len;
>> };
>> static const struct memory_provider_ops io_uring_pp_zc_ops;
>> @@ -826,6 +827,10 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
>> int i, copy, end, off;
>> int ret = 0;
>> + if (args->len == 0)
>> + return -EINTR;
>> + len = (args->len != UINT_MAX) ? min_t(size_t, len, args->len) : len;
>
> Just min?
Yes :facepalm:
>
>> +
>> if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
>> return -EAGAIN;
>> @@ -920,17 +925,21 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
>> out:
>> if (offset == start_off)
>> return ret;
>> + args->len -= (offset - start_off);
>
> Doesn't it unconditionally change the magic value UINT_MAX
> you're trying to preserve?
Yes, you're right. I will fix this and add a test case.
>
>> + if (args->len == 0)
>> + desc->count = 0;
>> return offset - start_off;
>> }
>> static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
>> struct sock *sk, int flags,
>> - unsigned issue_flags)
>> + unsigned issue_flags, unsigned long len)
>> {
>> struct io_zcrx_args args = {
>> .req = req,
>> .ifq = ifq,
>> .sock = sk->sk_socket,
>> + .len = len,
>> };
>> read_descriptor_t rd_desc = {
>> .count = 1,
>> @@ -956,6 +965,8 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
>> ret = IOU_REQUEUE;
>> } else if (sock_flag(sk, SOCK_DONE)) {
>> /* Make it to retry until it finally gets 0. */
>> + if (len != UINT_MAX)
>> + goto out;
>> if (issue_flags & IO_URING_F_MULTISHOT)
>> ret = IOU_REQUEUE;
>> else
>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-02-20 19:29 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-18 16:57 [PATCH v1 0/2] io_uring zc rx single shot recvzc David Wei
2025-02-18 16:57 ` [PATCH v1 1/2] io_uring/zcrx: add " David Wei
2025-02-20 13:45 ` Pavel Begunkov
2025-02-20 19:29 ` David Wei
2025-02-18 16:57 ` [PATCH v1 2/2] io_uring/zcrx: add selftest case for " David Wei
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox