* [PATCH] io_uring/net: allow filtering on IORING_OP_CONNECT
@ 2026-05-12 11:02 Shouvik Kar
2026-05-12 15:34 ` Jens Axboe
0 siblings, 1 reply; 2+ messages in thread
From: Shouvik Kar @ 2026-05-12 11:02 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe, Pavel Begunkov, Kees Cook, Christian Brauner
This adds custom filtering for IORING_OP_CONNECT, where the target
family is always exposed, and (for AF_INET / AF_INET6) port and
address are exposed. port and v4_addr are in network byte order so
filter authors can compare against on-wire constants.
Skip population unless addr_len covers the populated fields, to
avoid leaking stale io_async_msghdr data on short connects.
Signed-off-by: Shouvik Kar <auxcorelabs@gmail.com>
---
include/uapi/linux/io_uring/bpf_filter.h | 16 +++++++++
io_uring/net.c | 41 ++++++++++++++++++++++++
io_uring/net.h | 7 ++++
io_uring/opdef.c | 2 ++
4 files changed, 66 insertions(+)
diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
index 1b461d792a7b..ce7d78ab13b3 100644
--- a/include/uapi/linux/io_uring/bpf_filter.h
+++ b/include/uapi/linux/io_uring/bpf_filter.h
@@ -27,6 +27,22 @@ struct io_uring_bpf_ctx {
__u64 mode;
__u64 resolve;
} open;
+ /*
+ * For CONNECT: fields are populated only when addr_len covers
+ * them; unpopulated fields are zero from the caller-side memset
+ * in io_uring_populate_bpf_ctx(). port and v4_addr are network
+ * byte order. Filters may only issue BPF_LD|BPF_W|BPF_ABS at
+ * 4-byte aligned offsets; load + mask for sub-word fields.
+ */
+ struct {
+ __u32 family; /* sa_family_t zero-extended */
+ __be16 port;
+ __u8 pad[2];
+ union {
+ __be32 v4_addr;
+ __u8 v6_addr[16];
+ };
+ } connect;
};
};
diff --git a/io_uring/net.c b/io_uring/net.c
index 30cd22c0b934..cceb5c1409ca 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -1674,6 +1674,47 @@ void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
bctx->socket.protocol = sock->protocol;
}
+void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req)
+{
+ struct io_connect *conn = io_kiocb_to_cmd(req, struct io_connect);
+ struct io_async_msghdr *iomsg = req->async_data;
+ struct sockaddr_storage *ss = &iomsg->addr;
+
+ /*
+ * move_addr_to_kernel() skips the copy for addr_len == 0, so
+ * iomsg->addr may hold stale data from a prior CONNECT. Bail
+ * unless addr_len covers the family discriminator.
+ */
+ if (conn->addr_len < (int)sizeof(sa_family_t))
+ return;
+
+ bctx->connect.family = ss->ss_family;
+ switch (ss->ss_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin = (struct sockaddr_in *)ss;
+
+ if (conn->addr_len < (int)sizeof(*sin))
+ break;
+ bctx->connect.port = sin->sin_port;
+ bctx->connect.v4_addr = sin->sin_addr.s_addr;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
+
+ if (conn->addr_len < (int)sizeof(*sin6))
+ break;
+ bctx->connect.port = sin6->sin6_port;
+ memcpy(bctx->connect.v6_addr, &sin6->sin6_addr,
+ sizeof(bctx->connect.v6_addr));
+ break;
+ }
+ default:
+ /* family is set; per-family fields stay zero - family-only filtering */
+ break;
+ }
+}
+
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_socket *sock = io_kiocb_to_cmd(req, struct io_socket);
diff --git a/io_uring/net.h b/io_uring/net.h
index d4d1ddce50e3..51fda715d3c0 100644
--- a/io_uring/net.h
+++ b/io_uring/net.h
@@ -46,6 +46,7 @@ int io_accept(struct io_kiocb *req, unsigned int issue_flags);
int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_socket(struct io_kiocb *req, unsigned int issue_flags);
void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req);
+void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx, struct io_kiocb *req);
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
int io_connect(struct io_kiocb *req, unsigned int issue_flags);
@@ -69,4 +70,10 @@ static inline void io_socket_bpf_populate(struct io_uring_bpf_ctx *bctx,
struct io_kiocb *req)
{
}
+
+static inline void io_connect_bpf_populate(struct io_uring_bpf_ctx *bctx,
+ struct io_kiocb *req)
+{
+}
+
#endif
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index c3ef52b70811..8ea6bd274607 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -203,9 +203,11 @@ const struct io_issue_def io_issue_defs[] = {
.unbound_nonreg_file = 1,
.pollout = 1,
#if defined(CONFIG_NET)
+ .filter_pdu_size = sizeof_field(struct io_uring_bpf_ctx, connect),
.async_size = sizeof(struct io_async_msghdr),
.prep = io_connect_prep,
.issue = io_connect,
+ .filter_populate = io_connect_bpf_populate,
#else
.prep = io_eopnotsupp_prep,
#endif
--
2.53.0
^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH] io_uring/net: allow filtering on IORING_OP_CONNECT
2026-05-12 11:02 [PATCH] io_uring/net: allow filtering on IORING_OP_CONNECT Shouvik Kar
@ 2026-05-12 15:34 ` Jens Axboe
0 siblings, 0 replies; 2+ messages in thread
From: Jens Axboe @ 2026-05-12 15:34 UTC (permalink / raw)
To: Shouvik Kar, io-uring; +Cc: Pavel Begunkov, Kees Cook, Christian Brauner
On 5/12/26 5:02 AM, Shouvik Kar wrote:
> This adds custom filtering for IORING_OP_CONNECT, where the target
> family is always exposed, and (for AF_INET / AF_INET6) port and
> address are exposed. port and v4_addr are in network byte order so
> filter authors can compare against on-wire constants.
>
> Skip population unless addr_len covers the populated fields, to
> avoid leaking stale io_async_msghdr data on short connects.
Looks pretty straight forward to me. Do you have a liburing test
case for this too?
--
Jens Axboe
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2026-05-12 15:34 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-12 11:02 [PATCH] io_uring/net: allow filtering on IORING_OP_CONNECT Shouvik Kar
2026-05-12 15:34 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox