[PATCHSET 0/2] cBPF filter API adjustment

public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCHSET 0/2] cBPF filter API adjustment
@ 2026-02-11 15:01 Jens Axboe
  2026-02-11 15:01 ` [PATCH 1/2] io_uring/bpf_filter: move filter size and populate helper into struct Jens Axboe
  2026-02-11 15:01 ` [PATCH 2/2] io_uring/bpf_filter: pass in expected filter payload size Jens Axboe
  0 siblings, 2 replies; 3+ messages in thread
From: Jens Axboe @ 2026-02-11 15:01 UTC (permalink / raw)
  To: io-uring; +Cc: brauner, linux-kernel

Hi,

Christian brought up a good point on the API - what if the task and
kernel differ on what the payload size is for an opcode? Currently
there are two defined payloads, inside struct io_uring_bpf_ctx:

	struct {
		__u32	family;
		__u32	type;
		__u32	protocol;
	} socket;

	struct {
		__u64	flags;
		__u64	mode;
		__u64	resolve;
	} open;

and it could be a requirement that a filter exactly matches the payload
that the kernel uses, if extensions have been made on the kernel side.
Hence this small series updates the API slightly:

struct io_uring_bpf_filter adds a pdu_size field, which userspace can
set to the size if expects. For an OPENAT/OPENAT2 filter, that would
be sizeof(struct open) above. The kernel can validate that they match,
where the mismatch policy is controlled by userspace. See patch 2 for
details. In case of a mismatch that causes an error, the kernel side
pdu_size is copied back to userspace.

Patch 1 exposes the pdu_size by shoving the filtering and pdu_size
into the issue side definitions, and patch 2 implements the above
size checking.

The liburing master branch has been updated as well for this, as
copying back the pdu_size necessitates changing the API on that side.
Test cases and man pages are updated as well.

 include/uapi/linux/io_uring/bpf_filter.h |  8 ++-
 io_uring/bpf_filter.c                    | 82 ++++++++++++++++--------
 io_uring/opdef.c                         |  6 ++
 io_uring/opdef.h                         |  6 ++
 4 files changed, 74 insertions(+), 28 deletions(-)

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH 1/2] io_uring/bpf_filter: move filter size and populate helper into struct
  2026-02-11 15:01 [PATCHSET 0/2] cBPF filter API adjustment Jens Axboe
@ 2026-02-11 15:01 ` Jens Axboe
  2026-02-11 15:01 ` [PATCH 2/2] io_uring/bpf_filter: pass in expected filter payload size Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2026-02-11 15:01 UTC (permalink / raw)
  To: io-uring; +Cc: brauner, linux-kernel, Jens Axboe

Rather than open-code this logic in io_uring_populate_bpf_ctx() with
a switch, move it to the issue side definitions. Outside of making this
easier to extend in the future, it's also a prep patch for using the
pdu size for a given opcode filter elsewhere.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/bpf_filter.c | 17 ++++++-----------
 io_uring/opdef.c      |  6 ++++++
 io_uring/opdef.h      |  6 ++++++
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c
index 3816883a45ed..8ac7d06de122 100644
--- a/io_uring/bpf_filter.c
+++ b/io_uring/bpf_filter.c
@@ -26,6 +26,8 @@ static const struct io_bpf_filter dummy_filter;
 static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
 				      struct io_kiocb *req)
 {
+	const struct io_issue_def *def = &io_issue_defs[req->opcode];
+
 	bctx->opcode = req->opcode;
 	bctx->sqe_flags = (__force int) req->flags & SQE_VALID_FLAGS;
 	bctx->user_data = req->cqe.user_data;
@@ -34,19 +36,12 @@ static void io_uring_populate_bpf_ctx(struct io_uring_bpf_ctx *bctx,
 		sizeof(*bctx) - offsetof(struct io_uring_bpf_ctx, pdu_size));
 
 	/*
-	 * Opcodes can provide a handler fo populating more data into bctx,
+	 * Opcodes can provide a handler for populating more data into bctx,
 	 * for filters to use.
 	 */
-	switch (req->opcode) {
-	case IORING_OP_SOCKET:
-		bctx->pdu_size = sizeof(bctx->socket);
-		io_socket_bpf_populate(bctx, req);
-		break;
-	case IORING_OP_OPENAT:
-	case IORING_OP_OPENAT2:
-		bctx->pdu_size = sizeof(bctx->open);
-		io_openat_bpf_populate(bctx, req);
-		break;
+	if (def->filter_pdu_size) {
+		bctx->pdu_size = def->filter_pdu_size;
+		def->filter_populate(bctx, req);
 	}
 }
 
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index df52d760240e..91a23baf415e 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -221,8 +221,10 @@ const struct io_issue_def io_issue_defs[] = {
 		.issue			= io_fallocate,
 	},
 	[IORING_OP_OPENAT] = {
+		.filter_pdu_size	= sizeof_field(struct io_uring_bpf_ctx, open),
 		.prep			= io_openat_prep,
 		.issue			= io_openat,
+		.filter_populate	= io_openat_bpf_populate,
 	},
 	[IORING_OP_CLOSE] = {
 		.prep			= io_close_prep,
@@ -309,8 +311,10 @@ const struct io_issue_def io_issue_defs[] = {
 #endif
 	},
 	[IORING_OP_OPENAT2] = {
+		.filter_pdu_size	= sizeof_field(struct io_uring_bpf_ctx, open),
 		.prep			= io_openat2_prep,
 		.issue			= io_openat2,
+		.filter_populate	= io_openat_bpf_populate,
 	},
 	[IORING_OP_EPOLL_CTL] = {
 		.unbound_nonreg_file	= 1,
@@ -406,8 +410,10 @@ const struct io_issue_def io_issue_defs[] = {
 	[IORING_OP_SOCKET] = {
 		.audit_skip		= 1,
 #if defined(CONFIG_NET)
+		.filter_pdu_size	= sizeof_field(struct io_uring_bpf_ctx, socket),
 		.prep			= io_socket_prep,
 		.issue			= io_socket,
+		.filter_populate	= io_socket_bpf_populate,
 #else
 		.prep			= io_eopnotsupp_prep,
 #endif
diff --git a/io_uring/opdef.h b/io_uring/opdef.h
index aa37846880ff..faf3955dce8b 100644
--- a/io_uring/opdef.h
+++ b/io_uring/opdef.h
@@ -2,6 +2,8 @@
 #ifndef IOU_OP_DEF_H
 #define IOU_OP_DEF_H
 
+struct io_uring_bpf_ctx;
+
 struct io_issue_def {
 	/* needs req->file assigned */
 	unsigned		needs_file : 1;
@@ -33,8 +35,12 @@ struct io_issue_def {
 	/* size of async data needed, if any */
 	unsigned short		async_size;
 
+	/* bpf filter pdu size, if any */
+	unsigned short		filter_pdu_size;
+
 	int (*issue)(struct io_kiocb *, unsigned int);
 	int (*prep)(struct io_kiocb *, const struct io_uring_sqe *);
+	void (*filter_populate)(struct io_uring_bpf_ctx *, struct io_kiocb *);
 };
 
 struct io_cold_def {
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/2] io_uring/bpf_filter: pass in expected filter payload size
  2026-02-11 15:01 [PATCHSET 0/2] cBPF filter API adjustment Jens Axboe
  2026-02-11 15:01 ` [PATCH 1/2] io_uring/bpf_filter: move filter size and populate helper into struct Jens Axboe
@ 2026-02-11 15:01 ` Jens Axboe
  1 sibling, 0 replies; 3+ messages in thread
From: Jens Axboe @ 2026-02-11 15:01 UTC (permalink / raw)
  To: io-uring; +Cc: brauner, linux-kernel, Jens Axboe

It's quite possible that opcodes that have payloads attached to them,
like IORING_OP_OPENAT/OPENAT2 or IORING_OP_SOCKET, that these paylods
can change over time. For example, on the openat/openat2 side, the
struct open_how argument is extensible, and could be extended in the
future to allow further arguments to be passed in.

Allow registration of a cBPF filter to give the size of the filter as
seen by userspace. If that filter is for an opcode that takes extra
payload data, allow it if the application payload expectation is the
same size than the kernels. If that is the case, the kernel supports
filtering on the payload that the application expects. If the size
differs, the behavior depends on the IO_URING_BPF_FILTER_SZ_STRICT flag:

1) If IO_URING_BPF_FILTER_SZ_STRICT is set and the size expectation
   differs, fail the attempt to load the filter.

2) If IO_URING_BPF_FILTER_SZ_STRICT isn't set, allow the filter if
   the userspace pdu size is smaller than what the kernel offers.

3) Regardless if IO_URING_BPF_FILTER_SZ_STRICT, fail loading the filter
   if the userspace pdu size is bigger than what the kernel supports.

An attempt to load a filter due to sizing will error with -EMSGSIZE.
For that error, the registration struct will have filter->pdu_size
populated with the pdu size that the kernel uses.

Reported-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring/bpf_filter.h |  8 ++-
 io_uring/bpf_filter.c                    | 65 ++++++++++++++++++------
 2 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/include/uapi/linux/io_uring/bpf_filter.h b/include/uapi/linux/io_uring/bpf_filter.h
index 220351b81bc0..1b461d792a7b 100644
--- a/include/uapi/linux/io_uring/bpf_filter.h
+++ b/include/uapi/linux/io_uring/bpf_filter.h
@@ -35,13 +35,19 @@ enum {
 	 * If set, any currently unset opcode will have a deny filter attached
 	 */
 	IO_URING_BPF_FILTER_DENY_REST	= 1,
+	/*
+	 * If set, if kernel and application don't agree on pdu_size for
+	 * the given opcode, fail the registration of the filter.
+	 */
+	IO_URING_BPF_FILTER_SZ_STRICT	= 2,
 };
 
 struct io_uring_bpf_filter {
 	__u32	opcode;		/* io_uring opcode to filter */
 	__u32	flags;
 	__u32	filter_len;	/* number of BPF instructions */
-	__u32	resv;
+	__u8	pdu_size;	/* expected pdu size for opcode */
+	__u8	resv[3];
 	__u64	filter_ptr;	/* pointer to BPF filter */
 	__u64	resv2[5];
 };
diff --git a/io_uring/bpf_filter.c b/io_uring/bpf_filter.c
index 8ac7d06de122..4e1dd955c8c4 100644
--- a/io_uring/bpf_filter.c
+++ b/io_uring/bpf_filter.c
@@ -308,36 +308,69 @@ static struct io_bpf_filters *io_bpf_filter_cow(struct io_restriction *src)
 	return ERR_PTR(-EBUSY);
 }
 
-#define IO_URING_BPF_FILTER_FLAGS	IO_URING_BPF_FILTER_DENY_REST
+#define IO_URING_BPF_FILTER_FLAGS	(IO_URING_BPF_FILTER_DENY_REST | \
+					 IO_URING_BPF_FILTER_SZ_STRICT)
 
-int io_register_bpf_filter(struct io_restriction *res,
-			   struct io_uring_bpf __user *arg)
+static int io_bpf_filter_import(struct io_uring_bpf *reg,
+				struct io_uring_bpf __user *arg)
 {
-	struct io_bpf_filters *filters, *old_filters = NULL;
-	struct io_bpf_filter *filter, *old_filter;
-	struct io_uring_bpf reg;
-	struct bpf_prog *prog;
-	struct sock_fprog fprog;
+	const struct io_issue_def *def;
 	int ret;
 
-	if (copy_from_user(&reg, arg, sizeof(reg)))
+	if (copy_from_user(reg, arg, sizeof(*reg)))
 		return -EFAULT;
-	if (reg.cmd_type != IO_URING_BPF_CMD_FILTER)
+	if (reg->cmd_type != IO_URING_BPF_CMD_FILTER)
 		return -EINVAL;
-	if (reg.cmd_flags || reg.resv)
+	if (reg->cmd_flags || reg->resv)
 		return -EINVAL;
 
-	if (reg.filter.opcode >= IORING_OP_LAST)
+	if (reg->filter.opcode >= IORING_OP_LAST)
 		return -EINVAL;
-	if (reg.filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
+	if (reg->filter.flags & ~IO_URING_BPF_FILTER_FLAGS)
 		return -EINVAL;
-	if (reg.filter.resv)
+	if (!mem_is_zero(reg->filter.resv, sizeof(reg->filter.resv)))
 		return -EINVAL;
-	if (!mem_is_zero(reg.filter.resv2, sizeof(reg.filter.resv2)))
+	if (!mem_is_zero(reg->filter.resv2, sizeof(reg->filter.resv2)))
 		return -EINVAL;
-	if (!reg.filter.filter_len || reg.filter.filter_len > BPF_MAXINSNS)
+	if (!reg->filter.filter_len || reg->filter.filter_len > BPF_MAXINSNS)
 		return -EINVAL;
 
+	/* Verify filter size */
+	def = &io_issue_defs[reg->filter.opcode];
+
+	/* same size, always ok */
+	ret = 0;
+	if (reg->filter.pdu_size == def->filter_pdu_size)
+		;
+	/* size differs, fail in strict mode */
+	else if (reg->filter.flags & IO_URING_BPF_FILTER_SZ_STRICT)
+		ret = -EMSGSIZE;
+	/* userspace filter is bigger, always disallow */
+	else if (reg->filter.pdu_size > def->filter_pdu_size)
+		ret = -EMSGSIZE;
+
+	/* copy back kernel filter size */
+	reg->filter.pdu_size = def->filter_pdu_size;
+	if (copy_to_user(&arg->filter, &reg->filter, sizeof(reg->filter)))
+		return -EFAULT;
+
+	return ret;
+}
+
+int io_register_bpf_filter(struct io_restriction *res,
+			   struct io_uring_bpf __user *arg)
+{
+	struct io_bpf_filters *filters, *old_filters = NULL;
+	struct io_bpf_filter *filter, *old_filter;
+	struct io_uring_bpf reg;
+	struct bpf_prog *prog;
+	struct sock_fprog fprog;
+	int ret;
+
+	ret = io_bpf_filter_import(&reg, arg);
+	if (ret)
+		return ret;
+
 	fprog.len = reg.filter.filter_len;
 	fprog.filter = u64_to_user_ptr(reg.filter.filter_ptr);
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-02-11 15:06 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-11 15:01 [PATCHSET 0/2] cBPF filter API adjustment Jens Axboe
2026-02-11 15:01 ` [PATCH 1/2] io_uring/bpf_filter: move filter size and populate helper into struct Jens Axboe
2026-02-11 15:01 ` [PATCH 2/2] io_uring/bpf_filter: pass in expected filter payload size Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox