From: Jens Axboe <axboe@kernel.dk>
To: io-uring@vger.kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 3/3] io_uring: allow registration of per-task restrictions
Date: Thu, 15 Jan 2026 09:36:34 -0700 [thread overview]
Message-ID: <20260115165244.1037465-4-axboe@kernel.dk> (raw)
In-Reply-To: <20260115165244.1037465-1-axboe@kernel.dk>
Currently io_uring supports restricting operations on a per-ring basis.
To use those, the ring must be setup in a disabled state by setting
IORING_SETUP_R_DISABLED. Then restrictions can be set for the ring, and
the ring can then be enabled.
This commit adds support for IORING_REGISTER_RESTRICTIONS_TASK, which
allows to register the same kind of restrictions, but with the task
itself rather than with a specific ring. Once done, any ring created
will inherit these restrictions.
If a restriction filter is registered with a task, then it's
inherited on fork for its children. Children may only further restrict
operations, not extend them.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/linux/io_uring.h | 2 +-
include/linux/sched.h | 1 +
include/uapi/linux/io_uring.h | 9 +++++
io_uring/io_uring.c | 14 ++++++++
io_uring/register.c | 65 +++++++++++++++++++++++++++++++++++
io_uring/tctx.c | 26 +++++++++-----
kernel/fork.c | 4 +++
7 files changed, 111 insertions(+), 10 deletions(-)
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 85fe4e6b275c..cfd2f4c667ee 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -25,7 +25,7 @@ static inline void io_uring_task_cancel(void)
}
static inline void io_uring_free(struct task_struct *tsk)
{
- if (tsk->io_uring)
+ if (tsk->io_uring || tsk->io_uring_restrict)
__io_uring_free(tsk);
}
#else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d395f2810fac..9abbd11bb87c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1190,6 +1190,7 @@ struct task_struct {
#ifdef CONFIG_IO_URING
struct io_uring_task *io_uring;
+ struct io_restriction *io_uring_restrict;
#endif
/* Namespaces: */
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 0e1b0871fe5e..dcf70e064e45 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -703,6 +703,8 @@ enum io_uring_register_op {
/* register bpf filtering programs */
IORING_REGISTER_BPF_FILTER = 37,
+ IORING_REGISTER_RESTRICTIONS_TASK = 38,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -808,6 +810,13 @@ struct io_uring_restriction {
__u32 resv2[3];
};
+struct io_uring_task_restriction {
+ __u16 flags;
+ __u16 nr_res;
+ __u32 resv[3];
+ __DECLARE_FLEX_ARRAY(struct io_uring_restriction, restrictions);
+};
+
struct io_uring_clock_register {
__u32 clockid;
__u32 __resv[3];
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 80aeb498ec8a..f1625e4c6c7b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3623,6 +3623,20 @@ static __cold int io_uring_create(struct io_ctx_config *config)
else
ctx->notify_method = TWA_SIGNAL;
+ /*
+ * If the current task has restrictions enabled, then copy them to
+ * our newly created ring and mark it as registered.
+ */
+ if (current->io_uring_restrict) {
+ struct io_restriction *res = current->io_uring_restrict;
+
+ refcount_inc(&res->refs);
+ ctx->restrictions = res;
+ ctx->op_restricted = res->op_registered;
+ ctx->reg_restricted = res->reg_registered;
+ ctx->bpf_restricted = res->bpf_registered;
+ }
+
/*
* This is just grabbed for accounting purposes. When a process exits,
* the mm is exited and dropped before the files, hence we need to hang
diff --git a/io_uring/register.c b/io_uring/register.c
index cb006d53a146..00b9508b18f9 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -223,6 +223,67 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
return 0;
}
+static int io_register_restrictions_task(void __user *arg, unsigned int nr_args)
+{
+ struct io_uring_task_restriction __user *ures = arg;
+ struct io_uring_task_restriction tres;
+ struct io_restriction *res;
+ int ret;
+
+ /* Disallow if task already has registered restrictions */
+ if (current->io_uring_restrict)
+ return -EPERM;
+ if (nr_args != 1)
+ return -EINVAL;
+
+ if (copy_from_user(&tres, arg, sizeof(tres)))
+ return -EFAULT;
+
+ if (tres.flags)
+ return -EINVAL;
+ if (!mem_is_zero(tres.resv, sizeof(tres.resv)))
+ return -EINVAL;
+
+ res = io_alloc_restrictions();
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+
+ ret = io_parse_restrictions(ures->restrictions, tres.nr_res, res);
+ if (ret < 0) {
+ kfree(res);
+ return ret;
+ }
+ current->io_uring_restrict = res;
+ return 0;
+}
+
+static int io_register_bpf_filter_task(void __user *arg, unsigned int nr_args)
+{
+ struct io_restriction *res;
+ int ret;
+
+ if (nr_args != 1)
+ return -EINVAL;
+
+ /* If no task restrictions exist, setup a new set */
+ res = current->io_uring_restrict;
+ if (!res) {
+ res = io_alloc_restrictions();
+ if (IS_ERR(res))
+ return PTR_ERR(res);
+ }
+
+ ret = io_register_bpf_filter(res, arg);
+ if (ret) {
+ if (res != current->io_uring_restrict)
+ io_put_restrictions(res);
+ return ret;
+ }
+ if (!current->io_uring_restrict)
+ current->io_uring_restrict = res;
+ return 0;
+}
+
static int io_register_enable_rings(struct io_ring_ctx *ctx)
{
if (!(ctx->flags & IORING_SETUP_R_DISABLED))
@@ -955,6 +1016,10 @@ static int io_uring_register_blind(unsigned int opcode, void __user *arg,
return io_uring_register_send_msg_ring(arg, nr_args);
case IORING_REGISTER_QUERY:
return io_query(arg, nr_args);
+ case IORING_REGISTER_RESTRICTIONS_TASK:
+ return io_register_restrictions_task(arg, nr_args);
+ case IORING_REGISTER_BPF_FILTER:
+ return io_register_bpf_filter_task(arg, nr_args);
}
return -EINVAL;
}
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 5b66755579c0..d45785dcd2e3 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -11,6 +11,8 @@
#include "io_uring.h"
#include "tctx.h"
+#include "register.h"
+#include "bpf_filter.h"
static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
struct task_struct *task)
@@ -54,16 +56,22 @@ void __io_uring_free(struct task_struct *tsk)
* node is stored in the xarray. Until that gets sorted out, attempt
* an iteration here and warn if any entries are found.
*/
- xa_for_each(&tctx->xa, index, node) {
- WARN_ON_ONCE(1);
- break;
- }
- WARN_ON_ONCE(tctx->io_wq);
- WARN_ON_ONCE(tctx->cached_refs);
+ if (tctx) {
+ xa_for_each(&tctx->xa, index, node) {
+ WARN_ON_ONCE(1);
+ break;
+ }
+ WARN_ON_ONCE(tctx->io_wq);
+ WARN_ON_ONCE(tctx->cached_refs);
- percpu_counter_destroy(&tctx->inflight);
- kfree(tctx);
- tsk->io_uring = NULL;
+ percpu_counter_destroy(&tctx->inflight);
+ kfree(tctx);
+ tsk->io_uring = NULL;
+ }
+ if (tsk->io_uring_restrict) {
+ io_put_restrictions(tsk->io_uring_restrict);
+ tsk->io_uring_restrict = NULL;
+ }
}
__cold int io_uring_alloc_task_context(struct task_struct *task,
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..da8fd6fd384c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -97,6 +97,7 @@
#include <linux/kasan.h>
#include <linux/scs.h>
#include <linux/io_uring.h>
+#include <linux/io_uring_types.h>
#include <linux/bpf.h>
#include <linux/stackprotector.h>
#include <linux/user_events.h>
@@ -2129,6 +2130,8 @@ __latent_entropy struct task_struct *copy_process(
#ifdef CONFIG_IO_URING
p->io_uring = NULL;
+ if (p->io_uring_restrict)
+ refcount_inc(&p->io_uring_restrict->refs);
#endif
p->default_timer_slack_ns = current->timer_slack_ns;
@@ -2525,6 +2528,7 @@ __latent_entropy struct task_struct *copy_process(
mpol_put(p->mempolicy);
#endif
bad_fork_cleanup_delayacct:
+ io_uring_free(p);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
--
2.51.0
prev parent reply other threads:[~2026-01-15 16:52 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-15 16:36 [PATCHSET RFC v3] Inherited restrictions and BPF filtering Jens Axboe
2026-01-15 16:36 ` [PATCH 1/3] io_uring: move ctx->restrictions to be dynamically allocated Jens Axboe
2026-01-15 16:36 ` [PATCH 2/3] io_uring: add support for BPF filtering for opcode restrictions Jens Axboe
2026-01-15 20:11 ` Jonathan Corbet
2026-01-15 21:02 ` Jens Axboe
2026-01-15 21:05 ` Jonathan Corbet
2026-01-15 21:08 ` Jens Axboe
2026-01-15 16:36 ` Jens Axboe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260115165244.1037465-4-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox