* [PATCHSET for-next 0/2] Replace io_ring_ctx bitfields with flags
@ 2026-03-14 14:58 Jens Axboe
2026-03-14 14:58 ` [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags Jens Axboe
2026-03-14 14:58 ` [PATCH 2/2] io_uring: mark known and harmless racy ctx->int_flags uses Jens Axboe
0 siblings, 2 replies; 4+ messages in thread
From: Jens Axboe @ 2026-03-14 14:58 UTC (permalink / raw)
To: io-uring
Hi,
No functional changes here, just gets rid of the bitfields in ctx
and replaces then with a single int_flags member and a set of flags.
This makes it more obvious that these are manipulated and checked
together.
include/linux/io_uring_types.h | 32 +++++++------
io_uring/eventfd.c | 4 +-
io_uring/io_uring.c | 82 +++++++++++++++++-----------------
io_uring/io_uring.h | 10 +++--
io_uring/msg_ring.c | 2 +-
io_uring/register.c | 8 ++--
io_uring/rsrc.c | 8 ++--
io_uring/tctx.c | 2 +-
io_uring/timeout.c | 4 +-
io_uring/tw.c | 2 +-
10 files changed, 81 insertions(+), 73 deletions(-)
--
Jens Axboe
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags
2026-03-14 14:58 [PATCHSET for-next 0/2] Replace io_ring_ctx bitfields with flags Jens Axboe
@ 2026-03-14 14:58 ` Jens Axboe
2026-03-15 3:16 ` Gabriel Krisman Bertazi
2026-03-14 14:58 ` [PATCH 2/2] io_uring: mark known and harmless racy ctx->int_flags uses Jens Axboe
1 sibling, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2026-03-14 14:58 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
Bitfields cannot be set and checked atomically, and this makes it more
clear that these are indeed in shared storage and must be checked and
set in a sane fashion. This is in preparation for annotating a few of
the known racy, but harmless, flags checking.
No intended functional changes in this patch.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
include/linux/io_uring_types.h | 32 +++++++------
io_uring/eventfd.c | 4 +-
io_uring/io_uring.c | 82 +++++++++++++++++-----------------
io_uring/io_uring.h | 9 ++--
io_uring/msg_ring.c | 2 +-
io_uring/register.c | 8 ++--
io_uring/rsrc.c | 8 ++--
io_uring/tctx.c | 2 +-
io_uring/timeout.c | 4 +-
io_uring/tw.c | 2 +-
10 files changed, 80 insertions(+), 73 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dd1420bfcb73..b84576374c7b 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -268,24 +268,28 @@ struct io_alloc_cache {
unsigned int init_clear;
};
+enum {
+ IO_RING_F_DRAIN_NEXT = BIT(0),
+ IO_RING_F_OP_RESTRICTED = BIT(1),
+ IO_RING_F_REG_RESTRICTED = BIT(2),
+ IO_RING_F_OFF_TIMEOUT_USED = BIT(3),
+ IO_RING_F_DRAIN_ACTIVE = BIT(4),
+ IO_RING_F_HAS_EVFD = BIT(5),
+ /* all CQEs should be posted only by the submitter task */
+ IO_RING_F_TASK_COMPLETE = BIT(6),
+ IO_RING_F_LOCKLESS_CQ = BIT(7),
+ IO_RING_F_SYSCALL_IOPOLL = BIT(8),
+ IO_RING_F_POLL_ACTIVATED = BIT(9),
+ IO_RING_F_DRAIN_DISABLED = BIT(10),
+ IO_RING_F_COMPAT = BIT(11),
+ IO_RING_F_IOWQ_LIMITS_SET = BIT(12),
+};
+
struct io_ring_ctx {
/* const or read-mostly hot data */
struct {
unsigned int flags;
- unsigned int drain_next: 1;
- unsigned int op_restricted: 1;
- unsigned int reg_restricted: 1;
- unsigned int off_timeout_used: 1;
- unsigned int drain_active: 1;
- unsigned int has_evfd: 1;
- /* all CQEs should be posted only by the submitter task */
- unsigned int task_complete: 1;
- unsigned int lockless_cq: 1;
- unsigned int syscall_iopoll: 1;
- unsigned int poll_activated: 1;
- unsigned int drain_disabled: 1;
- unsigned int compat: 1;
- unsigned int iowq_limits_set : 1;
+ unsigned int int_flags;
struct task_struct *submitter_task;
struct io_rings *rings;
diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c
index 7482a7dc6b38..3da028500f76 100644
--- a/io_uring/eventfd.c
+++ b/io_uring/eventfd.c
@@ -148,7 +148,7 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
spin_unlock(&ctx->completion_lock);
ev_fd->eventfd_async = eventfd_async;
- ctx->has_evfd = true;
+ ctx->int_flags |= IO_RING_F_HAS_EVFD;
refcount_set(&ev_fd->refs, 1);
atomic_set(&ev_fd->ops, 0);
rcu_assign_pointer(ctx->io_ev_fd, ev_fd);
@@ -162,7 +162,7 @@ int io_eventfd_unregister(struct io_ring_ctx *ctx)
ev_fd = rcu_dereference_protected(ctx->io_ev_fd,
lockdep_is_held(&ctx->uring_lock));
if (ev_fd) {
- ctx->has_evfd = false;
+ ctx->int_flags &= ~IO_RING_F_HAS_EVFD;
rcu_assign_pointer(ctx->io_ev_fd, NULL);
io_eventfd_put(ev_fd);
return 0;
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 9a37035e76c0..bfeb3bc3849d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -477,17 +477,17 @@ static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx)
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
- if (ctx->poll_activated)
+ if (ctx->int_flags & IO_RING_F_POLL_ACTIVATED)
io_poll_wq_wake(ctx);
- if (ctx->off_timeout_used)
+ if (ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)
io_flush_timeouts(ctx);
- if (ctx->has_evfd)
+ if (ctx->int_flags & IO_RING_F_HAS_EVFD)
io_eventfd_signal(ctx, true);
}
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
{
- if (!ctx->lockless_cq)
+ if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
spin_lock(&ctx->completion_lock);
}
@@ -500,11 +500,11 @@ static inline void io_cq_lock(struct io_ring_ctx *ctx)
static inline void __io_cq_unlock_post(struct io_ring_ctx *ctx)
{
io_commit_cqring(ctx);
- if (!ctx->task_complete) {
- if (!ctx->lockless_cq)
+ if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
+ if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ))
spin_unlock(&ctx->completion_lock);
/* IOPOLL rings only need to wake up if it's also SQPOLL */
- if (!ctx->syscall_iopoll)
+ if (!(ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL))
io_cqring_wake(ctx);
}
io_commit_cqring_flush(ctx);
@@ -830,7 +830,7 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
{
lockdep_assert_held(&ctx->uring_lock);
- lockdep_assert(ctx->lockless_cq);
+ lockdep_assert(ctx->int_flags & IO_RING_F_LOCKLESS_CQ);
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
@@ -860,7 +860,7 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
lockdep_assert(!io_wq_current_is_worker());
lockdep_assert_held(&ctx->uring_lock);
- if (!ctx->lockless_cq) {
+ if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
spin_lock(&ctx->completion_lock);
posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
spin_unlock(&ctx->completion_lock);
@@ -885,7 +885,7 @@ bool io_req_post_cqe32(struct io_kiocb *req, struct io_uring_cqe cqe[2])
lockdep_assert_held(&ctx->uring_lock);
cqe[0].user_data = req->cqe.user_data;
- if (!ctx->lockless_cq) {
+ if (!(ctx->int_flags & IO_RING_F_LOCKLESS_CQ)) {
spin_lock(&ctx->completion_lock);
posted = io_fill_cqe_aux32(ctx, cqe);
spin_unlock(&ctx->completion_lock);
@@ -913,7 +913,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
* Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
* the submitter task context, IOPOLL protects with uring_lock.
*/
- if (ctx->lockless_cq || (req->flags & REQ_F_REISSUE)) {
+ if ((ctx->int_flags & IO_RING_F_LOCKLESS_CQ) || (req->flags & REQ_F_REISSUE)) {
defer_complete:
req->io_task_work.func = io_req_task_complete;
io_req_task_work_add(req);
@@ -1135,7 +1135,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
*/
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
unlikely(!io_fill_cqe_req(ctx, req))) {
- if (ctx->lockless_cq)
+ if (ctx->int_flags & IO_RING_F_LOCKLESS_CQ)
io_cqe_overflow(ctx, &req->cqe, &req->big_cqe);
else
io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe);
@@ -1148,7 +1148,7 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
INIT_WQ_LIST(&state->compl_reqs);
}
- if (unlikely(ctx->drain_active))
+ if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
io_queue_deferred(ctx);
ctx->submit_state.cq_flush = false;
@@ -1344,7 +1344,7 @@ static __cold void io_drain_req(struct io_kiocb *req)
list_add_tail(&de->list, &ctx->defer_list);
io_queue_deferred(ctx);
if (!drain && list_empty(&ctx->defer_list))
- ctx->drain_active = false;
+ ctx->int_flags &= ~IO_RING_F_DRAIN_ACTIVE;
}
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
@@ -1655,7 +1655,7 @@ static void io_queue_sqe_fallback(struct io_kiocb *req)
} else {
/* can't fail with IO_URING_F_INLINE */
io_req_sqe_copy(req, IO_URING_F_INLINE);
- if (unlikely(req->ctx->drain_active))
+ if (unlikely(req->ctx->int_flags & IO_RING_F_DRAIN_ACTIVE))
io_drain_req(req);
else
io_queue_iowq(req);
@@ -1671,7 +1671,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
struct io_kiocb *req,
unsigned int sqe_flags)
{
- if (!ctx->op_restricted)
+ if (!(ctx->int_flags & IO_RING_F_OP_RESTRICTED))
return true;
if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
return false;
@@ -1691,7 +1691,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
{
struct io_kiocb *head = ctx->submit_state.link.head;
- ctx->drain_active = true;
+ ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
if (head) {
/*
* If we need to drain a request in the middle of a link, drain
@@ -1701,7 +1701,7 @@ static void io_init_drain(struct io_ring_ctx *ctx)
* link.
*/
head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
- ctx->drain_next = true;
+ ctx->int_flags |= IO_RING_F_DRAIN_NEXT;
}
}
@@ -1767,23 +1767,23 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
req->buf_index = READ_ONCE(sqe->buf_group);
}
if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS)
- ctx->drain_disabled = true;
+ ctx->int_flags |= IO_RING_F_DRAIN_DISABLED;
if (sqe_flags & IOSQE_IO_DRAIN) {
- if (ctx->drain_disabled)
+ if (ctx->int_flags & IO_RING_F_DRAIN_DISABLED)
return io_init_fail_req(req, -EOPNOTSUPP);
io_init_drain(ctx);
}
}
- if (unlikely(ctx->op_restricted || ctx->drain_active || ctx->drain_next)) {
+ if (unlikely(ctx->int_flags & (IO_RING_F_OP_RESTRICTED | IO_RING_F_DRAIN_ACTIVE | IO_RING_F_DRAIN_NEXT))) {
if (!io_check_restriction(ctx, req, sqe_flags))
return io_init_fail_req(req, -EACCES);
/* knock it to the slow queue path, will be drained there */
- if (ctx->drain_active)
+ if (ctx->int_flags & IO_RING_F_DRAIN_ACTIVE)
req->flags |= REQ_F_FORCE_ASYNC;
/* if there is no link, we're at "next" request and need to drain */
- if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) {
- ctx->drain_next = false;
- ctx->drain_active = true;
+ if (unlikely(ctx->int_flags & IO_RING_F_DRAIN_NEXT) && !ctx->submit_state.link.head) {
+ ctx->int_flags &= ~IO_RING_F_DRAIN_NEXT;
+ ctx->int_flags |= IO_RING_F_DRAIN_ACTIVE;
req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC;
}
}
@@ -2204,7 +2204,7 @@ static __cold void io_activate_pollwq_cb(struct callback_head *cb)
poll_wq_task_work);
mutex_lock(&ctx->uring_lock);
- ctx->poll_activated = true;
+ ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
mutex_unlock(&ctx->uring_lock);
/*
@@ -2219,9 +2219,9 @@ __cold void io_activate_pollwq(struct io_ring_ctx *ctx)
{
spin_lock(&ctx->completion_lock);
/* already activated or in progress */
- if (ctx->poll_activated || ctx->poll_wq_task_work.func)
+ if ((ctx->int_flags & IO_RING_F_POLL_ACTIVATED) || ctx->poll_wq_task_work.func)
goto out;
- if (WARN_ON_ONCE(!ctx->task_complete))
+ if (WARN_ON_ONCE(!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)))
goto out;
if (!ctx->submitter_task)
goto out;
@@ -2242,7 +2242,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
struct io_ring_ctx *ctx = file->private_data;
__poll_t mask = 0;
- if (unlikely(!ctx->poll_activated))
+ if (unlikely(!(ctx->int_flags & IO_RING_F_POLL_ACTIVATED)))
io_activate_pollwq(ctx);
/*
* provides mb() which pairs with barrier from wq_has_sleeper
@@ -2607,7 +2607,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
goto out;
}
if (flags & IORING_ENTER_GETEVENTS) {
- if (ctx->syscall_iopoll)
+ if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL)
goto iopoll_locked;
/*
* Ignore errors, we'll soon call io_cqring_wait() and
@@ -2622,7 +2622,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (flags & IORING_ENTER_GETEVENTS) {
int ret2;
- if (ctx->syscall_iopoll) {
+ if (ctx->int_flags & IO_RING_F_SYSCALL_IOPOLL) {
/*
* We disallow the app entering submit/complete with
* polling, but we still need to lock the ring to
@@ -2923,9 +2923,9 @@ static void io_ctx_restriction_clone(struct io_ring_ctx *ctx,
if (dst->bpf_filters)
WRITE_ONCE(ctx->bpf_filters, dst->bpf_filters->filters);
if (dst->op_registered)
- ctx->op_restricted = 1;
+ ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
if (dst->reg_registered)
- ctx->reg_restricted = 1;
+ ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
}
static __cold int io_uring_create(struct io_ctx_config *config)
@@ -2952,17 +2952,18 @@ static __cold int io_uring_create(struct io_ctx_config *config)
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
!(ctx->flags & IORING_SETUP_IOPOLL))
- ctx->task_complete = true;
+ ctx->int_flags |= IO_RING_F_TASK_COMPLETE;
- if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL))
- ctx->lockless_cq = true;
+ if ((ctx->int_flags & IO_RING_F_TASK_COMPLETE) ||
+ (ctx->flags & IORING_SETUP_IOPOLL))
+ ctx->int_flags |= IO_RING_F_LOCKLESS_CQ;
/*
* lazy poll_wq activation relies on ->task_complete for synchronisation
* purposes, see io_activate_pollwq()
*/
- if (!ctx->task_complete)
- ctx->poll_activated = true;
+ if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE))
+ ctx->int_flags |= IO_RING_F_POLL_ACTIVATED;
/*
* When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user
@@ -2972,9 +2973,10 @@ static __cold int io_uring_create(struct io_ctx_config *config)
*/
if (ctx->flags & IORING_SETUP_IOPOLL &&
!(ctx->flags & IORING_SETUP_SQPOLL))
- ctx->syscall_iopoll = 1;
+ ctx->int_flags |= IO_RING_F_SYSCALL_IOPOLL;
- ctx->compat = in_compat_syscall();
+ if (in_compat_syscall())
+ ctx->int_flags |= IO_RING_F_COMPAT;
if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 0fa844faf287..5cb1983043cd 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -211,7 +211,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
if (ctx->flags & IORING_SETUP_IOPOLL) {
lockdep_assert_held(&ctx->uring_lock);
- } else if (!ctx->task_complete) {
+ } else if (!(ctx->int_flags & IO_RING_F_TASK_COMPLETE)) {
lockdep_assert_held(&ctx->completion_lock);
} else if (ctx->submitter_task) {
/*
@@ -228,7 +228,7 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
static inline bool io_is_compat(struct io_ring_ctx *ctx)
{
- return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat);
+ return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->int_flags & IO_RING_F_COMPAT);
}
static inline void io_submit_flush_completions(struct io_ring_ctx *ctx)
@@ -472,8 +472,9 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
- if (unlikely(ctx->off_timeout_used ||
- ctx->has_evfd || ctx->poll_activated))
+ if (unlikely(ctx->int_flags & (IO_RING_F_OFF_TIMEOUT_USED |
+ IO_RING_F_HAS_EVFD |
+ IO_RING_F_POLL_ACTIVATED)))
__io_commit_cqring_flush(ctx);
}
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 57ad0085869a..3ff9098573db 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -67,7 +67,7 @@ void io_msg_ring_cleanup(struct io_kiocb *req)
static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
{
- return target_ctx->task_complete;
+ return target_ctx->int_flags & IO_RING_F_TASK_COMPLETE;
}
static void io_msg_tw_complete(struct io_tw_req tw_req, io_tw_token_t tw)
diff --git a/io_uring/register.c b/io_uring/register.c
index 0148735f7711..489a6feaf228 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -184,9 +184,9 @@ static __cold int io_register_restrictions(struct io_ring_ctx *ctx,
return ret;
}
if (ctx->restrictions.op_registered)
- ctx->op_restricted = 1;
+ ctx->int_flags |= IO_RING_F_OP_RESTRICTED;
if (ctx->restrictions.reg_registered)
- ctx->reg_restricted = 1;
+ ctx->int_flags |= IO_RING_F_REG_RESTRICTED;
return 0;
}
@@ -384,7 +384,7 @@ static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
for (i = 0; i < ARRAY_SIZE(new_count); i++)
if (new_count[i])
ctx->iowq_limits[i] = new_count[i];
- ctx->iowq_limits_set = true;
+ ctx->int_flags |= IO_RING_F_IOWQ_LIMITS_SET;
if (tctx && tctx->io_wq) {
ret = io_wq_max_workers(tctx->io_wq, new_count);
@@ -725,7 +725,7 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
if (ctx->submitter_task && ctx->submitter_task != current)
return -EEXIST;
- if (ctx->reg_restricted && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
+ if ((ctx->int_flags & IO_RING_F_REG_RESTRICTED) && !(ctx->flags & IORING_SETUP_R_DISABLED)) {
opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
if (!test_bit(opcode, ctx->restrictions.register_op))
return -EACCES;
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 4fa59bf89bba..52554ed89b11 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -295,7 +295,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
u64 tag = 0;
uvec = u64_to_user_ptr(user_data);
- iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
+ iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx));
if (IS_ERR(iov)) {
err = PTR_ERR(iov);
break;
@@ -319,7 +319,7 @@ static int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
i = array_index_nospec(up->offset + done, ctx->buf_table.nr);
io_reset_rsrc_node(ctx, &ctx->buf_table, i);
ctx->buf_table.nodes[i] = node;
- if (ctx->compat)
+ if (io_is_compat(ctx))
user_data += sizeof(struct compat_iovec);
else
user_data += sizeof(struct iovec);
@@ -883,12 +883,12 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
if (arg) {
uvec = (struct iovec __user *) arg;
- iov = iovec_from_user(uvec, 1, 1, &fast_iov, ctx->compat);
+ iov = iovec_from_user(uvec, 1, 1, &fast_iov, io_is_compat(ctx));
if (IS_ERR(iov)) {
ret = PTR_ERR(iov);
break;
}
- if (ctx->compat)
+ if (io_is_compat(ctx))
arg += sizeof(struct compat_iovec);
else
arg += sizeof(struct iovec);
diff --git a/io_uring/tctx.c b/io_uring/tctx.c
index 7cbcb82aedfb..143de8e990eb 100644
--- a/io_uring/tctx.c
+++ b/io_uring/tctx.c
@@ -121,7 +121,7 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
return ret;
tctx = current->io_uring;
- if (ctx->iowq_limits_set) {
+ if (ctx->int_flags & IO_RING_F_IOWQ_LIMITS_SET) {
unsigned int limits[2] = { ctx->iowq_limits[0],
ctx->iowq_limits[1], };
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index 8eddf8add7a2..579fdddac71a 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -566,8 +566,8 @@ static int __io_timeout_prep(struct io_kiocb *req,
INIT_LIST_HEAD(&timeout->list);
timeout->off = off;
- if (unlikely(off && !req->ctx->off_timeout_used))
- req->ctx->off_timeout_used = true;
+ if (unlikely(off && !(req->ctx->int_flags & IO_RING_F_OFF_TIMEOUT_USED)))
+ req->ctx->int_flags |= IO_RING_F_OFF_TIMEOUT_USED;
/*
* for multishot reqs w/ fixed nr of repeats, repeats tracks the
* remaining nr
diff --git a/io_uring/tw.c b/io_uring/tw.c
index 2f2b4ac4b126..022fe8753c19 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -222,7 +222,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
if (!head) {
io_ctx_mark_taskrun(ctx);
- if (ctx->has_evfd)
+ if (ctx->int_flags & IO_RING_F_HAS_EVFD)
io_eventfd_signal(ctx, false);
}
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] io_uring: mark known and harmless racy ctx->int_flags uses
2026-03-14 14:58 [PATCHSET for-next 0/2] Replace io_ring_ctx bitfields with flags Jens Axboe
2026-03-14 14:58 ` [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags Jens Axboe
@ 2026-03-14 14:58 ` Jens Axboe
1 sibling, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2026-03-14 14:58 UTC (permalink / raw)
To: io-uring; +Cc: Jens Axboe
There are a few of these, where flags are read outside of the
uring_lock, yet it's harmless to race on them.
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
io_uring/io_uring.c | 2 +-
io_uring/io_uring.h | 7 ++++---
io_uring/tw.c | 2 +-
3 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index bfeb3bc3849d..fb5a263706be 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -2242,7 +2242,7 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait)
struct io_ring_ctx *ctx = file->private_data;
__poll_t mask = 0;
- if (unlikely(!(ctx->int_flags & IO_RING_F_POLL_ACTIVATED)))
+ if (unlikely(!(data_race(ctx->int_flags) & IO_RING_F_POLL_ACTIVATED)))
io_activate_pollwq(ctx);
/*
* provides mb() which pairs with barrier from wq_has_sleeper
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 5cb1983043cd..91cf67b5d85b 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -470,11 +470,12 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
wq_list_add_tail(&req->comp_list, &state->compl_reqs);
}
+#define SHOULD_FLUSH_MASK (IO_RING_F_OFF_TIMEOUT_USED | \
+ IO_RING_F_HAS_EVFD | IO_RING_F_POLL_ACTIVATED)
+
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
{
- if (unlikely(ctx->int_flags & (IO_RING_F_OFF_TIMEOUT_USED |
- IO_RING_F_HAS_EVFD |
- IO_RING_F_POLL_ACTIVATED)))
+ if (unlikely(data_race(ctx->int_flags) & SHOULD_FLUSH_MASK))
__io_commit_cqring_flush(ctx);
}
diff --git a/io_uring/tw.c b/io_uring/tw.c
index 022fe8753c19..fdff81eebc95 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -222,7 +222,7 @@ void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
if (!head) {
io_ctx_mark_taskrun(ctx);
- if (ctx->int_flags & IO_RING_F_HAS_EVFD)
+ if (data_race(ctx->int_flags) & IO_RING_F_HAS_EVFD)
io_eventfd_signal(ctx, false);
}
--
2.53.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags
2026-03-14 14:58 ` [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags Jens Axboe
@ 2026-03-15 3:16 ` Gabriel Krisman Bertazi
0 siblings, 0 replies; 4+ messages in thread
From: Gabriel Krisman Bertazi @ 2026-03-15 3:16 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring
Jens Axboe <axboe@kernel.dk> writes:
> Bitfields cannot be set and checked atomically, and this makes it more
> clear that these are indeed in shared storage and must be checked and
> set in a sane fashion. This is in preparation for annotating a few of
> the known racy, but harmless, flags checking.
>
> No intended functional changes in this patch.
>
> Signed-off-by: Jens Axboe <axboe@kernel.dk>
> ---
> include/linux/io_uring_types.h | 32 +++++++------
> io_uring/eventfd.c | 4 +-
> io_uring/io_uring.c | 82 +++++++++++++++++-----------------
> io_uring/io_uring.h | 9 ++--
> io_uring/msg_ring.c | 2 +-
> io_uring/register.c | 8 ++--
> io_uring/rsrc.c | 8 ++--
> io_uring/tctx.c | 2 +-
> io_uring/timeout.c | 4 +-
> io_uring/tw.c | 2 +-
> 10 files changed, 80 insertions(+), 73 deletions(-)
>
> diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
> index dd1420bfcb73..b84576374c7b 100644
> --- a/include/linux/io_uring_types.h
> +++ b/include/linux/io_uring_types.h
> @@ -268,24 +268,28 @@ struct io_alloc_cache {
> unsigned int init_clear;
> };
>
> +enum {
> + IO_RING_F_DRAIN_NEXT = BIT(0),
> + IO_RING_F_OP_RESTRICTED = BIT(1),
> + IO_RING_F_REG_RESTRICTED = BIT(2),
> + IO_RING_F_OFF_TIMEOUT_USED = BIT(3),
> + IO_RING_F_DRAIN_ACTIVE = BIT(4),
> + IO_RING_F_HAS_EVFD = BIT(5),
> + /* all CQEs should be posted only by the submitter task */
> + IO_RING_F_TASK_COMPLETE = BIT(6),
> + IO_RING_F_LOCKLESS_CQ = BIT(7),
> + IO_RING_F_SYSCALL_IOPOLL = BIT(8),
> + IO_RING_F_POLL_ACTIVATED = BIT(9),
> + IO_RING_F_DRAIN_DISABLED = BIT(10),
> + IO_RING_F_COMPAT = BIT(11),
> + IO_RING_F_IOWQ_LIMITS_SET = BIT(12),
> +};
> +
> struct io_ring_ctx {
> /* const or read-mostly hot data */
> struct {
> unsigned int flags;
> - unsigned int drain_next: 1;
> - unsigned int op_restricted: 1;
> - unsigned int reg_restricted: 1;
> - unsigned int off_timeout_used: 1;
> - unsigned int drain_active: 1;
> - unsigned int has_evfd: 1;
> - /* all CQEs should be posted only by the submitter task */
> - unsigned int task_complete: 1;
> - unsigned int lockless_cq: 1;
> - unsigned int syscall_iopoll: 1;
> - unsigned int poll_activated: 1;
> - unsigned int drain_disabled: 1;
> - unsigned int compat: 1;
> - unsigned int iowq_limits_set : 1;
> + unsigned int int_flags;
Jens,
What does the int prefix means in this context?
--
Gabriel Krisman Bertazi
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-03-15 3:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-14 14:58 [PATCHSET for-next 0/2] Replace io_ring_ctx bitfields with flags Jens Axboe
2026-03-14 14:58 ` [PATCH 1/2] io_uring: switch struct io_ring_ctx internal bitfields to flags Jens Axboe
2026-03-15 3:16 ` Gabriel Krisman Bertazi
2026-03-14 14:58 ` [PATCH 2/2] io_uring: mark known and harmless racy ctx->int_flags uses Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox