* [PATCH 1/3] io_uring/register: use stable SQ/CQ ring data during resize
2025-01-15 16:06 [PATCHSET 0/3] Ring resizing fixes Jens Axboe
@ 2025-01-15 16:06 ` Jens Axboe
2025-01-15 16:06 ` [PATCH 2/3] io_uring/register: document io_register_resize_rings() shared mem usage Jens Axboe
2025-01-15 16:06 ` [PATCH 3/3] io_uring/register: cache old SQ/CQ head reading for copies Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2025-01-15 16:06 UTC (permalink / raw)
To: io-uring; +Cc: jannh, Jens Axboe
Normally the kernel would not expect an application to modify any of
the data shared with the kernel during a resize operation, but of
course the kernel cannot always assume good intent on behalf of the
application.
As part of resizing the rings, existing SQEs and CQEs are copied over
to the new storage. Resizing uses the masks in the newly allocated
shared storage to index the arrays, however it's possible that malicious
userspace could modify these after they have been sanity checked.
Use the validated and locally stored CQ and SQ ring sizing for masking
to ensure the values are both stable and valid.
Fixes: 79cfe9e59c2a ("io_uring/register: add IORING_REGISTER_RESIZE_RINGS")
Reported-by: Jann Horn <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/register.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/io_uring/register.c b/io_uring/register.c
index fdd44914c39c..5880eb75ae44 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -514,7 +514,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
goto overflow;
for (i = o.rings->sq.head; i < tail; i++) {
unsigned src_head = i & (ctx->sq_entries - 1);
- unsigned dst_head = i & n.rings->sq_ring_mask;
+ unsigned dst_head = i & (p.sq_entries - 1);
n.sq_sqes[dst_head] = o.sq_sqes[src_head];
}
@@ -533,7 +533,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
}
for (i = o.rings->cq.head; i < tail; i++) {
unsigned src_head = i & (ctx->cq_entries - 1);
- unsigned dst_head = i & n.rings->cq_ring_mask;
+ unsigned dst_head = i & (p.cq_entries - 1);
n.rings->cqes[dst_head] = o.rings->cqes[src_head];
}
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] io_uring/register: document io_register_resize_rings() shared mem usage
2025-01-15 16:06 [PATCHSET 0/3] Ring resizing fixes Jens Axboe
2025-01-15 16:06 ` [PATCH 1/3] io_uring/register: use stable SQ/CQ ring data during resize Jens Axboe
@ 2025-01-15 16:06 ` Jens Axboe
2025-01-15 16:06 ` [PATCH 3/3] io_uring/register: cache old SQ/CQ head reading for copies Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2025-01-15 16:06 UTC (permalink / raw)
To: io-uring; +Cc: jannh, Jens Axboe
It can be a bit hard to tell which parts of io_register_resize_rings()
are operating on shared memory, and which ones are not. And anything
reading or writing to those regions should really use the read/write
once primitives.
Hence add those, ensuring sanity in how this memory is accessed, and
helping document the shared nature of it.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/register.c | 44 ++++++++++++++++++++++++++------------------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/io_uring/register.c b/io_uring/register.c
index 5880eb75ae44..ffcbc840032e 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -449,10 +449,18 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
if (IS_ERR(n.rings))
return PTR_ERR(n.rings);
- n.rings->sq_ring_mask = p.sq_entries - 1;
- n.rings->cq_ring_mask = p.cq_entries - 1;
- n.rings->sq_ring_entries = p.sq_entries;
- n.rings->cq_ring_entries = p.cq_entries;
+ /*
+ * At this point n.rings is shared with userspace, just like o.rings
+ * is as well. While we don't expect userspace to modify it while
+ * a resize is in progress, and it's most likely that userspace will
+ * shoot itself in the foot if it does, we can't always assume good
+ * intent... Use read/write once helpers from here on to indicate the
+ * shared nature of it.
+ */
+ WRITE_ONCE(n.rings->sq_ring_mask, p.sq_entries - 1);
+ WRITE_ONCE(n.rings->cq_ring_mask, p.cq_entries - 1);
+ WRITE_ONCE(n.rings->sq_ring_entries, p.sq_entries);
+ WRITE_ONCE(n.rings->cq_ring_entries, p.cq_entries);
if (copy_to_user(arg, &p, sizeof(p))) {
io_register_free_rings(&p, &n);
@@ -509,20 +517,20 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
* rings can't hold what is already there, then fail the operation.
*/
n.sq_sqes = ptr;
- tail = o.rings->sq.tail;
- if (tail - o.rings->sq.head > p.sq_entries)
+ tail = READ_ONCE(o.rings->sq.tail);
+ if (tail - READ_ONCE(o.rings->sq.head) > p.sq_entries)
goto overflow;
- for (i = o.rings->sq.head; i < tail; i++) {
+ for (i = READ_ONCE(o.rings->sq.head); i < tail; i++) {
unsigned src_head = i & (ctx->sq_entries - 1);
unsigned dst_head = i & (p.sq_entries - 1);
n.sq_sqes[dst_head] = o.sq_sqes[src_head];
}
- n.rings->sq.head = o.rings->sq.head;
- n.rings->sq.tail = o.rings->sq.tail;
+ WRITE_ONCE(n.rings->sq.head, READ_ONCE(o.rings->sq.head));
+ WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail));
- tail = o.rings->cq.tail;
- if (tail - o.rings->cq.head > p.cq_entries) {
+ tail = READ_ONCE(o.rings->cq.tail);
+ if (tail - READ_ONCE(o.rings->cq.head) > p.cq_entries) {
overflow:
/* restore old rings, and return -EOVERFLOW via cleanup path */
ctx->rings = o.rings;
@@ -531,21 +539,21 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
ret = -EOVERFLOW;
goto out;
}
- for (i = o.rings->cq.head; i < tail; i++) {
+ for (i = READ_ONCE(o.rings->cq.head); i < tail; i++) {
unsigned src_head = i & (ctx->cq_entries - 1);
unsigned dst_head = i & (p.cq_entries - 1);
n.rings->cqes[dst_head] = o.rings->cqes[src_head];
}
- n.rings->cq.head = o.rings->cq.head;
- n.rings->cq.tail = o.rings->cq.tail;
+ WRITE_ONCE(n.rings->cq.head, READ_ONCE(o.rings->cq.head));
+ WRITE_ONCE(n.rings->cq.tail, READ_ONCE(o.rings->cq.tail));
/* invalidate cached cqe refill */
ctx->cqe_cached = ctx->cqe_sentinel = NULL;
- n.rings->sq_dropped = o.rings->sq_dropped;
- n.rings->sq_flags = o.rings->sq_flags;
- n.rings->cq_flags = o.rings->cq_flags;
- n.rings->cq_overflow = o.rings->cq_overflow;
+ WRITE_ONCE(n.rings->sq_dropped, READ_ONCE(o.rings->sq_dropped));
+ WRITE_ONCE(n.rings->sq_flags, READ_ONCE(o.rings->sq_flags));
+ WRITE_ONCE(n.rings->cq_flags, READ_ONCE(o.rings->cq_flags));
+ WRITE_ONCE(n.rings->cq_overflow, READ_ONCE(o.rings->cq_overflow));
/* all done, store old pointers and assign new ones */
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] io_uring/register: cache old SQ/CQ head reading for copies
2025-01-15 16:06 [PATCHSET 0/3] Ring resizing fixes Jens Axboe
2025-01-15 16:06 ` [PATCH 1/3] io_uring/register: use stable SQ/CQ ring data during resize Jens Axboe
2025-01-15 16:06 ` [PATCH 2/3] io_uring/register: document io_register_resize_rings() shared mem usage Jens Axboe
@ 2025-01-15 16:06 ` Jens Axboe
2 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2025-01-15 16:06 UTC (permalink / raw)
To: io-uring; +Cc: jannh, Jens Axboe
The SQ and CQ ring heads are read twice - once for verifying that it's
within bounds, and once inside the loops copying SQE and CQE entries.
This is technically incorrect, in case the values could get modified
in between verifying them and using them in the copy loop. While this
won't lead to anything truly nefarious, it may cause longer loop times
for the copies than expected.
Read the ring head values once, and use the verified value in the copy
loops.
Signed-off-by: Jens Axboe <[email protected]>
---
io_uring/register.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/io_uring/register.c b/io_uring/register.c
index ffcbc840032e..371aec87e078 100644
--- a/io_uring/register.c
+++ b/io_uring/register.c
@@ -405,8 +405,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
{
struct io_ring_ctx_rings o = { }, n = { }, *to_free = NULL;
size_t size, sq_array_offset;
+ unsigned i, tail, old_head;
struct io_uring_params p;
- unsigned i, tail;
void *ptr;
int ret;
@@ -518,9 +518,10 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
*/
n.sq_sqes = ptr;
tail = READ_ONCE(o.rings->sq.tail);
- if (tail - READ_ONCE(o.rings->sq.head) > p.sq_entries)
+ old_head = READ_ONCE(o.rings->sq.head);
+ if (tail - old_head > p.sq_entries)
goto overflow;
- for (i = READ_ONCE(o.rings->sq.head); i < tail; i++) {
+ for (i = old_head; i < tail; i++) {
unsigned src_head = i & (ctx->sq_entries - 1);
unsigned dst_head = i & (p.sq_entries - 1);
@@ -530,7 +531,8 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
WRITE_ONCE(n.rings->sq.tail, READ_ONCE(o.rings->sq.tail));
tail = READ_ONCE(o.rings->cq.tail);
- if (tail - READ_ONCE(o.rings->cq.head) > p.cq_entries) {
+ old_head = READ_ONCE(o.rings->cq.head);
+ if (tail - old_head > p.cq_entries) {
overflow:
/* restore old rings, and return -EOVERFLOW via cleanup path */
ctx->rings = o.rings;
@@ -539,7 +541,7 @@ static int io_register_resize_rings(struct io_ring_ctx *ctx, void __user *arg)
ret = -EOVERFLOW;
goto out;
}
- for (i = READ_ONCE(o.rings->cq.head); i < tail; i++) {
+ for (i = old_head; i < tail; i++) {
unsigned src_head = i & (ctx->cq_entries - 1);
unsigned dst_head = i & (p.cq_entries - 1);
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread