public inbox for [email protected]
 help / color / mirror / Atom feed
* possible deadlock in __wake_up_common_lock
@ 2024-12-09 12:03 chase xd
  2024-12-09 14:59 ` Jens Axboe
  0 siblings, 1 reply; 6+ messages in thread
From: chase xd @ 2024-12-09 12:03 UTC (permalink / raw)
  To: Jens Axboe, Pavel Begunkov, io-uring, linux-kernel

Hi, Syzkaller reports this on lts 6.1.119, looks like a new bug:

Syzkaller hit 'possible deadlock in __wake_up_common_lock' bug.

============================================
WARNING: possible recursive locking detected
6.1.119-dirty #3 Not tainted
--------------------------------------------
syz-executor199/6820 is trying to acquire lock:
ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

but task is already holding lock:
ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&ctx->cq_wait);
  lock(&ctx->cq_wait);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

2 locks held by syz-executor199/6820:
 #0: ffff88807c3860a8 (&ctx->uring_lock){+.+.}-{3:3}, at:
__do_sys_io_uring_enter+0x8fc/0x2130 io_uring/io_uring.c:3313
 #1: ffff88807c386378 (&ctx->cq_wait){....}-{2:2}, at:
__wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137

stack backtrace:
CPU: 7 PID: 6820 Comm: syz-executor199 Not tainted 6.1.119-dirty #3
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x5b/0x85 lib/dump_stack.c:106
 print_deadlock_bug kernel/locking/lockdep.c:2983 [inline]
 check_deadlock kernel/locking/lockdep.c:3026 [inline]
 validate_chain kernel/locking/lockdep.c:3812 [inline]
 __lock_acquire.cold+0x219/0x3bd kernel/locking/lockdep.c:5049
 lock_acquire kernel/locking/lockdep.c:5662 [inline]
 lock_acquire+0x1e3/0x5e0 kernel/locking/lockdep.c:5627
 __raw_spin_lock_irqsave include/linux/spinlock_api_smp.h:110 [inline]
 _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162
 __wake_up_common_lock+0xb8/0x140 kernel/sched/wait.c:137
 __io_cqring_wake io_uring/io_uring.h:224 [inline]
 __io_cqring_wake io_uring/io_uring.h:211 [inline]
 io_req_local_work_add io_uring/io_uring.c:1135 [inline]
 __io_req_task_work_add+0x4a4/0xd60 io_uring/io_uring.c:1146
 io_poll_wake+0x3cb/0x550 io_uring/poll.c:465
 __wake_up_common+0x14c/0x650 kernel/sched/wait.c:107
 __wake_up_common_lock+0xd4/0x140 kernel/sched/wait.c:138
 __io_cqring_wake io_uring/io_uring.h:224 [inline]
 __io_cqring_wake io_uring/io_uring.h:211 [inline]
 io_cqring_wake io_uring/io_uring.h:231 [inline]
 io_cqring_ev_posted io_uring/io_uring.c:578 [inline]
 __io_cq_unlock_post io_uring/io_uring.c:586 [inline]
 __io_submit_flush_completions+0x778/0xba0 io_uring/io_uring.c:1346
 io_submit_flush_completions io_uring/io_uring.c:159 [inline]
 io_submit_state_end io_uring/io_uring.c:2203 [inline]
 io_submit_sqes+0xa78/0x1ce0 io_uring/io_uring.c:2317
 __do_sys_io_uring_enter+0x907/0x2130 io_uring/io_uring.c:3314
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:81
 entry_SYSCALL_64_after_hwframe+0x6e/0xd8
RIP: 0033:0x7fa54e70640d
Code: 28 c3 e8 46 1e 00 00 66 0f 1f 44 00 00 f3 0f 1e fa 48 89 f8 48
89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d
01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd0ad80be8 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
RAX: ffffffffffffffda RBX: 00007ffd0ad80df8 RCX: 00007fa54e70640d
RDX: 0000000000000000 RSI: 000000000000331b RDI: 0000000000000003
RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
R13: 00007ffd0ad80de8 R14: 00007fa54e783530 R15: 0000000000000001
 </TASK>


Syzkaller reproducer:
# {Threaded:false Repeat:false RepeatTimes:0 Procs:1 Slowdown:1
Sandbox: SandboxArg:0 Leak:false NetInjection:false NetDevices:false
NetReset:false Cgroups:false BinfmtMisc:false CloseFDs:false
KCSAN:false DevlinkPCI:false NicVF:false USB:false VhciInjection:false
Wifi:false IEEE802154:false Sysctl:false Swap:false UseTmpDir:false
HandleSegv:false Repro:false Trace:false LegacyOptions:{Collide:false
Fault:false FaultCall:0 FaultNth:0}}
r0 = syz_io_uring_setup(0x100, &(0x7f0000000000)={0x0, 0x0, 0x3a40},
&(0x7f0000000180)=<r1=>0x0, &(0x7f00000001c0)=<r2=>0x0)
syz_io_uring_setup(0x255d, &(0x7f00000001c0)={0x0, 0x0, 0x40, 0x0,
0x3, 0x0, r0}, &(0x7f0000000140), &(0x7f00000024c0)=<r3=>0x0)
syz_io_uring_submit(r1, r3, &(0x7f00000000c0)=@IORING_OP_SEND={0x1a,
0x0, 0x0, 0xffffffffffffffff, 0x0, 0x0})
io_uring_register$IORING_REGISTER_ENABLE_RINGS(r0, 0xc, 0x0, 0x0)
syz_io_uring_submit(r1, r2,
&(0x7f0000000100)=@IORING_OP_READV=@use_registered_buffer={0x1, 0x0,
0x0, @fd=r0})
syz_io_uring_submit(r1, r2,
&(0x7f0000000100)=@IORING_OP_READV=@use_registered_buffer={0x1, 0x0,
0x0, @fd=r0})
io_uring_enter(r0, 0x331b, 0x0, 0x0, 0x0, 0x0)


C reproducer:
// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <endian.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>

#ifndef __NR_io_uring_enter
#define __NR_io_uring_enter 426
#endif
#ifndef __NR_io_uring_register
#define __NR_io_uring_register 427
#endif
#ifndef __NR_io_uring_setup
#define __NR_io_uring_setup 425
#endif

#define SIZEOF_IO_URING_SQE 64
#define SIZEOF_IO_URING_CQE 16
#define SQ_HEAD_OFFSET 0
#define SQ_TAIL_OFFSET 64
#define SQ_RING_MASK_OFFSET 256
#define SQ_RING_ENTRIES_OFFSET 264
#define SQ_FLAGS_OFFSET 276
#define SQ_DROPPED_OFFSET 272
#define CQ_HEAD_OFFSET 128
#define CQ_TAIL_OFFSET 192
#define CQ_RING_MASK_OFFSET 260
#define CQ_RING_ENTRIES_OFFSET 268
#define CQ_RING_OVERFLOW_OFFSET 284
#define CQ_FLAGS_OFFSET 280
#define CQ_CQES_OFFSET 320

struct io_sqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t flags;
  uint32_t dropped;
  uint32_t array;
  uint32_t resv1;
  uint64_t resv2;
};

struct io_cqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t overflow;
  uint32_t cqes;
  uint64_t resv[2];
};

struct io_uring_params {
  uint32_t sq_entries;
  uint32_t cq_entries;
  uint32_t flags;
  uint32_t sq_thread_cpu;
  uint32_t sq_thread_idle;
  uint32_t features;
  uint32_t resv[4];
  struct io_sqring_offsets sq_off;
  struct io_cqring_offsets cq_off;
};

#define IORING_OFF_SQ_RING 0
#define IORING_OFF_SQES 0x10000000ULL
#define IORING_SETUP_SQE128 (1U << 10)
#define IORING_SETUP_CQE32 (1U << 11)

static long syz_io_uring_setup(volatile long a0, volatile long a1,
                               volatile long a2, volatile long a3)
{
  uint32_t entries = (uint32_t)a0;
  struct io_uring_params* setup_params = (struct io_uring_params*)a1;
  void** ring_ptr_out = (void**)a2;
  void** sqes_ptr_out = (void**)a3;
  setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128);
  uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
  uint32_t sq_ring_sz =
      setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
  uint32_t cq_ring_sz = setup_params->cq_off.cqes +
                        setup_params->cq_entries * SIZEOF_IO_URING_CQE;
  uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
  *ring_ptr_out =
      mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
           fd_io_uring, IORING_OFF_SQ_RING);
  uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
  *sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE,
                       MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
  uint32_t* array =
      (uint32_t*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
  for (uint32_t index = 0; index < entries; index++)
    array[index] = index;
  return fd_io_uring;
}

static long syz_io_uring_submit(volatile long a0, volatile long a1,
                                volatile long a2)
{
  char* ring_ptr = (char*)a0;
  char* sqes_ptr = (char*)a1;
  char* sqe = (char*)a2;
  uint32_t sq_ring_mask = *(uint32_t*)(ring_ptr + SQ_RING_MASK_OFFSET);
  uint32_t* sq_tail_ptr = (uint32_t*)(ring_ptr + SQ_TAIL_OFFSET);
  uint32_t sq_tail = *sq_tail_ptr & sq_ring_mask;
  char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE;
  memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
  uint32_t sq_tail_next = *sq_tail_ptr + 1;
  __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
  return 0;
}

uint64_t r[4] = {0xffffffffffffffff, 0x0, 0x0, 0x0};

int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  intptr_t res = 0;
  *(uint32_t*)0x20000004 = 0;
  *(uint32_t*)0x20000008 = 0x3a40;
  *(uint32_t*)0x2000000c = 0;
  *(uint32_t*)0x20000010 = 0;
  *(uint32_t*)0x20000018 = -1;
  memset((void*)0x2000001c, 0, 12);
  res = -1;
  res = syz_io_uring_setup(/*entries=*/0x100, /*params=*/0x20000000,
                           /*ring_ptr=*/0x20000180, /*sqes_ptr=*/0x200001c0);
  if (res != -1) {
    r[0] = res;
    r[1] = *(uint64_t*)0x20000180;
    r[2] = *(uint64_t*)0x200001c0;
  }
  *(uint32_t*)0x200001c4 = 0;
  *(uint32_t*)0x200001c8 = 0x40;
  *(uint32_t*)0x200001cc = 0;
  *(uint32_t*)0x200001d0 = 3;
  *(uint32_t*)0x200001d8 = r[0];
  memset((void*)0x200001dc, 0, 12);
  res = -1;
  res = syz_io_uring_setup(/*entries=*/0x255d, /*params=*/0x200001c0,
                           /*ring_ptr=*/0x20000140, /*sqes_ptr=*/0x200024c0);
  if (res != -1)
    r[3] = *(uint64_t*)0x200024c0;
  *(uint8_t*)0x200000c0 = 0x1a;
  *(uint8_t*)0x200000c1 = 0;
  *(uint16_t*)0x200000c2 = 0;
  *(uint32_t*)0x200000c4 = -1;
  *(uint64_t*)0x200000c8 = 0;
  *(uint64_t*)0x200000d0 = 0;
  *(uint32_t*)0x200000d8 = 0;
  *(uint32_t*)0x200000dc = 0;
  *(uint64_t*)0x200000e0 = 0;
  *(uint16_t*)0x200000e8 = 0;
  *(uint16_t*)0x200000ea = 0;
  memset((void*)0x200000ec, 0, 20);
  syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[3], /*sqe=*/0x200000c0);
  syscall(__NR_io_uring_register, /*fd=*/r[0], /*opcode=*/0xcul, /*arg=*/0ul,
          /*nr_args=*/0ul);
  *(uint8_t*)0x20000100 = 1;
  *(uint8_t*)0x20000101 = 0;
  *(uint16_t*)0x20000102 = 0;
  *(uint32_t*)0x20000104 = r[0];
  *(uint64_t*)0x20000108 = 0;
  *(uint64_t*)0x20000110 = 0;
  *(uint32_t*)0x20000118 = 0;
  *(uint32_t*)0x2000011c = 0;
  *(uint64_t*)0x20000120 = 0;
  *(uint16_t*)0x20000128 = 0;
  *(uint16_t*)0x2000012a = 0;
  memset((void*)0x2000012c, 0, 20);
  syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[2], /*sqe=*/0x20000100);
  *(uint8_t*)0x20000100 = 1;
  *(uint8_t*)0x20000101 = 0;
  *(uint16_t*)0x20000102 = 0;
  *(uint32_t*)0x20000104 = r[0];
  *(uint64_t*)0x20000108 = 0;
  *(uint64_t*)0x20000110 = 0;
  *(uint32_t*)0x20000118 = 0;
  *(uint32_t*)0x2000011c = 0;
  *(uint64_t*)0x20000120 = 0;
  *(uint16_t*)0x20000128 = 0;
  *(uint16_t*)0x2000012a = 0;
  memset((void*)0x2000012c, 0, 20);
  syz_io_uring_submit(/*ring_ptr=*/r[1], /*sqes_ptr=*/r[2], /*sqe=*/0x20000100);
  syscall(__NR_io_uring_enter, /*fd=*/r[0], /*to_submit=*/0x331b,
          /*min_complete=*/0, /*flags=*/0ul, /*sigmask=*/0ul, /*size=*/0ul);
  return 0;
}

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-12-13 14:10 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-09 12:03 possible deadlock in __wake_up_common_lock chase xd
2024-12-09 14:59 ` Jens Axboe
2024-12-11 11:46   ` chase xd
2024-12-12 14:28     ` Jens Axboe
2024-12-13  9:36       ` chase xd
2024-12-13 14:10         ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox