* [PATCH v2] io_uring/io-wq: Use set_bit() and test_bit() at worker->flags
@ 2024-05-07 15:05 Breno Leitao
2024-05-07 15:09 ` Jens Axboe
0 siblings, 1 reply; 3+ messages in thread
From: Breno Leitao @ 2024-05-07 15:05 UTC (permalink / raw)
To: Jens Axboe, Pavel Begunkov
Cc: christophe.jaillet, open list:IO_URING, open list
Utilize set_bit() and test_bit() on worker->flags within io_uring/io-wq
to address potential data races.
The structure io_worker->flags may be accessed through various data
paths, leading to concurrency issues. When KCSAN is enabled, it reveals
data races occurring in io_worker_handle_work and
io_wq_activate_free_worker functions.
BUG: KCSAN: data-race in io_worker_handle_work / io_wq_activate_free_worker
write to 0xffff8885c4246404 of 4 bytes by task 49071 on cpu 28:
io_worker_handle_work (io_uring/io-wq.c:434 io_uring/io-wq.c:569)
io_wq_worker (io_uring/io-wq.c:?)
<snip>
read to 0xffff8885c4246404 of 4 bytes by task 49024 on cpu 5:
io_wq_activate_free_worker (io_uring/io-wq.c:? io_uring/io-wq.c:285)
io_wq_enqueue (io_uring/io-wq.c:947)
io_queue_iowq (io_uring/io_uring.c:524)
io_req_task_submit (io_uring/io_uring.c:1511)
io_handle_tw_list (io_uring/io_uring.c:1198)
<snip>
Line numbers against commit 18daea77cca6 ("Merge tag 'for-linus' of
git://git.kernel.org/pub/scm/virt/kvm/kvm").
These races involve writes and reads to the same memory location by
different tasks running on different CPUs. To mitigate this, refactor
the code to use atomic operations such as set_bit(), test_bit(), and
clear_bit() instead of basic "and" and "or" operations. This ensures
thread-safe manipulation of worker flags.
Also, move `create_index` to avoid holes in the structure.
Signed-off-by: Breno Leitao <[email protected]>
---
Changelog:
v2:
* Moved `create_index` to avoid holes in the struct.
* Use set_mask_bits() as suggested by Christophe JAILLET.
v1:
* https://lore.kernel.org/all/[email protected]/
---
io_uring/io-wq.c | 46 +++++++++++++++++++++++-----------------------
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
index 522196dfb0ff..296b8301813a 100644
--- a/io_uring/io-wq.c
+++ b/io_uring/io-wq.c
@@ -25,10 +25,10 @@
#define WORKER_IDLE_TIMEOUT (5 * HZ)
enum {
- IO_WORKER_F_UP = 1, /* up and active */
- IO_WORKER_F_RUNNING = 2, /* account as running */
- IO_WORKER_F_FREE = 4, /* worker on free list */
- IO_WORKER_F_BOUND = 8, /* is doing bounded work */
+ IO_WORKER_F_UP = 0, /* up and active */
+ IO_WORKER_F_RUNNING = 1, /* account as running */
+ IO_WORKER_F_FREE = 2, /* worker on free list */
+ IO_WORKER_F_BOUND = 3, /* is doing bounded work */
};
enum {
@@ -44,7 +44,8 @@ enum {
*/
struct io_worker {
refcount_t ref;
- unsigned flags;
+ int create_index;
+ unsigned long flags;
struct hlist_nulls_node nulls_node;
struct list_head all_list;
struct task_struct *task;
@@ -58,7 +59,6 @@ struct io_worker {
unsigned long create_state;
struct callback_head create_work;
- int create_index;
union {
struct rcu_head rcu;
@@ -165,7 +165,7 @@ static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
{
- return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND);
+ return io_get_acct(worker->wq, test_bit(IO_WORKER_F_BOUND, &worker->flags));
}
static void io_worker_ref_put(struct io_wq *wq)
@@ -225,7 +225,7 @@ static void io_worker_exit(struct io_worker *worker)
wait_for_completion(&worker->ref_done);
raw_spin_lock(&wq->lock);
- if (worker->flags & IO_WORKER_F_FREE)
+ if (test_bit(IO_WORKER_F_FREE, &worker->flags))
hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
raw_spin_unlock(&wq->lock);
@@ -410,7 +410,7 @@ static void io_wq_dec_running(struct io_worker *worker)
struct io_wq_acct *acct = io_wq_get_acct(worker);
struct io_wq *wq = worker->wq;
- if (!(worker->flags & IO_WORKER_F_UP))
+ if (!test_bit(IO_WORKER_F_UP, &worker->flags))
return;
if (!atomic_dec_and_test(&acct->nr_running))
@@ -430,8 +430,8 @@ static void io_wq_dec_running(struct io_worker *worker)
*/
static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker)
{
- if (worker->flags & IO_WORKER_F_FREE) {
- worker->flags &= ~IO_WORKER_F_FREE;
+ if (test_bit(IO_WORKER_F_FREE, &worker->flags)) {
+ clear_bit(IO_WORKER_F_FREE, &worker->flags);
raw_spin_lock(&wq->lock);
hlist_nulls_del_init_rcu(&worker->nulls_node);
raw_spin_unlock(&wq->lock);
@@ -444,8 +444,8 @@ static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker)
static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
__must_hold(wq->lock)
{
- if (!(worker->flags & IO_WORKER_F_FREE)) {
- worker->flags |= IO_WORKER_F_FREE;
+ if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) {
+ set_bit(IO_WORKER_F_FREE, &worker->flags);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
}
}
@@ -631,7 +631,7 @@ static int io_wq_worker(void *data)
bool exit_mask = false, last_timeout = false;
char buf[TASK_COMM_LEN];
- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+ set_mask_bits(&worker->flags, 0, IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid);
set_task_comm(current, buf);
@@ -695,11 +695,11 @@ void io_wq_worker_running(struct task_struct *tsk)
if (!worker)
return;
- if (!(worker->flags & IO_WORKER_F_UP))
+ if (!test_bit(IO_WORKER_F_UP, &worker->flags))
return;
- if (worker->flags & IO_WORKER_F_RUNNING)
+ if (test_bit(IO_WORKER_F_RUNNING, &worker->flags))
return;
- worker->flags |= IO_WORKER_F_RUNNING;
+ set_bit(IO_WORKER_F_RUNNING, &worker->flags);
io_wq_inc_running(worker);
}
@@ -713,12 +713,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
if (!worker)
return;
- if (!(worker->flags & IO_WORKER_F_UP))
+ if (!test_bit(IO_WORKER_F_UP, &worker->flags))
return;
- if (!(worker->flags & IO_WORKER_F_RUNNING))
+ if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags))
return;
- worker->flags &= ~IO_WORKER_F_RUNNING;
+ clear_bit(IO_WORKER_F_RUNNING, &worker->flags);
io_wq_dec_running(worker);
}
@@ -732,7 +732,7 @@ static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker,
raw_spin_lock(&wq->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
list_add_tail_rcu(&worker->all_list, &wq->all_list);
- worker->flags |= IO_WORKER_F_FREE;
+ set_bit(IO_WORKER_F_FREE, &worker->flags);
raw_spin_unlock(&wq->lock);
wake_up_new_task(tsk);
}
@@ -838,7 +838,7 @@ static bool create_io_worker(struct io_wq *wq, int index)
init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND)
- worker->flags |= IO_WORKER_F_BOUND;
+ set_bit(IO_WORKER_F_BOUND, &worker->flags);
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
if (!IS_ERR(tsk)) {
@@ -924,8 +924,8 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
{
struct io_wq_acct *acct = io_work_get_acct(wq, work);
+ unsigned long work_flags = work->flags;
struct io_cb_cancel_data match;
- unsigned work_flags = work->flags;
bool do_create;
/*
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v2] io_uring/io-wq: Use set_bit() and test_bit() at worker->flags
2024-05-07 15:05 [PATCH v2] io_uring/io-wq: Use set_bit() and test_bit() at worker->flags Breno Leitao
@ 2024-05-07 15:09 ` Jens Axboe
2024-05-07 16:34 ` Christophe JAILLET
0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2024-05-07 15:09 UTC (permalink / raw)
To: Breno Leitao, Pavel Begunkov
Cc: christophe.jaillet, open list:IO_URING, open list
On 5/7/24 9:05 AM, Breno Leitao wrote:
> @@ -631,7 +631,7 @@ static int io_wq_worker(void *data)
> bool exit_mask = false, last_timeout = false;
> char buf[TASK_COMM_LEN];
>
> - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
> + set_mask_bits(&worker->flags, 0, IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
This takes a mask, no? I think this should be:
set_mask_bits(&worker->flags, 0, BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING);
Hmm?
--
Jens Axboe
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v2] io_uring/io-wq: Use set_bit() and test_bit() at worker->flags
2024-05-07 15:09 ` Jens Axboe
@ 2024-05-07 16:34 ` Christophe JAILLET
0 siblings, 0 replies; 3+ messages in thread
From: Christophe JAILLET @ 2024-05-07 16:34 UTC (permalink / raw)
To: Jens Axboe, Breno Leitao, Pavel Begunkov; +Cc: open list:IO_URING, open list
Le 07/05/2024 à 17:09, Jens Axboe a écrit :
> On 5/7/24 9:05 AM, Breno Leitao wrote:
>> @@ -631,7 +631,7 @@ static int io_wq_worker(void *data)
>> bool exit_mask = false, last_timeout = false;
>> char buf[TASK_COMM_LEN];
>>
>> - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
>> + set_mask_bits(&worker->flags, 0, IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
>
> This takes a mask, no? I think this should be:
>
> set_mask_bits(&worker->flags, 0, BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING);
>
> Hmm?
>
Because of that:
enum {
- IO_WORKER_F_UP = 1, /* up and active */
- IO_WORKER_F_RUNNING = 2, /* account as running */
- IO_WORKER_F_FREE = 4, /* worker on free list */
- IO_WORKER_F_BOUND = 8, /* is doing bounded work */
+ IO_WORKER_F_UP = 0, /* up and active */
+ IO_WORKER_F_RUNNING = 1, /* account as running */
+ IO_WORKER_F_FREE = 2, /* worker on free list */
+ IO_WORKER_F_BOUND = 3, /* is doing bounded work */
};
yes, now, BIT() is needed.
CJ
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-05-07 17:08 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-07 15:05 [PATCH v2] io_uring/io-wq: Use set_bit() and test_bit() at worker->flags Breno Leitao
2024-05-07 15:09 ` Jens Axboe
2024-05-07 16:34 ` Christophe JAILLET
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox