* [PATCH 0/2] add proper memory barrier for IOPOLL mode
@ 2020-06-15 9:24 Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 1/2] io_uring: don't fail links for EAGAIN error in " Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed Xiaoguang Wang
0 siblings, 2 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 9:24 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi, Xiaoguang Wang
The first patch makes io_uring do not fail links for io request that
returns EAGAIN error, second patch adds proper memory barrier to
synchronize io_kiocb's result and iopoll_completed.
Xiaoguang Wang (2):
io_uring: don't fail links for EAGAIN error in IOPOLL mode
io_uring: add memory barrier to synchronize io_kiocb's result and
iopoll_completed
fs/io_uring.c | 55 +++++++++++++++++++++++++++------------------------
1 file changed, 29 insertions(+), 26 deletions(-)
--
2.17.2
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/2] io_uring: don't fail links for EAGAIN error in IOPOLL mode
2020-06-15 9:24 [PATCH 0/2] add proper memory barrier for IOPOLL mode Xiaoguang Wang
@ 2020-06-15 9:24 ` Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed Xiaoguang Wang
1 sibling, 0 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 9:24 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi, Xiaoguang Wang
In IOPOLL mode, for EAGAIN error, we'll try to submit io request
again using io-wq, so don't fail rest of links if this io request
has links.
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 61fca5afaac8..a3119fecfe40 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1974,7 +1974,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if (res != req->result)
+ if (res != -EAGAIN && res != req->result)
req_set_fail_links(req);
req->result = res;
if (res != -EAGAIN)
--
2.17.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 9:24 [PATCH 0/2] add proper memory barrier for IOPOLL mode Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 1/2] io_uring: don't fail links for EAGAIN error in " Xiaoguang Wang
@ 2020-06-15 9:24 ` Xiaoguang Wang
2020-06-15 14:36 ` Jens Axboe
1 sibling, 1 reply; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 9:24 UTC (permalink / raw)
To: io-uring; +Cc: axboe, joseph.qi, Xiaoguang Wang
In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
completed are two independent store operations, to ensure that once
iopoll_completed is ture and then req->result must been perceived by
the cpu executing io_do_iopoll(), proper memory barrier should be used.
And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
we'll need to issue this io request using io-wq again. In order to just
issue a single smp_rmb() on the completion side, move the re-submit work
to io_iopoll_complete().
Signed-off-by: Xiaoguang Wang <[email protected]>
---
fs/io_uring.c | 53 +++++++++++++++++++++++++++------------------------
1 file changed, 28 insertions(+), 25 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a3119fecfe40..5ba4d770081f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1728,6 +1728,18 @@ static int io_put_kbuf(struct io_kiocb *req)
return cflags;
}
+static void io_iopoll_queue(struct list_head *again)
+{
+ struct io_kiocb *req;
+
+ do {
+ req = list_first_entry(again, struct io_kiocb, list);
+ list_del(&req->list);
+ refcount_inc(&req->refs);
+ io_queue_async_work(req);
+ } while (!list_empty(again));
+}
+
/*
* Find and free completed poll iocbs
*/
@@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
{
struct req_batch rb;
struct io_kiocb *req;
+ LIST_HEAD(again);
+
+ /* order with ->result store in io_complete_rw_iopoll() */
+ smp_rmb();
rb.to_free = rb.need_iter = 0;
while (!list_empty(done)) {
int cflags = 0;
+ if (READ_ONCE(req->result) == -EAGAIN) {
+ req->iopoll_completed = 0;
+ list_move_tail(&req->list, &again);
+ continue;
+ }
req = list_first_entry(done, struct io_kiocb, list);
list_del(&req->list);
@@ -1759,18 +1780,9 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (ctx->flags & IORING_SETUP_SQPOLL)
io_cqring_ev_posted(ctx);
io_free_req_many(ctx, &rb);
-}
-static void io_iopoll_queue(struct list_head *again)
-{
- struct io_kiocb *req;
-
- do {
- req = list_first_entry(again, struct io_kiocb, list);
- list_del(&req->list);
- refcount_inc(&req->refs);
- io_queue_async_work(req);
- } while (!list_empty(again));
+ if (!list_empty(&again))
+ io_iopoll_queue(&again);
}
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
@@ -1778,7 +1790,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
{
struct io_kiocb *req, *tmp;
LIST_HEAD(done);
- LIST_HEAD(again);
bool spin;
int ret;
@@ -1804,13 +1815,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (!list_empty(&done))
break;
- if (req->result == -EAGAIN) {
- list_move_tail(&req->list, &again);
- continue;
- }
- if (!list_empty(&again))
- break;
-
ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin);
if (ret < 0)
break;
@@ -1823,9 +1827,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
if (!list_empty(&done))
io_iopoll_complete(ctx, nr_events, &done);
- if (!list_empty(&again))
- io_iopoll_queue(&again);
-
return ret;
}
@@ -1976,9 +1977,11 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
if (res != -EAGAIN && res != req->result)
req_set_fail_links(req);
- req->result = res;
- if (res != -EAGAIN)
- WRITE_ONCE(req->iopoll_completed, 1);
+
+ WRITE_ONCE(req->result, res);
+ /* order with io_poll_complete() checking ->result */
+ smp_wmb();
+ WRITE_ONCE(req->iopoll_completed, 1);
}
/*
--
2.17.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 9:24 ` [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed Xiaoguang Wang
@ 2020-06-15 14:36 ` Jens Axboe
2020-06-15 14:48 ` Xiaoguang Wang
0 siblings, 1 reply; 11+ messages in thread
From: Jens Axboe @ 2020-06-15 14:36 UTC (permalink / raw)
To: Xiaoguang Wang, io-uring; +Cc: joseph.qi
On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
> completed are two independent store operations, to ensure that once
> iopoll_completed is ture and then req->result must been perceived by
> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>
> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
> we'll need to issue this io request using io-wq again. In order to just
> issue a single smp_rmb() on the completion side, move the re-submit work
> to io_iopoll_complete().
Did you actually test this one?
> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
> {
> struct req_batch rb;
> struct io_kiocb *req;
> + LIST_HEAD(again);
> +
> + /* order with ->result store in io_complete_rw_iopoll() */
> + smp_rmb();
>
> rb.to_free = rb.need_iter = 0;
> while (!list_empty(done)) {
> int cflags = 0;
>
> + if (READ_ONCE(req->result) == -EAGAIN) {
> + req->iopoll_completed = 0;
> + list_move_tail(&req->list, &again);
> + continue;
> + }
> req = list_first_entry(done, struct io_kiocb, list);
> list_del(&req->list);
>
You're using 'req' here before you initialize it...
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 14:36 ` Jens Axboe
@ 2020-06-15 14:48 ` Xiaoguang Wang
2020-06-15 15:02 ` Jens Axboe
0 siblings, 1 reply; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 14:48 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: joseph.qi
hi,
> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>> completed are two independent store operations, to ensure that once
>> iopoll_completed is ture and then req->result must been perceived by
>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>
>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>> we'll need to issue this io request using io-wq again. In order to just
>> issue a single smp_rmb() on the completion side, move the re-submit work
>> to io_iopoll_complete().
>
> Did you actually test this one?
I only run test cases in liburing/test in a vm.
>
>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>> {
>> struct req_batch rb;
>> struct io_kiocb *req;
>> + LIST_HEAD(again);
>> +
>> + /* order with ->result store in io_complete_rw_iopoll() */
>> + smp_rmb();
>>
>> rb.to_free = rb.need_iter = 0;
>> while (!list_empty(done)) {
>> int cflags = 0;
>>
>> + if (READ_ONCE(req->result) == -EAGAIN) {
>> + req->iopoll_completed = 0;
>> + list_move_tail(&req->list, &again);
>> + continue;
>> + }
>> req = list_first_entry(done, struct io_kiocb, list);
>> list_del(&req->list);
>>
>
> You're using 'req' here before you initialize it...
Sorry, next time when I submit patches, I'll construct test cases which
will cover my codes changes.
Regards,
Xiaoguang Wang
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 14:48 ` Xiaoguang Wang
@ 2020-06-15 15:02 ` Jens Axboe
2020-06-15 15:09 ` Pavel Begunkov
2020-06-15 15:32 ` Xiaoguang Wang
0 siblings, 2 replies; 11+ messages in thread
From: Jens Axboe @ 2020-06-15 15:02 UTC (permalink / raw)
To: Xiaoguang Wang, io-uring; +Cc: joseph.qi
On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
> hi,
>
>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>> completed are two independent store operations, to ensure that once
>>> iopoll_completed is ture and then req->result must been perceived by
>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>
>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>> we'll need to issue this io request using io-wq again. In order to just
>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>> to io_iopoll_complete().
>>
>> Did you actually test this one?
> I only run test cases in liburing/test in a vm.
>
>>
>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>> {
>>> struct req_batch rb;
>>> struct io_kiocb *req;
>>> + LIST_HEAD(again);
>>> +
>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>> + smp_rmb();
>>>
>>> rb.to_free = rb.need_iter = 0;
>>> while (!list_empty(done)) {
>>> int cflags = 0;
>>>
>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>> + req->iopoll_completed = 0;
>>> + list_move_tail(&req->list, &again);
>>> + continue;
>>> + }
>>> req = list_first_entry(done, struct io_kiocb, list);
>>> list_del(&req->list);
>>>
>>
>> You're using 'req' here before you initialize it...
> Sorry, next time when I submit patches, I'll construct test cases which
> will cover my codes changes.
I'm surprised the compiler didn't complain, or that the regular testing
didn't barf on it.
Don't think you need a new test case for this, the iopoll test case
should cover it, if you limit the queue depth on the device by
setting /sys/block/<dev>/queue/nr_requests to 2 or something like
that.
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 15:02 ` Jens Axboe
@ 2020-06-15 15:09 ` Pavel Begunkov
2020-06-15 15:32 ` Xiaoguang Wang
1 sibling, 0 replies; 11+ messages in thread
From: Pavel Begunkov @ 2020-06-15 15:09 UTC (permalink / raw)
To: Jens Axboe, Xiaoguang Wang, io-uring; +Cc: joseph.qi
On 15/06/2020 18:02, Jens Axboe wrote:
> On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
>> hi,
>>
>>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>>> completed are two independent store operations, to ensure that once
>>>> iopoll_completed is ture and then req->result must been perceived by
>>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>>
>>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>>> we'll need to issue this io request using io-wq again. In order to just
>>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>>> to io_iopoll_complete().
>>>
>>> Did you actually test this one?
>> I only run test cases in liburing/test in a vm.
>>
>>>
>>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>>> {
>>>> struct req_batch rb;
>>>> struct io_kiocb *req;
>>>> + LIST_HEAD(again);
>>>> +
>>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>>> + smp_rmb();
>>>>
>>>> rb.to_free = rb.need_iter = 0;
>>>> while (!list_empty(done)) {
>>>> int cflags = 0;
>>>>
>>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>>> + req->iopoll_completed = 0;
>>>> + list_move_tail(&req->list, &again);
>>>> + continue;
>>>> + }
>>>> req = list_first_entry(done, struct io_kiocb, list);
>>>> list_del(&req->list);
>>>>
>>>
>>> You're using 'req' here before you initialize it...
>> Sorry, next time when I submit patches, I'll construct test cases which
>> will cover my codes changes.
>
> I'm surprised the compiler didn't complain, or that the regular testing
> didn't barf on it.
>
> Don't think you need a new test case for this, the iopoll test case
> should cover it, if you limit the queue depth on the device by
> setting /sys/block/<dev>/queue/nr_requests to 2 or something like
> that.
Hmm, nice hint. I hooked a dirty ->iopoll in null_blk with fault
injection for that
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 15:02 ` Jens Axboe
2020-06-15 15:09 ` Pavel Begunkov
@ 2020-06-15 15:32 ` Xiaoguang Wang
2020-06-15 15:35 ` Jens Axboe
1 sibling, 1 reply; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 15:32 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: joseph.qi
hi,
> On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
>> hi,
>>
>>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>>> completed are two independent store operations, to ensure that once
>>>> iopoll_completed is ture and then req->result must been perceived by
>>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>>
>>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>>> we'll need to issue this io request using io-wq again. In order to just
>>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>>> to io_iopoll_complete().
>>>
>>> Did you actually test this one?
>> I only run test cases in liburing/test in a vm.
>>
>>>
>>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>>> {
>>>> struct req_batch rb;
>>>> struct io_kiocb *req;
>>>> + LIST_HEAD(again);
>>>> +
>>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>>> + smp_rmb();
>>>>
>>>> rb.to_free = rb.need_iter = 0;
>>>> while (!list_empty(done)) {
>>>> int cflags = 0;
>>>>
>>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>>> + req->iopoll_completed = 0;
>>>> + list_move_tail(&req->list, &again);
>>>> + continue;
>>>> + }
>>>> req = list_first_entry(done, struct io_kiocb, list);
>>>> list_del(&req->list);
>>>>
>>>
>>> You're using 'req' here before you initialize it...
>> Sorry, next time when I submit patches, I'll construct test cases which
>> will cover my codes changes.
>
> I'm surprised the compiler didn't complain, or that the regular testing
> didn't barf on it.
I'm also surprised, will try to find the reason.
And indeed the iopoll test case failed, but below command displayed nothing:
[lege@localhost test]$ sudo ./iopoll
Then I considered this test case pass wrongly.
dmesg show errors:
[ 127.806945] ==================================================================
[ 127.806983] BUG: KASAN: use-after-free in io_iopoll_complete+0xbb/0x980
[ 127.806989] Read of size 4 at addr ffff8886e3e98808 by task io_uring-sq/1643
[ 127.806999] CPU: 16 PID: 1643 Comm: io_uring-sq Not tainted 5.7.0+ #501
[ 127.807013] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014
[ 127.807021] Call Trace:
[ 127.807040] dump_stack+0x75/0xa0
[ 127.807047] ? io_iopoll_complete+0xbb/0x980
[ 127.807062] print_address_description.constprop.6+0x1a/0x220
[ 127.807086] ? _raw_write_lock_irqsave+0xd0/0xd0
[ 127.807092] ? io_free_req_many.part.79+0x208/0x2a0
[ 127.807107] ? __rcu_read_unlock+0x37/0x200
[ 127.807112] ? io_iopoll_complete+0xbb/0x980
[ 127.807117] ? io_iopoll_complete+0xbb/0x980
[ 127.807122] kasan_report.cold.9+0x1f/0x42
[ 127.807128] ? io_iopoll_complete+0xbb/0x980
[ 127.807133] io_iopoll_complete+0xbb/0x980
[ 127.807138] ? io_timeout_fn+0x140/0x140
[ 127.807150] ? __switch_to+0x2e9/0x5a0
[ 127.807157] io_iopoll_getevents+0x287/0x310
[ 127.807163] ? io_iopoll_complete+0x980/0x980
[ 127.807172] ? finish_wait+0xcb/0xf0
[ 127.807179] io_sq_thread+0x1c1/0x600
[ 127.807185] ? __ia32_sys_io_uring_enter+0x450/0x450
[ 127.807194] ? preempt_count_add+0x77/0xd0
[ 127.807200] ? _raw_spin_lock_irqsave+0x84/0xd0
[ 127.807206] ? _raw_write_lock_irqsave+0xd0/0xd0
[ 127.807210] ? finish_wait+0xf0/0xf0
[ 127.807215] ? preempt_count_sub+0x18/0xc0
[ 127.807224] ? __kthread_parkme+0xaf/0xd0
[ 127.807231] ? __ia32_sys_io_uring_enter+0x450/0x450
[ 127.807235] kthread+0x1e4/0x210
[ 127.807241] ? kthread_create_on_node+0xa0/0xa0
[ 127.807246] ret_from_fork+0x22/0x30
>
> Don't think you need a new test case for this, the iopoll test case
> should cover it, if you limit the queue depth on the device by
> setting /sys/block/<dev>/queue/nr_requests to 2 or something like
> that.
Thanks, I'll try later.
Regards,
Xiaoguang Wang
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 15:32 ` Xiaoguang Wang
@ 2020-06-15 15:35 ` Jens Axboe
2020-06-15 16:51 ` Jens Axboe
0 siblings, 1 reply; 11+ messages in thread
From: Jens Axboe @ 2020-06-15 15:35 UTC (permalink / raw)
To: Xiaoguang Wang, io-uring; +Cc: joseph.qi
On 6/15/20 9:32 AM, Xiaoguang Wang wrote:
> hi,
>
>> On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
>>> hi,
>>>
>>>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>>>> completed are two independent store operations, to ensure that once
>>>>> iopoll_completed is ture and then req->result must been perceived by
>>>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>>>
>>>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>>>> we'll need to issue this io request using io-wq again. In order to just
>>>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>>>> to io_iopoll_complete().
>>>>
>>>> Did you actually test this one?
>>> I only run test cases in liburing/test in a vm.
>>>
>>>>
>>>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>>>> {
>>>>> struct req_batch rb;
>>>>> struct io_kiocb *req;
>>>>> + LIST_HEAD(again);
>>>>> +
>>>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>>>> + smp_rmb();
>>>>>
>>>>> rb.to_free = rb.need_iter = 0;
>>>>> while (!list_empty(done)) {
>>>>> int cflags = 0;
>>>>>
>>>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>>>> + req->iopoll_completed = 0;
>>>>> + list_move_tail(&req->list, &again);
>>>>> + continue;
>>>>> + }
>>>>> req = list_first_entry(done, struct io_kiocb, list);
>>>>> list_del(&req->list);
>>>>>
>>>>
>>>> You're using 'req' here before you initialize it...
>>> Sorry, next time when I submit patches, I'll construct test cases which
>>> will cover my codes changes.
>>
>> I'm surprised the compiler didn't complain, or that the regular testing
>> didn't barf on it.
> I'm also surprised, will try to find the reason.
> And indeed the iopoll test case failed, but below command displayed nothing:
> [lege@localhost test]$ sudo ./iopoll
> Then I considered this test case pass wrongly.
>
> dmesg show errors:
> [ 127.806945] ==================================================================
> [ 127.806983] BUG: KASAN: use-after-free in io_iopoll_complete+0xbb/0x980
> [ 127.806989] Read of size 4 at addr ffff8886e3e98808 by task io_uring-sq/1643
>
> [ 127.806999] CPU: 16 PID: 1643 Comm: io_uring-sq Not tainted 5.7.0+ #501
> [ 127.807013] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014
> [ 127.807021] Call Trace:
> [ 127.807040] dump_stack+0x75/0xa0
> [ 127.807047] ? io_iopoll_complete+0xbb/0x980
> [ 127.807062] print_address_description.constprop.6+0x1a/0x220
> [ 127.807086] ? _raw_write_lock_irqsave+0xd0/0xd0
> [ 127.807092] ? io_free_req_many.part.79+0x208/0x2a0
> [ 127.807107] ? __rcu_read_unlock+0x37/0x200
> [ 127.807112] ? io_iopoll_complete+0xbb/0x980
> [ 127.807117] ? io_iopoll_complete+0xbb/0x980
> [ 127.807122] kasan_report.cold.9+0x1f/0x42
> [ 127.807128] ? io_iopoll_complete+0xbb/0x980
> [ 127.807133] io_iopoll_complete+0xbb/0x980
> [ 127.807138] ? io_timeout_fn+0x140/0x140
> [ 127.807150] ? __switch_to+0x2e9/0x5a0
> [ 127.807157] io_iopoll_getevents+0x287/0x310
> [ 127.807163] ? io_iopoll_complete+0x980/0x980
> [ 127.807172] ? finish_wait+0xcb/0xf0
> [ 127.807179] io_sq_thread+0x1c1/0x600
> [ 127.807185] ? __ia32_sys_io_uring_enter+0x450/0x450
> [ 127.807194] ? preempt_count_add+0x77/0xd0
> [ 127.807200] ? _raw_spin_lock_irqsave+0x84/0xd0
> [ 127.807206] ? _raw_write_lock_irqsave+0xd0/0xd0
> [ 127.807210] ? finish_wait+0xf0/0xf0
> [ 127.807215] ? preempt_count_sub+0x18/0xc0
> [ 127.807224] ? __kthread_parkme+0xaf/0xd0
> [ 127.807231] ? __ia32_sys_io_uring_enter+0x450/0x450
> [ 127.807235] kthread+0x1e4/0x210
> [ 127.807241] ? kthread_create_on_node+0xa0/0xa0
> [ 127.807246] ret_from_fork+0x22/0x30
There you go, so it did fail, just didn't register as a failure. I should
probably add a dmesg check for the liburing tests, and fail a test if
we trigger a WARNING or BUG condition. I'll look into that.
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 15:35 ` Jens Axboe
@ 2020-06-15 16:51 ` Jens Axboe
2020-06-15 17:53 ` Xiaoguang Wang
0 siblings, 1 reply; 11+ messages in thread
From: Jens Axboe @ 2020-06-15 16:51 UTC (permalink / raw)
To: Xiaoguang Wang, io-uring; +Cc: joseph.qi
On 6/15/20 9:35 AM, Jens Axboe wrote:
> On 6/15/20 9:32 AM, Xiaoguang Wang wrote:
>> hi,
>>
>>> On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
>>>> hi,
>>>>
>>>>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>>>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>>>>> completed are two independent store operations, to ensure that once
>>>>>> iopoll_completed is ture and then req->result must been perceived by
>>>>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>>>>
>>>>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>>>>> we'll need to issue this io request using io-wq again. In order to just
>>>>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>>>>> to io_iopoll_complete().
>>>>>
>>>>> Did you actually test this one?
>>>> I only run test cases in liburing/test in a vm.
>>>>
>>>>>
>>>>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>>>>> {
>>>>>> struct req_batch rb;
>>>>>> struct io_kiocb *req;
>>>>>> + LIST_HEAD(again);
>>>>>> +
>>>>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>>>>> + smp_rmb();
>>>>>>
>>>>>> rb.to_free = rb.need_iter = 0;
>>>>>> while (!list_empty(done)) {
>>>>>> int cflags = 0;
>>>>>>
>>>>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>>>>> + req->iopoll_completed = 0;
>>>>>> + list_move_tail(&req->list, &again);
>>>>>> + continue;
>>>>>> + }
>>>>>> req = list_first_entry(done, struct io_kiocb, list);
>>>>>> list_del(&req->list);
>>>>>>
>>>>>
>>>>> You're using 'req' here before you initialize it...
>>>> Sorry, next time when I submit patches, I'll construct test cases which
>>>> will cover my codes changes.
>>>
>>> I'm surprised the compiler didn't complain, or that the regular testing
>>> didn't barf on it.
>> I'm also surprised, will try to find the reason.
>> And indeed the iopoll test case failed, but below command displayed nothing:
>> [lege@localhost test]$ sudo ./iopoll
>> Then I considered this test case pass wrongly.
>>
>> dmesg show errors:
>> [ 127.806945] ==================================================================
>> [ 127.806983] BUG: KASAN: use-after-free in io_iopoll_complete+0xbb/0x980
>> [ 127.806989] Read of size 4 at addr ffff8886e3e98808 by task io_uring-sq/1643
>>
>> [ 127.806999] CPU: 16 PID: 1643 Comm: io_uring-sq Not tainted 5.7.0+ #501
>> [ 127.807013] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014
>> [ 127.807021] Call Trace:
>> [ 127.807040] dump_stack+0x75/0xa0
>> [ 127.807047] ? io_iopoll_complete+0xbb/0x980
>> [ 127.807062] print_address_description.constprop.6+0x1a/0x220
>> [ 127.807086] ? _raw_write_lock_irqsave+0xd0/0xd0
>> [ 127.807092] ? io_free_req_many.part.79+0x208/0x2a0
>> [ 127.807107] ? __rcu_read_unlock+0x37/0x200
>> [ 127.807112] ? io_iopoll_complete+0xbb/0x980
>> [ 127.807117] ? io_iopoll_complete+0xbb/0x980
>> [ 127.807122] kasan_report.cold.9+0x1f/0x42
>> [ 127.807128] ? io_iopoll_complete+0xbb/0x980
>> [ 127.807133] io_iopoll_complete+0xbb/0x980
>> [ 127.807138] ? io_timeout_fn+0x140/0x140
>> [ 127.807150] ? __switch_to+0x2e9/0x5a0
>> [ 127.807157] io_iopoll_getevents+0x287/0x310
>> [ 127.807163] ? io_iopoll_complete+0x980/0x980
>> [ 127.807172] ? finish_wait+0xcb/0xf0
>> [ 127.807179] io_sq_thread+0x1c1/0x600
>> [ 127.807185] ? __ia32_sys_io_uring_enter+0x450/0x450
>> [ 127.807194] ? preempt_count_add+0x77/0xd0
>> [ 127.807200] ? _raw_spin_lock_irqsave+0x84/0xd0
>> [ 127.807206] ? _raw_write_lock_irqsave+0xd0/0xd0
>> [ 127.807210] ? finish_wait+0xf0/0xf0
>> [ 127.807215] ? preempt_count_sub+0x18/0xc0
>> [ 127.807224] ? __kthread_parkme+0xaf/0xd0
>> [ 127.807231] ? __ia32_sys_io_uring_enter+0x450/0x450
>> [ 127.807235] kthread+0x1e4/0x210
>> [ 127.807241] ? kthread_create_on_node+0xa0/0xa0
>> [ 127.807246] ret_from_fork+0x22/0x30
>
> There you go, so it did fail, just didn't register as a failure. I should
> probably add a dmesg check for the liburing tests, and fail a test if
> we trigger a WARNING or BUG condition. I'll look into that.
I pushed a commit to liburing so that it should now catch dmesg
errors logged while running a test.
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed
2020-06-15 16:51 ` Jens Axboe
@ 2020-06-15 17:53 ` Xiaoguang Wang
0 siblings, 0 replies; 11+ messages in thread
From: Xiaoguang Wang @ 2020-06-15 17:53 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: joseph.qi
hi,
> On 6/15/20 9:35 AM, Jens Axboe wrote:
>> On 6/15/20 9:32 AM, Xiaoguang Wang wrote:
>>> hi,
>>>
>>>> On 6/15/20 8:48 AM, Xiaoguang Wang wrote:
>>>>> hi,
>>>>>
>>>>>> On 6/15/20 3:24 AM, Xiaoguang Wang wrote:
>>>>>>> In io_complete_rw_iopoll(), stores to io_kiocb's result and iopoll
>>>>>>> completed are two independent store operations, to ensure that once
>>>>>>> iopoll_completed is ture and then req->result must been perceived by
>>>>>>> the cpu executing io_do_iopoll(), proper memory barrier should be used.
>>>>>>>
>>>>>>> And in io_do_iopoll(), we check whether req->result is EAGAIN, if it is,
>>>>>>> we'll need to issue this io request using io-wq again. In order to just
>>>>>>> issue a single smp_rmb() on the completion side, move the re-submit work
>>>>>>> to io_iopoll_complete().
>>>>>>
>>>>>> Did you actually test this one?
>>>>> I only run test cases in liburing/test in a vm.
>>>>>
>>>>>>
>>>>>>> @@ -1736,11 +1748,20 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
>>>>>>> {
>>>>>>> struct req_batch rb;
>>>>>>> struct io_kiocb *req;
>>>>>>> + LIST_HEAD(again);
>>>>>>> +
>>>>>>> + /* order with ->result store in io_complete_rw_iopoll() */
>>>>>>> + smp_rmb();
>>>>>>>
>>>>>>> rb.to_free = rb.need_iter = 0;
>>>>>>> while (!list_empty(done)) {
>>>>>>> int cflags = 0;
>>>>>>>
>>>>>>> + if (READ_ONCE(req->result) == -EAGAIN) {
>>>>>>> + req->iopoll_completed = 0;
>>>>>>> + list_move_tail(&req->list, &again);
>>>>>>> + continue;
>>>>>>> + }
>>>>>>> req = list_first_entry(done, struct io_kiocb, list);
>>>>>>> list_del(&req->list);
>>>>>>>
>>>>>>
>>>>>> You're using 'req' here before you initialize it...
>>>>> Sorry, next time when I submit patches, I'll construct test cases which
>>>>> will cover my codes changes.
>>>>
>>>> I'm surprised the compiler didn't complain, or that the regular testing
>>>> didn't barf on it.
>>> I'm also surprised, will try to find the reason.
>>> And indeed the iopoll test case failed, but below command displayed nothing:
>>> [lege@localhost test]$ sudo ./iopoll
>>> Then I considered this test case pass wrongly.
>>>
>>> dmesg show errors:
>>> [ 127.806945] ==================================================================
>>> [ 127.806983] BUG: KASAN: use-after-free in io_iopoll_complete+0xbb/0x980
>>> [ 127.806989] Read of size 4 at addr ffff8886e3e98808 by task io_uring-sq/1643
>>>
>>> [ 127.806999] CPU: 16 PID: 1643 Comm: io_uring-sq Not tainted 5.7.0+ #501
>>> [ 127.807013] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.1-0-g0551a4be2c-prebuilt.qemu-project.org 04/01/2014
>>> [ 127.807021] Call Trace:
>>> [ 127.807040] dump_stack+0x75/0xa0
>>> [ 127.807047] ? io_iopoll_complete+0xbb/0x980
>>> [ 127.807062] print_address_description.constprop.6+0x1a/0x220
>>> [ 127.807086] ? _raw_write_lock_irqsave+0xd0/0xd0
>>> [ 127.807092] ? io_free_req_many.part.79+0x208/0x2a0
>>> [ 127.807107] ? __rcu_read_unlock+0x37/0x200
>>> [ 127.807112] ? io_iopoll_complete+0xbb/0x980
>>> [ 127.807117] ? io_iopoll_complete+0xbb/0x980
>>> [ 127.807122] kasan_report.cold.9+0x1f/0x42
>>> [ 127.807128] ? io_iopoll_complete+0xbb/0x980
>>> [ 127.807133] io_iopoll_complete+0xbb/0x980
>>> [ 127.807138] ? io_timeout_fn+0x140/0x140
>>> [ 127.807150] ? __switch_to+0x2e9/0x5a0
>>> [ 127.807157] io_iopoll_getevents+0x287/0x310
>>> [ 127.807163] ? io_iopoll_complete+0x980/0x980
>>> [ 127.807172] ? finish_wait+0xcb/0xf0
>>> [ 127.807179] io_sq_thread+0x1c1/0x600
>>> [ 127.807185] ? __ia32_sys_io_uring_enter+0x450/0x450
>>> [ 127.807194] ? preempt_count_add+0x77/0xd0
>>> [ 127.807200] ? _raw_spin_lock_irqsave+0x84/0xd0
>>> [ 127.807206] ? _raw_write_lock_irqsave+0xd0/0xd0
>>> [ 127.807210] ? finish_wait+0xf0/0xf0
>>> [ 127.807215] ? preempt_count_sub+0x18/0xc0
>>> [ 127.807224] ? __kthread_parkme+0xaf/0xd0
>>> [ 127.807231] ? __ia32_sys_io_uring_enter+0x450/0x450
>>> [ 127.807235] kthread+0x1e4/0x210
>>> [ 127.807241] ? kthread_create_on_node+0xa0/0xa0
>>> [ 127.807246] ret_from_fork+0x22/0x30
>>
>> There you go, so it did fail, just didn't register as a failure. I should
>> probably add a dmesg check for the liburing tests, and fail a test if
>> we trigger a WARNING or BUG condition. I'll look into that.
>
> I pushed a commit to liburing so that it should now catch dmesg
> errors logged while running a test.
Cool, thanks.
Will run new tests, and if there are not failures, I'll send a V2 soon.
Regards,
Xiaoguang Wang
>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2020-06-15 17:53 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-06-15 9:24 [PATCH 0/2] add proper memory barrier for IOPOLL mode Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 1/2] io_uring: don't fail links for EAGAIN error in " Xiaoguang Wang
2020-06-15 9:24 ` [PATCH 2/2] io_uring: add memory barrier to synchronize io_kiocb's result and iopoll_completed Xiaoguang Wang
2020-06-15 14:36 ` Jens Axboe
2020-06-15 14:48 ` Xiaoguang Wang
2020-06-15 15:02 ` Jens Axboe
2020-06-15 15:09 ` Pavel Begunkov
2020-06-15 15:32 ` Xiaoguang Wang
2020-06-15 15:35 ` Jens Axboe
2020-06-15 16:51 ` Jens Axboe
2020-06-15 17:53 ` Xiaoguang Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox