* [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
@ 2021-07-01 12:26 Pavel Begunkov
2021-07-01 13:45 ` Jens Axboe
2021-07-01 19:41 ` Jens Axboe
0 siblings, 2 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-07-01 12:26 UTC (permalink / raw)
To: Jens Axboe, io-uring
If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
a ->task_state bit and try task_work_add(), which may fail by that
moment. If that happens the function would try to cancel the request.
However, in a meanwhile there might come other io_req_task_work_add()
callers, which will see the bit set and leave their requests in the
list, which will never be executed.
Don't propagate an error, but clear the bit first and then fallback
all requests that we can splice from the list. The callback functions
have to be able to deal with PF_EXITING, so poll and apoll was modified
via changing io_poll_rewait().
Reported-by: Jens Axboe <[email protected]>
Signed-off-by: Pavel Begunkov <[email protected]>
---
Jens, can you try if it helps with the leak you meantioned? I can't
see it. As with previous, would need to remove the PF_EXITING check,
and should be in theory safe to do.
fs/io_uring.c | 70 ++++++++++++++++++---------------------------------
1 file changed, 24 insertions(+), 46 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5b840bb1e8ec..881856088990 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1952,17 +1952,13 @@ static void tctx_task_work(struct callback_head *cb)
ctx_flush_and_put(ctx);
}
-static int io_req_task_work_add(struct io_kiocb *req)
+static void io_req_task_work_add(struct io_kiocb *req)
{
struct task_struct *tsk = req->task;
struct io_uring_task *tctx = tsk->io_uring;
enum task_work_notify_mode notify;
- struct io_wq_work_node *node, *prev;
+ struct io_wq_work_node *node;
unsigned long flags;
- int ret = 0;
-
- if (unlikely(tsk->flags & PF_EXITING))
- return -ESRCH;
WARN_ON_ONCE(!tctx);
@@ -1973,7 +1969,9 @@ static int io_req_task_work_add(struct io_kiocb *req)
/* task_work already pending, we're done */
if (test_bit(0, &tctx->task_state) ||
test_and_set_bit(0, &tctx->task_state))
- return 0;
+ return;
+ if (unlikely(tsk->flags & PF_EXITING))
+ goto fail;
/*
* SQPOLL kernel thread doesn't need notification, just a wakeup. For
@@ -1982,36 +1980,24 @@ static int io_req_task_work_add(struct io_kiocb *req)
* will do the job.
*/
notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL;
-
if (!task_work_add(tsk, &tctx->task_work, notify)) {
wake_up_process(tsk);
- return 0;
+ return;
}
-
- /*
- * Slow path - we failed, find and delete work. if the work is not
- * in the list, it got run and we're fine.
- */
+fail:
+ clear_bit(0, &tctx->task_state);
spin_lock_irqsave(&tctx->task_lock, flags);
- wq_list_for_each(node, prev, &tctx->task_list) {
- if (&req->io_task_work.node == node) {
- wq_list_del(&tctx->task_list, node, prev);
- ret = 1;
- break;
- }
- }
+ node = tctx->task_list.first;
+ INIT_WQ_LIST(&tctx->task_list);
spin_unlock_irqrestore(&tctx->task_lock, flags);
- clear_bit(0, &tctx->task_state);
- return ret;
-}
-static void io_req_task_work_add_fallback(struct io_kiocb *req,
- io_req_tw_func_t cb)
-{
- req->io_task_work.func = cb;
- if (llist_add(&req->io_task_work.fallback_node,
- &req->ctx->fallback_llist))
- schedule_delayed_work(&req->ctx->fallback_work, 1);
+ while (node) {
+ req = container_of(node, struct io_kiocb, io_task_work.node);
+ node = node->next;
+ if (llist_add(&req->io_task_work.fallback_node,
+ &req->ctx->fallback_llist))
+ schedule_delayed_work(&req->ctx->fallback_work, 1);
+ }
}
static void io_req_task_cancel(struct io_kiocb *req)
@@ -2041,17 +2027,13 @@ static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
{
req->result = ret;
req->io_task_work.func = io_req_task_cancel;
-
- if (unlikely(io_req_task_work_add(req)))
- io_req_task_work_add_fallback(req, io_req_task_cancel);
+ io_req_task_work_add(req);
}
static void io_req_task_queue(struct io_kiocb *req)
{
req->io_task_work.func = io_req_task_submit;
-
- if (unlikely(io_req_task_work_add(req)))
- io_req_task_queue_fail(req, -ECANCELED);
+ io_req_task_work_add(req);
}
static inline void io_queue_next(struct io_kiocb *req)
@@ -2165,8 +2147,7 @@ static inline void io_put_req(struct io_kiocb *req)
static void io_free_req_deferred(struct io_kiocb *req)
{
req->io_task_work.func = io_free_req;
- if (unlikely(io_req_task_work_add(req)))
- io_req_task_work_add_fallback(req, io_free_req);
+ io_req_task_work_add(req);
}
static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
@@ -4823,8 +4804,6 @@ struct io_poll_table {
static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
__poll_t mask, io_req_tw_func_t func)
{
- int ret;
-
/* for instances that support it check for an event match first: */
if (mask && !(mask & poll->events))
return 0;
@@ -4842,11 +4821,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
* of executing it. We can't safely execute it anyway, as we may not
* have the needed state needed for it anyway.
*/
- ret = io_req_task_work_add(req);
- if (unlikely(ret)) {
- WRITE_ONCE(poll->canceled, true);
- io_req_task_work_add_fallback(req, func);
- }
+ io_req_task_work_add(req);
return 1;
}
@@ -4855,6 +4830,9 @@ static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
{
struct io_ring_ctx *ctx = req->ctx;
+ if (unlikely(req->task->flags & PF_EXITING))
+ WRITE_ONCE(poll->canceled, true);
+
if (!req->result && !READ_ONCE(poll->canceled)) {
struct poll_table_struct pt = { ._key = poll->events };
--
2.32.0
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
2021-07-01 12:26 [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks Pavel Begunkov
@ 2021-07-01 13:45 ` Jens Axboe
2021-07-01 14:19 ` Pavel Begunkov
2021-07-01 14:34 ` Pavel Begunkov
2021-07-01 19:41 ` Jens Axboe
1 sibling, 2 replies; 6+ messages in thread
From: Jens Axboe @ 2021-07-01 13:45 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/1/21 6:26 AM, Pavel Begunkov wrote:
> If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
> a ->task_state bit and try task_work_add(), which may fail by that
> moment. If that happens the function would try to cancel the request.
>
> However, in a meanwhile there might come other io_req_task_work_add()
> callers, which will see the bit set and leave their requests in the
> list, which will never be executed.
>
> Don't propagate an error, but clear the bit first and then fallback
> all requests that we can splice from the list. The callback functions
> have to be able to deal with PF_EXITING, so poll and apoll was modified
> via changing io_poll_rewait().
>
> Reported-by: Jens Axboe <[email protected]>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>
> Jens, can you try if it helps with the leak you meantioned? I can't
> see it. As with previous, would need to remove the PF_EXITING check,
> and should be in theory safe to do.
Probably misunderstanding you here, but you already killed the one that
patch 3 remove. In any case, I tested this on top of 1+2, and I don't
see any leaks at that point.
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
2021-07-01 13:45 ` Jens Axboe
@ 2021-07-01 14:19 ` Pavel Begunkov
2021-07-01 14:56 ` Jens Axboe
2021-07-01 14:34 ` Pavel Begunkov
1 sibling, 1 reply; 6+ messages in thread
From: Pavel Begunkov @ 2021-07-01 14:19 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 7/1/21 2:45 PM, Jens Axboe wrote:
> On 7/1/21 6:26 AM, Pavel Begunkov wrote:
>> If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
>> a ->task_state bit and try task_work_add(), which may fail by that
>> moment. If that happens the function would try to cancel the request.
>>
>> However, in a meanwhile there might come other io_req_task_work_add()
>> callers, which will see the bit set and leave their requests in the
>> list, which will never be executed.
>>
>> Don't propagate an error, but clear the bit first and then fallback
>> all requests that we can splice from the list. The callback functions
>> have to be able to deal with PF_EXITING, so poll and apoll was modified
>> via changing io_poll_rewait().
>>
>> Reported-by: Jens Axboe <[email protected]>
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>>
>> Jens, can you try if it helps with the leak you meantioned? I can't
>> see it. As with previous, would need to remove the PF_EXITING check,
>> and should be in theory safe to do.
>
> Probably misunderstanding you here, but you already killed the one that
> patch 3 remove. In any case, I tested this on top of 1+2, and I don't
> see any leaks at that point.
I believe removal of the PF_EXITING check yesterday didn't create
a new bug, but made the one addressed here much more likely to
happen. And so it fixes it, regardless of PF_EXITING.
For the PF_EXITING removal, let's postpone it for-next.
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
2021-07-01 13:45 ` Jens Axboe
2021-07-01 14:19 ` Pavel Begunkov
@ 2021-07-01 14:34 ` Pavel Begunkov
1 sibling, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-07-01 14:34 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 7/1/21 2:45 PM, Jens Axboe wrote:
> On 7/1/21 6:26 AM, Pavel Begunkov wrote:
>> If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
>> a ->task_state bit and try task_work_add(), which may fail by that
>> moment. If that happens the function would try to cancel the request.
>>
>> However, in a meanwhile there might come other io_req_task_work_add()
>> callers, which will see the bit set and leave their requests in the
>> list, which will never be executed.
>>
>> Don't propagate an error, but clear the bit first and then fallback
>> all requests that we can splice from the list. The callback functions
>> have to be able to deal with PF_EXITING, so poll and apoll was modified
>> via changing io_poll_rewait().
>>
>> Reported-by: Jens Axboe <[email protected]>
>> Signed-off-by: Pavel Begunkov <[email protected]>
>> ---
>>
>> Jens, can you try if it helps with the leak you meantioned? I can't
>> see it. As with previous, would need to remove the PF_EXITING check,
>> and should be in theory safe to do.
>
> Probably misunderstanding you here, but you already killed the one that
> patch 3 remove. In any case, I tested this on top of 1+2, and I don't
fwiw, it doesn't remove that check, just moved it a bit.
> see any leaks at that point.
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
2021-07-01 14:19 ` Pavel Begunkov
@ 2021-07-01 14:56 ` Jens Axboe
0 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2021-07-01 14:56 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/1/21 8:19 AM, Pavel Begunkov wrote:
> On 7/1/21 2:45 PM, Jens Axboe wrote:
>> On 7/1/21 6:26 AM, Pavel Begunkov wrote:
>>> If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
>>> a ->task_state bit and try task_work_add(), which may fail by that
>>> moment. If that happens the function would try to cancel the request.
>>>
>>> However, in a meanwhile there might come other io_req_task_work_add()
>>> callers, which will see the bit set and leave their requests in the
>>> list, which will never be executed.
>>>
>>> Don't propagate an error, but clear the bit first and then fallback
>>> all requests that we can splice from the list. The callback functions
>>> have to be able to deal with PF_EXITING, so poll and apoll was modified
>>> via changing io_poll_rewait().
>>>
>>> Reported-by: Jens Axboe <[email protected]>
>>> Signed-off-by: Pavel Begunkov <[email protected]>
>>> ---
>>>
>>> Jens, can you try if it helps with the leak you meantioned? I can't
>>> see it. As with previous, would need to remove the PF_EXITING check,
>>> and should be in theory safe to do.
>>
>> Probably misunderstanding you here, but you already killed the one that
>> patch 3 remove. In any case, I tested this on top of 1+2, and I don't
>> see any leaks at that point.
>
> I believe removal of the PF_EXITING check yesterday didn't create
> a new bug, but made the one addressed here much more likely to
> happen. And so it fixes it, regardless of PF_EXITING.
That's what it looks like, yes.
> For the PF_EXITING removal, let's postpone it for-next.
Agree, no rush on that one.
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks
2021-07-01 12:26 [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks Pavel Begunkov
2021-07-01 13:45 ` Jens Axboe
@ 2021-07-01 19:41 ` Jens Axboe
1 sibling, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2021-07-01 19:41 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/1/21 6:26 AM, Pavel Begunkov wrote:
> If one entered io_req_task_work_add() not seeing PF_EXITING, it will set
> a ->task_state bit and try task_work_add(), which may fail by that
> moment. If that happens the function would try to cancel the request.
>
> However, in a meanwhile there might come other io_req_task_work_add()
> callers, which will see the bit set and leave their requests in the
> list, which will never be executed.
>
> Don't propagate an error, but clear the bit first and then fallback
> all requests that we can splice from the list. The callback functions
> have to be able to deal with PF_EXITING, so poll and apoll was modified
> via changing io_poll_rewait().
I applied this one, adding a fixes tag too. Thanks!
--
Jens Axboe
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-07-01 19:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-07-01 12:26 [PATCH 1/1] io_uring: fix exiting io_req_task_work_add leaks Pavel Begunkov
2021-07-01 13:45 ` Jens Axboe
2021-07-01 14:19 ` Pavel Begunkov
2021-07-01 14:56 ` Jens Axboe
2021-07-01 14:34 ` Pavel Begunkov
2021-07-01 19:41 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox