* [PATCH 1/9] io_uring: fix leaking reg files on exit
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
` (8 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring; +Cc: stable
If io_sqe_files_unregister() faults on io_rsrc_ref_quiesce(), it will
fail to do unregister leaving files referenced. And that may well happen
because of a strayed signal or just because it does allocations inside.
In io_ring_ctx_free() do an unsafe version of unregister, as it's
guaranteed to not have requests by that point and so quiesce is useless.
Cc: [email protected]
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 257eddd4cd82..44342ff5c4e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7094,6 +7094,10 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
fput(file);
}
#endif
+ io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+ kfree(ctx->file_data);
+ ctx->file_data = NULL;
+ ctx->nr_user_files = 0;
}
static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
@@ -7200,21 +7204,14 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
- struct io_rsrc_data *data = ctx->file_data;
int ret;
- if (!data)
+ if (!ctx->file_data)
return -ENXIO;
- ret = io_rsrc_ref_quiesce(data, ctx);
- if (ret)
- return ret;
-
- __io_sqe_files_unregister(ctx);
- io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
- kfree(data);
- ctx->file_data = NULL;
- ctx->nr_user_files = 0;
- return 0;
+ ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ if (!ret)
+ __io_sqe_files_unregister(ctx);
+ return ret;
}
static void io_sq_thread_unpark(struct io_sq_data *sqd)
@@ -7664,7 +7661,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
ret = io_sqe_files_scm(ctx);
if (ret) {
- io_sqe_files_unregister(ctx);
+ __io_sqe_files_unregister(ctx);
return ret;
}
@@ -8465,7 +8462,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
}
mutex_lock(&ctx->uring_lock);
- io_sqe_files_unregister(ctx);
+ if (ctx->file_data) {
+ if (!atomic_dec_and_test(&ctx->file_data->refs))
+ wait_for_completion(&ctx->file_data->done);
+ __io_sqe_files_unregister(ctx);
+ }
if (ctx->rings)
__io_cqring_overflow_flush(ctx, true);
mutex_unlock(&ctx->uring_lock);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 2/9] io_uring: fix uninit old data for poll event upd
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
2021-04-13 1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
` (7 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
Both IORING_POLL_UPDATE_EVENTS and IORING_POLL_UPDATE_USER_DATA need
old_user_data to find/cancel a poll request, but it's set only for the
first one.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 44342ff5c4e1..429ee5fd9044 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5384,17 +5384,17 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
if (!(flags & IORING_POLL_ADD_MULTI))
events |= EPOLLONESHOT;
poll->update_events = poll->update_user_data = false;
- if (flags & IORING_POLL_UPDATE_EVENTS) {
- poll->update_events = true;
+
+ if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
poll->old_user_data = READ_ONCE(sqe->addr);
+ poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+ poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+ if (poll->update_user_data)
+ poll->new_user_data = READ_ONCE(sqe->off);
+ } else {
+ if (sqe->off || sqe->addr)
+ return -EINVAL;
}
- if (flags & IORING_POLL_UPDATE_USER_DATA) {
- poll->update_user_data = true;
- poll->new_user_data = READ_ONCE(sqe->off);
- }
- if (!(poll->update_events || poll->update_user_data) &&
- (sqe->off || sqe->addr))
- return -EINVAL;
poll->events = demangle_poll(events) |
(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
return 0;
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 3/9] io_uring: split poll and poll update structures
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
2021-04-13 1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
2021-04-13 1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 17:14 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
` (6 subsequent siblings)
9 siblings, 1 reply; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
struct io_poll_iocb became pretty nasty combining also update fields.
Split them, so we would have more clarity to it.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
1 file changed, 32 insertions(+), 23 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 429ee5fd9044..a0f207e62e32 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -490,15 +490,16 @@ struct io_poll_iocb {
__poll_t events;
bool done;
bool canceled;
+ struct wait_queue_entry wait;
+};
+
+struct io_poll_update {
+ struct file *file;
+ u64 old_user_data;
+ u64 new_user_data;
+ __poll_t events;
bool update_events;
bool update_user_data;
- union {
- struct wait_queue_entry wait;
- struct {
- u64 old_user_data;
- u64 new_user_data;
- };
- };
};
struct io_poll_remove {
@@ -715,6 +716,7 @@ enum {
REQ_F_COMPLETE_INLINE_BIT,
REQ_F_REISSUE_BIT,
REQ_F_DONT_REISSUE_BIT,
+ REQ_F_POLL_UPDATE_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_ASYNC_READ_BIT,
REQ_F_ASYNC_WRITE_BIT,
@@ -762,6 +764,8 @@ enum {
REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
/* don't attempt request reissue, see io_rw_reissue() */
REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT),
+ /* switches between poll and poll update */
+ REQ_F_POLL_UPDATE = BIT(REQ_F_POLL_UPDATE_BIT),
/* supports async reads */
REQ_F_ASYNC_READ = BIT(REQ_F_ASYNC_READ_BIT),
/* supports async writes */
@@ -791,6 +795,7 @@ struct io_kiocb {
struct file *file;
struct io_rw rw;
struct io_poll_iocb poll;
+ struct io_poll_update poll_update;
struct io_poll_remove poll_remove;
struct io_accept accept;
struct io_sync sync;
@@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
poll->head = NULL;
poll->done = false;
poll->canceled = false;
- poll->update_events = poll->update_user_data = false;
#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
/* mask in events that we always want/need */
poll->events = events | IO_POLL_UNMASK;
@@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_poll_iocb *poll = &req->poll;
u32 events, flags;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
#endif
if (!(flags & IORING_POLL_ADD_MULTI))
events |= EPOLLONESHOT;
- poll->update_events = poll->update_user_data = false;
+ events = demangle_poll(events) |
+ (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
- poll->old_user_data = READ_ONCE(sqe->addr);
- poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
- poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
- if (poll->update_user_data)
- poll->new_user_data = READ_ONCE(sqe->off);
+ struct io_poll_update *poll_upd = &req->poll_update;
+
+ req->flags |= REQ_F_POLL_UPDATE;
+ poll_upd->events = events;
+ poll_upd->old_user_data = READ_ONCE(sqe->addr);
+ poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+ poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+ if (poll_upd->update_user_data)
+ poll_upd->new_user_data = READ_ONCE(sqe->off);
} else {
+ struct io_poll_iocb *poll = &req->poll;
+
+ poll->events = events;
if (sqe->off || sqe->addr)
return -EINVAL;
}
- poll->events = demangle_poll(events) |
- (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
return 0;
}
@@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
int ret;
spin_lock_irq(&ctx->completion_lock);
- preq = io_poll_find(ctx, req->poll.old_user_data);
+ preq = io_poll_find(ctx, req->poll_update.old_user_data);
if (!preq) {
ret = -ENOENT;
goto err;
@@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
return 0;
}
/* only mask one event flags, keep behavior flags */
- if (req->poll.update_events) {
+ if (req->poll_update.update_events) {
preq->poll.events &= ~0xffff;
- preq->poll.events |= req->poll.events & 0xffff;
+ preq->poll.events |= req->poll_update.events & 0xffff;
preq->poll.events |= IO_POLL_UNMASK;
}
- if (req->poll.update_user_data)
- preq->user_data = req->poll.new_user_data;
+ if (req->poll_update.update_user_data)
+ preq->user_data = req->poll_update.new_user_data;
spin_unlock_irq(&ctx->completion_lock);
@@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
{
- if (!req->poll.update_events && !req->poll.update_user_data)
+ if (!(req->flags & REQ_F_POLL_UPDATE))
return __io_poll_add(req);
return io_poll_update(req);
}
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 3/9] io_uring: split poll and poll update structures
2021-04-13 1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13 17:14 ` Pavel Begunkov
0 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 17:14 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 13/04/2021 02:58, Pavel Begunkov wrote:
> struct io_poll_iocb became pretty nasty combining also update fields.
> Split them, so we would have more clarity to it.
I think we should move update into IORING_OP_POLL_REMOVE until it's
not too late. Would have better struct layouts and didn't get in
way of POLL_ADD, which should be more popular.
Another thing, looks IORING_OP_POLL_REMOVE may cancel apoll, that
doesn't sound great. IMHO we need to limit it to only poll requests,
rather confusing otherwise. note: it can't be used reliably from
the userspace, so we may probably not care about change in behaviour
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
> fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
> 1 file changed, 32 insertions(+), 23 deletions(-)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 429ee5fd9044..a0f207e62e32 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -490,15 +490,16 @@ struct io_poll_iocb {
> __poll_t events;
> bool done;
> bool canceled;
> + struct wait_queue_entry wait;
> +};
> +
> +struct io_poll_update {
> + struct file *file;
> + u64 old_user_data;
> + u64 new_user_data;
> + __poll_t events;
> bool update_events;
> bool update_user_data;
> - union {
> - struct wait_queue_entry wait;
> - struct {
> - u64 old_user_data;
> - u64 new_user_data;
> - };
> - };
> };
>
> struct io_poll_remove {
> @@ -715,6 +716,7 @@ enum {
> REQ_F_COMPLETE_INLINE_BIT,
> REQ_F_REISSUE_BIT,
> REQ_F_DONT_REISSUE_BIT,
> + REQ_F_POLL_UPDATE_BIT,
> /* keep async read/write and isreg together and in order */
> REQ_F_ASYNC_READ_BIT,
> REQ_F_ASYNC_WRITE_BIT,
> @@ -762,6 +764,8 @@ enum {
> REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT),
> /* don't attempt request reissue, see io_rw_reissue() */
> REQ_F_DONT_REISSUE = BIT(REQ_F_DONT_REISSUE_BIT),
> + /* switches between poll and poll update */
> + REQ_F_POLL_UPDATE = BIT(REQ_F_POLL_UPDATE_BIT),
> /* supports async reads */
> REQ_F_ASYNC_READ = BIT(REQ_F_ASYNC_READ_BIT),
> /* supports async writes */
> @@ -791,6 +795,7 @@ struct io_kiocb {
> struct file *file;
> struct io_rw rw;
> struct io_poll_iocb poll;
> + struct io_poll_update poll_update;
> struct io_poll_remove poll_remove;
> struct io_accept accept;
> struct io_sync sync;
> @@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
> poll->head = NULL;
> poll->done = false;
> poll->canceled = false;
> - poll->update_events = poll->update_user_data = false;
> #define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
> /* mask in events that we always want/need */
> poll->events = events | IO_POLL_UNMASK;
> @@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>
> static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> {
> - struct io_poll_iocb *poll = &req->poll;
> u32 events, flags;
>
> if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> @@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
> #endif
> if (!(flags & IORING_POLL_ADD_MULTI))
> events |= EPOLLONESHOT;
> - poll->update_events = poll->update_user_data = false;
> + events = demangle_poll(events) |
> + (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
>
> if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
> - poll->old_user_data = READ_ONCE(sqe->addr);
> - poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> - poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> - if (poll->update_user_data)
> - poll->new_user_data = READ_ONCE(sqe->off);
> + struct io_poll_update *poll_upd = &req->poll_update;
> +
> + req->flags |= REQ_F_POLL_UPDATE;
> + poll_upd->events = events;
> + poll_upd->old_user_data = READ_ONCE(sqe->addr);
> + poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> + poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> + if (poll_upd->update_user_data)
> + poll_upd->new_user_data = READ_ONCE(sqe->off);
> } else {
> + struct io_poll_iocb *poll = &req->poll;
> +
> + poll->events = events;
> if (sqe->off || sqe->addr)
> return -EINVAL;
> }
> - poll->events = demangle_poll(events) |
> - (events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
> return 0;
> }
>
> @@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
> int ret;
>
> spin_lock_irq(&ctx->completion_lock);
> - preq = io_poll_find(ctx, req->poll.old_user_data);
> + preq = io_poll_find(ctx, req->poll_update.old_user_data);
> if (!preq) {
> ret = -ENOENT;
> goto err;
> @@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
> return 0;
> }
> /* only mask one event flags, keep behavior flags */
> - if (req->poll.update_events) {
> + if (req->poll_update.update_events) {
> preq->poll.events &= ~0xffff;
> - preq->poll.events |= req->poll.events & 0xffff;
> + preq->poll.events |= req->poll_update.events & 0xffff;
> preq->poll.events |= IO_POLL_UNMASK;
> }
> - if (req->poll.update_user_data)
> - preq->user_data = req->poll.new_user_data;
> + if (req->poll_update.update_user_data)
> + preq->user_data = req->poll_update.new_user_data;
>
> spin_unlock_irq(&ctx->completion_lock);
>
> @@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
>
> static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
> {
> - if (!req->poll.update_events && !req->poll.update_user_data)
> + if (!(req->flags & REQ_F_POLL_UPDATE))
> return __io_poll_add(req);
> return io_poll_update(req);
> }
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 4/9] io_uring: add timeout completion_lock annotation
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (2 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
` (5 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
Add one more sparse locking annotation for readability in
io_kill_timeout().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index a0f207e62e32..eaa8f1af29cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1269,6 +1269,7 @@ static void io_queue_async_work(struct io_kiocb *req)
}
static void io_kill_timeout(struct io_kiocb *req, int status)
+ __must_hold(&req->ctx->completion_lock)
{
struct io_timeout_data *io = req->async_data;
int ret;
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (3 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
` (4 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
Don't save return values of hrtimer_try_to_cancel() in a variable, but
use right away. It's in general safer to not have an intermediate
variable, which may be reused and passed out wrongly, but it be
contracted out. Also clean io_timeout_extract().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index eaa8f1af29cc..4f4b4f4bff2d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1272,10 +1272,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
__must_hold(&req->ctx->completion_lock)
{
struct io_timeout_data *io = req->async_data;
- int ret;
- ret = hrtimer_try_to_cancel(&io->timer);
- if (ret != -1) {
+ if (hrtimer_try_to_cancel(&io->timer) != -1) {
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
@@ -1792,12 +1790,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
*/
if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
struct io_timeout_data *io = link->async_data;
- int ret;
io_remove_next_linked(req);
link->timeout.head = NULL;
- ret = hrtimer_try_to_cancel(&io->timer);
- if (ret != -1) {
+ if (hrtimer_try_to_cancel(&io->timer) != -1) {
io_cqring_fill_event(link, -ECANCELED, 0);
io_put_req_deferred(link, 1);
return true;
@@ -5533,21 +5529,18 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
{
struct io_timeout_data *io;
struct io_kiocb *req;
- int ret = -ENOENT;
+ bool found = false;
list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
- if (user_data == req->user_data) {
- ret = 0;
+ found = user_data == req->user_data;
+ if (found)
break;
- }
}
-
- if (ret == -ENOENT)
- return ERR_PTR(ret);
+ if (!found)
+ return ERR_PTR(-ENOENT);
io = req->async_data;
- ret = hrtimer_try_to_cancel(&io->timer);
- if (ret == -1)
+ if (hrtimer_try_to_cancel(&io->timer) == -1)
return ERR_PTR(-EALREADY);
list_del_init(&req->timeout.list);
return req;
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs()
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (4 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
` (3 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
Move some parts of io_poll_remove_waitqs() that are opcode independent.
Looks better and stresses that both do __io_poll_remove_one().
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 15 +++++----------
1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4f4b4f4bff2d..4407ebc8f8d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5224,21 +5224,16 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
bool do_complete;
io_poll_remove_double(req);
+ do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
- if (req->opcode == IORING_OP_POLL_ADD) {
- do_complete = __io_poll_remove_one(req, &req->poll, true);
- } else {
+ if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
struct async_poll *apoll = req->apoll;
/* non-poll requests have submit ref still */
- do_complete = __io_poll_remove_one(req, &apoll->poll, true);
- if (do_complete) {
- req_ref_put(req);
- kfree(apoll->double_poll);
- kfree(apoll);
- }
+ req_ref_put(req);
+ kfree(apoll->double_poll);
+ kfree(apoll);
}
-
return do_complete;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 7/9] io_uring: don't fail overflow on in_idle
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (5 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
` (2 subsequent siblings)
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
As CQE overflows are now untied from requests and so don't hold any
ref, we don't need to handle exiting/exec'ing cases there anymore.
Moreover, it's much nicer in regards to userspace to save overflowed
CQEs whenever possible, so remove failing on in_idle.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 44 ++++++++++++++++++++------------------------
1 file changed, 20 insertions(+), 24 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4407ebc8f8d3..cc6a44533802 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1515,32 +1515,28 @@ static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
unsigned int cflags)
{
struct io_ring_ctx *ctx = req->ctx;
+ struct io_overflow_cqe *ocqe;
- if (!atomic_read(&req->task->io_uring->in_idle)) {
- struct io_overflow_cqe *ocqe;
-
- ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
- if (!ocqe)
- goto overflow;
- if (list_empty(&ctx->cq_overflow_list)) {
- set_bit(0, &ctx->sq_check_overflow);
- set_bit(0, &ctx->cq_check_overflow);
- ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
- }
- ocqe->cqe.user_data = req->user_data;
- ocqe->cqe.res = res;
- ocqe->cqe.flags = cflags;
- list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
- return true;
+ ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+ if (!ocqe) {
+ /*
+ * If we're in ring overflow flush mode, or in task cancel mode,
+ * or cannot allocate an overflow entry, then we need to drop it
+ * on the floor.
+ */
+ WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
+ return false;
}
-overflow:
- /*
- * If we're in ring overflow flush mode, or in task cancel mode,
- * or cannot allocate an overflow entry, then we need to drop it
- * on the floor.
- */
- WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
- return false;
+ if (list_empty(&ctx->cq_overflow_list)) {
+ set_bit(0, &ctx->sq_check_overflow);
+ set_bit(0, &ctx->cq_check_overflow);
+ ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+ }
+ ocqe->cqe.user_data = req->user_data;
+ ocqe->cqe.res = res;
+ ocqe->cqe.flags = cflags;
+ list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
+ return true;
}
static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 8/9] io_uring: skip futile iopoll iterations
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (6 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
The only way to get out of io_iopoll_getevents() and continue iterating
is to have empty iopoll_list, otherwise the main loop would just exit.
So, instead of the unlock on 8th time heuristic, do that based on
iopoll_list.
Also, as no one can add new requests to iopoll_list while
io_iopoll_check() hold uring_lock, it's useless to spin with the list
empty, return in that case.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index cc6a44533802..1111968bbe7f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2387,7 +2387,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
{
unsigned int nr_events = 0;
- int iters = 0, ret = 0;
+ int ret = 0;
/*
* We disallow the app entering submit/complete with polling, but we
@@ -2416,10 +2416,13 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
* forever, while the workqueue is stuck trying to acquire the
* very same mutex.
*/
- if (!(++iters & 7)) {
+ if (list_empty(&ctx->iopoll_list)) {
mutex_unlock(&ctx->uring_lock);
io_run_task_work();
mutex_lock(&ctx->uring_lock);
+
+ if (list_empty(&ctx->iopoll_list))
+ break;
}
ret = io_iopoll_getevents(ctx, &nr_events, min);
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH 9/9] io_uring: inline io_iopoll_getevents()
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (7 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
@ 2021-04-13 1:58 ` Pavel Begunkov
2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 1:58 UTC (permalink / raw)
To: Jens Axboe, io-uring
io_iopoll_getevents() is of no use to us anymore, io_iopoll_check()
handles all the cases.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 52 +++++++++++++--------------------------------------
1 file changed, 13 insertions(+), 39 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1111968bbe7f..05c67ebeabd5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2331,27 +2331,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
return ret;
}
-/*
- * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
- * non-spinning poll check - we'll still enter the driver poll loop, but only
- * as a non-spinning completion check.
- */
-static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
- long min)
-{
- while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
- int ret;
-
- ret = io_do_iopoll(ctx, nr_events, min);
- if (ret < 0)
- return ret;
- if (*nr_events >= min)
- return 0;
- }
-
- return 1;
-}
-
/*
* We can't just wait for polled events to come to us, we have to actively
* find and complete them.
@@ -2395,17 +2374,16 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
* that got punted to a workqueue.
*/
mutex_lock(&ctx->uring_lock);
+ /*
+ * Don't enter poll loop if we already have events pending.
+ * If we do, we can potentially be spinning for commands that
+ * already triggered a CQE (eg in error).
+ */
+ if (test_bit(0, &ctx->cq_check_overflow))
+ __io_cqring_overflow_flush(ctx, false);
+ if (io_cqring_events(ctx))
+ goto out;
do {
- /*
- * Don't enter poll loop if we already have events pending.
- * If we do, we can potentially be spinning for commands that
- * already triggered a CQE (eg in error).
- */
- if (test_bit(0, &ctx->cq_check_overflow))
- __io_cqring_overflow_flush(ctx, false);
- if (io_cqring_events(ctx))
- break;
-
/*
* If a submit got punted to a workqueue, we can have the
* application entering polling for a command before it gets
@@ -2424,13 +2402,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
if (list_empty(&ctx->iopoll_list))
break;
}
-
- ret = io_iopoll_getevents(ctx, &nr_events, min);
- if (ret <= 0)
- break;
- ret = 0;
- } while (min && !nr_events && !need_resched());
-
+ ret = io_do_iopoll(ctx, &nr_events, min);
+ } while (!ret && nr_events < min && !need_resched());
+out:
mutex_unlock(&ctx->uring_lock);
return ret;
}
@@ -2543,7 +2517,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
/*
* After the iocb has been issued, it's safe to be found on the poll list.
* Adding the kiocb to the list AFTER submission ensures that we don't
- * find it from a io_iopoll_getevents() thread before the issuer is done
+ * find it from a io_do_iopoll() thread before the issuer is done
* accessing the kiocb cookie.
*/
static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
--
2.24.0
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 5.13 0/9] another 5.13 pack
2021-04-13 1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
` (8 preceding siblings ...)
2021-04-13 1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
@ 2021-04-13 15:38 ` Jens Axboe
9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-04-13 15:38 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 4/12/21 7:58 PM, Pavel Begunkov wrote:
> 1-2 are fixes
>
> 7/9 is about nicer overflow handling while someones exits
>
> 8-9 changes how we do iopoll with iopoll_list empty, saves from burning
> CPU for nothing.
>
> Pavel Begunkov (9):
> io_uring: fix leaking reg files on exit
> io_uring: fix uninit old data for poll event upd
> io_uring: split poll and poll update structures
> io_uring: add timeout completion_lock annotation
> io_uring: refactor hrtimer_try_to_cancel uses
> io_uring: clean up io_poll_remove_waitqs()
> io_uring: don't fail overflow on in_idle
> io_uring: skip futile iopoll iterations
> io_uring: inline io_iopoll_getevents()
>
> fs/io_uring.c | 236 ++++++++++++++++++++++----------------------------
> 1 file changed, 104 insertions(+), 132 deletions(-)
Applied, thanks.
--
Jens Axboe
^ permalink raw reply [flat|nested] 12+ messages in thread