public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 5.13 0/9] another 5.13 pack
@ 2021-04-13  1:58 Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

1-2 are fixes

7/9 is about nicer overflow handling while someones exits

8-9 changes how we do iopoll with iopoll_list empty, saves from burning
CPU for nothing.

Pavel Begunkov (9):
  io_uring: fix leaking reg files on exit
  io_uring: fix uninit old data for poll event upd
  io_uring: split poll and poll update structures
  io_uring: add timeout completion_lock annotation
  io_uring: refactor hrtimer_try_to_cancel uses
  io_uring: clean up io_poll_remove_waitqs()
  io_uring: don't fail overflow on in_idle
  io_uring: skip futile iopoll iterations
  io_uring: inline io_iopoll_getevents()

 fs/io_uring.c | 236 ++++++++++++++++++++++----------------------------
 1 file changed, 104 insertions(+), 132 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/9] io_uring: fix leaking reg files on exit
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
                   ` (8 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring; +Cc: stable

If io_sqe_files_unregister() faults on io_rsrc_ref_quiesce(), it will
fail to do unregister leaving files referenced. And that may well happen
because of a strayed signal or just because it does allocations inside.

In io_ring_ctx_free() do an unsafe version of unregister, as it's
guaranteed to not have requests by that point and so quiesce is useless.

Cc: [email protected]
Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 257eddd4cd82..44342ff5c4e1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -7094,6 +7094,10 @@ static void __io_sqe_files_unregister(struct io_ring_ctx *ctx)
 			fput(file);
 	}
 #endif
+	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
+	kfree(ctx->file_data);
+	ctx->file_data = NULL;
+	ctx->nr_user_files = 0;
 }
 
 static inline void io_rsrc_ref_lock(struct io_ring_ctx *ctx)
@@ -7200,21 +7204,14 @@ static struct io_rsrc_data *io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
-	struct io_rsrc_data *data = ctx->file_data;
 	int ret;
 
-	if (!data)
+	if (!ctx->file_data)
 		return -ENXIO;
-	ret = io_rsrc_ref_quiesce(data, ctx);
-	if (ret)
-		return ret;
-
-	__io_sqe_files_unregister(ctx);
-	io_free_file_tables(&ctx->file_table, ctx->nr_user_files);
-	kfree(data);
-	ctx->file_data = NULL;
-	ctx->nr_user_files = 0;
-	return 0;
+	ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+	if (!ret)
+		__io_sqe_files_unregister(ctx);
+	return ret;
 }
 
 static void io_sq_thread_unpark(struct io_sq_data *sqd)
@@ -7664,7 +7661,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
 
 	ret = io_sqe_files_scm(ctx);
 	if (ret) {
-		io_sqe_files_unregister(ctx);
+		__io_sqe_files_unregister(ctx);
 		return ret;
 	}
 
@@ -8465,7 +8462,11 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	}
 
 	mutex_lock(&ctx->uring_lock);
-	io_sqe_files_unregister(ctx);
+	if (ctx->file_data) {
+		if (!atomic_dec_and_test(&ctx->file_data->refs))
+			wait_for_completion(&ctx->file_data->done);
+		__io_sqe_files_unregister(ctx);
+	}
 	if (ctx->rings)
 		__io_cqring_overflow_flush(ctx, true);
 	mutex_unlock(&ctx->uring_lock);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/9] io_uring: fix uninit old data for poll event upd
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Both IORING_POLL_UPDATE_EVENTS and IORING_POLL_UPDATE_USER_DATA need
old_user_data to find/cancel a poll request, but it's set only for the
first one.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 44342ff5c4e1..429ee5fd9044 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5384,17 +5384,17 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 	if (!(flags & IORING_POLL_ADD_MULTI))
 		events |= EPOLLONESHOT;
 	poll->update_events = poll->update_user_data = false;
-	if (flags & IORING_POLL_UPDATE_EVENTS) {
-		poll->update_events = true;
+
+	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
 		poll->old_user_data = READ_ONCE(sqe->addr);
+		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+		if (poll->update_user_data)
+			poll->new_user_data = READ_ONCE(sqe->off);
+	} else {
+		if (sqe->off || sqe->addr)
+			return -EINVAL;
 	}
-	if (flags & IORING_POLL_UPDATE_USER_DATA) {
-		poll->update_user_data = true;
-		poll->new_user_data = READ_ONCE(sqe->off);
-	}
-	if (!(poll->update_events || poll->update_user_data) &&
-	     (sqe->off || sqe->addr))
-		return -EINVAL;
 	poll->events = demangle_poll(events) |
 				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 	return 0;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/9] io_uring: split poll and poll update structures
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13 17:14   ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
                   ` (6 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

struct io_poll_iocb became pretty nasty combining also update fields.
Split them, so we would have more clarity to it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 429ee5fd9044..a0f207e62e32 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -490,15 +490,16 @@ struct io_poll_iocb {
 	__poll_t			events;
 	bool				done;
 	bool				canceled;
+	struct wait_queue_entry		wait;
+};
+
+struct io_poll_update {
+	struct file			*file;
+	u64				old_user_data;
+	u64				new_user_data;
+	__poll_t			events;
 	bool				update_events;
 	bool				update_user_data;
-	union {
-		struct wait_queue_entry	wait;
-		struct {
-			u64		old_user_data;
-			u64		new_user_data;
-		};
-	};
 };
 
 struct io_poll_remove {
@@ -715,6 +716,7 @@ enum {
 	REQ_F_COMPLETE_INLINE_BIT,
 	REQ_F_REISSUE_BIT,
 	REQ_F_DONT_REISSUE_BIT,
+	REQ_F_POLL_UPDATE_BIT,
 	/* keep async read/write and isreg together and in order */
 	REQ_F_ASYNC_READ_BIT,
 	REQ_F_ASYNC_WRITE_BIT,
@@ -762,6 +764,8 @@ enum {
 	REQ_F_REISSUE		= BIT(REQ_F_REISSUE_BIT),
 	/* don't attempt request reissue, see io_rw_reissue() */
 	REQ_F_DONT_REISSUE	= BIT(REQ_F_DONT_REISSUE_BIT),
+	/* switches between poll and poll update */
+	REQ_F_POLL_UPDATE	= BIT(REQ_F_POLL_UPDATE_BIT),
 	/* supports async reads */
 	REQ_F_ASYNC_READ	= BIT(REQ_F_ASYNC_READ_BIT),
 	/* supports async writes */
@@ -791,6 +795,7 @@ struct io_kiocb {
 		struct file		*file;
 		struct io_rw		rw;
 		struct io_poll_iocb	poll;
+		struct io_poll_update	poll_update;
 		struct io_poll_remove	poll_remove;
 		struct io_accept	accept;
 		struct io_sync		sync;
@@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
 	poll->head = NULL;
 	poll->done = false;
 	poll->canceled = false;
-	poll->update_events = poll->update_user_data = false;
 #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
 	/* mask in events that we always want/need */
 	poll->events = events | IO_POLL_UNMASK;
@@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
 
 static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-	struct io_poll_iocb *poll = &req->poll;
 	u32 events, flags;
 
 	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
 #endif
 	if (!(flags & IORING_POLL_ADD_MULTI))
 		events |= EPOLLONESHOT;
-	poll->update_events = poll->update_user_data = false;
+	events = demangle_poll(events) |
+				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 
 	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
-		poll->old_user_data = READ_ONCE(sqe->addr);
-		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
-		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
-		if (poll->update_user_data)
-			poll->new_user_data = READ_ONCE(sqe->off);
+		struct io_poll_update *poll_upd = &req->poll_update;
+
+		req->flags |= REQ_F_POLL_UPDATE;
+		poll_upd->events = events;
+		poll_upd->old_user_data = READ_ONCE(sqe->addr);
+		poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
+		poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
+		if (poll_upd->update_user_data)
+			poll_upd->new_user_data = READ_ONCE(sqe->off);
 	} else {
+		struct io_poll_iocb *poll = &req->poll;
+
+		poll->events = events;
 		if (sqe->off || sqe->addr)
 			return -EINVAL;
 	}
-	poll->events = demangle_poll(events) |
-				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
 	return 0;
 }
 
@@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
 	int ret;
 
 	spin_lock_irq(&ctx->completion_lock);
-	preq = io_poll_find(ctx, req->poll.old_user_data);
+	preq = io_poll_find(ctx, req->poll_update.old_user_data);
 	if (!preq) {
 		ret = -ENOENT;
 		goto err;
@@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
 		return 0;
 	}
 	/* only mask one event flags, keep behavior flags */
-	if (req->poll.update_events) {
+	if (req->poll_update.update_events) {
 		preq->poll.events &= ~0xffff;
-		preq->poll.events |= req->poll.events & 0xffff;
+		preq->poll.events |= req->poll_update.events & 0xffff;
 		preq->poll.events |= IO_POLL_UNMASK;
 	}
-	if (req->poll.update_user_data)
-		preq->user_data = req->poll.new_user_data;
+	if (req->poll_update.update_user_data)
+		preq->user_data = req->poll_update.new_user_data;
 
 	spin_unlock_irq(&ctx->completion_lock);
 
@@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
 
 static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
 {
-	if (!req->poll.update_events && !req->poll.update_user_data)
+	if (!(req->flags & REQ_F_POLL_UPDATE))
 		return __io_poll_add(req);
 	return io_poll_update(req);
 }
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/9] io_uring: add timeout completion_lock annotation
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Add one more sparse locking annotation for readability in
io_kill_timeout().

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a0f207e62e32..eaa8f1af29cc 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1269,6 +1269,7 @@ static void io_queue_async_work(struct io_kiocb *req)
 }
 
 static void io_kill_timeout(struct io_kiocb *req, int status)
+	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_timeout_data *io = req->async_data;
 	int ret;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Don't save return values of hrtimer_try_to_cancel() in a variable, but
use right away. It's in general safer to not have an intermediate
variable, which may be reused and passed out wrongly, but it be
contracted out. Also clean io_timeout_extract().

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index eaa8f1af29cc..4f4b4f4bff2d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1272,10 +1272,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
 	__must_hold(&req->ctx->completion_lock)
 {
 	struct io_timeout_data *io = req->async_data;
-	int ret;
 
-	ret = hrtimer_try_to_cancel(&io->timer);
-	if (ret != -1) {
+	if (hrtimer_try_to_cancel(&io->timer) != -1) {
 		atomic_set(&req->ctx->cq_timeouts,
 			atomic_read(&req->ctx->cq_timeouts) + 1);
 		list_del_init(&req->timeout.list);
@@ -1792,12 +1790,10 @@ static bool io_kill_linked_timeout(struct io_kiocb *req)
 	 */
 	if (link && (link->flags & REQ_F_LTIMEOUT_ACTIVE)) {
 		struct io_timeout_data *io = link->async_data;
-		int ret;
 
 		io_remove_next_linked(req);
 		link->timeout.head = NULL;
-		ret = hrtimer_try_to_cancel(&io->timer);
-		if (ret != -1) {
+		if (hrtimer_try_to_cancel(&io->timer) != -1) {
 			io_cqring_fill_event(link, -ECANCELED, 0);
 			io_put_req_deferred(link, 1);
 			return true;
@@ -5533,21 +5529,18 @@ static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx,
 {
 	struct io_timeout_data *io;
 	struct io_kiocb *req;
-	int ret = -ENOENT;
+	bool found = false;
 
 	list_for_each_entry(req, &ctx->timeout_list, timeout.list) {
-		if (user_data == req->user_data) {
-			ret = 0;
+		found = user_data == req->user_data;
+		if (found)
 			break;
-		}
 	}
-
-	if (ret == -ENOENT)
-		return ERR_PTR(ret);
+	if (!found)
+		return ERR_PTR(-ENOENT);
 
 	io = req->async_data;
-	ret = hrtimer_try_to_cancel(&io->timer);
-	if (ret == -1)
+	if (hrtimer_try_to_cancel(&io->timer) == -1)
 		return ERR_PTR(-EALREADY);
 	list_del_init(&req->timeout.list);
 	return req;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs()
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (4 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Move some parts of io_poll_remove_waitqs() that are opcode independent.
Looks better and stresses that both do __io_poll_remove_one().

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4f4b4f4bff2d..4407ebc8f8d3 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5224,21 +5224,16 @@ static bool io_poll_remove_waitqs(struct io_kiocb *req)
 	bool do_complete;
 
 	io_poll_remove_double(req);
+	do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
 
-	if (req->opcode == IORING_OP_POLL_ADD) {
-		do_complete = __io_poll_remove_one(req, &req->poll, true);
-	} else {
+	if (req->opcode != IORING_OP_POLL_ADD && do_complete) {
 		struct async_poll *apoll = req->apoll;
 
 		/* non-poll requests have submit ref still */
-		do_complete = __io_poll_remove_one(req, &apoll->poll, true);
-		if (do_complete) {
-			req_ref_put(req);
-			kfree(apoll->double_poll);
-			kfree(apoll);
-		}
+		req_ref_put(req);
+		kfree(apoll->double_poll);
+		kfree(apoll);
 	}
-
 	return do_complete;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/9] io_uring: don't fail overflow on in_idle
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (5 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As CQE overflows are now untied from requests and so don't hold any
ref, we don't need to handle exiting/exec'ing cases there anymore.
Moreover, it's much nicer in regards to userspace to save overflowed
CQEs whenever possible, so remove failing on in_idle.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4407ebc8f8d3..cc6a44533802 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1515,32 +1515,28 @@ static bool io_cqring_event_overflow(struct io_kiocb *req, long res,
 				     unsigned int cflags)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_overflow_cqe *ocqe;
 
-	if (!atomic_read(&req->task->io_uring->in_idle)) {
-		struct io_overflow_cqe *ocqe;
-
-		ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
-		if (!ocqe)
-			goto overflow;
-		if (list_empty(&ctx->cq_overflow_list)) {
-			set_bit(0, &ctx->sq_check_overflow);
-			set_bit(0, &ctx->cq_check_overflow);
-			ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
-		}
-		ocqe->cqe.user_data = req->user_data;
-		ocqe->cqe.res = res;
-		ocqe->cqe.flags = cflags;
-		list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
-		return true;
+	ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT);
+	if (!ocqe) {
+		/*
+		 * If we're in ring overflow flush mode, or in task cancel mode,
+		 * or cannot allocate an overflow entry, then we need to drop it
+		 * on the floor.
+		 */
+		WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
+		return false;
 	}
-overflow:
-	/*
-	 * If we're in ring overflow flush mode, or in task cancel mode,
-	 * or cannot allocate an overflow entry, then we need to drop it
-	 * on the floor.
-	 */
-	WRITE_ONCE(ctx->rings->cq_overflow, ++ctx->cached_cq_overflow);
-	return false;
+	if (list_empty(&ctx->cq_overflow_list)) {
+		set_bit(0, &ctx->sq_check_overflow);
+		set_bit(0, &ctx->cq_check_overflow);
+		ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
+	}
+	ocqe->cqe.user_data = req->user_data;
+	ocqe->cqe.res = res;
+	ocqe->cqe.flags = cflags;
+	list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
+	return true;
 }
 
 static inline bool __io_cqring_fill_event(struct io_kiocb *req, long res,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 8/9] io_uring: skip futile iopoll iterations
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (6 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
  2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

The only way to get out of io_iopoll_getevents() and continue iterating
is to have empty iopoll_list, otherwise the main loop would just exit.
So, instead of the unlock on 8th time heuristic, do that based on
iopoll_list.

Also, as no one can add new requests to iopoll_list while
io_iopoll_check() hold uring_lock, it's useless to spin with the list
empty, return in that case.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index cc6a44533802..1111968bbe7f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2387,7 +2387,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
 static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 {
 	unsigned int nr_events = 0;
-	int iters = 0, ret = 0;
+	int ret = 0;
 
 	/*
 	 * We disallow the app entering submit/complete with polling, but we
@@ -2416,10 +2416,13 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 		 * forever, while the workqueue is stuck trying to acquire the
 		 * very same mutex.
 		 */
-		if (!(++iters & 7)) {
+		if (list_empty(&ctx->iopoll_list)) {
 			mutex_unlock(&ctx->uring_lock);
 			io_run_task_work();
 			mutex_lock(&ctx->uring_lock);
+
+			if (list_empty(&ctx->iopoll_list))
+				break;
 		}
 
 		ret = io_iopoll_getevents(ctx, &nr_events, min);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 9/9] io_uring: inline io_iopoll_getevents()
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (7 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
@ 2021-04-13  1:58 ` Pavel Begunkov
  2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13  1:58 UTC (permalink / raw)
  To: Jens Axboe, io-uring

io_iopoll_getevents() is of no use to us anymore, io_iopoll_check()
handles all the cases.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 52 +++++++++++++--------------------------------------
 1 file changed, 13 insertions(+), 39 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 1111968bbe7f..05c67ebeabd5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2331,27 +2331,6 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
 	return ret;
 }
 
-/*
- * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
- * non-spinning poll check - we'll still enter the driver poll loop, but only
- * as a non-spinning completion check.
- */
-static int io_iopoll_getevents(struct io_ring_ctx *ctx, unsigned int *nr_events,
-				long min)
-{
-	while (!list_empty(&ctx->iopoll_list) && !need_resched()) {
-		int ret;
-
-		ret = io_do_iopoll(ctx, nr_events, min);
-		if (ret < 0)
-			return ret;
-		if (*nr_events >= min)
-			return 0;
-	}
-
-	return 1;
-}
-
 /*
  * We can't just wait for polled events to come to us, we have to actively
  * find and complete them.
@@ -2395,17 +2374,16 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 	 * that got punted to a workqueue.
 	 */
 	mutex_lock(&ctx->uring_lock);
+	/*
+	 * Don't enter poll loop if we already have events pending.
+	 * If we do, we can potentially be spinning for commands that
+	 * already triggered a CQE (eg in error).
+	 */
+	if (test_bit(0, &ctx->cq_check_overflow))
+		__io_cqring_overflow_flush(ctx, false);
+	if (io_cqring_events(ctx))
+		goto out;
 	do {
-		/*
-		 * Don't enter poll loop if we already have events pending.
-		 * If we do, we can potentially be spinning for commands that
-		 * already triggered a CQE (eg in error).
-		 */
-		if (test_bit(0, &ctx->cq_check_overflow))
-			__io_cqring_overflow_flush(ctx, false);
-		if (io_cqring_events(ctx))
-			break;
-
 		/*
 		 * If a submit got punted to a workqueue, we can have the
 		 * application entering polling for a command before it gets
@@ -2424,13 +2402,9 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
 			if (list_empty(&ctx->iopoll_list))
 				break;
 		}
-
-		ret = io_iopoll_getevents(ctx, &nr_events, min);
-		if (ret <= 0)
-			break;
-		ret = 0;
-	} while (min && !nr_events && !need_resched());
-
+		ret = io_do_iopoll(ctx, &nr_events, min);
+	} while (!ret && nr_events < min && !need_resched());
+out:
 	mutex_unlock(&ctx->uring_lock);
 	return ret;
 }
@@ -2543,7 +2517,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 /*
  * After the iocb has been issued, it's safe to be found on the poll list.
  * Adding the kiocb to the list AFTER submission ensures that we don't
- * find it from a io_iopoll_getevents() thread before the issuer is done
+ * find it from a io_do_iopoll() thread before the issuer is done
  * accessing the kiocb cookie.
  */
 static void io_iopoll_req_issued(struct io_kiocb *req, bool in_async)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 5.13 0/9] another 5.13 pack
  2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
                   ` (8 preceding siblings ...)
  2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
@ 2021-04-13 15:38 ` Jens Axboe
  9 siblings, 0 replies; 12+ messages in thread
From: Jens Axboe @ 2021-04-13 15:38 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 4/12/21 7:58 PM, Pavel Begunkov wrote:
> 1-2 are fixes
> 
> 7/9 is about nicer overflow handling while someones exits
> 
> 8-9 changes how we do iopoll with iopoll_list empty, saves from burning
> CPU for nothing.
> 
> Pavel Begunkov (9):
>   io_uring: fix leaking reg files on exit
>   io_uring: fix uninit old data for poll event upd
>   io_uring: split poll and poll update structures
>   io_uring: add timeout completion_lock annotation
>   io_uring: refactor hrtimer_try_to_cancel uses
>   io_uring: clean up io_poll_remove_waitqs()
>   io_uring: don't fail overflow on in_idle
>   io_uring: skip futile iopoll iterations
>   io_uring: inline io_iopoll_getevents()
> 
>  fs/io_uring.c | 236 ++++++++++++++++++++++----------------------------
>  1 file changed, 104 insertions(+), 132 deletions(-)

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 3/9] io_uring: split poll and poll update structures
  2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
@ 2021-04-13 17:14   ` Pavel Begunkov
  0 siblings, 0 replies; 12+ messages in thread
From: Pavel Begunkov @ 2021-04-13 17:14 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 13/04/2021 02:58, Pavel Begunkov wrote:
> struct io_poll_iocb became pretty nasty combining also update fields.
> Split them, so we would have more clarity to it.

I think we should move update into IORING_OP_POLL_REMOVE until it's
not too late. Would have better struct layouts and didn't get in
way of POLL_ADD, which should be more popular.

Another thing, looks IORING_OP_POLL_REMOVE may cancel apoll, that
doesn't sound great. IMHO we need to limit it to only poll requests,
rather confusing otherwise. note: it can't be used reliably from
the userspace, so we may probably not care about change in behaviour

> 
> Signed-off-by: Pavel Begunkov <[email protected]>
> ---
>  fs/io_uring.c | 55 ++++++++++++++++++++++++++++++---------------------
>  1 file changed, 32 insertions(+), 23 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 429ee5fd9044..a0f207e62e32 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -490,15 +490,16 @@ struct io_poll_iocb {
>  	__poll_t			events;
>  	bool				done;
>  	bool				canceled;
> +	struct wait_queue_entry		wait;
> +};
> +
> +struct io_poll_update {
> +	struct file			*file;
> +	u64				old_user_data;
> +	u64				new_user_data;
> +	__poll_t			events;
>  	bool				update_events;
>  	bool				update_user_data;
> -	union {
> -		struct wait_queue_entry	wait;
> -		struct {
> -			u64		old_user_data;
> -			u64		new_user_data;
> -		};
> -	};
>  };
>  
>  struct io_poll_remove {
> @@ -715,6 +716,7 @@ enum {
>  	REQ_F_COMPLETE_INLINE_BIT,
>  	REQ_F_REISSUE_BIT,
>  	REQ_F_DONT_REISSUE_BIT,
> +	REQ_F_POLL_UPDATE_BIT,
>  	/* keep async read/write and isreg together and in order */
>  	REQ_F_ASYNC_READ_BIT,
>  	REQ_F_ASYNC_WRITE_BIT,
> @@ -762,6 +764,8 @@ enum {
>  	REQ_F_REISSUE		= BIT(REQ_F_REISSUE_BIT),
>  	/* don't attempt request reissue, see io_rw_reissue() */
>  	REQ_F_DONT_REISSUE	= BIT(REQ_F_DONT_REISSUE_BIT),
> +	/* switches between poll and poll update */
> +	REQ_F_POLL_UPDATE	= BIT(REQ_F_POLL_UPDATE_BIT),
>  	/* supports async reads */
>  	REQ_F_ASYNC_READ	= BIT(REQ_F_ASYNC_READ_BIT),
>  	/* supports async writes */
> @@ -791,6 +795,7 @@ struct io_kiocb {
>  		struct file		*file;
>  		struct io_rw		rw;
>  		struct io_poll_iocb	poll;
> +		struct io_poll_update	poll_update;
>  		struct io_poll_remove	poll_remove;
>  		struct io_accept	accept;
>  		struct io_sync		sync;
> @@ -4989,7 +4994,6 @@ static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
>  	poll->head = NULL;
>  	poll->done = false;
>  	poll->canceled = false;
> -	poll->update_events = poll->update_user_data = false;
>  #define IO_POLL_UNMASK	(EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
>  	/* mask in events that we always want/need */
>  	poll->events = events | IO_POLL_UNMASK;
> @@ -5366,7 +5370,6 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>  
>  static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>  {
> -	struct io_poll_iocb *poll = &req->poll;
>  	u32 events, flags;
>  
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> @@ -5383,20 +5386,26 @@ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
>  #endif
>  	if (!(flags & IORING_POLL_ADD_MULTI))
>  		events |= EPOLLONESHOT;
> -	poll->update_events = poll->update_user_data = false;
> +	events = demangle_poll(events) |
> +				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
>  
>  	if (flags & (IORING_POLL_UPDATE_EVENTS|IORING_POLL_UPDATE_USER_DATA)) {
> -		poll->old_user_data = READ_ONCE(sqe->addr);
> -		poll->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> -		poll->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> -		if (poll->update_user_data)
> -			poll->new_user_data = READ_ONCE(sqe->off);
> +		struct io_poll_update *poll_upd = &req->poll_update;
> +
> +		req->flags |= REQ_F_POLL_UPDATE;
> +		poll_upd->events = events;
> +		poll_upd->old_user_data = READ_ONCE(sqe->addr);
> +		poll_upd->update_events = flags & IORING_POLL_UPDATE_EVENTS;
> +		poll_upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA;
> +		if (poll_upd->update_user_data)
> +			poll_upd->new_user_data = READ_ONCE(sqe->off);
>  	} else {
> +		struct io_poll_iocb *poll = &req->poll;
> +
> +		poll->events = events;
>  		if (sqe->off || sqe->addr)
>  			return -EINVAL;
>  	}
> -	poll->events = demangle_poll(events) |
> -				(events & (EPOLLEXCLUSIVE|EPOLLONESHOT));
>  	return 0;
>  }
>  
> @@ -5434,7 +5443,7 @@ static int io_poll_update(struct io_kiocb *req)
>  	int ret;
>  
>  	spin_lock_irq(&ctx->completion_lock);
> -	preq = io_poll_find(ctx, req->poll.old_user_data);
> +	preq = io_poll_find(ctx, req->poll_update.old_user_data);
>  	if (!preq) {
>  		ret = -ENOENT;
>  		goto err;
> @@ -5464,13 +5473,13 @@ static int io_poll_update(struct io_kiocb *req)
>  		return 0;
>  	}
>  	/* only mask one event flags, keep behavior flags */
> -	if (req->poll.update_events) {
> +	if (req->poll_update.update_events) {
>  		preq->poll.events &= ~0xffff;
> -		preq->poll.events |= req->poll.events & 0xffff;
> +		preq->poll.events |= req->poll_update.events & 0xffff;
>  		preq->poll.events |= IO_POLL_UNMASK;
>  	}
> -	if (req->poll.update_user_data)
> -		preq->user_data = req->poll.new_user_data;
> +	if (req->poll_update.update_user_data)
> +		preq->user_data = req->poll_update.new_user_data;
>  
>  	spin_unlock_irq(&ctx->completion_lock);
>  
> @@ -5489,7 +5498,7 @@ static int io_poll_update(struct io_kiocb *req)
>  
>  static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
>  {
> -	if (!req->poll.update_events && !req->poll.update_user_data)
> +	if (!(req->flags & REQ_F_POLL_UPDATE))
>  		return __io_poll_add(req);
>  	return io_poll_update(req);
>  }
> 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-04-13 17:19 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-04-13  1:58 [PATCH 5.13 0/9] another 5.13 pack Pavel Begunkov
2021-04-13  1:58 ` [PATCH 1/9] io_uring: fix leaking reg files on exit Pavel Begunkov
2021-04-13  1:58 ` [PATCH 2/9] io_uring: fix uninit old data for poll event upd Pavel Begunkov
2021-04-13  1:58 ` [PATCH 3/9] io_uring: split poll and poll update structures Pavel Begunkov
2021-04-13 17:14   ` Pavel Begunkov
2021-04-13  1:58 ` [PATCH 4/9] io_uring: add timeout completion_lock annotation Pavel Begunkov
2021-04-13  1:58 ` [PATCH 5/9] io_uring: refactor hrtimer_try_to_cancel uses Pavel Begunkov
2021-04-13  1:58 ` [PATCH 6/9] io_uring: clean up io_poll_remove_waitqs() Pavel Begunkov
2021-04-13  1:58 ` [PATCH 7/9] io_uring: don't fail overflow on in_idle Pavel Begunkov
2021-04-13  1:58 ` [PATCH 8/9] io_uring: skip futile iopoll iterations Pavel Begunkov
2021-04-13  1:58 ` [PATCH 9/9] io_uring: inline io_iopoll_getevents() Pavel Begunkov
2021-04-13 15:38 ` [PATCH 5.13 0/9] another 5.13 pack Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox