public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH for-5.15 0/4] not connected 5.15 patches
@ 2021-08-17 19:28 Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

A set of not connected patches for 5.15.

Pavel Begunkov (4):
  io_uring: better encapsulate buffer select for rw
  io_uring: reuse io_req_complete_post()
  io_uring: improve tctx_task_work() ctx referencing
  io_uring: improve same wq polling

 fs/io_uring.c | 87 +++++++++++++++++----------------------------------
 1 file changed, 29 insertions(+), 58 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/4] io_uring: better encapsulate buffer select for rw
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Make io_put_rw_kbuf() to do the REQ_F_BUFFER_SELECTED check, so all the
callers don't need to hand code it. The number of places where we call
io_put_rw_kbuf() is growing, so saves some pain.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2d76ca0c7218..29e3ec6e9dbf 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2284,6 +2284,8 @@ static inline unsigned int io_put_rw_kbuf(struct io_kiocb *req)
 {
 	struct io_buffer *kbuf;
 
+	if (likely(!(req->flags & REQ_F_BUFFER_SELECTED)))
+		return 0;
 	kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
 	return io_put_kbuf(req, kbuf);
 }
@@ -2313,8 +2315,6 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 
 	io_init_req_batch(&rb);
 	while (!list_empty(done)) {
-		int cflags = 0;
-
 		req = list_first_entry(done, struct io_kiocb, inflight_entry);
 		list_del(&req->inflight_entry);
 
@@ -2325,10 +2325,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 			continue;
 		}
 
-		if (req->flags & REQ_F_BUFFER_SELECTED)
-			cflags = io_put_rw_kbuf(req);
-
-		__io_cqring_fill_event(ctx, req->user_data, req->result, cflags);
+		__io_cqring_fill_event(ctx, req->user_data, req->result,
+					io_put_rw_kbuf(req));
 		(*nr_events)++;
 
 		if (req_ref_put_and_test(req))
@@ -2548,11 +2546,7 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 
 static void io_req_task_complete(struct io_kiocb *req)
 {
-	int cflags = 0;
-
-	if (req->flags & REQ_F_BUFFER_SELECTED)
-		cflags = io_put_rw_kbuf(req);
-	__io_req_complete(req, 0, req->result, cflags);
+	__io_req_complete(req, 0, req->result, io_put_rw_kbuf(req));
 }
 
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
@@ -2820,12 +2814,9 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 		if (io_resubmit_prep(req)) {
 			io_req_task_queue_reissue(req);
 		} else {
-			int cflags = 0;
-
 			req_set_fail(req);
-			if (req->flags & REQ_F_BUFFER_SELECTED)
-				cflags = io_put_rw_kbuf(req);
-			__io_req_complete(req, issue_flags, ret, cflags);
+			__io_req_complete(req, issue_flags, ret,
+					  io_put_rw_kbuf(req));
 		}
 	}
 }
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/4] io_uring: reuse io_req_complete_post()
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

We have io_req_complete_post() to post a CQE and put the request. It
takes care of all synchronisation and is more concise and efficent, so
replace all hancoded occurrences of
"lock; post CQE; unlock; + put_req()" with io_req_complete_post().

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 48 +++++++++++-------------------------------------
 1 file changed, 11 insertions(+), 37 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 29e3ec6e9dbf..719d62b6e3d5 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5521,16 +5521,8 @@ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
 
 static void io_req_task_timeout(struct io_kiocb *req)
 {
-	struct io_ring_ctx *ctx = req->ctx;
-
-	spin_lock(&ctx->completion_lock);
-	io_cqring_fill_event(ctx, req->user_data, -ETIME, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-
-	io_cqring_ev_posted(ctx);
 	req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, -ETIME, 0);
 }
 
 static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
@@ -5658,14 +5650,9 @@ static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
 					io_translate_timeout_mode(tr->flags));
 	spin_unlock_irq(&ctx->timeout_lock);
 
-	spin_lock(&ctx->completion_lock);
-	io_cqring_fill_event(ctx, req->user_data, ret, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-	io_cqring_ev_posted(ctx);
 	if (ret < 0)
 		req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, ret, 0);
 	return 0;
 }
 
@@ -5805,7 +5792,6 @@ static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
 }
 
 static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
-	__acquires(&req->ctx->completion_lock)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
@@ -5813,15 +5799,19 @@ static int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr)
 	WARN_ON_ONCE(req->task != current);
 
 	ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
-	spin_lock(&ctx->completion_lock);
 	if (ret != -ENOENT)
 		return ret;
+
+	spin_lock(&ctx->completion_lock);
 	spin_lock_irq(&ctx->timeout_lock);
 	ret = io_timeout_cancel(ctx, sqe_addr);
 	spin_unlock_irq(&ctx->timeout_lock);
 	if (ret != -ENOENT)
-		return ret;
-	return io_poll_cancel(ctx, sqe_addr, false);
+		goto out;
+	ret = io_poll_cancel(ctx, sqe_addr, false);
+out:
+	spin_unlock(&ctx->completion_lock);
+	return ret;
 }
 
 static int io_async_cancel_prep(struct io_kiocb *req,
@@ -5848,7 +5838,6 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 	ret = io_try_cancel_userdata(req, sqe_addr);
 	if (ret != -ENOENT)
 		goto done;
-	spin_unlock(&ctx->completion_lock);
 
 	/* slow path, try all io-wq's */
 	io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
@@ -5861,17 +5850,10 @@ static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 			break;
 	}
 	io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
-
-	spin_lock(&ctx->completion_lock);
 done:
-	io_cqring_fill_event(ctx, req->user_data, ret, 0);
-	io_commit_cqring(ctx);
-	spin_unlock(&ctx->completion_lock);
-	io_cqring_ev_posted(ctx);
-
 	if (ret < 0)
 		req_set_fail(req);
-	io_put_req(req);
+	io_req_complete_post(req, ret, 0);
 	return 0;
 }
 
@@ -6411,20 +6393,12 @@ static inline struct file *io_file_get(struct io_ring_ctx *ctx,
 static void io_req_task_link_timeout(struct io_kiocb *req)
 {
 	struct io_kiocb *prev = req->timeout.prev;
-	struct io_ring_ctx *ctx = req->ctx;
 	int ret;
 
 	if (prev) {
 		ret = io_try_cancel_userdata(req, prev->user_data);
-		if (!ret)
-			ret = -ETIME;
-		io_cqring_fill_event(ctx, req->user_data, ret, 0);
-		io_commit_cqring(ctx);
-		spin_unlock(&ctx->completion_lock);
-		io_cqring_ev_posted(ctx);
-
+		io_req_complete_post(req, ret ?: -ETIME, 0);
 		io_put_req(prev);
-		io_put_req(req);
 	} else {
 		io_req_complete_post(req, -ETIME, 0);
 	}
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
  2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

io_uring processed by tctx_task_work() can't get freed until the
function returns. The reason is that io_ring_exit_work() executes a
task_work after all references are put, where the task works
execution is naturally serialised. Remove extra ctx pinning.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 719d62b6e3d5..202517860c83 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2004,9 +2004,14 @@ static void ctx_flush_and_put(struct io_ring_ctx *ctx)
 		io_submit_flush_completions(ctx);
 		mutex_unlock(&ctx->uring_lock);
 	}
-	percpu_ref_put(&ctx->refs);
 }
 
+/*
+ * All the ctxs we operate on here will stay alive until the function returns.
+ * That's because initially they're refcounted by the requests, and after
+ * io_ring_exit_work() synchronises with the current task by injecting and
+ * waiting for a task_work, which can't be executed until it returns.
+ */
 static void tctx_task_work(struct callback_head *cb)
 {
 	struct io_ring_ctx *ctx = NULL;
@@ -2033,7 +2038,6 @@ static void tctx_task_work(struct callback_head *cb)
 			if (req->ctx != ctx) {
 				ctx_flush_and_put(ctx);
 				ctx = req->ctx;
-				percpu_ref_get(&ctx->refs);
 			}
 			req->io_task_work.func(req);
 			node = next;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 4/4] io_uring: improve same wq polling
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
                   ` (2 preceding siblings ...)
  2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
@ 2021-08-17 19:28 ` Pavel Begunkov
  2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Pavel Begunkov @ 2021-08-17 19:28 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Move earlier the check for whether __io_queue_proc() tries to poll
already polled waitqueue, and do the same for the second poll entry, if
any. Shouldn't really matter, but at least it would have a more
predictable behaviour.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 202517860c83..1be7af620395 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5064,8 +5064,13 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 	if (unlikely(pt->nr_entries)) {
 		struct io_poll_iocb *poll_one = poll;
 
+		/* double add on the same waitqueue head, ignore */
+		if (poll_one->head == head)
+			return;
 		/* already have a 2nd entry, fail a third attempt */
 		if (*poll_ptr) {
+			if ((*poll_ptr)->head == head)
+				return;
 			pt->error = -EINVAL;
 			return;
 		}
@@ -5075,9 +5080,6 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 		 */
 		if (!(poll_one->events & EPOLLONESHOT))
 			poll_one->events |= EPOLLONESHOT;
-		/* double add on the same waitqueue head, ignore */
-		if (poll_one->head == head)
-			return;
 		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
 		if (!poll) {
 			pt->error = -ENOMEM;
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH for-5.15 0/4] not connected 5.15 patches
  2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
                   ` (3 preceding siblings ...)
  2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
@ 2021-08-17 22:07 ` Jens Axboe
  4 siblings, 0 replies; 6+ messages in thread
From: Jens Axboe @ 2021-08-17 22:07 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 8/17/21 1:28 PM, Pavel Begunkov wrote:
> A set of not connected patches for 5.15.
> 
> Pavel Begunkov (4):
>   io_uring: better encapsulate buffer select for rw
>   io_uring: reuse io_req_complete_post()
>   io_uring: improve tctx_task_work() ctx referencing
>   io_uring: improve same wq polling
> 
>  fs/io_uring.c | 87 +++++++++++++++++----------------------------------
>  1 file changed, 29 insertions(+), 58 deletions(-)

Applied, thanks.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-08-17 22:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-08-17 19:28 [PATCH for-5.15 0/4] not connected 5.15 patches Pavel Begunkov
2021-08-17 19:28 ` [PATCH 1/4] io_uring: better encapsulate buffer select for rw Pavel Begunkov
2021-08-17 19:28 ` [PATCH 2/4] io_uring: reuse io_req_complete_post() Pavel Begunkov
2021-08-17 19:28 ` [PATCH 3/4] io_uring: improve tctx_task_work() ctx referencing Pavel Begunkov
2021-08-17 19:28 ` [PATCH 4/4] io_uring: improve same wq polling Pavel Begunkov
2021-08-17 22:07 ` [PATCH for-5.15 0/4] not connected 5.15 patches Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox