public inbox for [email protected]
 help / color / mirror / Atom feed
* [RFC v2 0/2] non-atomic req->refs
@ 2021-04-29 17:20 Pavel Begunkov
  2021-04-29 17:20 ` [PATCH v2 1/2] io_uring: defer submission ref put Pavel Begunkov
  2021-04-29 17:20 ` [PATCH v2 2/2] io_uring: non-atomic request refs Pavel Begunkov
  0 siblings, 2 replies; 3+ messages in thread
From: Pavel Begunkov @ 2021-04-29 17:20 UTC (permalink / raw)
  To: Jens Axboe, io-uring

[knowingly buggy, don't use]

Just a glimpse on the de-atomising request refcounting for benchmarking,
dirty and with a bunch of known problems (in [a]poll and iopoll).
Haven't got any perf numbers myself yet.

v2: wrong patch with inverted an req_ref_sub_and_test() condition

Pavel Begunkov (2):
  io_uring: defer submission ref put
  io_uring: non-atomic request refs

 fs/io_uring.c | 99 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 41 deletions(-)

-- 
2.31.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH v2 1/2] io_uring: defer submission ref put
  2021-04-29 17:20 [RFC v2 0/2] non-atomic req->refs Pavel Begunkov
@ 2021-04-29 17:20 ` Pavel Begunkov
  2021-04-29 17:20 ` [PATCH v2 2/2] io_uring: non-atomic request refs Pavel Begunkov
  1 sibling, 0 replies; 3+ messages in thread
From: Pavel Begunkov @ 2021-04-29 17:20 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 43b00077dbd3..9c8e1e773a34 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2124,7 +2124,7 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req,
 static void io_submit_flush_completions(struct io_comp_state *cs,
 					struct io_ring_ctx *ctx)
 {
-	int i, nr = cs->nr;
+	int refs, i, nr = cs->nr;
 	struct io_kiocb *req;
 	struct req_batch rb;
 
@@ -2132,8 +2132,9 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
-		__io_cqring_fill_event(ctx, req->user_data, req->result,
-					req->compl.cflags);
+		if (req->flags & REQ_F_COMPLETE_INLINE)
+			__io_cqring_fill_event(ctx, req->user_data, req->result,
+						req->compl.cflags);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
@@ -2141,9 +2142,10 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	io_cqring_ev_posted(ctx);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
+		refs = 1 + !!(req->flags & REQ_F_COMPLETE_INLINE);
 
 		/* submission and completion refs */
-		if (req_ref_sub_and_test(req, 2))
+		if (req_ref_sub_and_test(req, refs))
 			io_req_free_batch(&rb, req, &ctx->submit_state);
 	}
 
@@ -6417,17 +6419,12 @@ static void __io_queue_sqe(struct io_kiocb *req)
 	 * doesn't support non-blocking read/write attempts
 	 */
 	if (likely(!ret)) {
-		/* drop submission reference */
-		if (req->flags & REQ_F_COMPLETE_INLINE) {
-			struct io_ring_ctx *ctx = req->ctx;
-			struct io_comp_state *cs = &ctx->submit_state.comp;
-
-			cs->reqs[cs->nr++] = req;
-			if (cs->nr == ARRAY_SIZE(cs->reqs))
-				io_submit_flush_completions(cs, ctx);
-		} else {
-			io_put_req(req);
-		}
+		struct io_ring_ctx *ctx = req->ctx;
+		struct io_comp_state *cs = &ctx->submit_state.comp;
+
+		cs->reqs[cs->nr++] = req;
+		if (cs->nr == ARRAY_SIZE(cs->reqs))
+			io_submit_flush_completions(cs, ctx);
 	} else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
 		if (!io_arm_poll_handler(req)) {
 			/*
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2 2/2] io_uring: non-atomic request refs
  2021-04-29 17:20 [RFC v2 0/2] non-atomic req->refs Pavel Begunkov
  2021-04-29 17:20 ` [PATCH v2 1/2] io_uring: defer submission ref put Pavel Begunkov
@ 2021-04-29 17:20 ` Pavel Begunkov
  1 sibling, 0 replies; 3+ messages in thread
From: Pavel Begunkov @ 2021-04-29 17:20 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Replace request reference counting with a non-atomic reference
synchronised by completion_lock.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 76 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 48 insertions(+), 28 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 9c8e1e773a34..dc4715576566 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -827,7 +827,7 @@ struct io_kiocb {
 
 	struct io_ring_ctx		*ctx;
 	unsigned int			flags;
-	atomic_t			refs;
+	int				refs;
 	struct task_struct		*task;
 	u64				user_data;
 
@@ -1487,23 +1487,26 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
  * see commit f958d7b528b1 for details.
  */
 #define req_ref_zero_or_close_to_overflow(req)	\
-	((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u)
+	((req)->refs == 0)
 
 static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
 {
-	return atomic_inc_not_zero(&req->refs);
+	if (!req->refs)
+		return false;
+	req->refs++;
+	return true;
 }
 
 static inline bool req_ref_sub_and_test(struct io_kiocb *req, int refs)
 {
 	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
-	return atomic_sub_and_test(refs, &req->refs);
+	req->refs -= refs;
+	return !req->refs;
 }
 
 static inline bool req_ref_put_and_test(struct io_kiocb *req)
 {
-	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
-	return atomic_dec_and_test(&req->refs);
+	return req_ref_sub_and_test(req, 1);
 }
 
 static inline void req_ref_put(struct io_kiocb *req)
@@ -1514,7 +1517,18 @@ static inline void req_ref_put(struct io_kiocb *req)
 static inline void req_ref_get(struct io_kiocb *req)
 {
 	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
-	atomic_inc(&req->refs);
+	req->refs++;
+}
+
+static inline bool io_req_sub_and_test_safe(struct io_kiocb *req, int nr)
+{
+	unsigned long flags;
+	bool ret;
+
+	spin_lock_irqsave(&req->ctx->completion_lock, flags);
+	ret = req_ref_sub_and_test(req, nr);
+	spin_unlock_irqrestore(&req->ctx->completion_lock, flags);
+	return ret;
 }
 
 static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
@@ -1601,16 +1615,13 @@ static void io_req_complete_post(struct io_kiocb *req, long res,
 		list_add(&req->compl.list, &cs->locked_free_list);
 		cs->locked_free_nr++;
 	} else {
-		if (!percpu_ref_tryget(&ctx->refs))
-			req = NULL;
+		percpu_ref_get(&ctx->refs);
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
-	if (req) {
-		io_cqring_ev_posted(ctx);
-		percpu_ref_put(&ctx->refs);
-	}
+	io_cqring_ev_posted(ctx);
+	percpu_ref_put(&ctx->refs);
 }
 
 static inline bool io_req_needs_clean(struct io_kiocb *req)
@@ -2132,21 +2143,22 @@ static void io_submit_flush_completions(struct io_comp_state *cs,
 	spin_lock_irq(&ctx->completion_lock);
 	for (i = 0; i < nr; i++) {
 		req = cs->reqs[i];
+		refs = 1 + !!(req->flags & REQ_F_COMPLETE_INLINE);
+
 		if (req->flags & REQ_F_COMPLETE_INLINE)
 			__io_cqring_fill_event(ctx, req->user_data, req->result,
 						req->compl.cflags);
+		if (!req_ref_sub_and_test(req, refs))
+			cs->reqs[i] = NULL;
 	}
 	io_commit_cqring(ctx);
 	spin_unlock_irq(&ctx->completion_lock);
 
 	io_cqring_ev_posted(ctx);
 	for (i = 0; i < nr; i++) {
-		req = cs->reqs[i];
-		refs = 1 + !!(req->flags & REQ_F_COMPLETE_INLINE);
-
 		/* submission and completion refs */
-		if (req_ref_sub_and_test(req, refs))
-			io_req_free_batch(&rb, req, &ctx->submit_state);
+		if (cs->reqs[i])
+			io_req_free_batch(&rb, cs->reqs[i], &ctx->submit_state);
 	}
 
 	io_req_free_batch_finish(ctx, &rb);
@@ -2161,7 +2173,7 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
 {
 	struct io_kiocb *nxt = NULL;
 
-	if (req_ref_put_and_test(req)) {
+	if (io_req_sub_and_test_safe(req, 1)) {
 		nxt = io_req_find_next(req);
 		__io_free_req(req);
 	}
@@ -2170,7 +2182,7 @@ static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req)
 
 static inline void io_put_req(struct io_kiocb *req)
 {
-	if (req_ref_put_and_test(req))
+	if (io_req_sub_and_test_safe(req, 1))
 		io_free_req(req);
 }
 
@@ -2188,6 +2200,12 @@ static void io_free_req_deferred(struct io_kiocb *req)
 		io_req_task_work_add_fallback(req, io_put_req_deferred_cb);
 }
 
+static inline void __io_put_req_deferred(struct io_kiocb *req, int refs)
+{
+	if (io_req_sub_and_test_safe(req, refs))
+		io_free_req_deferred(req);
+}
+
 static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
 {
 	if (req_ref_sub_and_test(req, refs))
@@ -2254,6 +2272,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
 	struct req_batch rb;
 	struct io_kiocb *req;
 
+	// TODO: check async grabs mutex
+
 	/* order with ->result store in io_complete_rw_iopoll() */
 	smp_rmb();
 
@@ -2757,7 +2777,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 	if (check_reissue && req->flags & REQ_F_REISSUE) {
 		req->flags &= ~REQ_F_REISSUE;
 		if (io_resubmit_prep(req)) {
-			req_ref_get(req);
+			io_req_sub_and_test_safe(req, -1);
 			io_queue_async_work(req);
 		} else {
 			int cflags = 0;
@@ -3185,7 +3205,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 	list_del_init(&wait->entry);
 
 	/* submit ref gets dropped, acquire a new one */
-	req_ref_get(req);
+	io_req_sub_and_test_safe(req, -1);
 	io_req_task_queue(req);
 	return 1;
 }
@@ -4979,7 +4999,7 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
 			poll->wait.func(&poll->wait, mode, sync, key);
 		}
 	}
-	req_ref_put(req);
+	__io_put_req_deferred(req, 1);
 	return 1;
 }
 
@@ -5030,7 +5050,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
 			return;
 		}
 		io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
-		req_ref_get(req);
+		io_req_sub_and_test_safe(req, -1);
 		poll->wait.private = req;
 		*poll_ptr = poll;
 	}
@@ -6266,7 +6286,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
 	/* avoid locking problems by failing it from a clean context */
 	if (ret) {
 		/* io-wq is going to take one down */
-		req_ref_get(req);
+		io_req_sub_and_test_safe(req, -1);
 		io_req_task_queue_fail(req, ret);
 	}
 }
@@ -6364,11 +6384,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 
 	if (prev) {
 		io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
-		io_put_req_deferred(prev, 1);
+		__io_put_req_deferred(prev, 1);
 	} else {
 		io_req_complete_post(req, -ETIME, 0);
 	}
-	io_put_req_deferred(req, 1);
+	__io_put_req_deferred(req, 1);
 	return HRTIMER_NORESTART;
 }
 
@@ -6503,7 +6523,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	req->link = NULL;
 	req->fixed_rsrc_refs = NULL;
 	/* one is dropped after submission, the other at completion */
-	atomic_set(&req->refs, 2);
+	req->refs = 2;
 	req->task = current;
 	req->result = 0;
 	req->work.creds = NULL;
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-04-29 17:20 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-04-29 17:20 [RFC v2 0/2] non-atomic req->refs Pavel Begunkov
2021-04-29 17:20 ` [PATCH v2 1/2] io_uring: defer submission ref put Pavel Begunkov
2021-04-29 17:20 ` [PATCH v2 2/2] io_uring: non-atomic request refs Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox