public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH 0/8] add persistent submission state
@ 2020-01-24 21:40 Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 1/8] io_uring: add comment for drain_next Pavel Begunkov
                   ` (8 more replies)
  0 siblings, 9 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Apart from unrelated first patch, this persues two goals:

1. start preparing io_uring to move resources handling into
opcode specific functions, and thus for splice(2)

2. make the first step towards some long-standing optimisation ideas

Basically, it makes struct io_submit_state embedded into ctx, so
easily accessible and persistent, and then plays a bit around that.

Pavel Begunkov (8):
  io_uring: add comment for drain_next
  io_uring: always pass non-null io_submit_state
  io_uring: place io_submit_state into ctx
  io_uring: move ring_fd  into io_submit_state
  io_uring: move cur_mm into io_submit_state
  io_uring: move *link into io_submit_state
  io_uring: persistent req bulk allocation cache
  io_uring: optimise req bulk allocation cache

 fs/io_uring.c | 219 +++++++++++++++++++++++++++-----------------------
 1 file changed, 120 insertions(+), 99 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH 1/8] io_uring: add comment for drain_next
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Draining the middle of a link is tricky, so leave a comment there

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 25f29ef81698..c7b38e5f72a1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4702,6 +4702,13 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	if (*link) {
 		struct io_kiocb *head = *link;
 
+		/*
+		 * Taking sequential execution of a link, draining both sides
+		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
+		 * requests in the link. So, it drains the head and the
+		 * next after the link request. The last one is done via
+		 * drain_next flag to persist the effect across calls.
+		 */
 		if (sqe_flags & IOSQE_IO_DRAIN) {
 			head->flags |= REQ_F_IO_DRAIN;
 			ctx->drain_next = 1;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/8] io_uring: always pass non-null io_submit_state
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 1/8] io_uring: add comment for drain_next Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

There is more harm than merit from conditionally passing
io_submit_state. Always pass non-null pointer. It shouldn't affect
performance, but even if so the gap will be closed by the following
commits. Also, in prepartion move plugging out of it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c7b38e5f72a1..63a14002e395 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -577,8 +577,6 @@ struct io_kiocb {
 #define IO_IOPOLL_BATCH			8
 
 struct io_submit_state {
-	struct blk_plug		plug;
-
 	/*
 	 * io_kiocb alloc cache
 	 */
@@ -1126,11 +1124,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 	struct io_kiocb *req;
 
-	if (!state) {
-		req = kmem_cache_alloc(req_cachep, gfp);
-		if (unlikely(!req))
-			goto fallback;
-	} else if (!state->free_reqs) {
+	if (!state->free_reqs) {
 		size_t sz;
 		int ret;
 
@@ -1771,9 +1765,6 @@ static void io_file_put(struct io_submit_state *state)
  */
 static struct file *io_file_get(struct io_submit_state *state, int fd)
 {
-	if (!state)
-		return fget(fd);
-
 	if (state->file) {
 		if (state->fd == fd) {
 			state->used_refs++;
@@ -4757,7 +4748,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
-	blk_finish_plug(&state->plug);
 	io_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
@@ -4770,7 +4760,6 @@ static void io_submit_state_end(struct io_submit_state *state)
 static void io_submit_state_start(struct io_submit_state *state,
 				  unsigned int max_ios)
 {
-	blk_start_plug(&state->plug);
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
@@ -4836,7 +4825,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct file *ring_file, int ring_fd,
 			  struct mm_struct **mm, bool async)
 {
-	struct io_submit_state state, *statep = NULL;
+	struct blk_plug plug;
+	struct io_submit_state state;
 	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
@@ -4854,10 +4844,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	if (nr > IO_PLUG_THRESHOLD) {
-		io_submit_state_start(&state, nr);
-		statep = &state;
-	}
+	io_submit_state_start(&state, nr);
+	if (nr > IO_PLUG_THRESHOLD)
+		blk_start_plug(&plug);
 
 	ctx->ring_fd = ring_fd;
 	ctx->ring_file = ring_file;
@@ -4866,7 +4855,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
 
-		req = io_get_req(ctx, statep);
+		req = io_get_req(ctx, &state);
 		if (unlikely(!req)) {
 			if (!submitted)
 				submitted = -EAGAIN;
@@ -4899,7 +4888,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, statep, &link))
+		if (!io_submit_sqe(req, sqe, &state, &link))
 			break;
 	}
 
@@ -4907,8 +4896,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		percpu_ref_put_many(&ctx->refs, nr - submitted);
 	if (link)
 		io_queue_link_head(link);
-	if (statep)
-		io_submit_state_end(&state);
+
+	io_submit_state_end(&state);
+	if (nr > IO_PLUG_THRESHOLD)
+		blk_finish_plug(&plug);
 
 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
 	io_commit_sqring(ctx);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 3/8] io_uring: place io_submit_state into ctx
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 1/8] io_uring: add comment for drain_next Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

io_submit_state is used only during submmission and holding
ctx->uring_lock, so only one instance is used at a time. Move it into
struct io_ring_ctx, so it:
- doesn't consume on-stack memory
- persists across io_uring_enter
- available without passing it through the call-stack

The last point is very useful to make opcode handlers manage their
resources themselfs, like splice would. Also, it's a base for other
hackish optimisations in the future.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 75 +++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 63a14002e395..c1d905b33b29 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -197,6 +197,27 @@ struct fixed_file_data {
 	struct completion		done;
 };
 
+#define IO_PLUG_THRESHOLD		2
+#define IO_IOPOLL_BATCH			8
+
+struct io_submit_state {
+	/*
+	 * io_kiocb alloc cache
+	 */
+	void			*reqs[IO_IOPOLL_BATCH];
+	unsigned int		free_reqs;
+	unsigned int		cur_req;
+
+	/*
+	 * File reference cache
+	 */
+	struct file		*file;
+	unsigned int		fd;
+	unsigned int		has_refs;
+	unsigned int		used_refs;
+	unsigned int		ios_left;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -308,6 +329,9 @@ struct io_ring_ctx {
 		spinlock_t		inflight_lock;
 		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
+
+	/* protected by uring_lock */
+	struct io_submit_state		submit_state;
 };
 
 /*
@@ -573,27 +597,6 @@ struct io_kiocb {
 	struct io_wq_work	work;
 };
 
-#define IO_PLUG_THRESHOLD		2
-#define IO_IOPOLL_BATCH			8
-
-struct io_submit_state {
-	/*
-	 * io_kiocb alloc cache
-	 */
-	void			*reqs[IO_IOPOLL_BATCH];
-	unsigned		int free_reqs;
-	unsigned		int cur_req;
-
-	/*
-	 * File reference cache
-	 */
-	struct file		*file;
-	unsigned int		fd;
-	unsigned int		has_refs;
-	unsigned int		used_refs;
-	unsigned int		ios_left;
-};
-
 struct io_op_def {
 	/* needs req->io allocated for deferral/async */
 	unsigned		async_ctx : 1;
@@ -1118,11 +1121,11 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
-static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
-				   struct io_submit_state *state)
+static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 {
 	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 	struct io_kiocb *req;
+	struct io_submit_state *state = &ctx->submit_state;
 
 	if (!state->free_reqs) {
 		size_t sz;
@@ -4418,10 +4421,10 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
 	return table->files[index & IORING_FILE_TABLE_MASK];;
 }
 
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
-			   const struct io_uring_sqe *sqe)
+static int io_req_set_file(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 	unsigned flags;
 	int fd;
 
@@ -4658,7 +4661,7 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC)
 
 static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			  struct io_submit_state *state, struct io_kiocb **link)
+			  struct io_kiocb **link)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned int sqe_flags;
@@ -4675,7 +4678,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
 					IOSQE_ASYNC);
 
-	ret = io_req_set_file(state, req, sqe);
+	ret = io_req_set_file(req, sqe);
 	if (unlikely(ret)) {
 err_req:
 		io_cqring_add_event(req, ret);
@@ -4746,8 +4749,10 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
-static void io_submit_state_end(struct io_submit_state *state)
+static void io_submit_end(struct io_ring_ctx *ctx)
 {
+	struct io_submit_state *state = &ctx->submit_state;
+
 	io_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
@@ -4757,9 +4762,10 @@ static void io_submit_state_end(struct io_submit_state *state)
 /*
  * Start submission side cache.
  */
-static void io_submit_state_start(struct io_submit_state *state,
-				  unsigned int max_ios)
+static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios)
 {
+	struct io_submit_state *state = &ctx->submit_state;
+
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
@@ -4826,7 +4832,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct mm_struct **mm, bool async)
 {
 	struct blk_plug plug;
-	struct io_submit_state state;
 	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
@@ -4844,7 +4849,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	io_submit_state_start(&state, nr);
+	io_submit_start(ctx, nr);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_start_plug(&plug);
 
@@ -4855,7 +4860,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
 
-		req = io_get_req(ctx, &state);
+		req = io_get_req(ctx);
 		if (unlikely(!req)) {
 			if (!submitted)
 				submitted = -EAGAIN;
@@ -4888,7 +4893,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, &state, &link))
+		if (!io_submit_sqe(req, sqe, &link))
 			break;
 	}
 
@@ -4897,7 +4902,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (link)
 		io_queue_link_head(link);
 
-	io_submit_state_end(&state);
+	io_submit_end(ctx);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_finish_plug(&plug);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 4/8] io_uring: move ring_fd  into io_submit_state
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (2 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 5/8] io_uring: move cur_mm " Pavel Begunkov
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

ring_fd and ring_file are set per submission, so move them into
the submission state.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c1d905b33b29..951c2fc7b5b7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -216,6 +216,9 @@ struct io_submit_state {
 	unsigned int		has_refs;
 	unsigned int		used_refs;
 	unsigned int		ios_left;
+
+	struct file		*ring_file;
+	int			ring_fd;
 };
 
 struct io_ring_ctx {
@@ -274,8 +277,6 @@ struct io_ring_ctx {
 	 */
 	struct fixed_file_data	*file_data;
 	unsigned		nr_user_files;
-	int 			ring_fd;
-	struct file 		*ring_file;
 
 	/* if used, fixed mapped user buffers */
 	unsigned		nr_user_bufs;
@@ -2783,7 +2784,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	req->close.fd = READ_ONCE(sqe->fd);
 	if (req->file->f_op == &io_uring_fops ||
-	    req->close.fd == req->ctx->ring_fd)
+	    req->close.fd == req->ctx->submit_state.ring_fd)
 		return -EBADF;
 
 	return 0;
@@ -4460,8 +4461,9 @@ static int io_grab_files(struct io_kiocb *req)
 {
 	int ret = -EBADF;
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 
-	if (!ctx->ring_file)
+	if (!state->ring_file)
 		return -EBADF;
 
 	rcu_read_lock();
@@ -4472,7 +4474,7 @@ static int io_grab_files(struct io_kiocb *req)
 	 * the fd has changed since we started down this path, and disallow
 	 * this operation if it has.
 	 */
-	if (fcheck(ctx->ring_fd) == ctx->ring_file) {
+	if (fcheck(state->ring_fd) == state->ring_file) {
 		list_add(&req->inflight_entry, &ctx->inflight_list);
 		req->flags |= REQ_F_INFLIGHT;
 		req->work.files = current->files;
@@ -4762,13 +4764,17 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 /*
  * Start submission side cache.
  */
-static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios)
+static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
+			    struct file *ring_file, int ring_fd)
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
+
+	state->ring_file = ring_file;
+	state->ring_fd = ring_fd;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -4849,13 +4855,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	io_submit_start(ctx, nr);
+	io_submit_start(ctx, nr, ring_file, ring_fd);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_start_plug(&plug);
 
-	ctx->ring_fd = ring_fd;
-	ctx->ring_file = ring_file;
-
 	for (i = 0; i < nr; i++) {
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 5/8] io_uring: move cur_mm into io_submit_state
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (3 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 6/8] io_uring: move *link " Pavel Begunkov
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

cur_mm is only used per submission, so it could be place into
io_submit_state. There is the reasoning behind:
- it's more convenient, don't need to pass it down the call stack
- it's passed as a pointer, so in either case needs memory read/write
- now uses heap (ctx->submit_state) instead of stack
- set only once for non-IORING_SETUP_SQPOLL case.
- generates pretty similar code as @ctx is hot and always somewhere in a
register

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 951c2fc7b5b7..c0e72390d272 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -219,6 +219,8 @@ struct io_submit_state {
 
 	struct file		*ring_file;
 	int			ring_fd;
+
+	struct mm_struct	*mm;
 };
 
 struct io_ring_ctx {
@@ -4834,8 +4836,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
 }
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
-			  struct file *ring_file, int ring_fd,
-			  struct mm_struct **mm, bool async)
+			  struct file *ring_file, int ring_fd, bool async)
 {
 	struct blk_plug plug;
 	struct io_kiocb *link = NULL;
@@ -4883,15 +4884,15 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			break;
 		}
 
-		if (io_op_defs[req->opcode].needs_mm && !*mm) {
+		if (io_op_defs[req->opcode].needs_mm && !ctx->submit_state.mm) {
 			mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
 			if (!mm_fault) {
 				use_mm(ctx->sqo_mm);
-				*mm = ctx->sqo_mm;
+				ctx->submit_state.mm = ctx->sqo_mm;
 			}
 		}
 
-		req->has_user = *mm != NULL;
+		req->has_user = (ctx->submit_state.mm != NULL);
 		req->in_async = async;
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
@@ -4918,7 +4919,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 static int io_sq_thread(void *data)
 {
 	struct io_ring_ctx *ctx = data;
-	struct mm_struct *cur_mm = NULL;
+	struct io_submit_state *submit = &ctx->submit_state;
 	const struct cred *old_cred;
 	mm_segment_t old_fs;
 	DEFINE_WAIT(wait);
@@ -4993,10 +4994,15 @@ static int io_sq_thread(void *data)
 			 * adding ourselves to the waitqueue, as the unuse/drop
 			 * may sleep.
 			 */
-			if (cur_mm) {
-				unuse_mm(cur_mm);
-				mmput(cur_mm);
-				cur_mm = NULL;
+			if (submit->mm) {
+				/*
+				 * this thread is the only submitter, thus
+				 * it's safe to change submit->mm without
+				 * taking ctx->uring_lock
+				 */
+				unuse_mm(submit->mm);
+				mmput(submit->mm);
+				submit->mm = NULL;
 			}
 
 			prepare_to_wait(&ctx->sqo_wait, &wait,
@@ -5027,16 +5033,17 @@ static int io_sq_thread(void *data)
 		}
 
 		mutex_lock(&ctx->uring_lock);
-		ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
+		ret = io_submit_sqes(ctx, to_submit, NULL, -1, true);
 		mutex_unlock(&ctx->uring_lock);
 		if (ret > 0)
 			inflight += ret;
 	}
 
 	set_fs(old_fs);
-	if (cur_mm) {
-		unuse_mm(cur_mm);
-		mmput(cur_mm);
+	if (submit->mm) {
+		unuse_mm(submit->mm);
+		mmput(submit->mm);
+		submit->mm = NULL;
 	}
 	revert_creds(old_cred);
 
@@ -5757,6 +5764,10 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	mmgrab(current->mm);
 	ctx->sqo_mm = current->mm;
 
+	ctx->submit_state.mm = NULL;
+	if (!(ctx->flags & IORING_SETUP_SQPOLL))
+		ctx->submit_state.mm = ctx->sqo_mm;
+
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		ret = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
@@ -6369,8 +6380,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			wake_up(&ctx->sqo_wait);
 		submitted = to_submit;
 	} else if (to_submit) {
-		struct mm_struct *cur_mm;
-
 		if (current->mm != ctx->sqo_mm ||
 		    current_cred() != ctx->creds) {
 			ret = -EPERM;
@@ -6378,10 +6387,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		}
 
 		mutex_lock(&ctx->uring_lock);
-		/* already have mm, so io_submit_sqes() won't try to grab it */
-		cur_mm = ctx->sqo_mm;
-		submitted = io_submit_sqes(ctx, to_submit, f.file, fd,
-					   &cur_mm, false);
+		submitted = io_submit_sqes(ctx, to_submit, f.file, fd, false);
 		mutex_unlock(&ctx->uring_lock);
 
 		if (submitted != to_submit)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 6/8] io_uring: move *link into io_submit_state
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (4 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 5/8] io_uring: move cur_mm " Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

It's more convenient to have it in the submission state, than passing as
a pointer, so move it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index c0e72390d272..f022453e3839 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -221,6 +221,7 @@ struct io_submit_state {
 	int			ring_fd;
 
 	struct mm_struct	*mm;
+	struct io_kiocb		*link;
 };
 
 struct io_ring_ctx {
@@ -4664,10 +4665,10 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC)
 
-static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			  struct io_kiocb **link)
+static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 	unsigned int sqe_flags;
 	int ret;
 
@@ -4697,8 +4698,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	 * submitted sync once the chain is complete. If none of those
 	 * conditions are true (normal request), then just queue it.
 	 */
-	if (*link) {
-		struct io_kiocb *head = *link;
+	if (state->link) {
+		struct io_kiocb *head = state->link;
 
 		/*
 		 * Taking sequential execution of a link, draining both sides
@@ -4728,7 +4729,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		/* last request of a link, enqueue the link */
 		if (!(sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK))) {
 			io_queue_link_head(head);
-			*link = NULL;
+			state->link = NULL;
 		}
 	} else {
 		if (unlikely(ctx->drain_next)) {
@@ -4741,7 +4742,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			ret = io_req_defer_prep(req, sqe);
 			if (ret)
 				req->flags |= REQ_F_FAIL_LINK;
-			*link = req;
+			state->link = req;
 		} else {
 			io_queue_sqe(req, sqe);
 		}
@@ -4761,6 +4762,8 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
 					&state->reqs[state->cur_req]);
+	if (state->link)
+		io_queue_link_head(state->link);
 }
 
 /*
@@ -4777,6 +4780,7 @@ static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
 
 	state->ring_file = ring_file;
 	state->ring_fd = ring_fd;
+	state->link = NULL;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -4839,7 +4843,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct file *ring_file, int ring_fd, bool async)
 {
 	struct blk_plug plug;
-	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
 
@@ -4897,14 +4900,12 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, &link))
+		if (!io_submit_sqe(req, sqe))
 			break;
 	}
 
 	if (submitted != nr)
 		percpu_ref_put_many(&ctx->refs, nr - submitted);
-	if (link)
-		io_queue_link_head(link);
 
 	io_submit_end(ctx);
 	if (nr > IO_PLUG_THRESHOLD)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 7/8] io_uring: persistent req bulk allocation cache
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (5 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 6/8] io_uring: move *link " Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-24 21:40 ` [PATCH 8/8] io_uring: optimise " Pavel Begunkov
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Save bulk allocated requests across io_uring_enter(), so lower QD also
could benefit from that. This is not much of an optimisation, and for
current cache sizes would probably affect only offloaded ~QD=1.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f022453e3839..aed19cbe9893 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -834,6 +834,25 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	return NULL;
 }
 
+static void io_init_submit_state(struct io_ring_ctx *ctx)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+
+	state->mm = (ctx->flags & IORING_SETUP_SQPOLL) ? NULL : ctx->sqo_mm;
+
+	state->free_reqs = 0;
+	state->cur_req = 0;
+}
+
+static void io_clear_submit_state(struct io_ring_ctx *ctx)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+
+	if (state->free_reqs)
+		kmem_cache_free_bulk(req_cachep, state->free_reqs,
+					&state->reqs[state->cur_req]);
+}
+
 static inline bool __req_need_defer(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -1132,10 +1151,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	if (!state->free_reqs) {
-		size_t sz;
+		size_t sz = ARRAY_SIZE(state->reqs);
 		int ret;
 
-		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
 		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
 
 		/*
@@ -4759,9 +4777,6 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	io_file_put(state);
-	if (state->free_reqs)
-		kmem_cache_free_bulk(req_cachep, state->free_reqs,
-					&state->reqs[state->cur_req]);
 	if (state->link)
 		io_queue_link_head(state->link);
 }
@@ -4774,7 +4789,6 @@ static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
-	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
 
@@ -5762,12 +5776,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	int ret;
 
 	init_waitqueue_head(&ctx->sqo_wait);
-	mmgrab(current->mm);
-	ctx->sqo_mm = current->mm;
-
-	ctx->submit_state.mm = NULL;
-	if (!(ctx->flags & IORING_SETUP_SQPOLL))
-		ctx->submit_state.mm = ctx->sqo_mm;
 
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		ret = -EPERM;
@@ -6143,6 +6151,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	if (ctx->sqo_mm)
 		mmdrop(ctx->sqo_mm);
 
+	io_clear_submit_state(ctx);
+
 	io_iopoll_reap_events(ctx);
 	io_sqe_buffer_unregister(ctx);
 	io_sqe_files_unregister(ctx);
@@ -6581,6 +6591,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
 	if (ret)
 		goto err;
 
+	mmgrab(current->mm);
+	ctx->sqo_mm = current->mm;
+	io_init_submit_state(ctx);
+
 	ret = io_sq_offload_start(ctx, p);
 	if (ret)
 		goto err;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 8/8] io_uring: optimise req bulk allocation cache
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (6 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
@ 2020-01-24 21:40 ` Pavel Begunkov
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
  8 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-24 21:40 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Traverse backward through @reqs in struct io_submit_state, so it's
possible to remove cur_req from it and easier to handle in general.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index aed19cbe9893..a4b496815783 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -206,7 +206,6 @@ struct io_submit_state {
 	 */
 	void			*reqs[IO_IOPOLL_BATCH];
 	unsigned int		free_reqs;
-	unsigned int		cur_req;
 
 	/*
 	 * File reference cache
@@ -839,9 +838,7 @@ static void io_init_submit_state(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	state->mm = (ctx->flags & IORING_SETUP_SQPOLL) ? NULL : ctx->sqo_mm;
-
 	state->free_reqs = 0;
-	state->cur_req = 0;
 }
 
 static void io_clear_submit_state(struct io_ring_ctx *ctx)
@@ -849,8 +846,7 @@ static void io_clear_submit_state(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	if (state->free_reqs)
-		kmem_cache_free_bulk(req_cachep, state->free_reqs,
-					&state->reqs[state->cur_req]);
+		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
 }
 
 static inline bool __req_need_defer(struct io_kiocb *req)
@@ -1167,12 +1163,10 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 			ret = 1;
 		}
 		state->free_reqs = ret - 1;
-		state->cur_req = 1;
-		req = state->reqs[0];
+		req = state->reqs[ret - 1];
 	} else {
-		req = state->reqs[state->cur_req];
 		state->free_reqs--;
-		state->cur_req++;
+		req = state->reqs[state->free_reqs];
 	}
 
 got_it:
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 0/8] add persistent submission state
  2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
                   ` (7 preceding siblings ...)
  2020-01-24 21:40 ` [PATCH 8/8] io_uring: optimise " Pavel Begunkov
@ 2020-01-25 19:53 ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 1/8] io_uring: leave a comment for drain_next Pavel Begunkov
                     ` (7 more replies)
  8 siblings, 8 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Apart from unrelated first patch, this persues two goals:

1. start preparing io_uring to move resources handling into
opcode specific functions, and thus for splice(2)

2. make the first step towards some long-standing optimisation ideas

Basically, it makes struct io_submit_state embedded into ctx, so
easily accessible and persistent, and then plays a bit around that.

v2: rebase

Pavel Begunkov (8):
  io_uring: leave a comment for drain_next
  io_uring: always pass non-null io_submit_state
  io_uring: place io_submit_state into ctx
  io_uring: move ring_fd  into io_submit_state
  io_uring: move cur_mm into io_submit_state
  io_uring: move *link into io_submit_state
  io_uring: persistent req bulk allocation cache
  io_uring: optimise req bulk allocation cache

 fs/io_uring.c | 219 +++++++++++++++++++++++++++-----------------------
 1 file changed, 120 insertions(+), 99 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/8] io_uring: leave a comment for drain_next
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
                     ` (6 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Draining the middle of a link is tricky, so leave a comment there

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e79d6e47dc7b..bc1cab6a256a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4702,6 +4702,13 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	if (*link) {
 		struct io_kiocb *head = *link;
 
+		/*
+		 * Taking sequential execution of a link, draining both sides
+		 * of the link also fullfils IOSQE_IO_DRAIN semantics for all
+		 * requests in the link. So, it drains the head and the
+		 * next after the link request. The last one is done via
+		 * drain_next flag to persist the effect across calls.
+		 */
 		if (sqe_flags & IOSQE_IO_DRAIN) {
 			head->flags |= REQ_F_IO_DRAIN;
 			ctx->drain_next = 1;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/8] io_uring: always pass non-null io_submit_state
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 1/8] io_uring: leave a comment for drain_next Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
                     ` (5 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

There is more harm than merit from conditionally passing
io_submit_state. Always pass non-null pointer. It shouldn't affect
performance, but even if so the gap will be closed by the following
commits. Also, in prepartion move plugging out of it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 33 ++++++++++++---------------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bc1cab6a256a..f4e7575b511d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -577,8 +577,6 @@ struct io_kiocb {
 #define IO_IOPOLL_BATCH			8
 
 struct io_submit_state {
-	struct blk_plug		plug;
-
 	/*
 	 * io_kiocb alloc cache
 	 */
@@ -1126,11 +1124,7 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
 	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 	struct io_kiocb *req;
 
-	if (!state) {
-		req = kmem_cache_alloc(req_cachep, gfp);
-		if (unlikely(!req))
-			goto fallback;
-	} else if (!state->free_reqs) {
+	if (!state->free_reqs) {
 		size_t sz;
 		int ret;
 
@@ -1771,9 +1765,6 @@ static void io_file_put(struct io_submit_state *state)
  */
 static struct file *io_file_get(struct io_submit_state *state, int fd)
 {
-	if (!state)
-		return fget(fd);
-
 	if (state->file) {
 		if (state->fd == fd) {
 			state->used_refs++;
@@ -4757,7 +4748,6 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
  */
 static void io_submit_state_end(struct io_submit_state *state)
 {
-	blk_finish_plug(&state->plug);
 	io_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
@@ -4770,7 +4760,6 @@ static void io_submit_state_end(struct io_submit_state *state)
 static void io_submit_state_start(struct io_submit_state *state,
 				  unsigned int max_ios)
 {
-	blk_start_plug(&state->plug);
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
@@ -4836,7 +4825,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct file *ring_file, int ring_fd,
 			  struct mm_struct **mm, bool async)
 {
-	struct io_submit_state state, *statep = NULL;
+	struct blk_plug plug;
+	struct io_submit_state state;
 	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
@@ -4854,10 +4844,9 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	if (nr > IO_PLUG_THRESHOLD) {
-		io_submit_state_start(&state, nr);
-		statep = &state;
-	}
+	io_submit_state_start(&state, nr);
+	if (nr > IO_PLUG_THRESHOLD)
+		blk_start_plug(&plug);
 
 	ctx->ring_fd = ring_fd;
 	ctx->ring_file = ring_file;
@@ -4866,7 +4855,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
 
-		req = io_get_req(ctx, statep);
+		req = io_get_req(ctx, &state);
 		if (unlikely(!req)) {
 			if (!submitted)
 				submitted = -EAGAIN;
@@ -4899,7 +4888,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, statep, &link))
+		if (!io_submit_sqe(req, sqe, &state, &link))
 			break;
 	}
 
@@ -4910,8 +4899,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	}
 	if (link)
 		io_queue_link_head(link);
-	if (statep)
-		io_submit_state_end(&state);
+
+	io_submit_state_end(&state);
+	if (nr > IO_PLUG_THRESHOLD)
+		blk_finish_plug(&plug);
 
 	 /* Commit SQ ring head once we've consumed and submitted all SQEs */
 	io_commit_sqring(ctx);
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 3/8] io_uring: place io_submit_state into ctx
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 1/8] io_uring: leave a comment for drain_next Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
                     ` (4 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

io_submit_state is used only during submmission and holding
ctx->uring_lock, so only one instance is used at a time. Move it into
struct io_ring_ctx, so it:
- doesn't consume on-stack memory
- persists across io_uring_enter
- available without passing it through the call-stack

The last point is very useful to make opcode handlers manage their
resources themselfs, like splice would. Also, it's a base for other
hackish optimisations in the future.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 75 +++++++++++++++++++++++++++------------------------
 1 file changed, 40 insertions(+), 35 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f4e7575b511d..8b159e21a35f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -197,6 +197,27 @@ struct fixed_file_data {
 	struct completion		done;
 };
 
+#define IO_PLUG_THRESHOLD		2
+#define IO_IOPOLL_BATCH			8
+
+struct io_submit_state {
+	/*
+	 * io_kiocb alloc cache
+	 */
+	void			*reqs[IO_IOPOLL_BATCH];
+	unsigned int		free_reqs;
+	unsigned int		cur_req;
+
+	/*
+	 * File reference cache
+	 */
+	struct file		*file;
+	unsigned int		fd;
+	unsigned int		has_refs;
+	unsigned int		used_refs;
+	unsigned int		ios_left;
+};
+
 struct io_ring_ctx {
 	struct {
 		struct percpu_ref	refs;
@@ -308,6 +329,9 @@ struct io_ring_ctx {
 		spinlock_t		inflight_lock;
 		struct list_head	inflight_list;
 	} ____cacheline_aligned_in_smp;
+
+	/* protected by uring_lock */
+	struct io_submit_state		submit_state;
 };
 
 /*
@@ -573,27 +597,6 @@ struct io_kiocb {
 	struct io_wq_work	work;
 };
 
-#define IO_PLUG_THRESHOLD		2
-#define IO_IOPOLL_BATCH			8
-
-struct io_submit_state {
-	/*
-	 * io_kiocb alloc cache
-	 */
-	void			*reqs[IO_IOPOLL_BATCH];
-	unsigned		int free_reqs;
-	unsigned		int cur_req;
-
-	/*
-	 * File reference cache
-	 */
-	struct file		*file;
-	unsigned int		fd;
-	unsigned int		has_refs;
-	unsigned int		used_refs;
-	unsigned int		ios_left;
-};
-
 struct io_op_def {
 	/* needs req->io allocated for deferral/async */
 	unsigned		async_ctx : 1;
@@ -1118,11 +1121,11 @@ static struct io_kiocb *io_get_fallback_req(struct io_ring_ctx *ctx)
 	return NULL;
 }
 
-static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx,
-				   struct io_submit_state *state)
+static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 {
 	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
 	struct io_kiocb *req;
+	struct io_submit_state *state = &ctx->submit_state;
 
 	if (!state->free_reqs) {
 		size_t sz;
@@ -4418,10 +4421,10 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
 	return table->files[index & IORING_FILE_TABLE_MASK];;
 }
 
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
-			   const struct io_uring_sqe *sqe)
+static int io_req_set_file(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 	unsigned flags;
 	int fd;
 
@@ -4658,7 +4661,7 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC)
 
 static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			  struct io_submit_state *state, struct io_kiocb **link)
+			  struct io_kiocb **link)
 {
 	struct io_ring_ctx *ctx = req->ctx;
 	unsigned int sqe_flags;
@@ -4675,7 +4678,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	req->flags |= sqe_flags & (IOSQE_IO_DRAIN|IOSQE_IO_HARDLINK|
 					IOSQE_ASYNC);
 
-	ret = io_req_set_file(state, req, sqe);
+	ret = io_req_set_file(req, sqe);
 	if (unlikely(ret)) {
 err_req:
 		io_cqring_add_event(req, ret);
@@ -4746,8 +4749,10 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 /*
  * Batched submission is done, ensure local IO is flushed out.
  */
-static void io_submit_state_end(struct io_submit_state *state)
+static void io_submit_end(struct io_ring_ctx *ctx)
 {
+	struct io_submit_state *state = &ctx->submit_state;
+
 	io_file_put(state);
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
@@ -4757,9 +4762,10 @@ static void io_submit_state_end(struct io_submit_state *state)
 /*
  * Start submission side cache.
  */
-static void io_submit_state_start(struct io_submit_state *state,
-				  unsigned int max_ios)
+static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios)
 {
+	struct io_submit_state *state = &ctx->submit_state;
+
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
@@ -4826,7 +4832,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct mm_struct **mm, bool async)
 {
 	struct blk_plug plug;
-	struct io_submit_state state;
 	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
@@ -4844,7 +4849,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	io_submit_state_start(&state, nr);
+	io_submit_start(ctx, nr);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_start_plug(&plug);
 
@@ -4855,7 +4860,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
 
-		req = io_get_req(ctx, &state);
+		req = io_get_req(ctx);
 		if (unlikely(!req)) {
 			if (!submitted)
 				submitted = -EAGAIN;
@@ -4888,7 +4893,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, &state, &link))
+		if (!io_submit_sqe(req, sqe, &link))
 			break;
 	}
 
@@ -4900,7 +4905,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (link)
 		io_queue_link_head(link);
 
-	io_submit_state_end(&state);
+	io_submit_end(ctx);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_finish_plug(&plug);
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 4/8] io_uring: move ring_fd  into io_submit_state
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
                     ` (2 preceding siblings ...)
  2020-01-25 19:53   ` [PATCH v2 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 5/8] io_uring: move cur_mm " Pavel Begunkov
                     ` (3 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

ring_fd and ring_file are set per submission, so move them into
the submission state.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 8b159e21a35f..4597f556d277 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -216,6 +216,9 @@ struct io_submit_state {
 	unsigned int		has_refs;
 	unsigned int		used_refs;
 	unsigned int		ios_left;
+
+	struct file		*ring_file;
+	int			ring_fd;
 };
 
 struct io_ring_ctx {
@@ -274,8 +277,6 @@ struct io_ring_ctx {
 	 */
 	struct fixed_file_data	*file_data;
 	unsigned		nr_user_files;
-	int 			ring_fd;
-	struct file 		*ring_file;
 
 	/* if used, fixed mapped user buffers */
 	unsigned		nr_user_bufs;
@@ -2783,7 +2784,7 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	req->close.fd = READ_ONCE(sqe->fd);
 	if (req->file->f_op == &io_uring_fops ||
-	    req->close.fd == req->ctx->ring_fd)
+	    req->close.fd == req->ctx->submit_state.ring_fd)
 		return -EBADF;
 
 	return 0;
@@ -4460,8 +4461,9 @@ static int io_grab_files(struct io_kiocb *req)
 {
 	int ret = -EBADF;
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 
-	if (!ctx->ring_file)
+	if (!state->ring_file)
 		return -EBADF;
 
 	rcu_read_lock();
@@ -4472,7 +4474,7 @@ static int io_grab_files(struct io_kiocb *req)
 	 * the fd has changed since we started down this path, and disallow
 	 * this operation if it has.
 	 */
-	if (fcheck(ctx->ring_fd) == ctx->ring_file) {
+	if (fcheck(state->ring_fd) == state->ring_file) {
 		list_add(&req->inflight_entry, &ctx->inflight_list);
 		req->flags |= REQ_F_INFLIGHT;
 		req->work.files = current->files;
@@ -4762,13 +4764,17 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 /*
  * Start submission side cache.
  */
-static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios)
+static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
+			    struct file *ring_file, int ring_fd)
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
 	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
+
+	state->ring_file = ring_file;
+	state->ring_fd = ring_fd;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -4849,13 +4855,10 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 	if (!percpu_ref_tryget_many(&ctx->refs, nr))
 		return -EAGAIN;
 
-	io_submit_start(ctx, nr);
+	io_submit_start(ctx, nr, ring_file, ring_fd);
 	if (nr > IO_PLUG_THRESHOLD)
 		blk_start_plug(&plug);
 
-	ctx->ring_fd = ring_fd;
-	ctx->ring_file = ring_file;
-
 	for (i = 0; i < nr; i++) {
 		const struct io_uring_sqe *sqe;
 		struct io_kiocb *req;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 5/8] io_uring: move cur_mm into io_submit_state
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
                     ` (3 preceding siblings ...)
  2020-01-25 19:53   ` [PATCH v2 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 6/8] io_uring: move *link " Pavel Begunkov
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

cur_mm is only used per submission, so it could be place into
io_submit_state. There is the reasoning behind:
- it's more convenient, don't need to pass it down the call stack
- it's passed as a pointer, so in either case needs memory read/write
- now uses heap (ctx->submit_state) instead of stack
- set only once for non-IORING_SETUP_SQPOLL case.
- generates pretty similar code as @ctx is hot and always somewhere in a
register

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4597f556d277..880c0e9bbe9e 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -219,6 +219,8 @@ struct io_submit_state {
 
 	struct file		*ring_file;
 	int			ring_fd;
+
+	struct mm_struct	*mm;
 };
 
 struct io_ring_ctx {
@@ -4834,8 +4836,7 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
 }
 
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
-			  struct file *ring_file, int ring_fd,
-			  struct mm_struct **mm, bool async)
+			  struct file *ring_file, int ring_fd, bool async)
 {
 	struct blk_plug plug;
 	struct io_kiocb *link = NULL;
@@ -4883,15 +4884,15 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			break;
 		}
 
-		if (io_op_defs[req->opcode].needs_mm && !*mm) {
+		if (io_op_defs[req->opcode].needs_mm && !ctx->submit_state.mm) {
 			mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
 			if (!mm_fault) {
 				use_mm(ctx->sqo_mm);
-				*mm = ctx->sqo_mm;
+				ctx->submit_state.mm = ctx->sqo_mm;
 			}
 		}
 
-		req->has_user = *mm != NULL;
+		req->has_user = (ctx->submit_state.mm != NULL);
 		req->in_async = async;
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
@@ -4921,7 +4922,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 static int io_sq_thread(void *data)
 {
 	struct io_ring_ctx *ctx = data;
-	struct mm_struct *cur_mm = NULL;
+	struct io_submit_state *submit = &ctx->submit_state;
 	const struct cred *old_cred;
 	mm_segment_t old_fs;
 	DEFINE_WAIT(wait);
@@ -4996,10 +4997,15 @@ static int io_sq_thread(void *data)
 			 * adding ourselves to the waitqueue, as the unuse/drop
 			 * may sleep.
 			 */
-			if (cur_mm) {
-				unuse_mm(cur_mm);
-				mmput(cur_mm);
-				cur_mm = NULL;
+			if (submit->mm) {
+				/*
+				 * this thread is the only submitter, thus
+				 * it's safe to change submit->mm without
+				 * taking ctx->uring_lock
+				 */
+				unuse_mm(submit->mm);
+				mmput(submit->mm);
+				submit->mm = NULL;
 			}
 
 			prepare_to_wait(&ctx->sqo_wait, &wait,
@@ -5030,16 +5036,17 @@ static int io_sq_thread(void *data)
 		}
 
 		mutex_lock(&ctx->uring_lock);
-		ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
+		ret = io_submit_sqes(ctx, to_submit, NULL, -1, true);
 		mutex_unlock(&ctx->uring_lock);
 		if (ret > 0)
 			inflight += ret;
 	}
 
 	set_fs(old_fs);
-	if (cur_mm) {
-		unuse_mm(cur_mm);
-		mmput(cur_mm);
+	if (submit->mm) {
+		unuse_mm(submit->mm);
+		mmput(submit->mm);
+		submit->mm = NULL;
 	}
 	revert_creds(old_cred);
 
@@ -5760,6 +5767,10 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	mmgrab(current->mm);
 	ctx->sqo_mm = current->mm;
 
+	ctx->submit_state.mm = NULL;
+	if (!(ctx->flags & IORING_SETUP_SQPOLL))
+		ctx->submit_state.mm = ctx->sqo_mm;
+
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		ret = -EPERM;
 		if (!capable(CAP_SYS_ADMIN))
@@ -6372,8 +6383,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 			wake_up(&ctx->sqo_wait);
 		submitted = to_submit;
 	} else if (to_submit) {
-		struct mm_struct *cur_mm;
-
 		if (current->mm != ctx->sqo_mm ||
 		    current_cred() != ctx->creds) {
 			ret = -EPERM;
@@ -6381,10 +6390,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
 		}
 
 		mutex_lock(&ctx->uring_lock);
-		/* already have mm, so io_submit_sqes() won't try to grab it */
-		cur_mm = ctx->sqo_mm;
-		submitted = io_submit_sqes(ctx, to_submit, f.file, fd,
-					   &cur_mm, false);
+		submitted = io_submit_sqes(ctx, to_submit, f.file, fd, false);
 		mutex_unlock(&ctx->uring_lock);
 
 		if (submitted != to_submit)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 6/8] io_uring: move *link into io_submit_state
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
                     ` (4 preceding siblings ...)
  2020-01-25 19:53   ` [PATCH v2 5/8] io_uring: move cur_mm " Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 8/8] io_uring: optimise " Pavel Begunkov
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

It's more convenient to have it in the submission state, than passing as
a pointer, so move it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 880c0e9bbe9e..5022eb4cb9a4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -221,6 +221,7 @@ struct io_submit_state {
 	int			ring_fd;
 
 	struct mm_struct	*mm;
+	struct io_kiocb		*link;
 };
 
 struct io_ring_ctx {
@@ -4664,10 +4665,10 @@ static inline void io_queue_link_head(struct io_kiocb *req)
 #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
 				IOSQE_IO_HARDLINK | IOSQE_ASYNC)
 
-static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-			  struct io_kiocb **link)
+static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
 	struct io_ring_ctx *ctx = req->ctx;
+	struct io_submit_state *state = &ctx->submit_state;
 	unsigned int sqe_flags;
 	int ret;
 
@@ -4697,8 +4698,8 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	 * submitted sync once the chain is complete. If none of those
 	 * conditions are true (normal request), then just queue it.
 	 */
-	if (*link) {
-		struct io_kiocb *head = *link;
+	if (state->link) {
+		struct io_kiocb *head = state->link;
 
 		/*
 		 * Taking sequential execution of a link, draining both sides
@@ -4728,7 +4729,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		/* last request of a link, enqueue the link */
 		if (!(sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK))) {
 			io_queue_link_head(head);
-			*link = NULL;
+			state->link = NULL;
 		}
 	} else {
 		if (unlikely(ctx->drain_next)) {
@@ -4741,7 +4742,7 @@ static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			ret = io_req_defer_prep(req, sqe);
 			if (ret)
 				req->flags |= REQ_F_FAIL_LINK;
-			*link = req;
+			state->link = req;
 		} else {
 			io_queue_sqe(req, sqe);
 		}
@@ -4761,6 +4762,8 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 	if (state->free_reqs)
 		kmem_cache_free_bulk(req_cachep, state->free_reqs,
 					&state->reqs[state->cur_req]);
+	if (state->link)
+		io_queue_link_head(state->link);
 }
 
 /*
@@ -4777,6 +4780,7 @@ static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
 
 	state->ring_file = ring_file;
 	state->ring_fd = ring_fd;
+	state->link = NULL;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -4839,7 +4843,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 			  struct file *ring_file, int ring_fd, bool async)
 {
 	struct blk_plug plug;
-	struct io_kiocb *link = NULL;
 	int i, submitted = 0;
 	bool mm_fault = false;
 
@@ -4897,7 +4900,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 		req->needs_fixed_file = async;
 		trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
 						true, async);
-		if (!io_submit_sqe(req, sqe, &link))
+		if (!io_submit_sqe(req, sqe))
 			break;
 	}
 
@@ -4906,8 +4909,6 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
 
 		percpu_ref_put_many(&ctx->refs, nr - ref_used);
 	}
-	if (link)
-		io_queue_link_head(link);
 
 	io_submit_end(ctx);
 	if (nr > IO_PLUG_THRESHOLD)
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 7/8] io_uring: persistent req bulk allocation cache
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
                     ` (5 preceding siblings ...)
  2020-01-25 19:53   ` [PATCH v2 6/8] io_uring: move *link " Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  2020-01-25 19:53   ` [PATCH v2 8/8] io_uring: optimise " Pavel Begunkov
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Save bulk allocated requests across io_uring_enter(), so lower QD also
could benefit from that. This is not much of an optimisation, and for
current cache sizes would probably affect only offloaded ~QD=1.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5022eb4cb9a4..82df6171baae 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -834,6 +834,25 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
 	return NULL;
 }
 
+static void io_init_submit_state(struct io_ring_ctx *ctx)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+
+	state->mm = (ctx->flags & IORING_SETUP_SQPOLL) ? NULL : ctx->sqo_mm;
+
+	state->free_reqs = 0;
+	state->cur_req = 0;
+}
+
+static void io_clear_submit_state(struct io_ring_ctx *ctx)
+{
+	struct io_submit_state *state = &ctx->submit_state;
+
+	if (state->free_reqs)
+		kmem_cache_free_bulk(req_cachep, state->free_reqs,
+					&state->reqs[state->cur_req]);
+}
+
 static inline bool __req_need_defer(struct io_kiocb *req)
 {
 	struct io_ring_ctx *ctx = req->ctx;
@@ -1132,10 +1151,9 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	if (!state->free_reqs) {
-		size_t sz;
+		size_t sz = ARRAY_SIZE(state->reqs);
 		int ret;
 
-		sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs));
 		ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs);
 
 		/*
@@ -4759,9 +4777,6 @@ static void io_submit_end(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	io_file_put(state);
-	if (state->free_reqs)
-		kmem_cache_free_bulk(req_cachep, state->free_reqs,
-					&state->reqs[state->cur_req]);
 	if (state->link)
 		io_queue_link_head(state->link);
 }
@@ -4774,7 +4789,6 @@ static void io_submit_start(struct io_ring_ctx *ctx, unsigned int max_ios,
 {
 	struct io_submit_state *state = &ctx->submit_state;
 
-	state->free_reqs = 0;
 	state->file = NULL;
 	state->ios_left = max_ios;
 
@@ -5765,12 +5779,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
 	int ret;
 
 	init_waitqueue_head(&ctx->sqo_wait);
-	mmgrab(current->mm);
-	ctx->sqo_mm = current->mm;
-
-	ctx->submit_state.mm = NULL;
-	if (!(ctx->flags & IORING_SETUP_SQPOLL))
-		ctx->submit_state.mm = ctx->sqo_mm;
 
 	if (ctx->flags & IORING_SETUP_SQPOLL) {
 		ret = -EPERM;
@@ -6146,6 +6154,8 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 	if (ctx->sqo_mm)
 		mmdrop(ctx->sqo_mm);
 
+	io_clear_submit_state(ctx);
+
 	io_iopoll_reap_events(ctx);
 	io_sqe_buffer_unregister(ctx);
 	io_sqe_files_unregister(ctx);
@@ -6584,6 +6594,10 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p)
 	if (ret)
 		goto err;
 
+	mmgrab(current->mm);
+	ctx->sqo_mm = current->mm;
+	io_init_submit_state(ctx);
+
 	ret = io_sq_offload_start(ctx, p);
 	if (ret)
 		goto err;
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 8/8] io_uring: optimise req bulk allocation cache
  2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
                     ` (6 preceding siblings ...)
  2020-01-25 19:53   ` [PATCH v2 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
@ 2020-01-25 19:53   ` Pavel Begunkov
  7 siblings, 0 replies; 18+ messages in thread
From: Pavel Begunkov @ 2020-01-25 19:53 UTC (permalink / raw)
  To: Jens Axboe, io-uring, linux-kernel

Traverse backward through @reqs in struct io_submit_state, so it's
possible to remove cur_req from it and easier to handle in general.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 82df6171baae..744e8a90b543 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -206,7 +206,6 @@ struct io_submit_state {
 	 */
 	void			*reqs[IO_IOPOLL_BATCH];
 	unsigned int		free_reqs;
-	unsigned int		cur_req;
 
 	/*
 	 * File reference cache
@@ -839,9 +838,7 @@ static void io_init_submit_state(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	state->mm = (ctx->flags & IORING_SETUP_SQPOLL) ? NULL : ctx->sqo_mm;
-
 	state->free_reqs = 0;
-	state->cur_req = 0;
 }
 
 static void io_clear_submit_state(struct io_ring_ctx *ctx)
@@ -849,8 +846,7 @@ static void io_clear_submit_state(struct io_ring_ctx *ctx)
 	struct io_submit_state *state = &ctx->submit_state;
 
 	if (state->free_reqs)
-		kmem_cache_free_bulk(req_cachep, state->free_reqs,
-					&state->reqs[state->cur_req]);
+		kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
 }
 
 static inline bool __req_need_defer(struct io_kiocb *req)
@@ -1167,12 +1163,10 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx)
 			ret = 1;
 		}
 		state->free_reqs = ret - 1;
-		state->cur_req = 1;
-		req = state->reqs[0];
+		req = state->reqs[ret - 1];
 	} else {
-		req = state->reqs[state->cur_req];
 		state->free_reqs--;
-		state->cur_req++;
+		req = state->reqs[state->free_reqs];
 	}
 
 got_it:
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2020-01-25 19:55 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-01-24 21:40 [PATCH 0/8] add persistent submission state Pavel Begunkov
2020-01-24 21:40 ` [PATCH 1/8] io_uring: add comment for drain_next Pavel Begunkov
2020-01-24 21:40 ` [PATCH 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
2020-01-24 21:40 ` [PATCH 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
2020-01-24 21:40 ` [PATCH 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
2020-01-24 21:40 ` [PATCH 5/8] io_uring: move cur_mm " Pavel Begunkov
2020-01-24 21:40 ` [PATCH 6/8] io_uring: move *link " Pavel Begunkov
2020-01-24 21:40 ` [PATCH 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
2020-01-24 21:40 ` [PATCH 8/8] io_uring: optimise " Pavel Begunkov
2020-01-25 19:53 ` [PATCH v2 0/8] add persistent submission state Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 1/8] io_uring: leave a comment for drain_next Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 2/8] io_uring: always pass non-null io_submit_state Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 3/8] io_uring: place io_submit_state into ctx Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 4/8] io_uring: move ring_fd into io_submit_state Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 5/8] io_uring: move cur_mm " Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 6/8] io_uring: move *link " Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 7/8] io_uring: persistent req bulk allocation cache Pavel Begunkov
2020-01-25 19:53   ` [PATCH v2 8/8] io_uring: optimise " Pavel Begunkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox