public inbox for [email protected]
 help / color / mirror / Atom feed
* [RFC] io_uring: remove file batch-get optimisation
@ 2021-08-10 13:52 Pavel Begunkov
  2021-08-10 17:04 ` Jens Axboe
  0 siblings, 1 reply; 4+ messages in thread
From: Pavel Begunkov @ 2021-08-10 13:52 UTC (permalink / raw)
  To: Jens Axboe, io-uring

For requests with non-fixed files, instead of grabbing just one
reference, we get by the number of left requests, so the following
requests using the same file can take it without atomics.

However, it's not all win. If there is one request in the middle
not using files or having a fixed file, we'll need to put back the left
references. Even worse if an application submits requests dealing with
different files, it will do a put for each new request, so doubling the
number of atomics needed. Also, even if not used, it's still takes some
cycles in the submission path.

If a file used many times, it rather makes sense to pre-register it, if
not, we may fall in the described pitfall. So, this optimisation is a
matter of use case. Go with the simpliest code-wise way, remove it.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 53 ++++-----------------------------------------------
 1 file changed, 4 insertions(+), 49 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d65621247709..f14ebb2dda14 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -324,12 +324,6 @@ struct io_submit_state {
 	/* inline/task_work completion list, under ->uring_lock */
 	struct list_head	free_list;
 
-	/*
-	 * File reference cache
-	 */
-	struct file		*file;
-	unsigned int		fd;
-	unsigned int		file_refs;
 	unsigned int		ios_left;
 };
 
@@ -1055,7 +1049,6 @@ static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
 				     unsigned nr_args);
 static void io_clean_op(struct io_kiocb *req);
 static struct file *io_file_get(struct io_ring_ctx *ctx,
-				struct io_submit_state *state,
 				struct io_kiocb *req, int fd, bool fixed);
 static void __io_queue_sqe(struct io_kiocb *req);
 static void io_rsrc_put_work(struct work_struct *work);
@@ -2610,40 +2603,6 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
 	}
 }
 
-static inline void io_state_file_put(struct io_submit_state *state)
-{
-	if (state->file_refs) {
-		fput_many(state->file, state->file_refs);
-		state->file_refs = 0;
-	}
-}
-
-/*
- * Get as many references to a file as we have IOs left in this submission,
- * assuming most submissions are for one file, or at least that each file
- * has more than one submission.
- */
-static struct file *__io_file_get(struct io_submit_state *state, int fd)
-{
-	if (!state)
-		return fget(fd);
-
-	if (state->file_refs) {
-		if (state->fd == fd) {
-			state->file_refs--;
-			return state->file;
-		}
-		io_state_file_put(state);
-	}
-	state->file = fget_many(fd, state->ios_left);
-	if (unlikely(!state->file))
-		return NULL;
-
-	state->fd = fd;
-	state->file_refs = state->ios_left - 1;
-	return state->file;
-}
-
 static bool io_bdev_nowait(struct block_device *bdev)
 {
 	return !bdev || blk_queue_nowait(bdev_get_queue(bdev));
@@ -3641,8 +3600,7 @@ static int __io_splice_prep(struct io_kiocb *req,
 	if (unlikely(sp->flags & ~valid_flags))
 		return -EINVAL;
 
-	sp->file_in = io_file_get(req->ctx, NULL, req,
-				  READ_ONCE(sqe->splice_fd_in),
+	sp->file_in = io_file_get(req->ctx, req, READ_ONCE(sqe->splice_fd_in),
 				  (sp->flags & SPLICE_F_FD_IN_FIXED));
 	if (!sp->file_in)
 		return -EBADF;
@@ -6378,10 +6336,9 @@ static inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx,
 }
 
 static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
-				       struct io_submit_state *state,
 				       struct io_kiocb *req, int fd)
 {
-	struct file *file = __io_file_get(state, fd);
+	struct file *file = fget(fd);
 
 	trace_io_uring_file_get(ctx, fd);
 
@@ -6392,13 +6349,12 @@ static struct file *io_file_get_normal(struct io_ring_ctx *ctx,
 }
 
 static inline struct file *io_file_get(struct io_ring_ctx *ctx,
-				       struct io_submit_state *state,
 				       struct io_kiocb *req, int fd, bool fixed)
 {
 	if (fixed)
 		return io_file_get_fixed(ctx, req, fd);
 	else
-		return io_file_get_normal(ctx, state, req, fd);
+		return io_file_get_normal(ctx, req, fd);
 }
 
 static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
@@ -6612,7 +6568,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 	}
 
 	if (io_op_defs[req->opcode].needs_file) {
-		req->file = io_file_get(ctx, state, req, READ_ONCE(sqe->fd),
+		req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd),
 					(sqe_flags & IOSQE_FIXED_FILE));
 		if (unlikely(!req->file))
 			ret = -EBADF;
@@ -6697,7 +6653,6 @@ static void io_submit_state_end(struct io_submit_state *state,
 		io_submit_flush_completions(ctx);
 	if (state->plug_started)
 		blk_finish_plug(&state->plug);
-	io_state_file_put(state);
 }
 
 /*
-- 
2.32.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [RFC] io_uring: remove file batch-get optimisation
  2021-08-10 13:52 [RFC] io_uring: remove file batch-get optimisation Pavel Begunkov
@ 2021-08-10 17:04 ` Jens Axboe
  2021-08-10 17:11   ` Pavel Begunkov
  0 siblings, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2021-08-10 17:04 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 8/10/21 7:52 AM, Pavel Begunkov wrote:
> For requests with non-fixed files, instead of grabbing just one
> reference, we get by the number of left requests, so the following
> requests using the same file can take it without atomics.
> 
> However, it's not all win. If there is one request in the middle
> not using files or having a fixed file, we'll need to put back the left
> references. Even worse if an application submits requests dealing with
> different files, it will do a put for each new request, so doubling the
> number of atomics needed. Also, even if not used, it's still takes some
> cycles in the submission path.
> 
> If a file used many times, it rather makes sense to pre-register it, if
> not, we may fall in the described pitfall. So, this optimisation is a
> matter of use case. Go with the simpliest code-wise way, remove it.

I ran this through the peak testing, not using registered files. Doesn't
seem to make a real difference here, at least in the quick testing.
Which would seem to indicate we could safely kill it. But that's also
the best case for non-registered files, would be curious to see if it
makes a real difference now for workloads where the file is being
shared.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] io_uring: remove file batch-get optimisation
  2021-08-10 17:04 ` Jens Axboe
@ 2021-08-10 17:11   ` Pavel Begunkov
  2021-08-10 17:14     ` Jens Axboe
  0 siblings, 1 reply; 4+ messages in thread
From: Pavel Begunkov @ 2021-08-10 17:11 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 8/10/21 6:04 PM, Jens Axboe wrote:
> On 8/10/21 7:52 AM, Pavel Begunkov wrote:
>> For requests with non-fixed files, instead of grabbing just one
>> reference, we get by the number of left requests, so the following
>> requests using the same file can take it without atomics.
>>
>> However, it's not all win. If there is one request in the middle
>> not using files or having a fixed file, we'll need to put back the left
>> references. Even worse if an application submits requests dealing with
>> different files, it will do a put for each new request, so doubling the
>> number of atomics needed. Also, even if not used, it's still takes some
>> cycles in the submission path.
>>
>> If a file used many times, it rather makes sense to pre-register it, if
>> not, we may fall in the described pitfall. So, this optimisation is a
>> matter of use case. Go with the simpliest code-wise way, remove it.
> 
> I ran this through the peak testing, not using registered files. Doesn't
> seem to make a real difference here, at least in the quick testing.
> Which would seem to indicate we could safely kill it. But that's also
> the best case for non-registered files, would be curious to see if it
> makes a real difference now for workloads where the file is being
> shared.

Do you mean shared between cores so there is contention? Or the worst
case for non-reg with multiple files as described in the patch? 

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [RFC] io_uring: remove file batch-get optimisation
  2021-08-10 17:11   ` Pavel Begunkov
@ 2021-08-10 17:14     ` Jens Axboe
  0 siblings, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2021-08-10 17:14 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 8/10/21 11:11 AM, Pavel Begunkov wrote:
> On 8/10/21 6:04 PM, Jens Axboe wrote:
>> On 8/10/21 7:52 AM, Pavel Begunkov wrote:
>>> For requests with non-fixed files, instead of grabbing just one
>>> reference, we get by the number of left requests, so the following
>>> requests using the same file can take it without atomics.
>>>
>>> However, it's not all win. If there is one request in the middle
>>> not using files or having a fixed file, we'll need to put back the left
>>> references. Even worse if an application submits requests dealing with
>>> different files, it will do a put for each new request, so doubling the
>>> number of atomics needed. Also, even if not used, it's still takes some
>>> cycles in the submission path.
>>>
>>> If a file used many times, it rather makes sense to pre-register it, if
>>> not, we may fall in the described pitfall. So, this optimisation is a
>>> matter of use case. Go with the simpliest code-wise way, remove it.
>>
>> I ran this through the peak testing, not using registered files. Doesn't
>> seem to make a real difference here, at least in the quick testing.
>> Which would seem to indicate we could safely kill it. But that's also
>> the best case for non-registered files, would be curious to see if it
>> makes a real difference now for workloads where the file is being
>> shared.
> 
> Do you mean shared between cores so there is contention? Or the worst
> case for non-reg with multiple files as described in the patch? 

The former. The latter is also a concern, but less so to me. That
said, I do think we should just do this. It's less code, and that's
always good. To justify it, numbers would have to back it up.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-08-10 17:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-08-10 13:52 [RFC] io_uring: remove file batch-get optimisation Pavel Begunkov
2021-08-10 17:04 ` Jens Axboe
2021-08-10 17:11   ` Pavel Begunkov
2021-08-10 17:14     ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox