public inbox for [email protected]
 help / color / mirror / Atom feed
* [PATCH for-5.9 0/3] rw iovec copy cleanup
@ 2020-07-13 19:59 Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Small cleanups related to preparing ->iter and ->iovec
of ->io to async. The last patch is needed for the "completion data"
series.

Pavel Begunkov (3):
  io_uring: simplify io_req_map_rw()
  io_uring: add a helper for async rw iovec prep
  io_uring: follow **iovec idiom in io_import_iovec

 fs/io_uring.c | 78 ++++++++++++++++++++++++---------------------------
 1 file changed, 37 insertions(+), 41 deletions(-)

-- 
2.24.0


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/3] io_uring: simplify io_req_map_rw()
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Don't deref req->io->rw every time, but put it in a local variable. This
looks prettier, generates less instructions and doesn't break alias
analysis.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6eae2fb469f9..d9c10070dcba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2827,15 +2827,17 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
 			  struct iovec *iovec, struct iovec *fast_iov,
 			  struct iov_iter *iter)
 {
-	req->io->rw.nr_segs = iter->nr_segs;
-	req->io->rw.size = io_size;
-	req->io->rw.iov = iovec;
-	if (!req->io->rw.iov) {
-		req->io->rw.iov = req->io->rw.fast_iov;
-		if (req->io->rw.iov != fast_iov)
-			memcpy(req->io->rw.iov, fast_iov,
+	struct io_async_rw *rw = &req->io->rw;
+
+	rw->nr_segs = iter->nr_segs;
+	rw->size = io_size;
+	if (!iovec) {
+		rw->iov = rw->fast_iov;
+		if (rw->iov != fast_iov)
+			memcpy(rw->iov, fast_iov,
 			       sizeof(struct iovec) * iter->nr_segs);
 	} else {
+		rw->iov = iovec;
 		req->flags |= REQ_F_NEED_CLEANUP;
 	}
 }
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/3] io_uring: add a helper for async rw iovec prep
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
  2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

Preparing reads/writes for async is a bit tricky. Extract a helper to
not repeat it twice.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d9c10070dcba..217dbb6563e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2871,11 +2871,27 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
 	return 0;
 }
 
+static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
+				   bool force_nonblock)
+{
+	struct io_async_ctx *io = req->io;
+	struct iov_iter iter;
+	ssize_t ret;
+
+	io->rw.iov = io->rw.fast_iov;
+	req->io = NULL;
+	ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
+	req->io = io;
+	if (unlikely(ret < 0))
+		return ret;
+
+	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+	return 0;
+}
+
 static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			bool force_nonblock)
 {
-	struct io_async_ctx *io;
-	struct iov_iter iter;
 	ssize_t ret;
 
 	ret = io_prep_rw(req, sqe, force_nonblock);
@@ -2888,17 +2904,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	/* either don't need iovec imported or already have it */
 	if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
 		return 0;
-
-	io = req->io;
-	io->rw.iov = io->rw.fast_iov;
-	req->io = NULL;
-	ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
-	req->io = io;
-	if (ret < 0)
-		return ret;
-
-	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-	return 0;
+	return io_rw_prep_async(req, READ, force_nonblock);
 }
 
 static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
@@ -3042,8 +3048,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 			 bool force_nonblock)
 {
-	struct io_async_ctx *io;
-	struct iov_iter iter;
 	ssize_t ret;
 
 	ret = io_prep_rw(req, sqe, force_nonblock);
@@ -3058,17 +3062,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	/* either don't need iovec imported or already have it */
 	if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
 		return 0;
-
-	io = req->io;
-	io->rw.iov = io->rw.fast_iov;
-	req->io = NULL;
-	ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
-	req->io = io;
-	if (ret < 0)
-		return ret;
-
-	io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
-	return 0;
+	return io_rw_prep_async(req, WRITE, force_nonblock);
 }
 
 static int io_write(struct io_kiocb *req, bool force_nonblock,
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
  2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
  2020-07-13 21:09   ` Jens Axboe
  2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
  3 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
  To: Jens Axboe, io-uring

As for import_iovec(), return !=NULL iovec from io_import_iovec() only
when it should be freed, that includes returning NULL when iovec is
already in req->io, because it shoulb be deallocated by other means,
e.g. inside op handler. After io_setup_async_rw() local iovec to ->io,
just mark it NULL, to follow the idea in io_{read,write} as well.

That's easier to follow, and especially useful if we want to reuse
per-op space for completion data.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 fs/io_uring.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 217dbb6563e7..0b9c0333d8c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2739,10 +2739,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
 	if (req->io) {
 		struct io_async_rw *iorw = &req->io->rw;
 
-		*iovec = iorw->iov;
-		iov_iter_init(iter, rw, *iovec, iorw->nr_segs, iorw->size);
-		if (iorw->iov == iorw->fast_iov)
-			*iovec = NULL;
+		iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
+		*iovec = NULL;
 		return iorw->size;
 	}
 
@@ -3025,6 +3023,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 						inline_vecs, &iter);
 			if (ret)
 				goto out_free;
+			/* it's copied and will be cleaned with ->io */
+			iovec = NULL;
 			/* if we can retry, do so with the callbacks armed */
 			if (io_rw_should_retry(req)) {
 				ret2 = io_iter_do_read(req, &iter);
@@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 		}
 	}
 out_free:
-	if (!(req->flags & REQ_F_NEED_CLEANUP))
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
@@ -3142,12 +3141,13 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 						inline_vecs, &iter);
 			if (ret)
 				goto out_free;
+			/* it's copied and will be cleaned with ->io */
+			iovec = NULL;
 			return -EAGAIN;
 		}
 	}
 out_free:
-	if (!(req->flags & REQ_F_NEED_CLEANUP))
-		kfree(iovec);
+	kfree(iovec);
 	return ret;
 }
 
-- 
2.24.0


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09   ` Jens Axboe
  2020-07-13 21:12     ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>  		}
>  	}
>  out_free:
> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
> -		kfree(iovec);
> +	kfree(iovec);
>  	return ret;
>  }

Faster to do:

if (iovec)
	kfree(iovec)

to avoid a stupid call. Kind of crazy, but I just verified with this one
as well that it's worth about 1.3% CPU in my stress test.

Apart from that, looks good, I just folded in that change.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH for-5.9 0/3] rw iovec copy cleanup
  2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
                   ` (2 preceding siblings ...)
  2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09 ` Jens Axboe
  3 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> Small cleanups related to preparing ->iter and ->iovec
> of ->io to async. The last patch is needed for the "completion data"
> series.

Looks good to me, applied.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:09   ` Jens Axboe
@ 2020-07-13 21:12     ` Pavel Begunkov
  2020-07-13 21:16       ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:12 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 14/07/2020 00:09, Jens Axboe wrote:
> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>  		}
>>  	}
>>  out_free:
>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>> -		kfree(iovec);
>> +	kfree(iovec);
>>  	return ret;
>>  }
> 
> Faster to do:
> 
> if (iovec)
> 	kfree(iovec)
> 
> to avoid a stupid call. Kind of crazy, but I just verified with this one
> as well that it's worth about 1.3% CPU in my stress test.

That looks crazy indeed

> Apart from that, looks good, I just folded in that change.

Great, thanks

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:12     ` Pavel Begunkov
@ 2020-07-13 21:16       ` Jens Axboe
  2020-07-13 21:18         ` Pavel Begunkov
  0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:16 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 3:12 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:09, Jens Axboe wrote:
>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>  		}
>>>  	}
>>>  out_free:
>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>> -		kfree(iovec);
>>> +	kfree(iovec);
>>>  	return ret;
>>>  }
>>
>> Faster to do:
>>
>> if (iovec)
>> 	kfree(iovec)
>>
>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>> as well that it's worth about 1.3% CPU in my stress test.
> 
> That looks crazy indeed

I suspect what needs to happen is that kfree should be something ala:

static inline void kfree(void *ptr)
{
	if (ptr)
		__kfree(ptr);
}

to avoid silly games like this. Needs to touch all three slab
allocators, though definitely in the trivial category.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:16       ` Jens Axboe
@ 2020-07-13 21:18         ` Pavel Begunkov
  2020-07-13 21:26           ` Jens Axboe
  0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:18 UTC (permalink / raw)
  To: Jens Axboe, io-uring

On 14/07/2020 00:16, Jens Axboe wrote:
> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>> On 14/07/2020 00:09, Jens Axboe wrote:
>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>>  		}
>>>>  	}
>>>>  out_free:
>>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>> -		kfree(iovec);
>>>> +	kfree(iovec);
>>>>  	return ret;
>>>>  }
>>>
>>> Faster to do:
>>>
>>> if (iovec)
>>> 	kfree(iovec)
>>>
>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>> as well that it's worth about 1.3% CPU in my stress test.
>>
>> That looks crazy indeed
> 
> I suspect what needs to happen is that kfree should be something ala:
> 
> static inline void kfree(void *ptr)
> {
> 	if (ptr)
> 		__kfree(ptr);
> }
> 
> to avoid silly games like this. Needs to touch all three slab
> allocators, though definitely in the trivial category.

Just thought the same, but not sure it's too common to have kfree(NULL).

The drop is probably because of extra call + cold jumps with unlikely().

void kfree() {
	trace_kfree(_RET_IP_, objp);

	if (unlikely(ZERO_OR_NULL_PTR(objp)))
		return;
}

-- 
Pavel Begunkov

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
  2020-07-13 21:18         ` Pavel Begunkov
@ 2020-07-13 21:26           ` Jens Axboe
  0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:26 UTC (permalink / raw)
  To: Pavel Begunkov, io-uring

On 7/13/20 3:18 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:16, Jens Axboe wrote:
>> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>>> On 14/07/2020 00:09, Jens Axboe wrote:
>>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>>>  		}
>>>>>  	}
>>>>>  out_free:
>>>>> -	if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>>> -		kfree(iovec);
>>>>> +	kfree(iovec);
>>>>>  	return ret;
>>>>>  }
>>>>
>>>> Faster to do:
>>>>
>>>> if (iovec)
>>>> 	kfree(iovec)
>>>>
>>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>>> as well that it's worth about 1.3% CPU in my stress test.
>>>
>>> That looks crazy indeed
>>
>> I suspect what needs to happen is that kfree should be something ala:
>>
>> static inline void kfree(void *ptr)
>> {
>> 	if (ptr)
>> 		__kfree(ptr);
>> }
>>
>> to avoid silly games like this. Needs to touch all three slab
>> allocators, though definitely in the trivial category.
> 
> Just thought the same, but not sure it's too common to have kfree(NULL).

Right, except the io_read/io_write path it'll be 100% common unless you
have more than the inline number of segments.

I see the same thing for eg the slab should_failslab() call, which isn't
inlined even if the kconfig isn't enabled. And for should_fail_bio()
as well. Those two add up to another ~1% or so of pointless overhead.

> The drop is probably because of extra call + cold jumps with unlikely().
> 
> void kfree() {
> 	trace_kfree(_RET_IP_, objp);
> 
> 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
> 		return;
> }

Must be, since the kfree() one adds up to more than the two above ones
that are just the call itself.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-07-13 21:26 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
2020-07-13 21:09   ` Jens Axboe
2020-07-13 21:12     ` Pavel Begunkov
2020-07-13 21:16       ` Jens Axboe
2020-07-13 21:18         ` Pavel Begunkov
2020-07-13 21:26           ` Jens Axboe
2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox