* [PATCH for-5.9 0/3] rw iovec copy cleanup
@ 2020-07-13 19:59 Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
To: Jens Axboe, io-uring
Small cleanups related to preparing ->iter and ->iovec
of ->io to async. The last patch is needed for the "completion data"
series.
Pavel Begunkov (3):
io_uring: simplify io_req_map_rw()
io_uring: add a helper for async rw iovec prep
io_uring: follow **iovec idiom in io_import_iovec
fs/io_uring.c | 78 ++++++++++++++++++++++++---------------------------
1 file changed, 37 insertions(+), 41 deletions(-)
--
2.24.0
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 1/3] io_uring: simplify io_req_map_rw()
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
To: Jens Axboe, io-uring
Don't deref req->io->rw every time, but put it in a local variable. This
looks prettier, generates less instructions and doesn't break alias
analysis.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 16 +++++++++-------
1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 6eae2fb469f9..d9c10070dcba 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2827,15 +2827,17 @@ static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
struct iovec *iovec, struct iovec *fast_iov,
struct iov_iter *iter)
{
- req->io->rw.nr_segs = iter->nr_segs;
- req->io->rw.size = io_size;
- req->io->rw.iov = iovec;
- if (!req->io->rw.iov) {
- req->io->rw.iov = req->io->rw.fast_iov;
- if (req->io->rw.iov != fast_iov)
- memcpy(req->io->rw.iov, fast_iov,
+ struct io_async_rw *rw = &req->io->rw;
+
+ rw->nr_segs = iter->nr_segs;
+ rw->size = io_size;
+ if (!iovec) {
+ rw->iov = rw->fast_iov;
+ if (rw->iov != fast_iov)
+ memcpy(rw->iov, fast_iov,
sizeof(struct iovec) * iter->nr_segs);
} else {
+ rw->iov = iovec;
req->flags |= REQ_F_NEED_CLEANUP;
}
}
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/3] io_uring: add a helper for async rw iovec prep
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
3 siblings, 0 replies; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
To: Jens Axboe, io-uring
Preparing reads/writes for async is a bit tricky. Extract a helper to
not repeat it twice.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 46 ++++++++++++++++++++--------------------------
1 file changed, 20 insertions(+), 26 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d9c10070dcba..217dbb6563e7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2871,11 +2871,27 @@ static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
return 0;
}
+static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
+ bool force_nonblock)
+{
+ struct io_async_ctx *io = req->io;
+ struct iov_iter iter;
+ ssize_t ret;
+
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(rw, req, &io->rw.iov, &iter, !force_nonblock);
+ req->io = io;
+ if (unlikely(ret < 0))
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
+}
+
static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
{
- struct io_async_ctx *io;
- struct iov_iter iter;
ssize_t ret;
ret = io_prep_rw(req, sqe, force_nonblock);
@@ -2888,17 +2904,7 @@ static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
/* either don't need iovec imported or already have it */
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
-
- io = req->io;
- io->rw.iov = io->rw.fast_iov;
- req->io = NULL;
- ret = io_import_iovec(READ, req, &io->rw.iov, &iter, !force_nonblock);
- req->io = io;
- if (ret < 0)
- return ret;
-
- io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
- return 0;
+ return io_rw_prep_async(req, READ, force_nonblock);
}
static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
@@ -3042,8 +3048,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool force_nonblock)
{
- struct io_async_ctx *io;
- struct iov_iter iter;
ssize_t ret;
ret = io_prep_rw(req, sqe, force_nonblock);
@@ -3058,17 +3062,7 @@ static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
/* either don't need iovec imported or already have it */
if (!req->io || req->flags & REQ_F_NEED_CLEANUP)
return 0;
-
- io = req->io;
- io->rw.iov = io->rw.fast_iov;
- req->io = NULL;
- ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter, !force_nonblock);
- req->io = io;
- if (ret < 0)
- return ret;
-
- io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
- return 0;
+ return io_rw_prep_async(req, WRITE, force_nonblock);
}
static int io_write(struct io_kiocb *req, bool force_nonblock,
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
@ 2020-07-13 19:59 ` Pavel Begunkov
2020-07-13 21:09 ` Jens Axboe
2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
3 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 19:59 UTC (permalink / raw)
To: Jens Axboe, io-uring
As for import_iovec(), return !=NULL iovec from io_import_iovec() only
when it should be freed, that includes returning NULL when iovec is
already in req->io, because it shoulb be deallocated by other means,
e.g. inside op handler. After io_setup_async_rw() local iovec to ->io,
just mark it NULL, to follow the idea in io_{read,write} as well.
That's easier to follow, and especially useful if we want to reuse
per-op space for completion data.
Signed-off-by: Pavel Begunkov <[email protected]>
---
fs/io_uring.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 217dbb6563e7..0b9c0333d8c0 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2739,10 +2739,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
- *iovec = iorw->iov;
- iov_iter_init(iter, rw, *iovec, iorw->nr_segs, iorw->size);
- if (iorw->iov == iorw->fast_iov)
- *iovec = NULL;
+ iov_iter_init(iter, rw, iorw->iov, iorw->nr_segs, iorw->size);
+ *iovec = NULL;
return iorw->size;
}
@@ -3025,6 +3023,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* it's copied and will be cleaned with ->io */
+ iovec = NULL;
/* if we can retry, do so with the callbacks armed */
if (io_rw_should_retry(req)) {
ret2 = io_iter_do_read(req, &iter);
@@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
}
}
out_free:
- if (!(req->flags & REQ_F_NEED_CLEANUP))
- kfree(iovec);
+ kfree(iovec);
return ret;
}
@@ -3142,12 +3141,13 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
inline_vecs, &iter);
if (ret)
goto out_free;
+ /* it's copied and will be cleaned with ->io */
+ iovec = NULL;
return -EAGAIN;
}
}
out_free:
- if (!(req->flags & REQ_F_NEED_CLEANUP))
- kfree(iovec);
+ kfree(iovec);
return ret;
}
--
2.24.0
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09 ` Jens Axboe
2020-07-13 21:12 ` Pavel Begunkov
0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
> }
> }
> out_free:
> - if (!(req->flags & REQ_F_NEED_CLEANUP))
> - kfree(iovec);
> + kfree(iovec);
> return ret;
> }
Faster to do:
if (iovec)
kfree(iovec)
to avoid a stupid call. Kind of crazy, but I just verified with this one
as well that it's worth about 1.3% CPU in my stress test.
Apart from that, looks good, I just folded in that change.
--
Jens Axboe
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH for-5.9 0/3] rw iovec copy cleanup
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
` (2 preceding siblings ...)
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
@ 2020-07-13 21:09 ` Jens Axboe
3 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:09 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 1:59 PM, Pavel Begunkov wrote:
> Small cleanups related to preparing ->iter and ->iovec
> of ->io to async. The last patch is needed for the "completion data"
> series.
Looks good to me, applied.
--
Jens Axboe
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 21:09 ` Jens Axboe
@ 2020-07-13 21:12 ` Pavel Begunkov
2020-07-13 21:16 ` Jens Axboe
0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:12 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 14/07/2020 00:09, Jens Axboe wrote:
> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>> }
>> }
>> out_free:
>> - if (!(req->flags & REQ_F_NEED_CLEANUP))
>> - kfree(iovec);
>> + kfree(iovec);
>> return ret;
>> }
>
> Faster to do:
>
> if (iovec)
> kfree(iovec)
>
> to avoid a stupid call. Kind of crazy, but I just verified with this one
> as well that it's worth about 1.3% CPU in my stress test.
That looks crazy indeed
> Apart from that, looks good, I just folded in that change.
Great, thanks
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 21:12 ` Pavel Begunkov
@ 2020-07-13 21:16 ` Jens Axboe
2020-07-13 21:18 ` Pavel Begunkov
0 siblings, 1 reply; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:16 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 3:12 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:09, Jens Axboe wrote:
>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>> }
>>> }
>>> out_free:
>>> - if (!(req->flags & REQ_F_NEED_CLEANUP))
>>> - kfree(iovec);
>>> + kfree(iovec);
>>> return ret;
>>> }
>>
>> Faster to do:
>>
>> if (iovec)
>> kfree(iovec)
>>
>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>> as well that it's worth about 1.3% CPU in my stress test.
>
> That looks crazy indeed
I suspect what needs to happen is that kfree should be something ala:
static inline void kfree(void *ptr)
{
if (ptr)
__kfree(ptr);
}
to avoid silly games like this. Needs to touch all three slab
allocators, though definitely in the trivial category.
--
Jens Axboe
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 21:16 ` Jens Axboe
@ 2020-07-13 21:18 ` Pavel Begunkov
2020-07-13 21:26 ` Jens Axboe
0 siblings, 1 reply; 10+ messages in thread
From: Pavel Begunkov @ 2020-07-13 21:18 UTC (permalink / raw)
To: Jens Axboe, io-uring
On 14/07/2020 00:16, Jens Axboe wrote:
> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>> On 14/07/2020 00:09, Jens Axboe wrote:
>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>> }
>>>> }
>>>> out_free:
>>>> - if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>> - kfree(iovec);
>>>> + kfree(iovec);
>>>> return ret;
>>>> }
>>>
>>> Faster to do:
>>>
>>> if (iovec)
>>> kfree(iovec)
>>>
>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>> as well that it's worth about 1.3% CPU in my stress test.
>>
>> That looks crazy indeed
>
> I suspect what needs to happen is that kfree should be something ala:
>
> static inline void kfree(void *ptr)
> {
> if (ptr)
> __kfree(ptr);
> }
>
> to avoid silly games like this. Needs to touch all three slab
> allocators, though definitely in the trivial category.
Just thought the same, but not sure it's too common to have kfree(NULL).
The drop is probably because of extra call + cold jumps with unlikely().
void kfree() {
trace_kfree(_RET_IP_, objp);
if (unlikely(ZERO_OR_NULL_PTR(objp)))
return;
}
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec
2020-07-13 21:18 ` Pavel Begunkov
@ 2020-07-13 21:26 ` Jens Axboe
0 siblings, 0 replies; 10+ messages in thread
From: Jens Axboe @ 2020-07-13 21:26 UTC (permalink / raw)
To: Pavel Begunkov, io-uring
On 7/13/20 3:18 PM, Pavel Begunkov wrote:
> On 14/07/2020 00:16, Jens Axboe wrote:
>> On 7/13/20 3:12 PM, Pavel Begunkov wrote:
>>> On 14/07/2020 00:09, Jens Axboe wrote:
>>>> On 7/13/20 1:59 PM, Pavel Begunkov wrote:
>>>>> @@ -3040,8 +3040,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
>>>>> }
>>>>> }
>>>>> out_free:
>>>>> - if (!(req->flags & REQ_F_NEED_CLEANUP))
>>>>> - kfree(iovec);
>>>>> + kfree(iovec);
>>>>> return ret;
>>>>> }
>>>>
>>>> Faster to do:
>>>>
>>>> if (iovec)
>>>> kfree(iovec)
>>>>
>>>> to avoid a stupid call. Kind of crazy, but I just verified with this one
>>>> as well that it's worth about 1.3% CPU in my stress test.
>>>
>>> That looks crazy indeed
>>
>> I suspect what needs to happen is that kfree should be something ala:
>>
>> static inline void kfree(void *ptr)
>> {
>> if (ptr)
>> __kfree(ptr);
>> }
>>
>> to avoid silly games like this. Needs to touch all three slab
>> allocators, though definitely in the trivial category.
>
> Just thought the same, but not sure it's too common to have kfree(NULL).
Right, except the io_read/io_write path it'll be 100% common unless you
have more than the inline number of segments.
I see the same thing for eg the slab should_failslab() call, which isn't
inlined even if the kconfig isn't enabled. And for should_fail_bio()
as well. Those two add up to another ~1% or so of pointless overhead.
> The drop is probably because of extra call + cold jumps with unlikely().
>
> void kfree() {
> trace_kfree(_RET_IP_, objp);
>
> if (unlikely(ZERO_OR_NULL_PTR(objp)))
> return;
> }
Must be, since the kfree() one adds up to more than the two above ones
that are just the call itself.
--
Jens Axboe
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2020-07-13 21:26 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-07-13 19:59 [PATCH for-5.9 0/3] rw iovec copy cleanup Pavel Begunkov
2020-07-13 19:59 ` [PATCH 1/3] io_uring: simplify io_req_map_rw() Pavel Begunkov
2020-07-13 19:59 ` [PATCH 2/3] io_uring: add a helper for async rw iovec prep Pavel Begunkov
2020-07-13 19:59 ` [PATCH 3/3] io_uring: follow **iovec idiom in io_import_iovec Pavel Begunkov
2020-07-13 21:09 ` Jens Axboe
2020-07-13 21:12 ` Pavel Begunkov
2020-07-13 21:16 ` Jens Axboe
2020-07-13 21:18 ` Pavel Begunkov
2020-07-13 21:26 ` Jens Axboe
2020-07-13 21:09 ` [PATCH for-5.9 0/3] rw iovec copy cleanup Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox