* [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-08 3:10 [PATCHSET for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
@ 2023-03-08 3:10 ` Jens Axboe
2023-03-14 9:26 ` Christian Brauner
2023-03-14 9:38 ` Matthew Wilcox
0 siblings, 2 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-08 3:10 UTC (permalink / raw)
To: io-uring, linux-fsdevel; +Cc: Jens Axboe
In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
and write path handle it correctly. This includes the pipe locking,
page allocation for writes, and confirming pipe buffers.
Signed-off-by: Jens Axboe <[email protected]>
---
fs/pipe.c | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/fs/pipe.c b/fs/pipe.c
index 340f253913a2..10366a6cb5b6 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe)
mutex_unlock(&pipe->mutex);
}
+static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock)
+{
+ if (!nonblock) {
+ __pipe_lock(pipe);
+ return true;
+ }
+
+ return mutex_trylock(&pipe->mutex);
+}
+
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
@@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
bool was_full, wake_next_reader = false;
+ const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
ssize_t ret;
/* Null read succeeds. */
@@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
ret = 0;
- __pipe_lock(pipe);
+ if (!__pipe_trylock(pipe, nonblock))
+ return -EAGAIN;
/*
* We only wake up writers if the pipe was full when we started
@@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
chars = total_len;
}
- error = pipe_buf_confirm(pipe, buf, false);
+ error = pipe_buf_confirm(pipe, buf, nonblock);
if (error) {
if (!ret)
ret = error;
@@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK || nonblock) {
ret = -EAGAIN;
break;
}
@@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t chars;
bool was_empty = false;
bool wake_next_writer = false;
+ const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;
- __pipe_lock(pipe);
+ if (!__pipe_trylock(pipe, nonblock))
+ return -EAGAIN;
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
offset + chars <= PAGE_SIZE) {
- ret = pipe_buf_confirm(pipe, buf, false);
+ ret = pipe_buf_confirm(pipe, buf, nonblock);
if (ret)
goto out;
@@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int copied;
if (!page) {
- page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
+ gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
+
+ if (!nonblock)
+ gfp |= GFP_USER;
+ page = alloc_page(gfp);
if (unlikely(!page)) {
- ret = ret ? : -ENOMEM;
+ ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
break;
}
pipe->tmp_page = page;
@@ -547,7 +565,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
continue;
/* Wait for buffer space to become available. */
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK || nonblock) {
if (!ret)
ret = -EAGAIN;
break;
--
2.39.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-08 3:10 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
@ 2023-03-14 9:26 ` Christian Brauner
2023-03-14 12:03 ` Jens Axboe
2023-03-14 9:38 ` Matthew Wilcox
1 sibling, 1 reply; 14+ messages in thread
From: Christian Brauner @ 2023-03-14 9:26 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, linux-fsdevel
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
> and write path handle it correctly. This includes the pipe locking,
> page allocation for writes, and confirming pipe buffers.
>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
> fs/pipe.c | 34 ++++++++++++++++++++++++++--------
> 1 file changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 340f253913a2..10366a6cb5b6 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe)
> mutex_unlock(&pipe->mutex);
> }
>
> +static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock)
> +{
> + if (!nonblock) {
> + __pipe_lock(pipe);
> + return true;
> + }
> +
> + return mutex_trylock(&pipe->mutex);
> +}
> +
> void pipe_double_lock(struct pipe_inode_info *pipe1,
> struct pipe_inode_info *pipe2)
> {
> @@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> struct file *filp = iocb->ki_filp;
> struct pipe_inode_info *pipe = filp->private_data;
> bool was_full, wake_next_reader = false;
> + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
> ssize_t ret;
>
> /* Null read succeeds. */
> @@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> return 0;
>
> ret = 0;
> - __pipe_lock(pipe);
> + if (!__pipe_trylock(pipe, nonblock))
> + return -EAGAIN;
>
> /*
> * We only wake up writers if the pipe was full when we started
> @@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> chars = total_len;
> }
>
> - error = pipe_buf_confirm(pipe, buf, false);
> + error = pipe_buf_confirm(pipe, buf, nonblock);
> if (error) {
> if (!ret)
> ret = error;
> @@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
> break;
> if (ret)
> break;
> - if (filp->f_flags & O_NONBLOCK) {
> + if (filp->f_flags & O_NONBLOCK || nonblock) {
> ret = -EAGAIN;
> break;
> }
> @@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> ssize_t chars;
> bool was_empty = false;
> bool wake_next_writer = false;
> + const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
>
> /* Null write succeeds. */
> if (unlikely(total_len == 0))
> return 0;
>
> - __pipe_lock(pipe);
> + if (!__pipe_trylock(pipe, nonblock))
> + return -EAGAIN;
>
> if (!pipe->readers) {
> send_sig(SIGPIPE, current, 0);
> @@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>
> if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
> offset + chars <= PAGE_SIZE) {
> - ret = pipe_buf_confirm(pipe, buf, false);
> + ret = pipe_buf_confirm(pipe, buf, nonblock);
> if (ret)
> goto out;
>
> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> int copied;
>
> if (!page) {
> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
> +
> + if (!nonblock)
> + gfp |= GFP_USER;
Just for my education: Does this encode the assumpation that the
non-blocking code can only be reached from io_uring and thus GFP_USER
can be dropped for that case? IOW, if there's other code that could in
the future reach the non blocking condition would this still be correct?
> + page = alloc_page(gfp);
> if (unlikely(!page)) {
> - ret = ret ? : -ENOMEM;
> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy
to misparse. Idk, doesn't need to be exactly that code but sm like:
if (!nonblock) {
gfp |= GFP_USER;
ret = -EAGAIN;
} else {
ret = -ENOMEM;
}
page = alloc_page(gfp);
if (unlikely(!page))
break;
else
ret = 0;
pipe->tmp_page = page;
or sm else.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-08 3:10 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
2023-03-14 9:26 ` Christian Brauner
@ 2023-03-14 9:38 ` Matthew Wilcox
2023-03-14 12:04 ` Jens Axboe
1 sibling, 1 reply; 14+ messages in thread
From: Matthew Wilcox @ 2023-03-14 9:38 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, linux-fsdevel
On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
> int copied;
>
> if (!page) {
> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
> +
> + if (!nonblock)
> + gfp |= GFP_USER;
> + page = alloc_page(gfp);
Hmm, looks like you drop __GFP_HARDWALL in the nonblock case. People who
use cpusets might be annoyed by that.
> if (unlikely(!page)) {
> - ret = ret ? : -ENOMEM;
> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
double ternary operator? really?
if (!ret)
ret = nonblock ? -EAGAIN : -ENOMEM;
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-14 9:26 ` Christian Brauner
@ 2023-03-14 12:03 ` Jens Axboe
0 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 12:03 UTC (permalink / raw)
To: Christian Brauner; +Cc: io-uring, linux-fsdevel
On 3/14/23 3:26?AM, Christian Brauner wrote:
> On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
>> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>> int copied;
>>
>> if (!page) {
>> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
>> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
>> +
>> + if (!nonblock)
>> + gfp |= GFP_USER;
>
> Just for my education: Does this encode the assumpation that the
> non-blocking code can only be reached from io_uring and thus GFP_USER
> can be dropped for that case? IOW, if there's other code that could in
> the future reach the non blocking condition would this still be correct?
You can already reach that if you do preadv2(..., RWF_NOWAIT). There
should be no assumptions here on the user of it, semantics should be the
same. The gfp mask is just split so we avoid __GFP_WAIT for the
nonblocking case.
>
>> + page = alloc_page(gfp);
>> if (unlikely(!page)) {
>> - ret = ret ? : -ENOMEM;
>> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
>
> Hm, could we try and avoid the nested "?:?:" please. Imho, that's easy
> to misparse. Idk, doesn't need to be exactly that code but sm like:
>
> if (!nonblock) {
> gfp |= GFP_USER;
> ret = -EAGAIN;
> } else {
> ret = -ENOMEM;
> }
>
> page = alloc_page(gfp);
> if (unlikely(!page))
> break;
> else
> ret = 0;
> pipe->tmp_page = page;
>
> or sm else.
Yeah this is much better, I think I was a bit too lazy here, not really
a fan of ternaries myself... I'll fix that up. Thanks!
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-14 9:38 ` Matthew Wilcox
@ 2023-03-14 12:04 ` Jens Axboe
0 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 12:04 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: io-uring, linux-fsdevel
On 3/14/23 3:38?AM, Matthew Wilcox wrote:
> On Tue, Mar 07, 2023 at 08:10:32PM -0700, Jens Axboe wrote:
>> @@ -493,9 +507,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
>> int copied;
>>
>> if (!page) {
>> - page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
>> + gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT;
>> +
>> + if (!nonblock)
>> + gfp |= GFP_USER;
>> + page = alloc_page(gfp);
>
> Hmm, looks like you drop __GFP_HARDWALL in the nonblock case. People who
> use cpusets might be annoyed by that.
Ah good catch! Yes, that's an oversight, I'll rectify that in v2.
>> if (unlikely(!page)) {
>> - ret = ret ? : -ENOMEM;
>> + ret = ret ? : nonblock ? -EAGAIN : -ENOMEM;
>
> double ternary operator? really?
yeah sorry... See reply to Christian, I'll make this cleaner.
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCHSET v2 for-next 0/3] Add FMODE_NOWAIT support to pipes
@ 2023-03-14 15:42 Jens Axboe
2023-03-14 15:42 ` [PATCH 1/3] fs: add 'nonblock' parameter to pipe_buf_confirm() and fops method Jens Axboe
` (2 more replies)
0 siblings, 3 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 15:42 UTC (permalink / raw)
To: io-uring, linux-fsdevel; +Cc: brauner
One thing that's always been a bit slower than I'd like with io_uring is
dealing with pipes. They don't support IOCB_NOWAIT, and hence we need to
punt them to io-wq for handling.
This series adds support for FMODE_NOWAIT to pipes.
Patch 1 extends pipe_buf_operations->confirm() to accept a nonblock
parameter, and wires up the caller, pipe_buf_confirm(), to have that
argument too.
Patch 2 makes pipes deal with IOCB_NOWAIT for locking the pipe, calling
pipe_buf_confirm(), and for allocating new pages on writes.
Patch 3 flicks the switch and enables FMODE_NOWAIT for pipes.
Curious on how big of a difference this makes, I wrote a small benchmark
that simply opens 128 pipes and then does 256 rounds of reading and
writing to them. This was run 10 times, discarding the first run as it's
always a bit slower. Before the patch:
Avg: 262.52 msec
Stdev: 2.12 msec
Min: 261.07 msec
Max 267.91 msec
and after the patch:
Avg: 24.14 msec
Stdev: 9.61 msec
Min: 17.84 msec
Max: 43.75 msec
or about a 10x improvement in performance (and efficiency) for pipes
being empty on read attempt. If we run the same test but with pipes
already having data, the improvement is even better (as expected):
Before:
Avg: 249.24 msec
Stdev: 0.20 msec
Min: 248.96 msec
Max: 249.53 msec
After:
Avg: 10.86 msec
Stdev: 0.91 msec
Min: 10.02 msec
Max: 12.67 msec
or about a 23x improvement.
I ran the patches through the ltp pipe and splice tests, no regressions
observed. Looking at io_uring traces, we can see that we no longer have
any io_uring_queue_async_work() traces after the patch, where previously
everything was done via io-wq.
Changes since v1:
- Add acks/reviewed-bys
- Fix missing __GFP_HARDWALL (willy)
- Get rid of nasty double ternary (willy,christian)
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH 1/3] fs: add 'nonblock' parameter to pipe_buf_confirm() and fops method
2023-03-14 15:42 [PATCHSET v2 for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
@ 2023-03-14 15:42 ` Jens Axboe
2023-03-14 15:42 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
2023-03-14 15:42 ` [PATCH 3/3] pipe: set FMODE_NOWAIT on pipes Jens Axboe
2 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 15:42 UTC (permalink / raw)
To: io-uring, linux-fsdevel; +Cc: brauner, Jens Axboe, Dave Chinner
In preparation for being able to do a nonblocking confirm attempt of a
pipe buffer, plumb a parameter through the stack to indicate if this is
a nonblocking attempt or not.
Each caller is passing down 'false' right now, but the only confirm
method in the tree, page_cache_pipe_buf_confirm(), is converted to do a
trylock_page() if nonblock == true.
Acked-by: Dave Chinner <[email protected]>
Reviewed-by: Christian Brauner <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/fuse/dev.c | 4 ++--
fs/pipe.c | 4 ++--
fs/splice.c | 11 +++++++----
include/linux/pipe_fs_i.h | 7 ++++---
4 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index eb4f88e3dc97..0bd1b0870f2d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -700,7 +700,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
struct pipe_buffer *buf = cs->pipebufs;
if (!cs->write) {
- err = pipe_buf_confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf, false);
if (err)
return err;
@@ -800,7 +800,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
fuse_copy_finish(cs);
- err = pipe_buf_confirm(cs->pipe, buf);
+ err = pipe_buf_confirm(cs->pipe, buf, false);
if (err)
goto out_put_old;
diff --git a/fs/pipe.c b/fs/pipe.c
index 42c7ff41c2db..340f253913a2 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -297,7 +297,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
chars = total_len;
}
- error = pipe_buf_confirm(pipe, buf);
+ error = pipe_buf_confirm(pipe, buf, false);
if (error) {
if (!ret)
ret = error;
@@ -461,7 +461,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
offset + chars <= PAGE_SIZE) {
- ret = pipe_buf_confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf, false);
if (ret)
goto out;
diff --git a/fs/splice.c b/fs/splice.c
index 2c3dec2b6dfa..130ee1052588 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -100,13 +100,16 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
* is a page cache page, IO may be in flight.
*/
static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+ struct pipe_buffer *buf, bool nonblock)
{
struct page *page = buf->page;
int err;
if (!PageUptodate(page)) {
- lock_page(page);
+ if (nonblock && !trylock_page(page))
+ return -EAGAIN;
+ else
+ lock_page(page);
/*
* Page got truncated/unhashed. This will cause a 0-byte
@@ -498,7 +501,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
if (sd->len > sd->total_len)
sd->len = sd->total_len;
- ret = pipe_buf_confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf, false);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
@@ -761,7 +764,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
continue;
this_len = min(this_len, left);
- ret = pipe_buf_confirm(pipe, buf);
+ ret = pipe_buf_confirm(pipe, buf, false);
if (unlikely(ret)) {
if (ret == -ENODATA)
ret = 0;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index d2c3f16cf6b1..d63278bb0797 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -100,7 +100,8 @@ struct pipe_buf_operations {
* hook. Returns 0 for good, or a negative error value in case of
* error. If not present all pages are considered good.
*/
- int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
+ int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *,
+ bool nonblock);
/*
* When the contents of this pipe buffer has been completely
@@ -209,11 +210,11 @@ static inline void pipe_buf_release(struct pipe_inode_info *pipe,
* @buf: the buffer to confirm
*/
static inline int pipe_buf_confirm(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
+ struct pipe_buffer *buf, bool nonblock)
{
if (!buf->ops->confirm)
return 0;
- return buf->ops->confirm(pipe, buf);
+ return buf->ops->confirm(pipe, buf, nonblock);
}
/**
--
2.39.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-14 15:42 [PATCHSET v2 for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
2023-03-14 15:42 ` [PATCH 1/3] fs: add 'nonblock' parameter to pipe_buf_confirm() and fops method Jens Axboe
@ 2023-03-14 15:42 ` Jens Axboe
2023-03-15 8:23 ` Christian Brauner
2023-03-14 15:42 ` [PATCH 3/3] pipe: set FMODE_NOWAIT on pipes Jens Axboe
2 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 15:42 UTC (permalink / raw)
To: io-uring, linux-fsdevel; +Cc: brauner, Jens Axboe, Dave Chinner
In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
and write path handle it correctly. This includes the pipe locking,
page allocation for writes, and confirming pipe buffers.
Acked-by: Dave Chinner <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/pipe.c | 39 +++++++++++++++++++++++++++++++--------
1 file changed, 31 insertions(+), 8 deletions(-)
diff --git a/fs/pipe.c b/fs/pipe.c
index 340f253913a2..dc00b20e56c8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -108,6 +108,16 @@ static inline void __pipe_unlock(struct pipe_inode_info *pipe)
mutex_unlock(&pipe->mutex);
}
+static inline bool __pipe_trylock(struct pipe_inode_info *pipe, bool nonblock)
+{
+ if (!nonblock) {
+ __pipe_lock(pipe);
+ return true;
+ }
+
+ return mutex_trylock(&pipe->mutex);
+}
+
void pipe_double_lock(struct pipe_inode_info *pipe1,
struct pipe_inode_info *pipe2)
{
@@ -234,6 +244,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
bool was_full, wake_next_reader = false;
+ const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
ssize_t ret;
/* Null read succeeds. */
@@ -241,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
return 0;
ret = 0;
- __pipe_lock(pipe);
+ if (!__pipe_trylock(pipe, nonblock))
+ return -EAGAIN;
/*
* We only wake up writers if the pipe was full when we started
@@ -297,7 +309,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
chars = total_len;
}
- error = pipe_buf_confirm(pipe, buf, false);
+ error = pipe_buf_confirm(pipe, buf, nonblock);
if (error) {
if (!ret)
ret = error;
@@ -342,7 +354,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK || nonblock) {
ret = -EAGAIN;
break;
}
@@ -423,12 +435,14 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
ssize_t chars;
bool was_empty = false;
bool wake_next_writer = false;
+ const bool nonblock = iocb->ki_flags & IOCB_NOWAIT;
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;
- __pipe_lock(pipe);
+ if (!__pipe_trylock(pipe, nonblock))
+ return -EAGAIN;
if (!pipe->readers) {
send_sig(SIGPIPE, current, 0);
@@ -461,7 +475,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
offset + chars <= PAGE_SIZE) {
- ret = pipe_buf_confirm(pipe, buf, false);
+ ret = pipe_buf_confirm(pipe, buf, nonblock);
if (ret)
goto out;
@@ -493,9 +507,18 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
int copied;
if (!page) {
- page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
+ gfp_t gfp = __GFP_HIGHMEM | __GFP_ACCOUNT |
+ __GFP_HARDWALL;
+ int this_ret = -EAGAIN;
+
+ if (!nonblock) {
+ this_ret = -ENOMEM;
+ gfp |= GFP_USER;
+ }
+ page = alloc_page(gfp);
if (unlikely(!page)) {
- ret = ret ? : -ENOMEM;
+ if (!ret)
+ ret = this_ret;
break;
}
pipe->tmp_page = page;
@@ -547,7 +570,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
continue;
/* Wait for buffer space to become available. */
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK || nonblock) {
if (!ret)
ret = -EAGAIN;
break;
--
2.39.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH 3/3] pipe: set FMODE_NOWAIT on pipes
2023-03-14 15:42 [PATCHSET v2 for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
2023-03-14 15:42 ` [PATCH 1/3] fs: add 'nonblock' parameter to pipe_buf_confirm() and fops method Jens Axboe
2023-03-14 15:42 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
@ 2023-03-14 15:42 ` Jens Axboe
2 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-14 15:42 UTC (permalink / raw)
To: io-uring, linux-fsdevel; +Cc: brauner, Jens Axboe, Dave Chinner
The read/write path is now prepared to deal with IOCB_NOWAIT, hence
enable support for that via setting FMODE_NOWAIT on new pipes.
Acked-by: Dave Chinner <[email protected]>
Reviewed-by: Christian Brauner <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
---
fs/pipe.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/fs/pipe.c b/fs/pipe.c
index dc00b20e56c8..b7e380952fca 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -999,6 +999,9 @@ static int __do_pipe_flags(int *fd, struct file **files, int flags)
audit_fd_pair(fdr, fdw);
fd[0] = fdr;
fd[1] = fdw;
+ /* pipe groks IOCB_NOWAIT */
+ files[0]->f_mode |= FMODE_NOWAIT;
+ files[1]->f_mode |= FMODE_NOWAIT;
return 0;
err_fdr:
--
2.39.2
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-14 15:42 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
@ 2023-03-15 8:23 ` Christian Brauner
2023-03-15 14:16 ` Jens Axboe
0 siblings, 1 reply; 14+ messages in thread
From: Christian Brauner @ 2023-03-15 8:23 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, linux-fsdevel, Dave Chinner
On Tue, Mar 14, 2023 at 09:42:02AM -0600, Jens Axboe wrote:
> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
> and write path handle it correctly. This includes the pipe locking,
> page allocation for writes, and confirming pipe buffers.
>
> Acked-by: Dave Chinner <[email protected]>
> Signed-off-by: Jens Axboe <[email protected]>
> ---
Looks good,
Reviewed-by: Christian Brauner <[email protected]>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-15 8:23 ` Christian Brauner
@ 2023-03-15 14:16 ` Jens Axboe
2023-03-15 15:02 ` Christian Brauner
0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2023-03-15 14:16 UTC (permalink / raw)
To: Christian Brauner; +Cc: io-uring, linux-fsdevel, Dave Chinner
On 3/15/23 2:23 AM, Christian Brauner wrote:
> On Tue, Mar 14, 2023 at 09:42:02AM -0600, Jens Axboe wrote:
>> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
>> and write path handle it correctly. This includes the pipe locking,
>> page allocation for writes, and confirming pipe buffers.
>>
>> Acked-by: Dave Chinner <[email protected]>
>> Signed-off-by: Jens Axboe <[email protected]>
>> ---
>
> Looks good,
> Reviewed-by: Christian Brauner <[email protected]>
Thanks for the review, I'll add that. Are you OK with me carrying
these patches, or do you want to stage them for 6.4?
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-15 14:16 ` Jens Axboe
@ 2023-03-15 15:02 ` Christian Brauner
2023-03-15 15:12 ` Jens Axboe
0 siblings, 1 reply; 14+ messages in thread
From: Christian Brauner @ 2023-03-15 15:02 UTC (permalink / raw)
To: Jens Axboe; +Cc: io-uring, linux-fsdevel, Dave Chinner
On Wed, Mar 15, 2023 at 08:16:19AM -0600, Jens Axboe wrote:
> On 3/15/23 2:23 AM, Christian Brauner wrote:
> > On Tue, Mar 14, 2023 at 09:42:02AM -0600, Jens Axboe wrote:
> >> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
> >> and write path handle it correctly. This includes the pipe locking,
> >> page allocation for writes, and confirming pipe buffers.
> >>
> >> Acked-by: Dave Chinner <[email protected]>
> >> Signed-off-by: Jens Axboe <[email protected]>
> >> ---
> >
> > Looks good,
> > Reviewed-by: Christian Brauner <[email protected]>
>
> Thanks for the review, I'll add that. Are you OK with me carrying
> these patches, or do you want to stage them for 6.4?
I'n not fuzzed. Since it's fs only I would lean towards carrying it. I
can pick it up now and see if Al has any strong opinions on this one.
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-15 15:02 ` Christian Brauner
@ 2023-03-15 15:12 ` Jens Axboe
2023-03-15 15:16 ` Jens Axboe
0 siblings, 1 reply; 14+ messages in thread
From: Jens Axboe @ 2023-03-15 15:12 UTC (permalink / raw)
To: Christian Brauner; +Cc: io-uring, linux-fsdevel, Dave Chinner
On 3/15/23 9:02 AM, Christian Brauner wrote:
> On Wed, Mar 15, 2023 at 08:16:19AM -0600, Jens Axboe wrote:
>> On 3/15/23 2:23 AM, Christian Brauner wrote:
>>> On Tue, Mar 14, 2023 at 09:42:02AM -0600, Jens Axboe wrote:
>>>> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
>>>> and write path handle it correctly. This includes the pipe locking,
>>>> page allocation for writes, and confirming pipe buffers.
>>>>
>>>> Acked-by: Dave Chinner <[email protected]>
>>>> Signed-off-by: Jens Axboe <[email protected]>
>>>> ---
>>>
>>> Looks good,
>>> Reviewed-by: Christian Brauner <[email protected]>
>>
>> Thanks for the review, I'll add that. Are you OK with me carrying
>> these patches, or do you want to stage them for 6.4?
>
> I'n not fuzzed. Since it's fs only I would lean towards carrying it. I
> can pick it up now and see if Al has any strong opinions on this one.
Either way is fine with me - let me know if you pick it up, and
I can drop it from my tree.
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT
2023-03-15 15:12 ` Jens Axboe
@ 2023-03-15 15:16 ` Jens Axboe
0 siblings, 0 replies; 14+ messages in thread
From: Jens Axboe @ 2023-03-15 15:16 UTC (permalink / raw)
To: Christian Brauner; +Cc: io-uring, linux-fsdevel, Dave Chinner
On 3/15/23 9:12 AM, Jens Axboe wrote:
> On 3/15/23 9:02 AM, Christian Brauner wrote:
>> On Wed, Mar 15, 2023 at 08:16:19AM -0600, Jens Axboe wrote:
>>> On 3/15/23 2:23 AM, Christian Brauner wrote:
>>>> On Tue, Mar 14, 2023 at 09:42:02AM -0600, Jens Axboe wrote:
>>>>> In preparation for enabling FMODE_NOWAIT for pipes, ensure that the read
>>>>> and write path handle it correctly. This includes the pipe locking,
>>>>> page allocation for writes, and confirming pipe buffers.
>>>>>
>>>>> Acked-by: Dave Chinner <[email protected]>
>>>>> Signed-off-by: Jens Axboe <[email protected]>
>>>>> ---
>>>>
>>>> Looks good,
>>>> Reviewed-by: Christian Brauner <[email protected]>
>>>
>>> Thanks for the review, I'll add that. Are you OK with me carrying
>>> these patches, or do you want to stage them for 6.4?
>>
>> I'n not fuzzed. Since it's fs only I would lean towards carrying it. I
>> can pick it up now and see if Al has any strong opinions on this one.
>
> Either way is fine with me - let me know if you pick it up, and
> I can drop it from my tree.
Oh and if you do, I should probably send out a v3. Was missing a
kerneldoc in patch 1, corrected that in my tree but it's not in v2.
Outside of that one-liner doc change, same patches in my tree.
--
Jens Axboe
^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2023-03-15 15:16 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-03-14 15:42 [PATCHSET v2 for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
2023-03-14 15:42 ` [PATCH 1/3] fs: add 'nonblock' parameter to pipe_buf_confirm() and fops method Jens Axboe
2023-03-14 15:42 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
2023-03-15 8:23 ` Christian Brauner
2023-03-15 14:16 ` Jens Axboe
2023-03-15 15:02 ` Christian Brauner
2023-03-15 15:12 ` Jens Axboe
2023-03-15 15:16 ` Jens Axboe
2023-03-14 15:42 ` [PATCH 3/3] pipe: set FMODE_NOWAIT on pipes Jens Axboe
-- strict thread matches above, loose matches on Subject: below --
2023-03-08 3:10 [PATCHSET for-next 0/3] Add FMODE_NOWAIT support to pipes Jens Axboe
2023-03-08 3:10 ` [PATCH 2/3] pipe: enable handling of IOCB_NOWAIT Jens Axboe
2023-03-14 9:26 ` Christian Brauner
2023-03-14 12:03 ` Jens Axboe
2023-03-14 9:38 ` Matthew Wilcox
2023-03-14 12:04 ` Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox