* [PATCH] io_uring: Add vmsplice support
@ 2021-01-05 23:00 arni
2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
0 siblings, 2 replies; 4+ messages in thread
From: arni @ 2021-01-05 23:00 UTC (permalink / raw)
To: io-uring; +Cc: axboe
This patchset is a followup from my last email, which may be found at
https://lore.kernel.org/io-uring/[email protected]/
Thanks for you feedback, Jens. I have modified the test app on my end as
well, and it now looks like the following
https://gist.githubusercontent.com/ArniDagur/3392a787e89e78ba8ff739ff0f8476d5/raw/d01d19bb6fdc3defea59ae7c2a2c3d29682d8520/main.c
As you suggested, I now always return -EGAIN when force_nonblock is set.
In addition, req_set_fail_links() is now called when less than the
entirety of the buffer is spliced, and io_req_complete() is called at
the end.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] splice: Make vmsplice public 2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni @ 2021-01-05 23:00 ` arni 2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni 1 sibling, 0 replies; 4+ messages in thread From: arni @ 2021-01-05 23:00 UTC (permalink / raw) To: io-uring; +Cc: axboe, Árni Dagur From: Árni Dagur <[email protected]> Create a public function do_vmsplice(), so that other parts of the kernel can use it. Signed-off-by: Árni Dagur <[email protected]> --- fs/splice.c | 21 +++++++++++++++------ include/linux/splice.h | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 866d5c2367b2..2d653a20cced 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1270,6 +1270,20 @@ static int vmsplice_type(struct fd f, int *type) return 0; } +long do_vmsplice(struct file *file, struct iov_iter *iter, unsigned int flags) +{ + long error; + + if (!iov_iter_count(iter)) + error = 0; + else if (iov_iter_rw(iter) == WRITE) + error = vmsplice_to_pipe(file, iter, flags); + else + error = vmsplice_to_user(file, iter, flags); + + return error; +} + /* * Note that vmsplice only really supports true splicing _from_ user memory * to a pipe, not the other way around. Splicing from user memory is a simple @@ -1309,12 +1323,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, if (error < 0) goto out_fdput; - if (!iov_iter_count(&iter)) - error = 0; - else if (iov_iter_rw(&iter) == WRITE) - error = vmsplice_to_pipe(f.file, &iter, flags); - else - error = vmsplice_to_user(f.file, &iter, flags); + error = do_vmsplice(f.file, &iter, flags); kfree(iov); out_fdput: diff --git a/include/linux/splice.h b/include/linux/splice.h index a55179fd60fc..44c0e612f652 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -81,9 +81,9 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, extern long do_splice(struct file *in, loff_t *off_in, struct file *out, loff_t *off_out, size_t len, unsigned int flags); - extern long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags); +extern long do_vmsplice(struct file *file, struct iov_iter *iter, unsigned int flags); /* * for dynamic pipe sizing -- 2.30.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] io_uring: Add vmsplice support 2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni 2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni @ 2021-01-05 23:00 ` arni 2021-01-05 23:43 ` Pavel Begunkov 1 sibling, 1 reply; 4+ messages in thread From: arni @ 2021-01-05 23:00 UTC (permalink / raw) To: io-uring; +Cc: axboe, Árni Dagur From: Árni Dagur <[email protected]> * The `sqe->splice_flags` field is used to hold flags. * We return -EAGAIN if force_nonblock is set. Signed-off-by: Árni Dagur <[email protected]> --- fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 1 + 2 files changed, 77 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index ca46f314640b..a99a89798386 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -531,6 +531,13 @@ struct io_splice { unsigned int flags; }; +struct io_vmsplice { + struct file *file; + u64 addr; + u64 len; + unsigned int flags; +}; + struct io_provide_buf { struct file *file; __u64 addr; @@ -692,6 +699,7 @@ struct io_kiocb { struct io_madvise madvise; struct io_epoll epoll; struct io_splice splice; + struct io_vmsplice vmsplice; struct io_provide_buf pbuf; struct io_statx statx; struct io_shutdown shutdown; @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .work_flags = IO_WQ_WORK_BLKCG, }, + [IORING_OP_VMSPLICE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .work_flags = IO_WQ_WORK_MM, + }, [IORING_OP_PROVIDE_BUFFERS] = {}, [IORING_OP_REMOVE_BUFFERS] = {}, [IORING_OP_TEE] = { @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) return 0; } +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_vmsplice *sp = &req->vmsplice; + + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(READ_ONCE(sqe->off))) + return -EINVAL; + + sp->addr = READ_ONCE(sqe->addr); + sp->len = READ_ONCE(sqe->len); + sp->flags = READ_ONCE(sqe->splice_flags); + + if (sp->flags & ~SPLICE_F_ALL) + return -EINVAL; + + return 0; +} + +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock) +{ + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct io_vmsplice *sp = &req->vmsplice; + void __user *buf = u64_to_user_ptr(sp->addr); + struct iov_iter __iter, *iter = &__iter; + struct file *file = sp->file; + ssize_t io_size; + int type, ret; + + if (force_nonblock) + return -EAGAIN; + + if (file->f_mode & FMODE_WRITE) + type = WRITE; + else if (file->f_mode & FMODE_READ) + type = READ; + else { + ret = -EBADF; + goto err; + } + + ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter, + req->ctx->compat); + if (ret < 0) + goto err; + io_size = iov_iter_count(iter); + + ret = do_vmsplice(file, iter, sp->flags); + if (ret != io_size) { +err: + req_set_fail_links(req); + } + io_req_complete(req, ret); + kfree(iovec); + return 0; +} + /* * IORING_OP_NOP just posts a completion event, nothing else. */ @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return io_epoll_ctl_prep(req, sqe); case IORING_OP_SPLICE: return io_splice_prep(req, sqe); + case IORING_OP_VMSPLICE: + return io_vmsplice_prep(req, sqe); case IORING_OP_PROVIDE_BUFFERS: return io_provide_buffers_prep(req, sqe); case IORING_OP_REMOVE_BUFFERS: @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, case IORING_OP_SPLICE: ret = io_splice(req, force_nonblock); break; + case IORING_OP_VMSPLICE: + ret = io_vmsplice(req, force_nonblock); + break; case IORING_OP_PROVIDE_BUFFERS: ret = io_provide_buffers(req, force_nonblock, cs); break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index d31a2a1e8ef9..6bc79f9bb123 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -137,6 +137,7 @@ enum { IORING_OP_SHUTDOWN, IORING_OP_RENAMEAT, IORING_OP_UNLINKAT, + IORING_OP_VMSPLICE, /* this goes last, obviously */ IORING_OP_LAST, -- 2.30.0 ^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] io_uring: Add vmsplice support 2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni @ 2021-01-05 23:43 ` Pavel Begunkov 0 siblings, 0 replies; 4+ messages in thread From: Pavel Begunkov @ 2021-01-05 23:43 UTC (permalink / raw) To: arni, io-uring; +Cc: axboe On 05/01/2021 23:00, [email protected] wrote: > From: Árni Dagur <[email protected]> > > * The `sqe->splice_flags` field is used to hold flags. > * We return -EAGAIN if force_nonblock is set. > > Signed-off-by: Árni Dagur <[email protected]> > --- > fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++ > include/uapi/linux/io_uring.h | 1 + > 2 files changed, 77 insertions(+) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index ca46f314640b..a99a89798386 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -531,6 +531,13 @@ struct io_splice { > unsigned int flags; > }; > > +struct io_vmsplice { > + struct file *file; > + u64 addr; > + u64 len; > + unsigned int flags; > +}; > + > struct io_provide_buf { > struct file *file; > __u64 addr; > @@ -692,6 +699,7 @@ struct io_kiocb { > struct io_madvise madvise; > struct io_epoll epoll; > struct io_splice splice; > + struct io_vmsplice vmsplice; > struct io_provide_buf pbuf; > struct io_statx statx; > struct io_shutdown shutdown; > @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = { > .unbound_nonreg_file = 1, > .work_flags = IO_WQ_WORK_BLKCG, > }, > + [IORING_OP_VMSPLICE] = { > + .needs_file = 1, > + .hash_reg_file = 1, > + .unbound_nonreg_file = 1, > + .work_flags = IO_WQ_WORK_MM, > + }, > [IORING_OP_PROVIDE_BUFFERS] = {}, > [IORING_OP_REMOVE_BUFFERS] = {}, > [IORING_OP_TEE] = { > @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock) > return 0; > } > > +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > +{ > + struct io_vmsplice *sp = &req->vmsplice; > + > + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) > + return -EINVAL; > + if (unlikely(READ_ONCE(sqe->off))) > + return -EINVAL; > + > + sp->addr = READ_ONCE(sqe->addr); > + sp->len = READ_ONCE(sqe->len); > + sp->flags = READ_ONCE(sqe->splice_flags); > + > + if (sp->flags & ~SPLICE_F_ALL) > + return -EINVAL; > + > + return 0; > +} > + > +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock) > +{ > + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; > + struct io_vmsplice *sp = &req->vmsplice; > + void __user *buf = u64_to_user_ptr(sp->addr); const struct iovec __user *uiov > + struct iov_iter __iter, *iter = &__iter; read/write either use ((struct io_async_rw *)req->async_data)->iter or to avoid allocation use an on-stack iter. This only has that on-stack __iter, so why do you need *iter? > + struct file *file = sp->file; > + ssize_t io_size; > + int type, ret; > + > + if (force_nonblock) > + return -EAGAIN; > + > + if (file->f_mode & FMODE_WRITE) > + type = WRITE; > + else if (file->f_mode & FMODE_READ) > + type = READ; > + else { > + ret = -EBADF; > + goto err; it jumps to kfree(iovec), where iovec=inline_vecs > + } > + > + ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter, > + req->ctx->compat); This may happen asynchronously long after io_uring_enter(submit) returned, e.g. if a user keeps uiov on-stack it will fail or read garbage. So, it's either to make it a part of ABI -- users must not delete uiov until the request completion, or copy it while not-yet-async. For consistency with read/write I'd prefer the second. > + if (ret < 0) > + goto err; > + io_size = iov_iter_count(iter); > + > + ret = do_vmsplice(file, iter, sp->flags); > + if (ret != io_size) { > +err: > + req_set_fail_links(req); > + } > + io_req_complete(req, ret); > + kfree(iovec); > + return 0; > +} > + > /* > * IORING_OP_NOP just posts a completion event, nothing else. > */ > @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) > return io_epoll_ctl_prep(req, sqe); > case IORING_OP_SPLICE: > return io_splice_prep(req, sqe); > + case IORING_OP_VMSPLICE: > + return io_vmsplice_prep(req, sqe); > case IORING_OP_PROVIDE_BUFFERS: > return io_provide_buffers_prep(req, sqe); > case IORING_OP_REMOVE_BUFFERS: > @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, > case IORING_OP_SPLICE: > ret = io_splice(req, force_nonblock); > break; > + case IORING_OP_VMSPLICE: > + ret = io_vmsplice(req, force_nonblock); > + break; > case IORING_OP_PROVIDE_BUFFERS: > ret = io_provide_buffers(req, force_nonblock, cs); > break; > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index d31a2a1e8ef9..6bc79f9bb123 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -137,6 +137,7 @@ enum { > IORING_OP_SHUTDOWN, > IORING_OP_RENAMEAT, > IORING_OP_UNLINKAT, > + IORING_OP_VMSPLICE, > > /* this goes last, obviously */ > IORING_OP_LAST, > -- Pavel Begunkov ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-01-05 23:48 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni 2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni 2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni 2021-01-05 23:43 ` Pavel Begunkov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox