* [PATCH] io_uring: Add vmsplice support
@ 2021-01-05 23:00 arni
2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
0 siblings, 2 replies; 4+ messages in thread
From: arni @ 2021-01-05 23:00 UTC (permalink / raw)
To: io-uring; +Cc: axboe
This patchset is a followup from my last email, which may be found at
https://lore.kernel.org/io-uring/[email protected]/
Thanks for you feedback, Jens. I have modified the test app on my end as
well, and it now looks like the following
https://gist.githubusercontent.com/ArniDagur/3392a787e89e78ba8ff739ff0f8476d5/raw/d01d19bb6fdc3defea59ae7c2a2c3d29682d8520/main.c
As you suggested, I now always return -EGAIN when force_nonblock is set.
In addition, req_set_fail_links() is now called when less than the
entirety of the buffer is spliced, and io_req_complete() is called at
the end.
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] splice: Make vmsplice public
2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni
@ 2021-01-05 23:00 ` arni
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
1 sibling, 0 replies; 4+ messages in thread
From: arni @ 2021-01-05 23:00 UTC (permalink / raw)
To: io-uring; +Cc: axboe, Árni Dagur
From: Árni Dagur <[email protected]>
Create a public function do_vmsplice(), so that other parts of the
kernel can use it.
Signed-off-by: Árni Dagur <[email protected]>
---
fs/splice.c | 21 +++++++++++++++------
include/linux/splice.h | 2 +-
2 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/fs/splice.c b/fs/splice.c
index 866d5c2367b2..2d653a20cced 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1270,6 +1270,20 @@ static int vmsplice_type(struct fd f, int *type)
return 0;
}
+long do_vmsplice(struct file *file, struct iov_iter *iter, unsigned int flags)
+{
+ long error;
+
+ if (!iov_iter_count(iter))
+ error = 0;
+ else if (iov_iter_rw(iter) == WRITE)
+ error = vmsplice_to_pipe(file, iter, flags);
+ else
+ error = vmsplice_to_user(file, iter, flags);
+
+ return error;
+}
+
/*
* Note that vmsplice only really supports true splicing _from_ user memory
* to a pipe, not the other way around. Splicing from user memory is a simple
@@ -1309,12 +1323,7 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov,
if (error < 0)
goto out_fdput;
- if (!iov_iter_count(&iter))
- error = 0;
- else if (iov_iter_rw(&iter) == WRITE)
- error = vmsplice_to_pipe(f.file, &iter, flags);
- else
- error = vmsplice_to_user(f.file, &iter, flags);
+ error = do_vmsplice(f.file, &iter, flags);
kfree(iov);
out_fdput:
diff --git a/include/linux/splice.h b/include/linux/splice.h
index a55179fd60fc..44c0e612f652 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -81,9 +81,9 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t *off_out,
size_t len, unsigned int flags);
-
extern long do_tee(struct file *in, struct file *out, size_t len,
unsigned int flags);
+extern long do_vmsplice(struct file *file, struct iov_iter *iter, unsigned int flags);
/*
* for dynamic pipe sizing
--
2.30.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] io_uring: Add vmsplice support
2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni
2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni
@ 2021-01-05 23:00 ` arni
2021-01-05 23:43 ` Pavel Begunkov
1 sibling, 1 reply; 4+ messages in thread
From: arni @ 2021-01-05 23:00 UTC (permalink / raw)
To: io-uring; +Cc: axboe, Árni Dagur
From: Árni Dagur <[email protected]>
* The `sqe->splice_flags` field is used to hold flags.
* We return -EAGAIN if force_nonblock is set.
Signed-off-by: Árni Dagur <[email protected]>
---
fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++
include/uapi/linux/io_uring.h | 1 +
2 files changed, 77 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ca46f314640b..a99a89798386 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -531,6 +531,13 @@ struct io_splice {
unsigned int flags;
};
+struct io_vmsplice {
+ struct file *file;
+ u64 addr;
+ u64 len;
+ unsigned int flags;
+};
+
struct io_provide_buf {
struct file *file;
__u64 addr;
@@ -692,6 +699,7 @@ struct io_kiocb {
struct io_madvise madvise;
struct io_epoll epoll;
struct io_splice splice;
+ struct io_vmsplice vmsplice;
struct io_provide_buf pbuf;
struct io_statx statx;
struct io_shutdown shutdown;
@@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = {
.unbound_nonreg_file = 1,
.work_flags = IO_WQ_WORK_BLKCG,
},
+ [IORING_OP_VMSPLICE] = {
+ .needs_file = 1,
+ .hash_reg_file = 1,
+ .unbound_nonreg_file = 1,
+ .work_flags = IO_WQ_WORK_MM,
+ },
[IORING_OP_PROVIDE_BUFFERS] = {},
[IORING_OP_REMOVE_BUFFERS] = {},
[IORING_OP_TEE] = {
@@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
return 0;
}
+static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_vmsplice *sp = &req->vmsplice;
+
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (unlikely(READ_ONCE(sqe->off)))
+ return -EINVAL;
+
+ sp->addr = READ_ONCE(sqe->addr);
+ sp->len = READ_ONCE(sqe->len);
+ sp->flags = READ_ONCE(sqe->splice_flags);
+
+ if (sp->flags & ~SPLICE_F_ALL)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int io_vmsplice(struct io_kiocb *req, bool force_nonblock)
+{
+ struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+ struct io_vmsplice *sp = &req->vmsplice;
+ void __user *buf = u64_to_user_ptr(sp->addr);
+ struct iov_iter __iter, *iter = &__iter;
+ struct file *file = sp->file;
+ ssize_t io_size;
+ int type, ret;
+
+ if (force_nonblock)
+ return -EAGAIN;
+
+ if (file->f_mode & FMODE_WRITE)
+ type = WRITE;
+ else if (file->f_mode & FMODE_READ)
+ type = READ;
+ else {
+ ret = -EBADF;
+ goto err;
+ }
+
+ ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter,
+ req->ctx->compat);
+ if (ret < 0)
+ goto err;
+ io_size = iov_iter_count(iter);
+
+ ret = do_vmsplice(file, iter, sp->flags);
+ if (ret != io_size) {
+err:
+ req_set_fail_links(req);
+ }
+ io_req_complete(req, ret);
+ kfree(iovec);
+ return 0;
+}
+
/*
* IORING_OP_NOP just posts a completion event, nothing else.
*/
@@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return io_epoll_ctl_prep(req, sqe);
case IORING_OP_SPLICE:
return io_splice_prep(req, sqe);
+ case IORING_OP_VMSPLICE:
+ return io_vmsplice_prep(req, sqe);
case IORING_OP_PROVIDE_BUFFERS:
return io_provide_buffers_prep(req, sqe);
case IORING_OP_REMOVE_BUFFERS:
@@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
case IORING_OP_SPLICE:
ret = io_splice(req, force_nonblock);
break;
+ case IORING_OP_VMSPLICE:
+ ret = io_vmsplice(req, force_nonblock);
+ break;
case IORING_OP_PROVIDE_BUFFERS:
ret = io_provide_buffers(req, force_nonblock, cs);
break;
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index d31a2a1e8ef9..6bc79f9bb123 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -137,6 +137,7 @@ enum {
IORING_OP_SHUTDOWN,
IORING_OP_RENAMEAT,
IORING_OP_UNLINKAT,
+ IORING_OP_VMSPLICE,
/* this goes last, obviously */
IORING_OP_LAST,
--
2.30.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 2/2] io_uring: Add vmsplice support
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
@ 2021-01-05 23:43 ` Pavel Begunkov
0 siblings, 0 replies; 4+ messages in thread
From: Pavel Begunkov @ 2021-01-05 23:43 UTC (permalink / raw)
To: arni, io-uring; +Cc: axboe
On 05/01/2021 23:00, [email protected] wrote:
> From: Árni Dagur <[email protected]>
>
> * The `sqe->splice_flags` field is used to hold flags.
> * We return -EAGAIN if force_nonblock is set.
>
> Signed-off-by: Árni Dagur <[email protected]>
> ---
> fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++
> include/uapi/linux/io_uring.h | 1 +
> 2 files changed, 77 insertions(+)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index ca46f314640b..a99a89798386 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -531,6 +531,13 @@ struct io_splice {
> unsigned int flags;
> };
>
> +struct io_vmsplice {
> + struct file *file;
> + u64 addr;
> + u64 len;
> + unsigned int flags;
> +};
> +
> struct io_provide_buf {
> struct file *file;
> __u64 addr;
> @@ -692,6 +699,7 @@ struct io_kiocb {
> struct io_madvise madvise;
> struct io_epoll epoll;
> struct io_splice splice;
> + struct io_vmsplice vmsplice;
> struct io_provide_buf pbuf;
> struct io_statx statx;
> struct io_shutdown shutdown;
> @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = {
> .unbound_nonreg_file = 1,
> .work_flags = IO_WQ_WORK_BLKCG,
> },
> + [IORING_OP_VMSPLICE] = {
> + .needs_file = 1,
> + .hash_reg_file = 1,
> + .unbound_nonreg_file = 1,
> + .work_flags = IO_WQ_WORK_MM,
> + },
> [IORING_OP_PROVIDE_BUFFERS] = {},
> [IORING_OP_REMOVE_BUFFERS] = {},
> [IORING_OP_TEE] = {
> @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
> return 0;
> }
>
> +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> + struct io_vmsplice *sp = &req->vmsplice;
> +
> + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> + return -EINVAL;
> + if (unlikely(READ_ONCE(sqe->off)))
> + return -EINVAL;
> +
> + sp->addr = READ_ONCE(sqe->addr);
> + sp->len = READ_ONCE(sqe->len);
> + sp->flags = READ_ONCE(sqe->splice_flags);
> +
> + if (sp->flags & ~SPLICE_F_ALL)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock)
> +{
> + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
> + struct io_vmsplice *sp = &req->vmsplice;
> + void __user *buf = u64_to_user_ptr(sp->addr);
const struct iovec __user *uiov
> + struct iov_iter __iter, *iter = &__iter;
read/write either use ((struct io_async_rw *)req->async_data)->iter
or to avoid allocation use an on-stack iter. This only has that
on-stack __iter, so why do you need *iter?
> + struct file *file = sp->file;
> + ssize_t io_size;
> + int type, ret;
> +
> + if (force_nonblock)
> + return -EAGAIN;
> +
> + if (file->f_mode & FMODE_WRITE)
> + type = WRITE;
> + else if (file->f_mode & FMODE_READ)
> + type = READ;
> + else {
> + ret = -EBADF;
> + goto err;
it jumps to kfree(iovec), where iovec=inline_vecs
> + }
> +
> + ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter,
> + req->ctx->compat);
This may happen asynchronously long after io_uring_enter(submit)
returned, e.g. if a user keeps uiov on-stack it will fail or read
garbage.
So, it's either to make it a part of ABI -- users must not delete
uiov until the request completion, or copy it while not-yet-async.
For consistency with read/write I'd prefer the second.
> + if (ret < 0)
> + goto err;
> + io_size = iov_iter_count(iter);
> +
> + ret = do_vmsplice(file, iter, sp->flags);
> + if (ret != io_size) {
> +err:
> + req_set_fail_links(req);
> + }
> + io_req_complete(req, ret);
> + kfree(iovec);
> + return 0;
> +}
> +
> /*
> * IORING_OP_NOP just posts a completion event, nothing else.
> */
> @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return io_epoll_ctl_prep(req, sqe);
> case IORING_OP_SPLICE:
> return io_splice_prep(req, sqe);
> + case IORING_OP_VMSPLICE:
> + return io_vmsplice_prep(req, sqe);
> case IORING_OP_PROVIDE_BUFFERS:
> return io_provide_buffers_prep(req, sqe);
> case IORING_OP_REMOVE_BUFFERS:
> @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
> case IORING_OP_SPLICE:
> ret = io_splice(req, force_nonblock);
> break;
> + case IORING_OP_VMSPLICE:
> + ret = io_vmsplice(req, force_nonblock);
> + break;
> case IORING_OP_PROVIDE_BUFFERS:
> ret = io_provide_buffers(req, force_nonblock, cs);
> break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d31a2a1e8ef9..6bc79f9bb123 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -137,6 +137,7 @@ enum {
> IORING_OP_SHUTDOWN,
> IORING_OP_RENAMEAT,
> IORING_OP_UNLINKAT,
> + IORING_OP_VMSPLICE,
>
> /* this goes last, obviously */
> IORING_OP_LAST,
>
--
Pavel Begunkov
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-01-05 23:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni
2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
2021-01-05 23:43 ` Pavel Begunkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox