From: Pavel Begunkov <[email protected]>
To: [email protected], [email protected]
Cc: [email protected]
Subject: Re: [PATCH 2/2] io_uring: Add vmsplice support
Date: Tue, 5 Jan 2021 23:43:58 +0000 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
On 05/01/2021 23:00, [email protected] wrote:
> From: Árni Dagur <[email protected]>
>
> * The `sqe->splice_flags` field is used to hold flags.
> * We return -EAGAIN if force_nonblock is set.
>
> Signed-off-by: Árni Dagur <[email protected]>
> ---
> fs/io_uring.c | 76 +++++++++++++++++++++++++++++++++++
> include/uapi/linux/io_uring.h | 1 +
> 2 files changed, 77 insertions(+)
>
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index ca46f314640b..a99a89798386 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -531,6 +531,13 @@ struct io_splice {
> unsigned int flags;
> };
>
> +struct io_vmsplice {
> + struct file *file;
> + u64 addr;
> + u64 len;
> + unsigned int flags;
> +};
> +
> struct io_provide_buf {
> struct file *file;
> __u64 addr;
> @@ -692,6 +699,7 @@ struct io_kiocb {
> struct io_madvise madvise;
> struct io_epoll epoll;
> struct io_splice splice;
> + struct io_vmsplice vmsplice;
> struct io_provide_buf pbuf;
> struct io_statx statx;
> struct io_shutdown shutdown;
> @@ -967,6 +975,12 @@ static const struct io_op_def io_op_defs[] = {
> .unbound_nonreg_file = 1,
> .work_flags = IO_WQ_WORK_BLKCG,
> },
> + [IORING_OP_VMSPLICE] = {
> + .needs_file = 1,
> + .hash_reg_file = 1,
> + .unbound_nonreg_file = 1,
> + .work_flags = IO_WQ_WORK_MM,
> + },
> [IORING_OP_PROVIDE_BUFFERS] = {},
> [IORING_OP_REMOVE_BUFFERS] = {},
> [IORING_OP_TEE] = {
> @@ -3884,6 +3898,63 @@ static int io_splice(struct io_kiocb *req, bool force_nonblock)
> return 0;
> }
>
> +static int io_vmsplice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> +{
> + struct io_vmsplice *sp = &req->vmsplice;
> +
> + if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
> + return -EINVAL;
> + if (unlikely(READ_ONCE(sqe->off)))
> + return -EINVAL;
> +
> + sp->addr = READ_ONCE(sqe->addr);
> + sp->len = READ_ONCE(sqe->len);
> + sp->flags = READ_ONCE(sqe->splice_flags);
> +
> + if (sp->flags & ~SPLICE_F_ALL)
> + return -EINVAL;
> +
> + return 0;
> +}
> +
> +static int io_vmsplice(struct io_kiocb *req, bool force_nonblock)
> +{
> + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
> + struct io_vmsplice *sp = &req->vmsplice;
> + void __user *buf = u64_to_user_ptr(sp->addr);
const struct iovec __user *uiov
> + struct iov_iter __iter, *iter = &__iter;
read/write either use ((struct io_async_rw *)req->async_data)->iter
or to avoid allocation use an on-stack iter. This only has that
on-stack __iter, so why do you need *iter?
> + struct file *file = sp->file;
> + ssize_t io_size;
> + int type, ret;
> +
> + if (force_nonblock)
> + return -EAGAIN;
> +
> + if (file->f_mode & FMODE_WRITE)
> + type = WRITE;
> + else if (file->f_mode & FMODE_READ)
> + type = READ;
> + else {
> + ret = -EBADF;
> + goto err;
it jumps to kfree(iovec), where iovec=inline_vecs
> + }
> +
> + ret = __import_iovec(type, buf, sp->len, UIO_FASTIOV, &iovec, iter,
> + req->ctx->compat);
This may happen asynchronously long after io_uring_enter(submit)
returned, e.g. if a user keeps uiov on-stack it will fail or read
garbage.
So, it's either to make it a part of ABI -- users must not delete
uiov until the request completion, or copy it while not-yet-async.
For consistency with read/write I'd prefer the second.
> + if (ret < 0)
> + goto err;
> + io_size = iov_iter_count(iter);
> +
> + ret = do_vmsplice(file, iter, sp->flags);
> + if (ret != io_size) {
> +err:
> + req_set_fail_links(req);
> + }
> + io_req_complete(req, ret);
> + kfree(iovec);
> + return 0;
> +}
> +
> /*
> * IORING_OP_NOP just posts a completion event, nothing else.
> */
> @@ -6009,6 +6080,8 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
> return io_epoll_ctl_prep(req, sqe);
> case IORING_OP_SPLICE:
> return io_splice_prep(req, sqe);
> + case IORING_OP_VMSPLICE:
> + return io_vmsplice_prep(req, sqe);
> case IORING_OP_PROVIDE_BUFFERS:
> return io_provide_buffers_prep(req, sqe);
> case IORING_OP_REMOVE_BUFFERS:
> @@ -6262,6 +6335,9 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
> case IORING_OP_SPLICE:
> ret = io_splice(req, force_nonblock);
> break;
> + case IORING_OP_VMSPLICE:
> + ret = io_vmsplice(req, force_nonblock);
> + break;
> case IORING_OP_PROVIDE_BUFFERS:
> ret = io_provide_buffers(req, force_nonblock, cs);
> break;
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index d31a2a1e8ef9..6bc79f9bb123 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -137,6 +137,7 @@ enum {
> IORING_OP_SHUTDOWN,
> IORING_OP_RENAMEAT,
> IORING_OP_UNLINKAT,
> + IORING_OP_VMSPLICE,
>
> /* this goes last, obviously */
> IORING_OP_LAST,
>
--
Pavel Begunkov
prev parent reply other threads:[~2021-01-05 23:48 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-01-05 23:00 [PATCH] io_uring: Add vmsplice support arni
2021-01-05 23:00 ` [PATCH 1/2] splice: Make vmsplice public arni
2021-01-05 23:00 ` [PATCH 2/2] io_uring: Add vmsplice support arni
2021-01-05 23:43 ` Pavel Begunkov [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox