public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: Bijan Mottahedeh <[email protected]>, [email protected]
Cc: [email protected]
Subject: Re: [PATCH 7/8] io_uring: support readv/writev with fixed buffers
Date: Tue, 17 Nov 2020 11:04:17 +0000	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

On 12/11/2020 23:00, Bijan Mottahedeh wrote:
> Support readv/writev with fixed buffers, and introduce IOSQE_FIXED_BUFFER,
> consistent with fixed files.

I don't like it at all, see issues below. The actual implementation would
be much uglier.

I propose you to split the series and push separately. Your first 6 patches
first, I don't have conceptual objections to them. Then registration sharing
(I still need to look it up). And then we can return to this, if you're not
yet convinced.

> 
> Signed-off-by: Bijan Mottahedeh <[email protected]>
> ---
>  fs/io_uring.c                 | 59 ++++++++++++++++++++++++++++++++++++++++---
>  include/uapi/linux/io_uring.h |  3 +++
>  2 files changed, 58 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 6020fd2..12c4144 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -625,6 +625,7 @@ enum {
>  	REQ_F_HARDLINK_BIT	= IOSQE_IO_HARDLINK_BIT,
>  	REQ_F_FORCE_ASYNC_BIT	= IOSQE_ASYNC_BIT,
>  	REQ_F_BUFFER_SELECT_BIT	= IOSQE_BUFFER_SELECT_BIT,
> +	REQ_F_FIXED_BUFFER_BIT	= IOSQE_FIXED_BUFFER_BIT,
>  
>  	REQ_F_FAIL_LINK_BIT,
>  	REQ_F_INFLIGHT_BIT,
> @@ -681,8 +682,12 @@ enum {
>  	REQ_F_WORK_INITIALIZED	= BIT(REQ_F_WORK_INITIALIZED_BIT),
>  	/* linked timeout is active, i.e. prepared by link's head */
>  	REQ_F_LTIMEOUT_ACTIVE	= BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
> +	/* ctx owns buffer */
> +	REQ_F_FIXED_BUFFER	= BIT(REQ_F_FIXED_BUFFER_BIT),
>  };
>  
> +#define REQ_F_FIXED_RSRC	(REQ_F_FIXED_FILE | REQ_F_FIXED_BUFFER)
> +
>  struct async_poll {
>  	struct io_poll_iocb	poll;
>  	struct io_poll_iocb	*double_poll;
> @@ -3191,6 +3196,46 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
>  	return __io_iov_buffer_select(req, iov, needs_lock);
>  }
>  
> +static ssize_t io_import_iovec_fixed(int rw, struct io_kiocb *req, void *buf,
> +				     unsigned segs, unsigned fast_segs,
> +				     struct iovec **iovec,
> +				     struct iov_iter *iter)
> +{
> +	struct io_ring_ctx *ctx = req->ctx;
> +	struct io_mapped_ubuf *imu;
> +	struct iovec *iov;
> +	u16 index, buf_index;
> +	ssize_t ret;
> +	unsigned long seg;
> +
> +	if (unlikely(!ctx->buf_data))
> +		return -EFAULT;
> +
> +	ret = import_iovec(rw, buf, segs, fast_segs, iovec, iter);

Did you test it? import_iovec() does access_ok() against each iov_base,
which in your case are an index.

> +	if (ret < 0)
> +		return ret;
> +
> +	iov = (struct iovec *)iter->iov;
> +
> +	for (seg = 0; seg < iter->nr_segs; seg++) {
> +		buf_index = *(u16 *)(&iov[seg].iov_base);

That's ugly, and also not consistent with rw_fixed, because iov_base is
used to calculate offset.

> +		if (unlikely(buf_index < 0 || buf_index >= ctx->nr_user_bufs))
> +			return -EFAULT;
> +
> +		index = array_index_nospec(buf_index, ctx->nr_user_bufs);
> +		imu = io_buf_from_index(ctx, index);
> +		if (!imu->ubuf || !imu->len)
> +			return -EFAULT;
> +		if (iov[seg].iov_len > imu->len)
> +			return -EFAULT;
> +
> +		iov[seg].iov_base = (void *)imu->ubuf;

Nope, that's not different from non registered version.
What import_fixed actually do is setting up the iter argument to point
to a bvec (a vector of struct page *).

So it either would need to keep a vector of bvecs, that's a vector of vectors,
that's not supported by iter, etc., so you'll also need to iterate over them
in io_read/write and so on. Or flat 2D structure into 1D, but that's still ugly.



> +		ret += iov[seg].iov_len;
> +	}
> +
> +	return ret;
> +}
> +
>  static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
>  				 struct iovec **iovec, struct iov_iter *iter,
>  				 bool needs_lock)
> @@ -3201,6 +3246,12 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
>  	u8 opcode;
>  
>  	opcode = req->opcode;
> +
> +	if ((opcode == IORING_OP_READV || opcode == IORING_OP_WRITEV) &&
> +	    req->flags & REQ_F_FIXED_BUFFER)
> +		return (io_import_iovec_fixed(rw, req, buf, sqe_len,
> +					      UIO_FASTIOV, iovec, iter));
> +
>  	if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
>  		*iovec = NULL;
>  		return io_import_fixed(req, rw, iter);
> @@ -5692,7 +5743,7 @@ static int io_timeout_remove_prep(struct io_kiocb *req,
>  {
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>  		return -EINVAL;
> -	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
> +	if (unlikely(req->flags & (REQ_F_FIXED_RSRC | REQ_F_BUFFER_SELECT)))

Why it's here? 

#define REQ_F_FIXED_RSRC	(REQ_F_FIXED_FILE | REQ_F_FIXED_BUFFER)
So, why do you | with REQ_F_BUFFER_SELECT again here?


>  		return -EINVAL;
>  	if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags)
>  		return -EINVAL;
> @@ -5867,7 +5918,7 @@ static int io_async_cancel_prep(struct io_kiocb *req,
>  {
>  	if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
>  		return -EINVAL;
> -	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
> +	if (unlikely(req->flags & (REQ_F_FIXED_RSRC | REQ_F_BUFFER_SELECT)))
>  		return -EINVAL;
>  	if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags)
>  		return -EINVAL;
> @@ -5889,7 +5940,7 @@ static int io_rsrc_update_prep(struct io_kiocb *req,
>  {
>  	if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
>  		return -EINVAL;
> -	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
> +	if (unlikely(req->flags & (REQ_F_FIXED_RSRC | REQ_F_BUFFER_SELECT)))
>  		return -EINVAL;
>  	if (sqe->ioprio || sqe->rw_flags)
>  		return -EINVAL;
> @@ -6740,7 +6791,7 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
>  
>  #define SQE_VALID_FLAGS	(IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK|	\
>  				IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
> -				IOSQE_BUFFER_SELECT)
> +				IOSQE_BUFFER_SELECT | IOSQE_FIXED_BUFFER)
>  
>  static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
>  		       const struct io_uring_sqe *sqe,
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 17682b5..41da59c 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -70,6 +70,7 @@ enum {
>  	IOSQE_IO_HARDLINK_BIT,
>  	IOSQE_ASYNC_BIT,
>  	IOSQE_BUFFER_SELECT_BIT,
> +	IOSQE_FIXED_BUFFER_BIT,
>  };
>  
>  /*
> @@ -87,6 +88,8 @@ enum {
>  #define IOSQE_ASYNC		(1U << IOSQE_ASYNC_BIT)
>  /* select buffer from sqe->buf_group */
>  #define IOSQE_BUFFER_SELECT	(1U << IOSQE_BUFFER_SELECT_BIT)
> +/* use fixed buffer set */
> +#define IOSQE_FIXED_BUFFER	(1U << IOSQE_FIXED_BUFFER_BIT)

Unfortenatuly, we're almost out of flags bits -- it's a 1 byte
field and 6 bits are already taken. Let's not use it.

-- 
Pavel Begunkov

  reply	other threads:[~2020-11-17 11:07 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-11-12 23:00 [PATCH 0/8] io_uring: buffer registration enhancements Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 1/8] io_uring: modularize io_sqe_buffer_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 2/8] io_uring: modularize io_sqe_buffers_register Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 3/8] io_uring: generalize fixed file functionality Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 4/8] io_uring: implement fixed buffers registration similar to fixed files Bijan Mottahedeh
2020-11-15 13:33   ` Pavel Begunkov
2020-11-16 21:24     ` Bijan Mottahedeh
2020-11-16 23:09       ` Pavel Begunkov
2020-11-17  0:41         ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 5/8] io_uring: generalize files_update functionlity to rsrc_update Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 6/8] io_uring: support buffer registration updates Bijan Mottahedeh
2020-11-18 20:17   ` Pavel Begunkov
2020-12-09  0:42     ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 7/8] io_uring: support readv/writev with fixed buffers Bijan Mottahedeh
2020-11-17 11:04   ` Pavel Begunkov [this message]
2020-11-17 22:59     ` Bijan Mottahedeh
2020-11-18  9:14       ` Pavel Begunkov
2020-11-18 20:12       ` Pavel Begunkov
     [not found]         ` <[email protected]>
     [not found]           ` <[email protected]>
2020-11-19 19:27             ` Bijan Mottahedeh
2020-11-12 23:00 ` [PATCH 8/8] io_uring: support buffer registration sharing Bijan Mottahedeh
2020-11-16 23:28 ` [PATCH 0/8] io_uring: buffer registration enhancements Pavel Begunkov
2020-11-17  0:21   ` Bijan Mottahedeh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox