public inbox for [email protected]
 help / color / mirror / Atom feed
From: Gabriel Krisman Bertazi <[email protected]>
To: Anuj Gupta <[email protected]>
Cc: [email protected],  [email protected],  [email protected],
	[email protected],  [email protected],  [email protected],
	[email protected],  [email protected],
	[email protected],  Kanchan Joshi <[email protected]>
Subject: Re: [PATCH v2 08/10] io_uring/rw: add support to send meta along with read/write
Date: Wed, 26 Jun 2024 13:17:28 -0400	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]> (Anuj Gupta's message of "Wed, 26 Jun 2024 15:36:58 +0530")

Anuj Gupta <[email protected]> writes:

> This patch adds the capability of sending meta along with read/write.
> This meta is represented by a newly introduced 'struct io_uring_meta'
> which specifies information such as meta type/flags/buffer/length and
> apptag.
> Application sets up a SQE128 ring, prepares io_uring_meta within the SQE
> at offset pointed by sqe->cmd.
> The patch processes the user-passed information to prepare uio_meta
> descriptor and passes it down using kiocb->private.
>
> Meta exchange is supported only for direct IO.
> Also vectored read/write operations with meta are not supported
> currently.
>
> Signed-off-by: Anuj Gupta <[email protected]>
> Signed-off-by: Kanchan Joshi <[email protected]>
> ---
>  include/linux/fs.h            |  1 +
>  include/uapi/linux/io_uring.h | 30 +++++++++++++++-
>  io_uring/io_uring.c           |  7 ++++
>  io_uring/rw.c                 | 68 +++++++++++++++++++++++++++++++++--
>  io_uring/rw.h                 |  9 ++++-
>  5 files changed, 110 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index db26b4a70c62..0132565288c2 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -330,6 +330,7 @@ struct readahead_control;
>  #define IOCB_NOIO		(1 << 20)
>  /* can use bio alloc cache */
>  #define IOCB_ALLOC_CACHE	(1 << 21)
> +#define IOCB_HAS_META		(1 << 22)
>  /*
>   * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
>   * iocb completion can be passed back to the owner for execution from a safe
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index 2aaf7ee256ac..9140c66b315b 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -101,12 +101,40 @@ struct io_uring_sqe {
>  		__u64	optval;
>  		/*
>  		 * If the ring is initialized with IORING_SETUP_SQE128, then
> -		 * this field is used for 80 bytes of arbitrary command data
> +		 * this field is starting offset for 80 bytes of data.
> +		 * This data is opaque for uring command op. And for meta io,
> +		 * this contains 'struct io_uring_meta'.
>  		 */
>  		__u8	cmd[0];
>  	};
>  };
>  
> +enum io_uring_sqe_meta_type_bits {
> +	META_TYPE_INTEGRITY_BIT,
> +	/* not a real meta type; just to make sure that we don't overflow */
> +	META_TYPE_LAST_BIT,
> +};
> +
> +/* meta type flags */
> +#define META_TYPE_INTEGRITY	(1U << META_TYPE_INTEGRITY_BIT)
> +
> +struct io_uring_meta {
> +	__u16	meta_type;
> +	__u16	meta_flags;
> +	__u32	meta_len;
> +	__u64	meta_addr;
> +	/* the next 64 bytes goes to SQE128 */
> +	__u16	apptag;
> +	__u8	pad[62];
> +};
> +
> +/*
> + * flags for integrity meta
> + */
> +#define INTEGRITY_CHK_GUARD	(1U << 0)	/* enforce guard check */
> +#define INTEGRITY_CHK_APPTAG	(1U << 1)	/* enforce app tag check */
> +#define INTEGRITY_CHK_REFTAG	(1U << 2)	/* enforce ref tag check */
> +
>  /*
>   * If sqe->file_index is set to this for opcodes that instantiate a new
>   * direct descriptor (like openat/openat2/accept), then io_uring will allocate
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index 7ed1e009aaec..0d26ee1193ca 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3704,6 +3704,13 @@ static int __init io_uring_init(void)
>  	/* top 8bits are for internal use */
>  	BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0);
>  
> +	BUILD_BUG_ON(sizeof(struct io_uring_meta) >
> +		     2 * sizeof(struct io_uring_sqe) -
> +		     offsetof(struct io_uring_sqe, cmd));
> +
> +	BUILD_BUG_ON(META_TYPE_LAST_BIT >
> +		     8 * sizeof_field(struct io_uring_meta, meta_type));
> +
>  	io_uring_optable_init();
>  
>  	/*
> diff --git a/io_uring/rw.c b/io_uring/rw.c
> index c004d21e2f12..e8f5b5af4d2f 100644
> --- a/io_uring/rw.c
> +++ b/io_uring/rw.c
> @@ -23,6 +23,8 @@
>  #include "poll.h"
>  #include "rw.h"
>  
> +#define	INTEGRITY_VALID_FLAGS (INTEGRITY_CHK_GUARD | INTEGRITY_CHK_APPTAG | \
> +			       INTEGRITY_CHK_REFTAG)
>  struct io_rw {
>  	/* NOTE: kiocb has the file as the first member, so don't do it here */
>  	struct kiocb			kiocb;
> @@ -247,6 +249,42 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
>  	return 0;
>  }
>  
> +static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe,
> +			   struct io_rw *rw, int ddir)
> +{
> +	const struct io_uring_meta *md = (struct io_uring_meta *)sqe->cmd;
> +	u16 meta_type = READ_ONCE(md->meta_type);
> +	const struct io_issue_def *def;
> +	struct io_async_rw *io;
> +	int ret;
> +
> +	if (!meta_type)
> +		return 0;
> +	if (!(meta_type & META_TYPE_INTEGRITY))
> +		return -EINVAL;
> +
> +	/* should fit into two bytes */
> +	BUILD_BUG_ON(INTEGRITY_VALID_FLAGS >= (1 << 16));
> +
> +	def = &io_issue_defs[req->opcode];
> +	if (def->vectored)
> +		return -EOPNOTSUPP;
> +
> +	io = req->async_data;
> +	io->meta.flags = READ_ONCE(md->meta_flags);
> +	if (io->meta.flags & ~INTEGRITY_VALID_FLAGS)
> +		return -EINVAL;
> +
> +	io->meta.apptag = READ_ONCE(md->apptag);
> +	ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->meta_addr)),
> +			  READ_ONCE(md->meta_len), &io->meta.iter);
> +	if (unlikely(ret < 0))
> +		return ret;
> +	rw->kiocb.ki_flags |= IOCB_HAS_META;
> +	iov_iter_save_state(&io->meta.iter, &io->iter_meta_state);
> +	return ret;
> +}
> +
>  static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  		      int ddir, bool do_import)
>  {
> @@ -269,11 +307,16 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
>  		rw->kiocb.ki_ioprio = get_current_ioprio();
>  	}
>  	rw->kiocb.dio_complete = NULL;
> +	rw->kiocb.ki_flags = 0;
>  
>  	rw->addr = READ_ONCE(sqe->addr);
>  	rw->len = READ_ONCE(sqe->len);
>  	rw->flags = READ_ONCE(sqe->rw_flags);
> -	return io_prep_rw_setup(req, ddir, do_import);
> +	ret = io_prep_rw_setup(req, ddir, do_import);
> +
> +	if (unlikely(req->ctx->flags & IORING_SETUP_SQE128 && !ret))
> +		ret = io_prep_rw_meta(req, sqe, rw, ddir);
> +	return ret;

Would it be useful to have a flag to differentiate a malformed SQE from
a SQE with io_uring_meta, instead of assuming sqe->cmd has it? We don't
check for addr3 at the moment and differently from uring_cmd, userspace
will be mixing writes commands with and without metadata to different
files, so it would be useful to catch that.

Also, just styling, but can you turn that !ret into a separate if leg?
We are bound to add more code here eventually, and the next patch to
touch it will end up doing it anyway. I mean:

ret = io_prep_rw_setup(req, ddir, do_import);
if (unlikely(ret))
	return ret;
if (unlikely(req->ctx->flags & IORING_SETUP_SQE128))
	ret = io_prep_rw_meta(req, sqe, rw, ddir);
return ret;

-- 
Gabriel Krisman Bertazi

  reply	other threads:[~2024-06-26 17:17 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20240626101415epcas5p3b06a963aa0b0196d6599fb86c90bc38c@epcas5p3.samsung.com>
2024-06-26 10:06 ` [PATCH v2 00/10] Read/Write with meta/integrity Anuj Gupta
     [not found]   ` <CGME20240626101511epcas5p31c95b67da58d408c371c49f8719140fc@epcas5p3.samsung.com>
2024-06-26 10:06     ` [PATCH v2 01/10] block: change rq_integrity_vec to respect the iterator Anuj Gupta
     [not found]   ` <CGME20240626101513epcas5p10b3f8470148abb10ce3edfb90814cd94@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 02/10] block: set bip_vcnt correctly Anuj Gupta
2024-06-28  6:04       ` Christoph Hellwig
2024-06-28 20:35         ` Jens Axboe
     [not found]   ` <CGME20240626101516epcas5p19fb40e8231d1832cab3d031672f0109e@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 03/10] block: copy bip_max_vcnt vecs instead of bip_vcnt during clone Anuj Gupta
2024-06-27  6:14       ` Christoph Hellwig
     [not found]   ` <CGME20240626101518epcas5p17e046bca77b218fc6914ddeb182eb42e@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 04/10] block: Handle meta bounce buffer correctly in case of split Anuj Gupta
2024-06-27  6:16       ` Christoph Hellwig
     [not found]   ` <CGME20240626101519epcas5p163b0735c1604a228196f0e8c14773005@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 05/10] block: introduce BIP_CLONED flag Anuj Gupta
2024-06-27  6:21       ` Christoph Hellwig
2024-06-27 12:09         ` Christoph Hellwig
     [not found]   ` <CGME20240626101521epcas5p42b0c1c0e123996b199e058bae9a69123@epcas5p4.samsung.com>
2024-06-26 10:06     ` [PATCH v2 06/10] block: modify bio_integrity_map_user to accept iov_iter as argument Anuj Gupta
2024-06-27  6:23       ` Christoph Hellwig
     [not found]   ` <CGME20240626101523epcas5p2616cf568575685bd251d28fc1398d4cd@epcas5p2.samsung.com>
2024-06-26 10:06     ` [PATCH v2 07/10] block: define meta io descriptor Anuj Gupta
2024-06-27  6:22       ` Christoph Hellwig
     [not found]   ` <CGME20240626101525epcas5p4dbcef84714e4e9214b951fe2ff649521@epcas5p4.samsung.com>
2024-06-26 10:06     ` [PATCH v2 08/10] io_uring/rw: add support to send meta along with read/write Anuj Gupta
2024-06-26 17:17       ` Gabriel Krisman Bertazi [this message]
2024-07-01 14:09         ` Anuj gupta
     [not found]   ` <CGME20240626101527epcas5p23e10a6701f552d16bd6a999418009ba0@epcas5p2.samsung.com>
2024-06-26 10:06     ` [PATCH v2 09/10] block: add support to pass user meta buffer Anuj Gupta
     [not found]   ` <CGME20240626101529epcas5p49976c46701337830c400cefd8f074b40@epcas5p4.samsung.com>
2024-06-26 10:07     ` [PATCH v2 10/10] nvme: add handling for user integrity buffer Anuj Gupta
2024-06-27  6:29       ` Christoph Hellwig
2024-06-27  6:05   ` [PATCH v2 00/10] Read/Write with meta/integrity Christoph Hellwig
2024-06-27 19:12     ` Kanchan Joshi
2024-06-28 20:36   ` (subset) " Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox