public inbox for [email protected]
 help / color / mirror / Atom feed
From: Anuj Gupta <[email protected]>
To: [email protected], [email protected], [email protected],
	[email protected], [email protected], [email protected]
Cc: [email protected], [email protected],
	[email protected], Anuj Gupta <[email protected]>,
	Kanchan Joshi <[email protected]>
Subject: [PATCH v2 08/10] io_uring/rw: add support to send meta along with read/write
Date: Wed, 26 Jun 2024 15:36:58 +0530	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

This patch adds the capability of sending meta along with read/write.
This meta is represented by a newly introduced 'struct io_uring_meta'
which specifies information such as meta type/flags/buffer/length and
apptag.
Application sets up a SQE128 ring, prepares io_uring_meta within the SQE
at offset pointed by sqe->cmd.
The patch processes the user-passed information to prepare uio_meta
descriptor and passes it down using kiocb->private.

Meta exchange is supported only for direct IO.
Also vectored read/write operations with meta are not supported
currently.

Signed-off-by: Anuj Gupta <[email protected]>
Signed-off-by: Kanchan Joshi <[email protected]>
---
 include/linux/fs.h            |  1 +
 include/uapi/linux/io_uring.h | 30 +++++++++++++++-
 io_uring/io_uring.c           |  7 ++++
 io_uring/rw.c                 | 68 +++++++++++++++++++++++++++++++++--
 io_uring/rw.h                 |  9 ++++-
 5 files changed, 110 insertions(+), 5 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index db26b4a70c62..0132565288c2 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -330,6 +330,7 @@ struct readahead_control;
 #define IOCB_NOIO		(1 << 20)
 /* can use bio alloc cache */
 #define IOCB_ALLOC_CACHE	(1 << 21)
+#define IOCB_HAS_META		(1 << 22)
 /*
  * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
  * iocb completion can be passed back to the owner for execution from a safe
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2aaf7ee256ac..9140c66b315b 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -101,12 +101,40 @@ struct io_uring_sqe {
 		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
-		 * this field is used for 80 bytes of arbitrary command data
+		 * this field is starting offset for 80 bytes of data.
+		 * This data is opaque for uring command op. And for meta io,
+		 * this contains 'struct io_uring_meta'.
 		 */
 		__u8	cmd[0];
 	};
 };
 
+enum io_uring_sqe_meta_type_bits {
+	META_TYPE_INTEGRITY_BIT,
+	/* not a real meta type; just to make sure that we don't overflow */
+	META_TYPE_LAST_BIT,
+};
+
+/* meta type flags */
+#define META_TYPE_INTEGRITY	(1U << META_TYPE_INTEGRITY_BIT)
+
+struct io_uring_meta {
+	__u16	meta_type;
+	__u16	meta_flags;
+	__u32	meta_len;
+	__u64	meta_addr;
+	/* the next 64 bytes goes to SQE128 */
+	__u16	apptag;
+	__u8	pad[62];
+};
+
+/*
+ * flags for integrity meta
+ */
+#define INTEGRITY_CHK_GUARD	(1U << 0)	/* enforce guard check */
+#define INTEGRITY_CHK_APPTAG	(1U << 1)	/* enforce app tag check */
+#define INTEGRITY_CHK_REFTAG	(1U << 2)	/* enforce ref tag check */
+
 /*
  * If sqe->file_index is set to this for opcodes that instantiate a new
  * direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7ed1e009aaec..0d26ee1193ca 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3704,6 +3704,13 @@ static int __init io_uring_init(void)
 	/* top 8bits are for internal use */
 	BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0);
 
+	BUILD_BUG_ON(sizeof(struct io_uring_meta) >
+		     2 * sizeof(struct io_uring_sqe) -
+		     offsetof(struct io_uring_sqe, cmd));
+
+	BUILD_BUG_ON(META_TYPE_LAST_BIT >
+		     8 * sizeof_field(struct io_uring_meta, meta_type));
+
 	io_uring_optable_init();
 
 	/*
diff --git a/io_uring/rw.c b/io_uring/rw.c
index c004d21e2f12..e8f5b5af4d2f 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -23,6 +23,8 @@
 #include "poll.h"
 #include "rw.h"
 
+#define	INTEGRITY_VALID_FLAGS (INTEGRITY_CHK_GUARD | INTEGRITY_CHK_APPTAG | \
+			       INTEGRITY_CHK_REFTAG)
 struct io_rw {
 	/* NOTE: kiocb has the file as the first member, so don't do it here */
 	struct kiocb			kiocb;
@@ -247,6 +249,42 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
 	return 0;
 }
 
+static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+			   struct io_rw *rw, int ddir)
+{
+	const struct io_uring_meta *md = (struct io_uring_meta *)sqe->cmd;
+	u16 meta_type = READ_ONCE(md->meta_type);
+	const struct io_issue_def *def;
+	struct io_async_rw *io;
+	int ret;
+
+	if (!meta_type)
+		return 0;
+	if (!(meta_type & META_TYPE_INTEGRITY))
+		return -EINVAL;
+
+	/* should fit into two bytes */
+	BUILD_BUG_ON(INTEGRITY_VALID_FLAGS >= (1 << 16));
+
+	def = &io_issue_defs[req->opcode];
+	if (def->vectored)
+		return -EOPNOTSUPP;
+
+	io = req->async_data;
+	io->meta.flags = READ_ONCE(md->meta_flags);
+	if (io->meta.flags & ~INTEGRITY_VALID_FLAGS)
+		return -EINVAL;
+
+	io->meta.apptag = READ_ONCE(md->apptag);
+	ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->meta_addr)),
+			  READ_ONCE(md->meta_len), &io->meta.iter);
+	if (unlikely(ret < 0))
+		return ret;
+	rw->kiocb.ki_flags |= IOCB_HAS_META;
+	iov_iter_save_state(&io->meta.iter, &io->iter_meta_state);
+	return ret;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
@@ -269,11 +307,16 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
 	rw->kiocb.dio_complete = NULL;
+	rw->kiocb.ki_flags = 0;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
-	return io_prep_rw_setup(req, ddir, do_import);
+	ret = io_prep_rw_setup(req, ddir, do_import);
+
+	if (unlikely(req->ctx->flags & IORING_SETUP_SQE128 && !ret))
+		ret = io_prep_rw_meta(req, sqe, rw, ddir);
+	return ret;
 }
 
 int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -400,7 +443,10 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 static void io_resubmit_prep(struct io_kiocb *req)
 {
 	struct io_async_rw *io = req->async_data;
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
+	if (unlikely(rw->kiocb.ki_flags & IOCB_HAS_META))
+		iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
 	iov_iter_restore(&io->iter, &io->iter_state);
 }
 
@@ -768,8 +814,12 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
 
 static bool need_complete_io(struct io_kiocb *req)
 {
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+	/* Exclude meta IO as we don't support partial completion for that */
 	return req->flags & REQ_F_ISREG ||
-		S_ISBLK(file_inode(req->file)->i_mode);
+		S_ISBLK(file_inode(req->file)->i_mode) ||
+		!(rw->kiocb.ki_flags & IOCB_HAS_META);
 }
 
 static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
@@ -786,7 +836,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (!(req->flags & REQ_F_FIXED_FILE))
 		req->flags |= io_file_get_flags(file);
 
-	kiocb->ki_flags = file->f_iocb_flags;
+	kiocb->ki_flags |= file->f_iocb_flags;
 	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
 	if (unlikely(ret))
 		return ret;
@@ -815,6 +865,14 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (unlikely(kiocb->ki_flags & IOCB_HAS_META)) {
+		struct io_async_rw *io = req->async_data;
+
+		if (!(req->file->f_flags & O_DIRECT))
+			return -EOPNOTSUPP;
+		kiocb->private = &io->meta;
+	}
+
 	return 0;
 }
 
@@ -881,6 +939,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	 * manually if we need to.
 	 */
 	iov_iter_restore(&io->iter, &io->iter_state);
+	if (unlikely(kiocb->ki_flags & IOCB_HAS_META))
+		iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
 
 	do {
 		/*
@@ -1091,6 +1151,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 ret_eagain:
 		iov_iter_restore(&io->iter, &io->iter_state);
+		if (unlikely(kiocb->ki_flags & IOCB_HAS_META))
+			iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
 		if (kiocb->ki_flags & IOCB_WRITE)
 			io_req_end_write(req);
 		return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..49944b539c51 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -9,7 +9,14 @@ struct io_async_rw {
 	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
 	int				free_iov_nr;
-	struct wait_page_queue		wpq;
+	/* wpq is for buffered io, while meta fields are used with direct io*/
+	union {
+		struct wait_page_queue		wpq;
+		struct {
+			struct uio_meta			meta;
+			struct iov_iter_state		iter_meta_state;
+		};
+	};
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-- 
2.25.1


  parent reply	other threads:[~2024-06-26 11:43 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20240626101415epcas5p3b06a963aa0b0196d6599fb86c90bc38c@epcas5p3.samsung.com>
2024-06-26 10:06 ` [PATCH v2 00/10] Read/Write with meta/integrity Anuj Gupta
     [not found]   ` <CGME20240626101511epcas5p31c95b67da58d408c371c49f8719140fc@epcas5p3.samsung.com>
2024-06-26 10:06     ` [PATCH v2 01/10] block: change rq_integrity_vec to respect the iterator Anuj Gupta
     [not found]   ` <CGME20240626101513epcas5p10b3f8470148abb10ce3edfb90814cd94@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 02/10] block: set bip_vcnt correctly Anuj Gupta
2024-06-28  6:04       ` Christoph Hellwig
2024-06-28 20:35         ` Jens Axboe
     [not found]   ` <CGME20240626101516epcas5p19fb40e8231d1832cab3d031672f0109e@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 03/10] block: copy bip_max_vcnt vecs instead of bip_vcnt during clone Anuj Gupta
2024-06-27  6:14       ` Christoph Hellwig
     [not found]   ` <CGME20240626101518epcas5p17e046bca77b218fc6914ddeb182eb42e@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 04/10] block: Handle meta bounce buffer correctly in case of split Anuj Gupta
2024-06-27  6:16       ` Christoph Hellwig
     [not found]   ` <CGME20240626101519epcas5p163b0735c1604a228196f0e8c14773005@epcas5p1.samsung.com>
2024-06-26 10:06     ` [PATCH v2 05/10] block: introduce BIP_CLONED flag Anuj Gupta
2024-06-27  6:21       ` Christoph Hellwig
2024-06-27 12:09         ` Christoph Hellwig
     [not found]   ` <CGME20240626101521epcas5p42b0c1c0e123996b199e058bae9a69123@epcas5p4.samsung.com>
2024-06-26 10:06     ` [PATCH v2 06/10] block: modify bio_integrity_map_user to accept iov_iter as argument Anuj Gupta
2024-06-27  6:23       ` Christoph Hellwig
     [not found]   ` <CGME20240626101523epcas5p2616cf568575685bd251d28fc1398d4cd@epcas5p2.samsung.com>
2024-06-26 10:06     ` [PATCH v2 07/10] block: define meta io descriptor Anuj Gupta
2024-06-27  6:22       ` Christoph Hellwig
     [not found]   ` <CGME20240626101525epcas5p4dbcef84714e4e9214b951fe2ff649521@epcas5p4.samsung.com>
2024-06-26 10:06     ` Anuj Gupta [this message]
2024-06-26 17:17       ` [PATCH v2 08/10] io_uring/rw: add support to send meta along with read/write Gabriel Krisman Bertazi
2024-07-01 14:09         ` Anuj gupta
     [not found]   ` <CGME20240626101527epcas5p23e10a6701f552d16bd6a999418009ba0@epcas5p2.samsung.com>
2024-06-26 10:06     ` [PATCH v2 09/10] block: add support to pass user meta buffer Anuj Gupta
     [not found]   ` <CGME20240626101529epcas5p49976c46701337830c400cefd8f074b40@epcas5p4.samsung.com>
2024-06-26 10:07     ` [PATCH v2 10/10] nvme: add handling for user integrity buffer Anuj Gupta
2024-06-27  6:29       ` Christoph Hellwig
2024-06-27  6:05   ` [PATCH v2 00/10] Read/Write with meta/integrity Christoph Hellwig
2024-06-27 19:12     ` Kanchan Joshi
2024-06-28 20:36   ` (subset) " Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox