From: Anuj Gupta <[email protected]>
To: [email protected], [email protected], [email protected],
[email protected], [email protected],
[email protected]
Cc: [email protected], [email protected],
[email protected], [email protected],
[email protected], Anuj Gupta <[email protected]>,
Kanchan Joshi <[email protected]>
Subject: [PATCH v3 06/10] io_uring/rw: add support to send meta along with read/write
Date: Fri, 23 Aug 2024 16:08:06 +0530 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
This patch adds the capability of sending meta along with read/write.
This meta is represented by a newly introduced 'struct io_uring_meta'
which specifies information such as meta type/flags/buffer/length and
apptag.
Application sets up a SQE128 ring, prepares io_uring_meta within the
second SQE.
The patch processes the user-passed information to prepare uio_meta
descriptor and passes it down using kiocb->private.
Meta exchange is supported only for direct IO.
Also vectored read/write operations with meta are not supported
currently.
Signed-off-by: Anuj Gupta <[email protected]>
Signed-off-by: Kanchan Joshi <[email protected]>
---
include/linux/fs.h | 1 +
include/uapi/linux/io_uring.h | 32 ++++++++++++++++
io_uring/io_uring.c | 6 +++
io_uring/rw.c | 70 +++++++++++++++++++++++++++++++++--
io_uring/rw.h | 10 ++++-
5 files changed, 115 insertions(+), 4 deletions(-)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fb0426f349fc..aec78bf3040c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -330,6 +330,7 @@ struct readahead_control;
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+#define IOCB_HAS_META (1 << 22)
/*
* IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
* iocb completion can be passed back to the owner for execution from a safe
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 042eab793e26..09e6cc022669 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -105,8 +105,40 @@ struct io_uring_sqe {
*/
__u8 cmd[0];
};
+ /*
+ * If the ring is initialized with IORING_SETUP_SQE128, then
+ * this field is starting offset for 64 bytes of data. For meta io
+ * this contains 'struct io_uring_meta'
+ */
+ __u8 big_sqe_cmd[0];
};
+enum io_uring_sqe_meta_type_bits {
+ META_TYPE_INTEGRITY_BIT,
+ /* not a real meta type; just to make sure that we don't overflow */
+ META_TYPE_LAST_BIT,
+};
+
+/* meta type flags */
+#define META_TYPE_INTEGRITY (1U << META_TYPE_INTEGRITY_BIT)
+
+/* this goes to SQE128 */
+struct io_uring_meta {
+ __u16 meta_type;
+ __u16 meta_flags;
+ __u32 meta_len;
+ __u64 meta_addr;
+ __u16 app_tag;
+ __u8 pad[46];
+};
+
+/*
+ * flags for integrity meta
+ */
+#define INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define INTEGRITY_CHK_APPTAG (1U << 1) /* enforce app tag check */
+#define INTEGRITY_CHK_REFTAG (1U << 2) /* enforce ref tag check */
+
/*
* If sqe->file_index is set to this for opcodes that instantiate a new
* direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index a53f2f25a80b..743201d37611 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3814,6 +3814,12 @@ static int __init io_uring_init(void)
/* top 8bits are for internal use */
BUILD_BUG_ON((IORING_URING_CMD_MASK & 0xff000000) != 0);
+ BUILD_BUG_ON(sizeof(struct io_uring_meta) >
+ sizeof(struct io_uring_sqe));
+
+ BUILD_BUG_ON(META_TYPE_LAST_BIT >
+ 8 * sizeof_field(struct io_uring_meta, meta_type));
+
io_uring_optable_init();
/*
diff --git a/io_uring/rw.c b/io_uring/rw.c
index c004d21e2f12..fadc17813f76 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -23,6 +23,8 @@
#include "poll.h"
#include "rw.h"
+#define INTEGRITY_VALID_FLAGS (INTEGRITY_CHK_GUARD | INTEGRITY_CHK_APPTAG | \
+ INTEGRITY_CHK_REFTAG)
struct io_rw {
/* NOTE: kiocb has the file as the first member, so don't do it here */
struct kiocb kiocb;
@@ -247,6 +249,42 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
return 0;
}
+static int io_prep_rw_meta(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_rw *rw, int ddir)
+{
+ const struct io_uring_meta *md = (struct io_uring_meta *)sqe->big_sqe_cmd;
+ u16 meta_type = READ_ONCE(md->meta_type);
+ const struct io_issue_def *def;
+ struct io_async_rw *io;
+ int ret;
+
+ if (!meta_type)
+ return 0;
+ if (!(meta_type & META_TYPE_INTEGRITY))
+ return -EINVAL;
+
+ /* should fit into two bytes */
+ BUILD_BUG_ON(INTEGRITY_VALID_FLAGS >= (1 << 16));
+
+ def = &io_issue_defs[req->opcode];
+ if (def->vectored)
+ return -EOPNOTSUPP;
+
+ io = req->async_data;
+ io->meta.flags = READ_ONCE(md->meta_flags);
+ if (io->meta.flags && (io->meta.flags & ~INTEGRITY_VALID_FLAGS))
+ return -EINVAL;
+
+ io->meta.app_tag = READ_ONCE(md->app_tag);
+ ret = import_ubuf(ddir, u64_to_user_ptr(READ_ONCE(md->meta_addr)),
+ READ_ONCE(md->meta_len), &io->meta.iter);
+ if (unlikely(ret < 0))
+ return ret;
+ rw->kiocb.ki_flags |= IOCB_HAS_META;
+ iov_iter_save_state(&io->meta.iter, &io->iter_meta_state);
+ return ret;
+}
+
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
int ddir, bool do_import)
{
@@ -269,11 +307,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
rw->kiocb.ki_ioprio = get_current_ioprio();
}
rw->kiocb.dio_complete = NULL;
+ rw->kiocb.ki_flags = 0;
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
rw->flags = READ_ONCE(sqe->rw_flags);
- return io_prep_rw_setup(req, ddir, do_import);
+ ret = io_prep_rw_setup(req, ddir, do_import);
+
+ if (unlikely(ret))
+ return ret;
+ if (unlikely(req->ctx->flags & IORING_SETUP_SQE128))
+ ret = io_prep_rw_meta(req, sqe, rw, ddir);
+ return ret;
}
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -400,7 +445,10 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
static void io_resubmit_prep(struct io_kiocb *req)
{
struct io_async_rw *io = req->async_data;
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ if (unlikely(rw->kiocb.ki_flags & IOCB_HAS_META))
+ iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
iov_iter_restore(&io->iter, &io->iter_state);
}
@@ -768,8 +816,12 @@ static inline int io_iter_do_read(struct io_rw *rw, struct iov_iter *iter)
static bool need_complete_io(struct io_kiocb *req)
{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+
+ /* Exclude meta IO as we don't support partial completion for that */
return req->flags & REQ_F_ISREG ||
- S_ISBLK(file_inode(req->file)->i_mode);
+ S_ISBLK(file_inode(req->file)->i_mode) ||
+ !(rw->kiocb.ki_flags & IOCB_HAS_META);
}
static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
@@ -786,7 +838,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
if (!(req->flags & REQ_F_FIXED_FILE))
req->flags |= io_file_get_flags(file);
- kiocb->ki_flags = file->f_iocb_flags;
+ kiocb->ki_flags |= file->f_iocb_flags;
ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
if (unlikely(ret))
return ret;
@@ -815,6 +867,14 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
kiocb->ki_complete = io_complete_rw;
}
+ if (unlikely(kiocb->ki_flags & IOCB_HAS_META)) {
+ struct io_async_rw *io = req->async_data;
+
+ if (!(req->file->f_flags & O_DIRECT))
+ return -EOPNOTSUPP;
+ kiocb->private = &io->meta;
+ }
+
return 0;
}
@@ -881,6 +941,8 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
* manually if we need to.
*/
iov_iter_restore(&io->iter, &io->iter_state);
+ if (unlikely(kiocb->ki_flags & IOCB_HAS_META))
+ iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
do {
/*
@@ -1091,6 +1153,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
} else {
ret_eagain:
iov_iter_restore(&io->iter, &io->iter_state);
+ if (unlikely(kiocb->ki_flags & IOCB_HAS_META))
+ iov_iter_restore(&io->meta.iter, &io->iter_meta_state);
if (kiocb->ki_flags & IOCB_WRITE)
io_req_end_write(req);
return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..ce7a865fac95 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/pagemap.h>
+#include <linux/bio-integrity.h>
struct io_async_rw {
size_t bytes_done;
@@ -9,7 +10,14 @@ struct io_async_rw {
struct iovec fast_iov;
struct iovec *free_iovec;
int free_iov_nr;
- struct wait_page_queue wpq;
+ /* wpq is for buffered io, while meta fields are used with direct io*/
+ union {
+ struct wait_page_queue wpq;
+ struct {
+ struct uio_meta meta;
+ struct iov_iter_state iter_meta_state;
+ };
+ };
};
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
--
2.25.1
next prev parent reply other threads:[~2024-08-23 10:48 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20240823104552epcas5p226dbbbd448cd0ee0955ffdd3ad1b112d@epcas5p2.samsung.com>
2024-08-23 10:38 ` [PATCH v3 00/10] Read/Write with meta/integrity Anuj Gupta
[not found] ` <CGME20240823104616epcas5p4bd315bd116ea7e32b1abf7e174af64a1@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 01/10] block: define set of integrity flags to be inherited by cloned bip Anuj Gupta
2024-08-24 8:24 ` Christoph Hellwig
2024-08-29 3:05 ` Martin K. Petersen
[not found] ` <CGME20240823104618epcas5p4b9983678886dceed75edd9cbec9341b2@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 02/10] block: introduce a helper to determine metadata bytes from data iter Anuj Gupta
2024-08-24 8:24 ` Christoph Hellwig
2024-08-29 3:06 ` Martin K. Petersen
[not found] ` <CGME20240823104620epcas5p2118c152963d6cadfbc9968790ac0e536@epcas5p2.samsung.com>
2024-08-23 10:38 ` [PATCH v3 03/10] block: handle split correctly for user meta bounce buffer Anuj Gupta
2024-08-24 8:31 ` Christoph Hellwig
2024-08-28 11:18 ` Anuj Gupta
2024-08-29 4:04 ` Christoph Hellwig
[not found] ` <CGME20240823104622epcas5p2e3b29f793eff9857c5712b3d6d327ed5@epcas5p2.samsung.com>
2024-08-23 10:38 ` [PATCH v3 04/10] block: modify bio_integrity_map_user to accept iov_iter as argument Anuj Gupta
[not found] ` <CGME20240823104624epcas5p40c1b0f3516100f69cbd31d45867cd289@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 05/10] block: define meta io descriptor Anuj Gupta
2024-08-24 8:31 ` Christoph Hellwig
2024-08-29 3:05 ` Martin K. Petersen
[not found] ` <CGME20240823104627epcas5p2abcd2283f6fb3301e1a8e828e3c270ae@epcas5p2.samsung.com>
2024-08-23 10:38 ` Anuj Gupta [this message]
2024-08-24 8:33 ` [PATCH v3 06/10] io_uring/rw: add support to send meta along with read/write Christoph Hellwig
[not found] ` <CGME20240823104629epcas5p3fea0cb7e66b0446ddacf7648c08c3ba8@epcas5p3.samsung.com>
2024-08-23 10:38 ` [PATCH v3 07/10] block: introduce BIP_CHECK_GUARD/REFTAG/APPTAG bip_flags Anuj Gupta
2024-08-24 8:35 ` Christoph Hellwig
2024-08-28 13:42 ` Kanchan Joshi
2024-08-29 3:16 ` Martin K. Petersen
2024-08-29 4:06 ` Christoph Hellwig
2024-08-29 13:29 ` Anuj gupta
2024-09-12 12:40 ` Anuj Gupta
2024-09-13 2:06 ` Martin K. Petersen
2024-08-29 4:06 ` Christoph Hellwig
[not found] ` <CGME20240823104631epcas5p4f83b92081107fbefca78008ee319ff7e@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 07/10] block,nvme: " Anuj Gupta
[not found] ` <CGME20240823104634epcas5p4ef1af26cc7146b4e8b7a4a1844ffe476@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 08/10] block: add support to pass user meta buffer Anuj Gupta
2024-08-24 8:44 ` Christoph Hellwig
[not found] ` <CGME20240823104636epcas5p4825a6d2dd9e45cfbcc97895264662d30@epcas5p4.samsung.com>
2024-08-23 10:38 ` [PATCH v3 09/10] nvme: add handling for app_tag Anuj Gupta
2024-08-24 8:49 ` Christoph Hellwig
2024-08-29 3:00 ` Martin K. Petersen
2024-08-29 10:18 ` Kanchan Joshi
2024-09-13 2:05 ` Martin K. Petersen
[not found] ` <CGME20240823104639epcas5p11dbab393122841419368a86b4bd5c04b@epcas5p1.samsung.com>
2024-08-23 10:38 ` [PATCH v3 10/10] scsi: add support for user-meta interface Anuj Gupta
2024-08-24 8:52 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox