From: Anuj Gupta <[email protected]>
To: [email protected], [email protected], [email protected],
[email protected], [email protected],
[email protected], [email protected], [email protected],
[email protected]
Cc: [email protected], [email protected],
[email protected], [email protected],
[email protected], [email protected],
[email protected], Anuj Gupta <[email protected]>,
Kanchan Joshi <[email protected]>
Subject: [PATCH v11 06/10] io_uring: introduce attributes for read/write and PI support
Date: Thu, 28 Nov 2024 16:52:36 +0530 [thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>
Add the ability to pass additional attributes along with read/write.
Application can prepare attibute specific information and pass its
address using the SQE field:
__u64 attr_ptr;
Along with setting a mask indicating attributes being passed:
__u64 attr_type_mask;
Overall 64 attributes are allowed and currently one attribute
'IORING_RW_ATTR_FLAG_PI' is supported.
With PI attribute, userspace can pass following information:
- flags: integrity check flags IO_INTEGRITY_CHK_{GUARD/APPTAG/REFTAG}
- len: length of PI/metadata buffer
- addr: address of metadata buffer
- seed: seed value for reftag remapping
- app_tag: application defined 16b value
Process this information to prepare uio_meta_descriptor and pass it down
using kiocb->private.
PI attribute is supported only for direct IO.
Signed-off-by: Anuj Gupta <[email protected]>
Signed-off-by: Kanchan Joshi <[email protected]>
---
include/uapi/linux/io_uring.h | 16 +++++++
io_uring/io_uring.c | 2 +
io_uring/rw.c | 83 ++++++++++++++++++++++++++++++++++-
io_uring/rw.h | 14 +++++-
4 files changed, 112 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index aac9a4f8fa9a..38f0d6b10eaf 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -98,6 +98,10 @@ struct io_uring_sqe {
__u64 addr3;
__u64 __pad2[1];
};
+ struct {
+ __u64 attr_ptr; /* pointer to attribute information */
+ __u64 attr_type_mask; /* bit mask of attributes */
+ };
__u64 optval;
/*
* If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +111,18 @@ struct io_uring_sqe {
};
};
+/* sqe->attr_type_mask flags */
+#define IORING_RW_ATTR_FLAG_PI (1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+ __u16 flags;
+ __u16 app_tag;
+ __u32 len;
+ __u64 addr;
+ __u64 seed;
+ __u64 rsvd;
+};
+
/*
* If sqe->file_index is set to this for opcodes that instantiate a new
* direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 06ff41484e29..e4891f1ce52d 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3893,6 +3893,8 @@ static int __init io_uring_init(void)
BUILD_BUG_SQE_ELEM(46, __u16, __pad3[0]);
BUILD_BUG_SQE_ELEM(48, __u64, addr3);
BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
+ BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr);
+ BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask);
BUILD_BUG_SQE_ELEM(56, __u64, __pad2);
BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0bcb83e4ce3c..04e4467ab0ee 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
return 0;
}
+static inline void io_meta_save_state(struct io_async_rw *io)
+{
+ io->meta_state.seed = io->meta.seed;
+ iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
+{
+ if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+ io->meta.seed = io->meta_state.seed;
+ iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
+ }
+}
+
+static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
+ u64 attr_ptr, u64 attr_type_mask)
+{
+ struct io_uring_attr_pi pi_attr;
+ struct io_async_rw *io;
+ int ret;
+
+ if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
+ sizeof(pi_attr)))
+ return -EFAULT;
+
+ if (pi_attr.rsvd)
+ return -EINVAL;
+
+ io = req->async_data;
+ io->meta.flags = pi_attr.flags;
+ io->meta.app_tag = pi_attr.app_tag;
+ io->meta.seed = pi_attr.seed;
+ ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
+ pi_attr.len, &io->meta.iter);
+ if (unlikely(ret < 0))
+ return ret;
+ rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
+ io_meta_save_state(io);
+ return ret;
+}
+
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
int ddir, bool do_import)
{
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
unsigned ioprio;
+ u64 attr_type_mask;
int ret;
rw->kiocb.ki_pos = READ_ONCE(sqe->off);
@@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
rw->kiocb.ki_ioprio = get_current_ioprio();
}
rw->kiocb.dio_complete = NULL;
+ rw->kiocb.ki_flags = 0;
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
rw->flags = READ_ONCE(sqe->rw_flags);
- return io_prep_rw_setup(req, ddir, do_import);
+ ret = io_prep_rw_setup(req, ddir, do_import);
+
+ if (unlikely(ret))
+ return ret;
+
+ attr_type_mask = READ_ONCE(sqe->attr_type_mask);
+ if (attr_type_mask) {
+ u64 attr_ptr;
+
+ /* only PI attribute is supported currently */
+ if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
+ return -EINVAL;
+
+ attr_ptr = READ_ONCE(sqe->attr_ptr);
+ ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
+ }
+ return ret;
}
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
static void io_resubmit_prep(struct io_kiocb *req)
{
struct io_async_rw *io = req->async_data;
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ io_meta_restore(io, &rw->kiocb);
iov_iter_restore(&io->iter, &io->iter_state);
}
@@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req)
if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
return false;
+ /* never retry for meta io */
+ if (kiocb->ki_flags & IOCB_HAS_METADATA)
+ return false;
+
/*
* just use poll if we can, and don't attempt if the fs doesn't
* support callback based unlocks
@@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
if (!(req->flags & REQ_F_FIXED_FILE))
req->flags |= io_file_get_flags(file);
- kiocb->ki_flags = file->f_iocb_flags;
+ kiocb->ki_flags |= file->f_iocb_flags;
ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
if (unlikely(ret))
return ret;
@@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
kiocb->ki_complete = io_complete_rw;
}
+ if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+ struct io_async_rw *io = req->async_data;
+
+ /*
+ * We have a union of meta fields with wpq used for buffered-io
+ * in io_async_rw, so fail it here.
+ */
+ if (!(req->file->f_flags & O_DIRECT))
+ return -EOPNOTSUPP;
+ kiocb->private = &io->meta;
+ }
+
return 0;
}
@@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
* manually if we need to.
*/
iov_iter_restore(&io->iter, &io->iter_state);
+ io_meta_restore(io, kiocb);
do {
/*
@@ -1125,6 +1203,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
} else {
ret_eagain:
iov_iter_restore(&io->iter, &io->iter_state);
+ io_meta_restore(io, kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
io_req_end_write(req);
return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..2d7656bd268d 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -2,6 +2,11 @@
#include <linux/pagemap.h>
+struct io_meta_state {
+ u32 seed;
+ struct iov_iter_state iter_meta;
+};
+
struct io_async_rw {
size_t bytes_done;
struct iov_iter iter;
@@ -9,7 +14,14 @@ struct io_async_rw {
struct iovec fast_iov;
struct iovec *free_iovec;
int free_iov_nr;
- struct wait_page_queue wpq;
+ /* wpq is for buffered io, while meta fields are used with direct io */
+ union {
+ struct wait_page_queue wpq;
+ struct {
+ struct uio_meta meta;
+ struct io_meta_state meta_state;
+ };
+ };
};
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
--
2.25.1
next prev parent reply other threads:[~2024-11-28 11:46 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20241128113036epcas5p397ba228852b72fff671fe695c322a3ef@epcas5p3.samsung.com>
2024-11-28 11:22 ` [PATCH v11 00/10] Read/Write with meta/integrity Anuj Gupta
[not found] ` <CGME20241128113056epcas5p2c9278736c88c646e6f3c7480ffb2211f@epcas5p2.samsung.com>
2024-11-28 11:22 ` [PATCH v11 01/10] block: define set of integrity flags to be inherited by cloned bip Anuj Gupta
[not found] ` <CGME20241128113058epcas5p1f544aa328a27b59f96b48b94dc0bdf94@epcas5p1.samsung.com>
2024-11-28 11:22 ` [PATCH v11 02/10] block: copy back bounce buffer to user-space correctly in case of split Anuj Gupta
[not found] ` <CGME20241128113101epcas5p3fefab67892c16c7bbaba8063c5c4a2c1@epcas5p3.samsung.com>
2024-11-28 11:22 ` [PATCH v11 03/10] block: modify bio_integrity_map_user to accept iov_iter as argument Anuj Gupta
[not found] ` <CGME20241128113104epcas5p4c4bd9f936403295e4cbac7c1f52d9b30@epcas5p4.samsung.com>
2024-11-28 11:22 ` [PATCH v11 04/10] fs, iov_iter: define meta io descriptor Anuj Gupta
[not found] ` <CGME20241128113106epcas5p1b5b06683bfa72225f3c1ab102b9f361c@epcas5p1.samsung.com>
2024-11-28 11:22 ` [PATCH v11 05/10] fs: introduce IOCB_HAS_METADATA for metadata Anuj Gupta
[not found] ` <CGME20241128113109epcas5p46022c85174da65853c85a8848b32f164@epcas5p4.samsung.com>
2024-11-28 11:22 ` Anuj Gupta [this message]
2024-12-03 2:13 ` [PATCH v11 06/10] io_uring: introduce attributes for read/write and PI support Martin K. Petersen
2024-12-03 6:56 ` Anuj Gupta
2024-12-03 12:00 ` Pavel Begunkov
2024-12-04 8:09 ` Anuj Gupta
[not found] ` <CGME20241128113112epcas5p186ef86baaa3054effb7244c54ee2f991@epcas5p1.samsung.com>
2024-11-28 11:22 ` [PATCH v11 07/10] block: introduce BIP_CHECK_GUARD/REFTAG/APPTAG bip_flags Anuj Gupta
[not found] ` <CGME20241128113114epcas5p29c7e2a71a136cb50c636a9fe5d87bb0b@epcas5p2.samsung.com>
2024-11-28 11:22 ` [PATCH v11 08/10] nvme: add support for passing on the application tag Anuj Gupta
[not found] ` <CGME20241128113117epcas5p3b0387c302753c5424ba410f5b38ddeb9@epcas5p3.samsung.com>
2024-11-28 11:22 ` [PATCH v11 09/10] scsi: add support for user-meta interface Anuj Gupta
[not found] ` <CGME20241128113120epcas5p3bd415b5a09b3d5b793cbdda0b4102a62@epcas5p3.samsung.com>
2024-11-28 11:22 ` [PATCH v11 10/10] block: add support to pass user meta buffer Anuj Gupta
2024-11-29 16:04 ` [PATCH v11 00/10] Read/Write with meta/integrity Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
[email protected] \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox