public inbox for [email protected]
 help / color / mirror / Atom feed
From: Anuj Gupta <[email protected]>
To: Pavel Begunkov <[email protected]>
Cc: [email protected], [email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected],
	[email protected], [email protected],
	[email protected],
	Kanchan Joshi <[email protected]>
Subject: Re: [PATCH v10 06/10] io_uring: introduce attributes for read/write and PI support
Date: Wed, 27 Nov 2024 15:16:44 +0530	[thread overview]
Message-ID: <20241127094644.GC22537@green245> (raw)
In-Reply-To: <[email protected]>

[-- Attachment #1: Type: text/plain, Size: 7332 bytes --]

On Tue, Nov 26, 2024 at 03:45:09PM +0000, Pavel Begunkov wrote:
> On 11/26/24 13:54, Anuj Gupta wrote:
> > On Tue, Nov 26, 2024 at 01:01:03PM +0000, Pavel Begunkov wrote:
> > > On 11/25/24 07:06, Anuj Gupta wrote:
> 
> Hmm, I have doubts it's going to work well because the union
> members have different sizes. Adding a new type could grow
> struct io_uring_attr, which is already bad for uapi. And it
> can't be stacked:
> 

How about something like this [1]. I have removed the io_uring_attr
structure, and with the mask scheme the user would pass attributes in
order of their types. Do you still see some cracks?

[1]

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index aac9a4f8fa9a..38f0d6b10eaf 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -98,6 +98,10 @@ struct io_uring_sqe {
 			__u64	addr3;
 			__u64	__pad2[1];
 		};
+		struct {
+			__u64	attr_ptr; /* pointer to attribute information */
+			__u64	attr_type_mask; /* bit mask of attributes */
+		};
 		__u64	optval;
 		/*
 		 * If the ring is initialized with IORING_SETUP_SQE128, then
@@ -107,6 +111,18 @@ struct io_uring_sqe {
 	};
 };
 
+/* sqe->attr_type_mask flags */
+#define IORING_RW_ATTR_FLAG_PI	(1U << 0)
+/* PI attribute information */
+struct io_uring_attr_pi {
+		__u16	flags;
+		__u16	app_tag;
+		__u32	len;
+		__u64	addr;
+		__u64	seed;
+		__u64	rsvd;
+};
+
 /*
  * If sqe->file_index is set to this for opcodes that instantiate a new
  * direct descriptor (like openat/openat2/accept), then io_uring will allocate
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index c3a7d0197636..02291ea679fb 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3889,6 +3889,8 @@ static int __init io_uring_init(void)
 	BUILD_BUG_SQE_ELEM(46, __u16,  __pad3[0]);
 	BUILD_BUG_SQE_ELEM(48, __u64,  addr3);
 	BUILD_BUG_SQE_ELEM_SIZE(48, 0, cmd);
+	BUILD_BUG_SQE_ELEM(48, __u64, attr_ptr);
+	BUILD_BUG_SQE_ELEM(56, __u64, attr_type_mask);
 	BUILD_BUG_SQE_ELEM(56, __u64,  __pad2);
 
 	BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0bcb83e4ce3c..8d2ec89fd76b 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -257,11 +257,53 @@ static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
 	return 0;
 }
 
+static inline void io_meta_save_state(struct io_async_rw *io)
+{
+	io->meta_state.seed = io->meta.seed;
+	iov_iter_save_state(&io->meta.iter, &io->meta_state.iter_meta);
+}
+
+static inline void io_meta_restore(struct io_async_rw *io, struct kiocb *kiocb)
+{
+	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+		io->meta.seed = io->meta_state.seed;
+		iov_iter_restore(&io->meta.iter, &io->meta_state.iter_meta);
+	}
+}
+
+static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
+			 u64 attr_ptr, u64 attr_type_mask)
+{
+	struct io_uring_attr_pi pi_attr;
+	struct io_async_rw *io;
+	int ret;
+
+	if (copy_from_user(&pi_attr, u64_to_user_ptr(attr_ptr),
+	    sizeof(pi_attr)))
+		return -EFAULT;
+
+	if (pi_attr.rsvd)
+		return -EINVAL;
+
+	io = req->async_data;
+	io->meta.flags = pi_attr.flags;
+	io->meta.app_tag = pi_attr.app_tag;
+	io->meta.seed = READ_ONCE(pi_attr.seed);
+	ret = import_ubuf(ddir, u64_to_user_ptr(pi_attr.addr),
+			  pi_attr.len, &io->meta.iter);
+	if (unlikely(ret < 0))
+		return ret;
+	rw->kiocb.ki_flags |= IOCB_HAS_METADATA;
+	io_meta_save_state(io);
+	return ret;
+}
+
 static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		      int ddir, bool do_import)
 {
 	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 	unsigned ioprio;
+	u64 attr_type_mask;
 	int ret;
 
 	rw->kiocb.ki_pos = READ_ONCE(sqe->off);
@@ -279,11 +321,28 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 		rw->kiocb.ki_ioprio = get_current_ioprio();
 	}
 	rw->kiocb.dio_complete = NULL;
+	rw->kiocb.ki_flags = 0;
 
 	rw->addr = READ_ONCE(sqe->addr);
 	rw->len = READ_ONCE(sqe->len);
 	rw->flags = READ_ONCE(sqe->rw_flags);
-	return io_prep_rw_setup(req, ddir, do_import);
+	ret = io_prep_rw_setup(req, ddir, do_import);
+
+	if (unlikely(ret))
+		return ret;
+
+	attr_type_mask = READ_ONCE(sqe->attr_type_mask);
+	if (attr_type_mask) {
+		u64 attr_ptr;
+
+		/* only PI attribute is supported currently */
+		if (attr_type_mask != IORING_RW_ATTR_FLAG_PI)
+			return -EINVAL;
+
+		attr_ptr = READ_ONCE(sqe->attr_ptr);
+		ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
+	}
+	return ret;
 }
 
 int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
@@ -409,7 +468,9 @@ static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
 static void io_resubmit_prep(struct io_kiocb *req)
 {
 	struct io_async_rw *io = req->async_data;
+	struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
+	io_meta_restore(io, &rw->kiocb);
 	iov_iter_restore(&io->iter, &io->iter_state);
 }
 
@@ -744,6 +805,10 @@ static bool io_rw_should_retry(struct io_kiocb *req)
 	if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
 		return false;
 
+	/* never retry for meta io */
+	if (kiocb->ki_flags & IOCB_HAS_METADATA)
+		return false;
+
 	/*
 	 * just use poll if we can, and don't attempt if the fs doesn't
 	 * support callback based unlocks
@@ -794,7 +859,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 	if (!(req->flags & REQ_F_FIXED_FILE))
 		req->flags |= io_file_get_flags(file);
 
-	kiocb->ki_flags = file->f_iocb_flags;
+	kiocb->ki_flags |= file->f_iocb_flags;
 	ret = kiocb_set_rw_flags(kiocb, rw->flags, rw_type);
 	if (unlikely(ret))
 		return ret;
@@ -828,6 +893,18 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
 		kiocb->ki_complete = io_complete_rw;
 	}
 
+	if (kiocb->ki_flags & IOCB_HAS_METADATA) {
+		struct io_async_rw *io = req->async_data;
+
+		/*
+		 * We have a union of meta fields with wpq used for buffered-io
+		 * in io_async_rw, so fail it here.
+		 */
+		if (!(req->file->f_flags & O_DIRECT))
+			return -EOPNOTSUPP;
+		kiocb->private = &io->meta;
+	}
+
 	return 0;
 }
 
@@ -902,6 +979,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
 	 * manually if we need to.
 	 */
 	iov_iter_restore(&io->iter, &io->iter_state);
+	io_meta_restore(io, kiocb);
 
 	do {
 		/*
@@ -1125,6 +1203,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 ret_eagain:
 		iov_iter_restore(&io->iter, &io->iter_state);
+		io_meta_restore(io, kiocb);
 		if (kiocb->ki_flags & IOCB_WRITE)
 			io_req_end_write(req);
 		return -EAGAIN;
diff --git a/io_uring/rw.h b/io_uring/rw.h
index 3f432dc75441..2d7656bd268d 100644
--- a/io_uring/rw.h
+++ b/io_uring/rw.h
@@ -2,6 +2,11 @@
 
 #include <linux/pagemap.h>
 
+struct io_meta_state {
+	u32			seed;
+	struct iov_iter_state	iter_meta;
+};
+
 struct io_async_rw {
 	size_t				bytes_done;
 	struct iov_iter			iter;
@@ -9,7 +14,14 @@ struct io_async_rw {
 	struct iovec			fast_iov;
 	struct iovec			*free_iovec;
 	int				free_iov_nr;
-	struct wait_page_queue		wpq;
+	/* wpq is for buffered io, while meta fields are used with direct io */
+	union {
+		struct wait_page_queue		wpq;
+		struct {
+			struct uio_meta			meta;
+			struct io_meta_state		meta_state;
+		};
+	};
 };
 
 int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe);
-- 
2.25.1

[-- Attachment #2: Type: text/plain, Size: 0 bytes --]



  parent reply	other threads:[~2024-11-27  9:56 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20241125071431epcas5p3a3d9633606d2f0b46de2c144bb7f3711@epcas5p3.samsung.com>
2024-11-25  7:06 ` [PATCH v10 00/10] Read/Write with meta/integrity Anuj Gupta
     [not found]   ` <CGME20241125071449epcas5p1f1d44ee61d1af7c847920680767637e7@epcas5p1.samsung.com>
2024-11-25  7:06     ` [PATCH v10 01/10] block: define set of integrity flags to be inherited by cloned bip Anuj Gupta
     [not found]   ` <CGME20241125071451epcas5p2e50329d88842569e5a2a07b918406d28@epcas5p2.samsung.com>
2024-11-25  7:06     ` [PATCH v10 02/10] block: copy back bounce buffer to user-space correctly in case of split Anuj Gupta
     [not found]   ` <CGME20241125071454epcas5p449a4b9a80f6bfe2ffa1181e3af6c2ac6@epcas5p4.samsung.com>
2024-11-25  7:06     ` [PATCH v10 03/10] block: modify bio_integrity_map_user to accept iov_iter as argument Anuj Gupta
     [not found]   ` <CGME20241125071457epcas5p498c0641542bed9057e23cfff9cfc5ff0@epcas5p4.samsung.com>
2024-11-25  7:06     ` [PATCH v10 04/10] fs, iov_iter: define meta io descriptor Anuj Gupta
     [not found]   ` <CGME20241125071459epcas5p3f603d511a03c790476cce37505e61a0b@epcas5p3.samsung.com>
2024-11-25  7:06     ` [PATCH v10 05/10] fs: introduce IOCB_HAS_METADATA for metadata Anuj Gupta
     [not found]   ` <CGME20241125071502epcas5p46c373574219a958b565f20732797893f@epcas5p4.samsung.com>
2024-11-25  7:06     ` [PATCH v10 06/10] io_uring: introduce attributes for read/write and PI support Anuj Gupta
2024-11-25 14:58       ` Pavel Begunkov
2024-11-26 10:40         ` Anuj Gupta
2024-11-26 12:53           ` Pavel Begunkov
2024-11-26 13:01       ` Pavel Begunkov
2024-11-26 13:04         ` Pavel Begunkov
2024-11-26 13:54         ` Anuj Gupta
2024-11-26 15:45           ` Pavel Begunkov
2024-11-26 16:23             ` Anuj gupta
2024-11-27 10:35               ` Pavel Begunkov
2024-11-27  9:46             ` Anuj Gupta [this message]
2024-11-27 11:24               ` Pavel Begunkov
     [not found]   ` <CGME20241125071505epcas5p34469830c74b82603c57cb4122d0850f7@epcas5p3.samsung.com>
2024-11-25  7:06     ` [PATCH v10 07/10] block: introduce BIP_CHECK_GUARD/REFTAG/APPTAG bip_flags Anuj Gupta
     [not found]   ` <CGME20241125071507epcas5p3b898d0960fb411cd176aea29029d820a@epcas5p3.samsung.com>
2024-11-25  7:06     ` [PATCH v10 08/10] nvme: add support for passing on the application tag Anuj Gupta
     [not found]   ` <CGME20241125071510epcas5p47a424c419577f1e5c09375ce39a880c3@epcas5p4.samsung.com>
2024-11-25  7:06     ` [PATCH v10 09/10] scsi: add support for user-meta interface Anuj Gupta
     [not found]   ` <CGME20241125071513epcas5p28b1c27bc43262eb575d576e32f8e3d7b@epcas5p2.samsung.com>
2024-11-25  7:06     ` [PATCH v10 10/10] block: add support to pass user meta buffer Anuj Gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241127094644.GC22537@green245 \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox