public inbox for io-uring@vger.kernel.org
 help / color / mirror / Atom feed
From: Pavel Begunkov <asml.silence@gmail.com>
To: io-uring@vger.kernel.org, linux-block@vger.kernel.org,
	linux-nvme@lists.infradead.org
Cc: linux-fsdevel@vger.kernel.org, Keith Busch <kbusch@kernel.org>,
	David Wei <dw@davidwei.uk>,
	Vishal Verma <vishal1.verma@intel.com>,
	asml.silence@gmail.com
Subject: [RFC 02/12] iov_iter: introduce iter type for pre-registered dma
Date: Fri, 27 Jun 2025 16:10:29 +0100	[thread overview]
Message-ID: <66ec20266c87e323f365fcc82b60f00aef6e2334.1751035820.git.asml.silence@gmail.com> (raw)
In-Reply-To: <cover.1751035820.git.asml.silence@gmail.com>

Introduce a new iterator type representing vectors with pre-registered
DMA addresses. It carries an array of struct dmavec, which is just a
{dma addr, dma len} pair. It'll be used to pass dmabuf buffers from
io_uring and other interfaces operating with iterators.

The vector is mapped for the device returned by the ->get_dma_device()
callback of the file, and the caller should only pass the iterator to
that file's methods. That should also prevent ITER_DMAVEC iterators
reaching unaware files.

Note, the drivers are responsible for cpu-device memory synchronisation
and should use dma_sync_single_for_{device,cpu} when appropriate.

Suggested-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/linux/uio.h | 14 +++++++++
 lib/iov_iter.c      | 70 ++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2e86c653186c..d68148508ef7 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -29,11 +29,17 @@ enum iter_type {
 	ITER_FOLIOQ,
 	ITER_XARRAY,
 	ITER_DISCARD,
+	ITER_DMAVEC,
 };
 
 #define ITER_SOURCE	1	// == WRITE
 #define ITER_DEST	0	// == READ
 
+struct dmavec {
+	dma_addr_t		addr;
+	int			len;
+};
+
 struct iov_iter_state {
 	size_t iov_offset;
 	size_t count;
@@ -71,6 +77,7 @@ struct iov_iter {
 				const struct folio_queue *folioq;
 				struct xarray *xarray;
 				void __user *ubuf;
+				const struct dmavec *dmavec;
 			};
 			size_t count;
 		};
@@ -155,6 +162,11 @@ static inline bool iov_iter_is_xarray(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_XARRAY;
 }
 
+static inline bool iov_iter_is_dma(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_DMAVEC;
+}
+
 static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 {
 	return i->data_source ? WRITE : READ;
@@ -302,6 +314,8 @@ void iov_iter_folio_queue(struct iov_iter *i, unsigned int direction,
 			  unsigned int first_slot, unsigned int offset, size_t count);
 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
 		     loff_t start, size_t count);
+void iov_iter_dma(struct iov_iter *i, unsigned int direction,
+		  struct dmavec *dmavec, unsigned nr_segs, size_t count);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index f9193f952f49..b7740f9aa279 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -559,6 +559,26 @@ static void iov_iter_folioq_advance(struct iov_iter *i, size_t size)
 	i->folioq = folioq;
 }
 
+static void iov_iter_dma_advance(struct iov_iter *i, size_t size)
+{
+	const struct dmavec *dmav, *end;
+
+	if (!i->count)
+		return;
+	i->count -= size;
+
+	size += i->iov_offset;
+
+	for (dmav = i->dmavec, end = dmav + i->nr_segs; dmav < end; dmav++) {
+		if (likely(size < dmav->len))
+			break;
+		size -= dmav->len;
+	}
+	i->iov_offset = size;
+	i->nr_segs -= dmav - i->dmavec;
+	i->dmavec = dmav;
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
@@ -575,6 +595,8 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		iov_iter_folioq_advance(i, size);
 	} else if (iov_iter_is_discard(i)) {
 		i->count -= size;
+	} else if (iov_iter_is_dma(i)) {
+		iov_iter_dma_advance(i, size);
 	}
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -763,6 +785,20 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_xarray);
 
+void iov_iter_dma(struct iov_iter *i, unsigned int direction,
+		  struct dmavec *dmavec, unsigned nr_segs, size_t count)
+{
+	WARN_ON(direction & ~(READ | WRITE));
+	*i = (struct iov_iter){
+		.iter_type = ITER_DMAVEC,
+		.data_source = direction,
+		.dmavec = dmavec,
+		.nr_segs = nr_segs,
+		.iov_offset = 0,
+		.count = count
+	};
+}
+
 /**
  * iov_iter_discard - Initialise an I/O iterator that discards data
  * @i: The iterator to initialise.
@@ -834,6 +870,32 @@ static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask,
 	return true;
 }
 
+static bool iov_iter_aligned_dma(const struct iov_iter *i, unsigned addr_mask,
+				  unsigned len_mask)
+{
+	const struct dmavec *dmav = i->dmavec;
+	unsigned skip = i->iov_offset;
+	size_t size = i->count;
+
+	do {
+		size_t len = dmav->len - skip;
+
+		if (len > size)
+			len = size;
+		if (len & len_mask)
+			return false;
+		if ((unsigned long)(dmav->addr + skip) & addr_mask)
+			return false;
+
+		dmav++;
+		size -= len;
+		skip = 0;
+	} while (size);
+
+	return true;
+}
+
+
 /**
  * iov_iter_is_aligned() - Check if the addresses and lengths of each segments
  * 	are aligned to the parameters.
@@ -875,6 +937,9 @@ bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask,
 			return false;
 	}
 
+	if (iov_iter_is_dma(i))
+		return iov_iter_aligned_dma(i, addr_mask, len_mask);
+
 	return true;
 }
 EXPORT_SYMBOL_GPL(iov_iter_is_aligned);
@@ -1552,7 +1617,8 @@ EXPORT_SYMBOL_GPL(import_ubuf);
 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 {
 	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) &&
-			 !iter_is_ubuf(i)) && !iov_iter_is_kvec(i))
+			 !iter_is_ubuf(i)) && !iov_iter_is_kvec(i) &&
+			 !iov_iter_is_dma(i))
 		return;
 	i->iov_offset = state->iov_offset;
 	i->count = state->count;
@@ -1570,6 +1636,8 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 	BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
 	if (iov_iter_is_bvec(i))
 		i->bvec -= state->nr_segs - i->nr_segs;
+	else if (iov_iter_is_dma(i))
+		i->dmavec -= state->nr_segs - i->nr_segs;
 	else
 		i->__iov -= state->nr_segs - i->nr_segs;
 	i->nr_segs = state->nr_segs;
-- 
2.49.0


  parent reply	other threads:[~2025-06-27 15:09 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-27 15:10 [RFC 00/12] io_uring dmabuf read/write support Pavel Begunkov
2025-06-27 15:10 ` [RFC 01/12] file: add callback returning dev for dma operations Pavel Begunkov
2025-06-27 15:10 ` Pavel Begunkov [this message]
2025-06-27 15:10 ` [RFC 03/12] block: move around bio flagging helpers Pavel Begunkov
2025-06-27 15:10 ` [RFC 04/12] block: introduce dmavec bio type Pavel Begunkov
2025-06-27 15:10 ` [RFC 05/12] block: implement ->get_dma_device callback Pavel Begunkov
2025-06-27 15:10 ` [RFC 06/12] nvme-pci: add support for user passed dma vectors Pavel Begunkov
2025-06-27 15:10 ` [RFC 07/12] io_uring/rsrc: extended reg buffer registration Pavel Begunkov
2025-06-27 15:10 ` [RFC 08/12] io_uring: add basic dmabuf helpers Pavel Begunkov
2025-06-27 15:10 ` [RFC 09/12] io_uring/rsrc: add imu flags Pavel Begunkov
2025-06-27 15:10 ` [RFC 10/12] io_uring/rsrc: add dmabuf-backed buffer registeration Pavel Begunkov
2025-06-27 15:10 ` [RFC 11/12] io_uring/rsrc: implement dmabuf regbuf import Pavel Begunkov
2025-06-27 15:10 ` [RFC 12/12] io_uring/rw: enable dma registered buffers Pavel Begunkov
2025-07-03 14:23 ` [RFC 00/12] io_uring dmabuf read/write support Christoph Hellwig
2025-07-03 14:37   ` Christian König
2025-07-07 11:15   ` Pavel Begunkov
2025-07-07 14:48     ` Christoph Hellwig
2025-07-07 15:41       ` Pavel Begunkov
2025-07-08  9:45         ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=66ec20266c87e323f365fcc82b60f00aef6e2334.1751035820.git.asml.silence@gmail.com \
    --to=asml.silence@gmail.com \
    --cc=dw@davidwei.uk \
    --cc=io-uring@vger.kernel.org \
    --cc=kbusch@kernel.org \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=vishal1.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox