public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jonathan Lemon <[email protected]>
To: <[email protected]>
Cc: <[email protected]>
Subject: [RFC PATCH v3 08/15] io_uring: Add zctap buffer get/put functions and refcounting.
Date: Wed, 2 Nov 2022 16:32:37 -0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

Flesh out the driver API functions introduced earlier.

The driver gets a buffer, and is responsible for setting its own
bias count.

The bias is decremented as skb fragments go up the stack, and
the driver releases the references when finished with the buffer.

When ownership of the fragment is transferred to the user, a
user refcount is incremented, and correspondingly decremented
when returned.  When all refcounts are released, the buffer is safe
to reuse.  The user/kernel split is needed to differentiate between
"safe to reuse the buffer" and "still in use by the kernel".

The locking here can likely be improved.

Signed-off-by: Jonathan Lemon <[email protected]>
---
 io_uring/kbuf.c  |  13 +++++
 io_uring/kbuf.h  |   2 +
 io_uring/zctap.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index 25cd724ade18..caae2755e3d5 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -188,6 +188,19 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
 	return ret;
 }
 
+/* XXX May called from the driver, in napi context. */
+u64 io_zctap_buffer(struct io_kiocb *req, size_t *len)
+{
+	struct io_ring_ctx *ctx = req->ctx;
+	struct io_buffer_list *bl;
+	void __user *ret = NULL;
+
+	bl = io_buffer_get_list(ctx, req->buf_index);
+	if (likely(bl))
+		ret = io_ring_buffer_select(req, len, bl, IO_URING_F_UNLOCKED);
+	return (u64)ret;
+}
+
 static __cold int io_init_bl_list(struct io_ring_ctx *ctx)
 {
 	int i;
diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h
index c23e15d7d3ca..b530e987b438 100644
--- a/io_uring/kbuf.h
+++ b/io_uring/kbuf.h
@@ -50,6 +50,8 @@ unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
 
 void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
 
+u64 io_zctap_buffer(struct io_kiocb *req, size_t *len);
+
 static inline void io_kbuf_recycle_ring(struct io_kiocb *req)
 {
 	/*
diff --git a/io_uring/zctap.c b/io_uring/zctap.c
index 9f892e9ed8f2..766da3bb2e41 100644
--- a/io_uring/zctap.c
+++ b/io_uring/zctap.c
@@ -24,6 +24,8 @@ struct ifq_region {
 	int			nr_pages;
 	u16			id;
 
+	spinlock_t		freelist_lock;
+
 	struct io_zctap_buf	*buf;
 	u16			freelist[];
 };
@@ -40,20 +42,142 @@ static u64 zctap_mk_page_info(u16 region_id, u16 pgid)
 	return (u64)0xface << 48 | (u64)region_id << 16 | (u64)pgid;
 }
 
+static u64 zctap_page_info(const struct page *page)
+{
+	return page_private(page);
+}
+
+static u16 zctap_page_id(const struct page *page)
+{
+	return zctap_page_info(page) & 0xffff;
+}
+
+/* driver bias cannot be larger than this */
+#define IO_ZCTAP_UREF		0x10000
+#define IO_ZCTAP_KREF_MASK	(IO_ZCTAP_UREF - 1)
+
+/* return user refs back, indicate whether buffer is reusable */
+static bool io_zctap_put_buf_uref(struct io_zctap_buf *buf)
+{
+	if (atomic_read(&buf->refcount) < IO_ZCTAP_UREF) {
+		WARN_ONCE(1, "uref botch: %x < %x, id:%d page:%px\n",
+			atomic_read(&buf->refcount), IO_ZCTAP_UREF,
+			zctap_page_id(buf->page),
+			buf->page);
+		return false;
+	}
+
+	return atomic_sub_and_test(IO_ZCTAP_UREF, &buf->refcount);
+}
+
+/* gets a user-supplied buffer from the fill queue */
+static struct io_zctap_buf *io_zctap_get_buffer(struct io_zctap_ifq *ifq,
+						u16 *buf_pgid)
+{
+	struct io_zctap_buf *buf;
+	struct ifq_region *ifr;
+	struct io_kiocb req;
+	size_t len = 0;
+	u64 addr;
+	int pgid;
+
+	ifr = ifq->region;
+retry:
+	req = (struct io_kiocb) {
+		.ctx = ifq->ctx,
+		.buf_index = ifq->fill_bgid,
+	};
+	/*  IN: uses buf_index as buffer group.
+	 * OUT: buf_index of actual buffer. (and req->buf_list set)
+	 *	(this comes from the user-supplied bufid)
+	 */
+	addr = io_zctap_buffer(&req, &len);
+	if (!addr)
+		return NULL;
+
+	pgid = addr & 0xffff;
+//	region_id = (addr >> 16) & 0xffff;
+
+	if (pgid > ifr->nr_pages) {
+		WARN_RATELIMIT(1, "bufid %d > max %d", pgid, ifr->nr_pages);
+		return NULL;
+	}
+
+	buf = &ifr->buf[pgid];
+	if (!io_zctap_put_buf_uref(buf))
+		goto retry;
+
+	*buf_pgid = pgid;
+	return buf;
+}
+
+/* if on exit/teardown path, can skip this work */
+static void io_zctap_recycle_buf(struct ifq_region *ifr,
+				 struct io_zctap_buf *buf)
+{
+	spin_lock(&ifr->freelist_lock);
+
+	ifr->freelist[ifr->free_count++] = buf - ifr->buf;
+
+	spin_unlock(&ifr->freelist_lock);
+}
+
 struct io_zctap_buf *io_zctap_get_buf(struct io_zctap_ifq *ifq, int refc)
 {
-	return NULL;
+	struct ifq_region *ifr = ifq->region;
+	struct io_zctap_buf *buf;
+	u16 pgid;
+
+	spin_lock(&ifr->freelist_lock);
+
+	buf = NULL;
+	if (ifr->free_count) {
+		pgid = ifr->freelist[--ifr->free_count];
+		buf = &ifr->buf[pgid];
+	}
+
+	spin_unlock(&ifr->freelist_lock);
+
+	if (!buf) {
+		buf = io_zctap_get_buffer(ifq, &pgid);
+		if (!buf)
+			return NULL;
+	}
+
+	WARN_ON(atomic_read(&buf->refcount));
+	atomic_set(&buf->refcount, refc & IO_ZCTAP_KREF_MASK);
+
+	return buf;
 }
 EXPORT_SYMBOL(io_zctap_get_buf);
 
+/* called from driver and networking stack. */
 void io_zctap_put_buf(struct io_zctap_ifq *ifq, struct io_zctap_buf *buf)
 {
+	struct ifq_region *ifr = ifq->region;
+
+	/* XXX move to inline function later. */
+	if (!atomic_dec_and_test(&buf->refcount))
+		return;
+
+	io_zctap_recycle_buf(ifr, buf);
 }
 EXPORT_SYMBOL(io_zctap_put_buf);
 
+/* called from driver and networking stack. */
 void io_zctap_put_buf_refs(struct io_zctap_ifq *ifq, struct io_zctap_buf *buf,
 			   unsigned count)
 {
+	struct ifq_region *ifr = ifq->region;
+	unsigned refs;
+
+	refs = atomic_read(&buf->refcount) & IO_ZCTAP_KREF_MASK;
+	WARN(refs < count, "driver refcount botch: %u < %u\n", refs, count);
+
+	if (!atomic_sub_and_test(count, &buf->refcount))
+		return;
+
+	io_zctap_recycle_buf(ifr, buf);
 }
 EXPORT_SYMBOL(io_zctap_put_buf_refs);
 
@@ -176,6 +300,7 @@ int io_provide_ifq_region(struct io_zctap_ifq *ifq, u16 id)
 		return -ENOMEM;
 	}
 
+	spin_lock_init(&ifr->freelist_lock);
 	ifr->nr_pages = nr_pages;
 	ifr->imu = imu;
 	ifr->free_count = nr_pages;
-- 
2.30.2


  parent reply	other threads:[~2022-11-02 23:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-02 23:32 [RFC PATCH v3 00/15] zero-copy RX for io_uring Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 01/15] io_uring: add zctap ifq definition Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 02/15] netdevice: add SETUP_ZCTAP to the netdev_bpf structure Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 03/15] io_uring: add register ifq opcode Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 04/15] io_uring: create a zctap region for a mapped buffer Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 05/15] io_uring: mark pages in ifq region with zctap information Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 06/15] io_uring: Provide driver API for zctap packet buffers Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 07/15] io_uring: Allocate zctap device buffers and dma map them Jonathan Lemon
2022-11-02 23:32 ` Jonathan Lemon [this message]
2022-11-02 23:32 ` [RFC PATCH v3 09/15] skbuff: Introduce SKBFL_FIXED_FRAG and skb_fixed() Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 10/15] io_uring: Allocate a uarg for use by the ifq RX Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 11/15] io_uring: Define the zctap iov[] returned to the user Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 12/15] io_uring: add OP_RECV_ZC command Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 13/15] io_uring: Make remove_ifq_region a delayed work call Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 14/15] io_uring: Add a buffer caching mechanism for zctap Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 15/15] io_uring: Notify the application as the fillq is drained Jonathan Lemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox