public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jonathan Lemon <[email protected]>
To: <[email protected]>
Cc: <[email protected]>
Subject: [RFC PATCH v3 14/15] io_uring: Add a buffer caching mechanism for zctap.
Date: Wed, 2 Nov 2022 16:32:43 -0700	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

This is based on the same concept as the page pool.

Here, there are 4 separate buffer sources:
  cache - small (128) cache the driver can use locklessly.
  ptr_ring - buffers freed through skb_release_data()
  fillq - entries returned from the application
  freelist - spinlock protected pool of free entries.

The driver first tries the lockless cache, before attempting to
refill it from the ptr ring.  If there are still no buffers, then
the fill ring is examined, before going to the freelist.

If the ptr_ring is full when buffers are released as the skb is
dropped (or the driver returns the buffers), then they are placed
back on the freelist.

Signed-off-by: Jonathan Lemon <[email protected]>
---
 io_uring/zctap.c | 128 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 99 insertions(+), 29 deletions(-)

diff --git a/io_uring/zctap.c b/io_uring/zctap.c
index 9eba916376c6..8515fff20ad5 100644
--- a/io_uring/zctap.c
+++ b/io_uring/zctap.c
@@ -18,8 +18,12 @@
 
 #define NR_ZCTAP_IFQS	1
 
+#define REGION_CACHE_COUNT	128
+#define REGION_REFILL_COUNT	64
+
 struct ifq_region {
 	struct io_zctap_ifq	*ifq;
+	int			cache_count;
 	int			free_count;
 	int			nr_pages;
 	u16			id;
@@ -28,6 +32,10 @@ struct ifq_region {
 	struct delayed_work	release_work;
 	unsigned long		delay_end;
 
+	u16			cache[REGION_CACHE_COUNT];
+
+	struct ptr_ring		ring;
+
 	struct io_zctap_buf	*buf;
 	u16			freelist[];
 };
@@ -103,8 +111,29 @@ static bool io_zctap_put_buf_uref(struct io_zctap_buf *buf)
 	return atomic_sub_and_test(IO_ZCTAP_UREF, &buf->refcount);
 }
 
+/* if on exit/teardown path, can skip this work */
+static void io_zctap_recycle_buf(struct ifq_region *ifr,
+				 struct io_zctap_buf *buf)
+{
+	int rc;
+
+	if (in_serving_softirq())
+		rc = ptr_ring_produce(&ifr->ring, buf);
+	else
+		rc = ptr_ring_produce_bh(&ifr->ring, buf);
+
+	if (rc) {
+		spin_lock(&ifr->freelist_lock);
+
+		ifr->freelist[ifr->free_count++] = buf - ifr->buf;
+
+		spin_unlock(&ifr->freelist_lock);
+	}
+}
+
 /* gets a user-supplied buffer from the fill queue
  *   note: may drain N entries, but still have no usable buffers
+ *   XXX add retry limit?
  */
 static struct io_zctap_buf *io_zctap_get_buffer(struct io_zctap_ifq *ifq,
 						u16 *buf_pgid)
@@ -146,40 +175,71 @@ static struct io_zctap_buf *io_zctap_get_buffer(struct io_zctap_ifq *ifq,
 	return buf;
 }
 
-/* if on exit/teardown path, can skip this work */
-static void io_zctap_recycle_buf(struct ifq_region *ifr,
-				 struct io_zctap_buf *buf)
+static int io_zctap_get_buffers(struct io_zctap_ifq *ifq, u16 *cache, int n)
 {
-	spin_lock(&ifr->freelist_lock);
+	struct io_zctap_buf *buf;
+	int i;
 
-	ifr->freelist[ifr->free_count++] = buf - ifr->buf;
-
-	spin_unlock(&ifr->freelist_lock);
+	for (i = 0; i < n; i++) {
+		buf = io_zctap_get_buffer(ifq, &cache[i]);
+		if (!buf)
+			break;
+	}
+	return i;
 }
 
 struct io_zctap_buf *io_zctap_get_buf(struct io_zctap_ifq *ifq, int refc)
 {
-	struct ifq_region *ifr = ifq->region;
 	struct io_zctap_buf *buf;
+	struct ifq_region *ifr;
+	int count;
 	u16 pgid;
 
+	ifr = ifq->region;
+	if (ifr->cache_count)
+		goto out;
+
+	if (!__ptr_ring_empty(&ifr->ring)) {
+		do {
+			buf = __ptr_ring_consume(&ifr->ring);
+			if (!buf)
+				break;
+			ifr->cache[ifr->cache_count++] = buf - ifr->buf;
+		} while (ifr->cache_count < REGION_REFILL_COUNT);
+
+		if (ifr->cache_count)
+			goto out;
+	}
+
+	count = io_zctap_get_buffers(ifq, ifr->cache, REGION_REFILL_COUNT);
+	ifr->cache_count += count;
+
+	if (ifr->cache_count)
+		goto out;
+
 	spin_lock(&ifr->freelist_lock);
 
-	buf = NULL;
-	if (ifr->free_count) {
-		pgid = ifr->freelist[--ifr->free_count];
-		buf = &ifr->buf[pgid];
-	}
+	count = min_t(int, ifr->free_count, REGION_CACHE_COUNT);
+	ifr->free_count -= count;
+	ifr->cache_count += count;
+	memcpy(ifr->cache, &ifr->freelist[ifr->free_count],
+	       count * sizeof(u16));
 
 	spin_unlock(&ifr->freelist_lock);
 
-	if (!buf) {
-		buf = io_zctap_get_buffer(ifq, &pgid);
-		if (!buf)
-			return NULL;
-	}
+	if (ifr->cache_count)
+		goto out;
 
-	WARN_ON(atomic_read(&buf->refcount));
+	return NULL;
+
+out:
+	pgid = ifr->cache[--ifr->cache_count];
+	buf = &ifr->buf[pgid];
+
+	WARN_RATELIMIT(atomic_read(&buf->refcount),
+		       "pgid:%d refc:%d cache_count:%d\n",
+		       pgid, atomic_read(&buf->refcount),
+		       ifr->cache_count);
 	atomic_set(&buf->refcount, refc & IO_ZCTAP_KREF_MASK);
 
 	return buf;
@@ -274,6 +334,7 @@ static void io_remove_ifq_region_work(struct work_struct *work)
 	}
 
 	io_zctap_ifq_put(ifr->ifq);
+	ptr_ring_cleanup(&ifr->ring, NULL);
 	kvfree(ifr->buf);
 	kvfree(ifr);
 }
@@ -361,16 +422,18 @@ int io_provide_ifq_region(struct io_zctap_ifq *ifq, u16 id)
 	if (imu->ubuf & ~PAGE_MASK || imu->ubuf_end & ~PAGE_MASK)
 		return -EFAULT;
 
+	err = -ENOMEM;
 	nr_pages = imu->nr_bvecs;
 	ifr = kvmalloc(struct_size(ifr, freelist, nr_pages), GFP_KERNEL);
 	if (!ifr)
-		return -ENOMEM;
+		goto fail;
 
 	ifr->buf = kvmalloc_array(nr_pages, sizeof(*ifr->buf), GFP_KERNEL);
-	if (!ifr->buf) {
-		kvfree(ifr);
-		return -ENOMEM;
-	}
+	if (!ifr->buf)
+		goto fail_buf;
+
+	if (ptr_ring_init(&ifr->ring, 1024, GFP_KERNEL))
+		goto fail_ring;
 
 	spin_lock_init(&ifr->freelist_lock);
 	ifr->nr_pages = nr_pages;
@@ -378,18 +441,25 @@ int io_provide_ifq_region(struct io_zctap_ifq *ifq, u16 id)
 	ifr->id = id;
 	ifr->ifq = ifq;
 	ifr->delay_end = 0;
+	ifr->cache_count = 0;
 
 	err = io_zctap_map_region(ifr, imu);
-	if (err) {
-		kvfree(ifr->buf);
-		kvfree(ifr);
-		return err;
-	}
+	if (err)
+		goto fail_map;
 
 	ifq->region = ifr;
 	refcount_inc(&ifq->refcount);
 
 	return 0;
+
+fail_map:
+	ptr_ring_cleanup(&ifr->ring, NULL);
+fail_ring:
+	kvfree(ifr->buf);
+fail_buf:
+	kvfree(ifr);
+fail:
+	return err;
 }
 
 static int __io_queue_mgmt(struct net_device *dev, struct io_zctap_ifq *ifq,
-- 
2.30.2


  parent reply	other threads:[~2022-11-02 23:40 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-02 23:32 [RFC PATCH v3 00/15] zero-copy RX for io_uring Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 01/15] io_uring: add zctap ifq definition Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 02/15] netdevice: add SETUP_ZCTAP to the netdev_bpf structure Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 03/15] io_uring: add register ifq opcode Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 04/15] io_uring: create a zctap region for a mapped buffer Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 05/15] io_uring: mark pages in ifq region with zctap information Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 06/15] io_uring: Provide driver API for zctap packet buffers Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 07/15] io_uring: Allocate zctap device buffers and dma map them Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 08/15] io_uring: Add zctap buffer get/put functions and refcounting Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 09/15] skbuff: Introduce SKBFL_FIXED_FRAG and skb_fixed() Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 10/15] io_uring: Allocate a uarg for use by the ifq RX Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 11/15] io_uring: Define the zctap iov[] returned to the user Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 12/15] io_uring: add OP_RECV_ZC command Jonathan Lemon
2022-11-02 23:32 ` [RFC PATCH v3 13/15] io_uring: Make remove_ifq_region a delayed work call Jonathan Lemon
2022-11-02 23:32 ` Jonathan Lemon [this message]
2022-11-02 23:32 ` [RFC PATCH v3 15/15] io_uring: Notify the application as the fillq is drained Jonathan Lemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox