public inbox for [email protected]
 help / color / mirror / Atom feed
From: Jonathan Lemon <[email protected]>
To: <[email protected]>
Cc: <[email protected]>
Subject: [PATCH v1 13/15] io_uring: Make remove_ifq_region a delayed work call
Date: Mon, 7 Nov 2022 21:05:19 -0800	[thread overview]
Message-ID: <[email protected]> (raw)
In-Reply-To: <[email protected]>

The page backing store should not be removed until all ouststanding
packets are returned.  The packets may be inflight, owned by the
driver or sitting in a socket buffer.

The region holds a reference to the ifq, and when the ifq is
closed, a delayed work item is scheduled which checks that all
pages have been returned.  When complete, then the region releases
the ifq reference so it can be freed.

Currently, the work item will exit and leak pages after a timeout
expires.  This should not happen in normal operation.

Signed-off-by: Jonathan Lemon <[email protected]>
---
 include/linux/io_uring_types.h |  1 +
 io_uring/zctap.c               | 77 +++++++++++++++++++++++++---------
 2 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index 39f20344d578..7d9895370875 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -583,6 +583,7 @@ struct io_zctap_ifq {
 	struct io_ring_ctx	*ctx;
 	void			*region;
 	struct ubuf_info	*uarg;
+	refcount_t		refcount;
 	u16			queue_id;
 	u16			id;
 	u16			fill_bgid;
diff --git a/io_uring/zctap.c b/io_uring/zctap.c
index 096b3dd5a8a3..262aa50de8c4 100644
--- a/io_uring/zctap.c
+++ b/io_uring/zctap.c
@@ -19,13 +19,14 @@
 #define NR_ZCTAP_IFQS	1
 
 struct ifq_region {
-	struct io_zctap_ifq	*ifq;		/* only for delayed_work */
-	struct io_mapped_ubuf	*imu;
+	struct io_zctap_ifq	*ifq;
 	int			free_count;
 	int			nr_pages;
 	u16			id;
 
 	spinlock_t		freelist_lock;
+	struct delayed_work	release_work;
+	unsigned long		delay_end;
 
 	struct io_zctap_buf	*buf;
 	u16			freelist[];
@@ -37,6 +38,8 @@ struct io_zctap_ifq_priv {
 	struct ubuf_info	uarg;
 };
 
+static void io_zctap_ifq_put(struct io_zctap_ifq *ifq);
+
 typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
 
 static void zctap_set_page_info(struct page *page, u64 info)
@@ -239,11 +242,30 @@ netdev2device(struct net_device *dev)
 	return dev->dev.parent;			/* from SET_NETDEV_DEV() */
 }
 
-static void io_remove_ifq_region(struct ifq_region *ifr)
+static void io_remove_ifq_region_work(struct work_struct *work)
 {
+	struct ifq_region *ifr = container_of(
+		to_delayed_work(work), struct ifq_region, release_work);
 	struct device *device = netdev2device(ifr->ifq->dev);
 	struct io_zctap_buf *buf;
-	int i;
+	int i, refs, count;
+
+	count = 0;
+	for (i = 0; i < ifr->nr_pages; i++) {
+		buf = &ifr->buf[i];
+		refs = atomic_read(&buf->refcount) & IO_ZCTAP_KREF_MASK;
+		if (refs) {
+			if (time_before(jiffies, ifr->delay_end)) {
+				schedule_delayed_work(&ifr->release_work, HZ);
+				return;
+			}
+			count++;
+		}
+	}
+
+	if (count)
+		pr_debug("freeing ifr with %d/%d outstanding pages\n",
+			 count, ifr->nr_pages);
 
 	for (i = 0; i < ifr->nr_pages; i++) {
 		buf = &ifr->buf[i];
@@ -255,20 +277,28 @@ static void io_remove_ifq_region(struct ifq_region *ifr)
 		put_page(buf->page);
 	}
 
+	io_zctap_ifq_put(ifr->ifq);
 	kvfree(ifr->buf);
 	kvfree(ifr);
 }
 
-static int io_zctap_map_region(struct ifq_region *ifr, struct device *device)
+static void io_remove_ifq_region(struct ifq_region *ifr)
 {
-	struct io_mapped_ubuf *imu;
+	ifr->delay_end = jiffies + HZ * 10;
+	INIT_DELAYED_WORK(&ifr->release_work, io_remove_ifq_region_work);
+	schedule_delayed_work(&ifr->release_work, 0);
+}
+
+static int io_zctap_map_region(struct ifq_region *ifr,
+			       struct io_mapped_ubuf *imu)
+{
+	struct device *device = netdev2device(ifr->ifq->dev);
 	struct io_zctap_buf *buf;
 	struct page *page;
 	dma_addr_t addr;
 	int i, err;
 	u64 info;
 
-	imu = ifr->imu;
 	for (i = 0; i < ifr->nr_pages; i++) {
 		page = imu->bvec[i].bv_page;
 
@@ -302,10 +332,10 @@ static int io_zctap_map_region(struct ifq_region *ifr, struct device *device)
 
 out:
 	while (i--) {
-		page = imu->bvec[i].bv_page;
+		buf = &ifr->buf[i];
+		page = buf->page;
 		set_page_private(page, 0);
 		ClearPagePrivate(page);
-		buf = &ifr->buf[i];
 		dma_unmap_page_attrs(device, buf->dma, PAGE_SIZE,
 				     DMA_BIDIRECTIONAL,
 				     DMA_ATTR_SKIP_CPU_SYNC);
@@ -348,13 +378,12 @@ int io_provide_ifq_region(struct io_zctap_ifq *ifq, u16 id)
 
 	spin_lock_init(&ifr->freelist_lock);
 	ifr->nr_pages = nr_pages;
-	ifr->imu = imu;
 	ifr->free_count = nr_pages;
 	ifr->id = id;
+	ifr->ifq = ifq;
+	ifr->delay_end = 0;
 
-	ifr->ifq = ifq;		/* XXX */
-
-	err = io_zctap_map_region(ifr, netdev2device(ifq->dev));
+	err = io_zctap_map_region(ifr, imu);
 	if (err) {
 		kvfree(ifr->buf);
 		kvfree(ifr);
@@ -362,6 +391,7 @@ int io_provide_ifq_region(struct io_zctap_ifq *ifq, u16 id)
 	}
 
 	ifq->region = ifr;
+	refcount_inc(&ifq->refcount);
 
 	return 0;
 }
@@ -436,15 +466,23 @@ static struct io_zctap_ifq *io_zctap_ifq_alloc(struct io_ring_ctx *ctx)
 	return &priv->ifq;
 }
 
-static void io_zctap_ifq_free(struct io_zctap_ifq *ifq)
+static void io_zctap_ifq_put(struct io_zctap_ifq *ifq)
+{
+	if (!refcount_dec_and_test(&ifq->refcount))
+		return;
+
+	if (ifq->dev)
+		dev_put(ifq->dev);
+	kfree(ifq);
+}
+
+static void io_zctap_ifq_close(struct io_zctap_ifq *ifq)
 {
 	if (ifq->queue_id != -1)
 		io_close_zctap_ifq(ifq, ifq->queue_id);
 	if (ifq->region)
 		io_remove_ifq_region(ifq->region);
-	if (ifq->dev)
-		dev_put(ifq->dev);
-	kfree(ifq);
+	io_zctap_ifq_put(ifq);
 }
 
 int io_register_ifq(struct io_ring_ctx *ctx,
@@ -473,6 +511,7 @@ int io_register_ifq(struct io_ring_ctx *ctx,
 	ifq->fill_bgid = req.fill_bgid;
 	ifq->uarg->callback = io_zctap_ifq_callback;
 	ifq->uarg->flags = SKBFL_ALL_ZEROCOPY | SKBFL_FIXED_FRAG;
+	refcount_set(&ifq->refcount, 1);
 
 	err = -ENODEV;
 	ifq->dev = dev_get_by_index(&init_net, req.ifindex);
@@ -493,7 +532,7 @@ int io_register_ifq(struct io_ring_ctx *ctx,
 	return 0;
 
 out:
-	io_zctap_ifq_free(ifq);
+	io_zctap_ifq_close(ifq);
 	return err;
 }
 
@@ -506,7 +545,7 @@ int io_unregister_zctap_ifq(struct io_ring_ctx *ctx, unsigned long index)
 		return -EINVAL;
 
 	ctx->zctap_ifq = NULL;
-	io_zctap_ifq_free(ifq);
+	io_zctap_ifq_close(ifq);
 
 	return 0;
 }
-- 
2.30.2


  parent reply	other threads:[~2022-11-08  5:05 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-08  5:05 [PATCH v1 00/15] zero-copy RX for io_uring Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 01/15] io_uring: add zctap ifq definition Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 02/15] netdevice: add SETUP_ZCTAP to the netdev_bpf structure Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 03/15] io_uring: add register ifq opcode Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 04/15] io_uring: create a zctap region for a mapped buffer Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 05/15] io_uring: mark pages in ifq region with zctap information Jonathan Lemon
2022-11-16  8:12   ` Christoph Hellwig
2022-11-17 20:48     ` Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 06/15] io_uring: Provide driver API for zctap packet buffers Jonathan Lemon
2022-11-16  8:17   ` Christoph Hellwig
2022-11-17 21:01     ` Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 07/15] io_uring: Allocate zctap device buffers and dma map them Jonathan Lemon
2022-11-16  8:15   ` Christoph Hellwig
2022-11-17 20:51     ` Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 08/15] io_uring: Add zctap buffer get/put functions and refcounting Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 09/15] skbuff: Introduce SKBFL_FIXED_FRAG and skb_fixed() Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 10/15] io_uring: Allocate a uarg for use by the ifq RX Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 11/15] io_uring: Define the zctap iov[] returned to the user Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 12/15] io_uring: add OP_RECV_ZC command Jonathan Lemon
2022-11-08  5:05 ` Jonathan Lemon [this message]
2022-11-08  5:05 ` [PATCH v1 14/15] io_uring: Add a buffer caching mechanism for zctap Jonathan Lemon
2022-11-08  5:05 ` [PATCH v1 15/15] io_uring: Notify the application as the fillq is drained Jonathan Lemon
2022-11-09  6:37 ` [PATCH v1 00/15] zero-copy RX for io_uring Dust Li
2022-11-09 15:27   ` Jonathan Lemon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox