public inbox for [email protected]
 help / color / mirror / Atom feed
From: Pavel Begunkov <[email protected]>
To: [email protected], [email protected],
	[email protected]
Cc: "David S . Miller" <[email protected]>,
	Jakub Kicinski <[email protected]>,
	Jonathan Lemon <[email protected]>,
	Willem de Bruijn <[email protected]>,
	Jens Axboe <[email protected]>, David Ahern <[email protected]>,
	[email protected], Pavel Begunkov <[email protected]>
Subject: [PATCH net-next v4 07/27] net: introduce managed frags infrastructure
Date: Thu,  7 Jul 2022 12:49:38 +0100	[thread overview]
Message-ID: <088d3480b1ebc687fe7cbfc00aec2ff1c33a72c7.1657194434.git.asml.silence@gmail.com> (raw)
In-Reply-To: <[email protected]>

Some users like io_uring can do page pinning more efficiently, so we
want a way to delegate referencing to other subsystems. For that add
a new flag called SKBFL_MANAGED_FRAG_REFS. When set, skb doesn't hold
page references and upper layers are responsivle to managing page
lifetime.

It's allowed to convert skbs from managed to normal by calling
skb_zcopy_downgrade_managed(). The function will take all needed
page references and clear the flag. It's needed, for instance,
to avoid mixing managed modes.

Signed-off-by: Pavel Begunkov <[email protected]>
---
 include/linux/skbuff.h | 25 +++++++++++++++++++++++--
 net/core/skbuff.c      | 29 +++++++++++++++++++++++++++--
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a8a2dd4cfdfd..07004593d7ca 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -688,11 +688,16 @@ enum {
 	SKBFL_PURE_ZEROCOPY = BIT(2),
 
 	SKBFL_DONT_ORPHAN = BIT(3),
+
+	/* page references are managed by the ubuf_info, so it's safe to
+	 * use frags only up until ubuf_info is released
+	 */
+	SKBFL_MANAGED_FRAG_REFS = BIT(4),
 };
 
 #define SKBFL_ZEROCOPY_FRAG	(SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG)
 #define SKBFL_ALL_ZEROCOPY	(SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \
-				 SKBFL_DONT_ORPHAN)
+				 SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS)
 
 /*
  * The callback notifies userspace to release buffers when skb DMA is done in
@@ -1810,6 +1815,11 @@ static inline bool skb_zcopy_pure(const struct sk_buff *skb)
 	return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY;
 }
 
+static inline bool skb_zcopy_managed(const struct sk_buff *skb)
+{
+	return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS;
+}
+
 static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1,
 				       const struct sk_buff *skb2)
 {
@@ -1884,6 +1894,14 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success)
 	}
 }
 
+void __skb_zcopy_downgrade_managed(struct sk_buff *skb);
+
+static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
+{
+	if (unlikely(skb_zcopy_managed(skb)))
+		__skb_zcopy_downgrade_managed(skb);
+}
+
 static inline void skb_mark_not_on_list(struct sk_buff *skb)
 {
 	skb->next = NULL;
@@ -3499,7 +3517,10 @@ static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
  */
 static inline void skb_frag_unref(struct sk_buff *skb, int f)
 {
-	__skb_frag_unref(&skb_shinfo(skb)->frags[f], skb->pp_recycle);
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+
+	if (!skb_zcopy_managed(skb))
+		__skb_frag_unref(&shinfo->frags[f], skb->pp_recycle);
 }
 
 /**
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f5a3ebbc1f7e..cf4107d80bc4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -666,11 +666,18 @@ static void skb_release_data(struct sk_buff *skb)
 			      &shinfo->dataref))
 		goto exit;
 
-	skb_zcopy_clear(skb, true);
+	if (skb_zcopy(skb)) {
+		bool skip_unref = shinfo->flags & SKBFL_MANAGED_FRAG_REFS;
+
+		skb_zcopy_clear(skb, true);
+		if (skip_unref)
+			goto free_head;
+	}
 
 	for (i = 0; i < shinfo->nr_frags; i++)
 		__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
 
+free_head:
 	if (shinfo->frag_list)
 		kfree_skb_list(shinfo->frag_list);
 
@@ -895,7 +902,10 @@ EXPORT_SYMBOL(skb_dump);
  */
 void skb_tx_error(struct sk_buff *skb)
 {
-	skb_zcopy_clear(skb, true);
+	if (skb) {
+		skb_zcopy_downgrade_managed(skb);
+		skb_zcopy_clear(skb, true);
+	}
 }
 EXPORT_SYMBOL(skb_tx_error);
 
@@ -1375,6 +1385,16 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream);
 
+void __skb_zcopy_downgrade_managed(struct sk_buff *skb)
+{
+	int i;
+
+	skb_shinfo(skb)->flags &= ~SKBFL_MANAGED_FRAG_REFS;
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+		skb_frag_ref(skb, i);
+}
+EXPORT_SYMBOL_GPL(__skb_zcopy_downgrade_managed);
+
 static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig,
 			      gfp_t gfp_mask)
 {
@@ -1692,6 +1712,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
 
 	BUG_ON(skb_shared(skb));
 
+	skb_zcopy_downgrade_managed(skb);
+
 	size = SKB_DATA_ALIGN(size);
 
 	if (skb_pfmemalloc(skb))
@@ -3488,6 +3510,8 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 	int pos = skb_headlen(skb);
 	const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY;
 
+	skb_zcopy_downgrade_managed(skb);
+
 	skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags;
 	skb_zerocopy_clone(skb1, skb, 0);
 	if (len < pos)	/* Split line is inside header. */
@@ -3841,6 +3865,7 @@ int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 	if (skb_can_coalesce(skb, i, page, offset)) {
 		skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
 	} else if (i < MAX_SKB_FRAGS) {
+		skb_zcopy_downgrade_managed(skb);
 		get_page(page);
 		skb_fill_page_desc(skb, i, page, offset, size);
 	} else {
-- 
2.36.1


  parent reply	other threads:[~2022-07-07 11:52 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-07 11:49 [PATCH net-next v4 00/27] io_uring zerocopy send Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 01/27] ipv4: avoid partial copy for zc Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 02/27] ipv6: " Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 03/27] skbuff: don't mix ubuf_info from different sources Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 04/27] skbuff: add SKBFL_DONT_ORPHAN flag Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 05/27] skbuff: carry external ubuf_info in msghdr Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 06/27] net: Allow custom iter handler " Pavel Begunkov
2022-07-11 12:20   ` Pavel Begunkov
2022-07-07 11:49 ` Pavel Begunkov [this message]
2022-07-07 11:49 ` [PATCH net-next v4 08/27] net: introduce __skb_fill_page_desc_noacc Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 09/27] ipv4/udp: support externally provided ubufs Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 10/27] ipv6/udp: " Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 11/27] tcp: " Pavel Begunkov
2022-07-08  4:06   ` David Ahern
2022-07-08 14:03     ` Pavel Begunkov
2022-07-13 23:38       ` David Ahern
2022-07-07 11:49 ` [PATCH net-next v4 12/27] io_uring: initialise msghdr::msg_ubuf Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 13/27] io_uring: export io_put_task() Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 14/27] io_uring: add zc notification infrastructure Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 15/27] io_uring: cache struct io_notif Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 16/27] io_uring: complete notifiers in tw Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 17/27] io_uring: add rsrc referencing for notifiers Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 18/27] io_uring: add notification slot registration Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 19/27] io_uring: wire send zc request type Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 20/27] io_uring: account locked pages for non-fixed zc Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 21/27] io_uring: allow to pass addr into sendzc Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 22/27] io_uring: sendzc with fixed buffers Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 23/27] io_uring: flush notifiers after sendzc Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 24/27] io_uring: rename IORING_OP_FILES_UPDATE Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 25/27] io_uring: add zc notification flush requests Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 26/27] io_uring: enable managed frags with register buffers Pavel Begunkov
2022-07-07 11:49 ` [PATCH net-next v4 27/27] selftests/io_uring: test zerocopy send Pavel Begunkov
2022-07-08  4:10 ` [PATCH net-next v4 00/27] io_uring " David Ahern
2022-07-08 14:26   ` Pavel Begunkov
2022-07-11 12:56     ` Pavel Begunkov
2022-07-13 23:45       ` David Ahern
2022-07-14 18:55         ` Pavel Begunkov
2022-07-18  2:19           ` David Ahern
2022-07-20 13:32             ` Pavel Begunkov
2022-07-24 18:28             ` David Ahern
2022-07-27 10:51               ` Pavel Begunkov
2022-07-29 22:30                 ` David Ahern
2022-09-26 20:08               ` Pavel Begunkov
2022-09-28 19:31                 ` David Ahern
2022-09-28 20:11                   ` Pavel Begunkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=088d3480b1ebc687fe7cbfc00aec2ff1c33a72c7.1657194434.git.asml.silence@gmail.com \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox