public inbox for [email protected]
 help / color / mirror / Atom feed
From: Mina Almasry <[email protected]>
To: David Wei <[email protected]>
Cc: [email protected], [email protected],
	 Jens Axboe <[email protected]>,
	Pavel Begunkov <[email protected]>,
	 Jakub Kicinski <[email protected]>,
	Paolo Abeni <[email protected]>,
	 "David S. Miller" <[email protected]>,
	Eric Dumazet <[email protected]>,
	 Jesper Dangaard Brouer <[email protected]>,
	David Ahern <[email protected]>,
	 Stanislav Fomichev <[email protected]>,
	Joe Damato <[email protected]>,
	 Pedro Tammela <[email protected]>
Subject: Re: [PATCH v7 04/15] net: prepare for non devmem TCP memory providers
Date: Fri, 1 Nov 2024 10:09:25 -0700	[thread overview]
Message-ID: <CAHS8izPZ3bzmPx=geE0Nb0q8kG8fvzsGT2YgohoFJbSz2r21Zw@mail.gmail.com> (raw)
In-Reply-To: <[email protected]>

On Tue, Oct 29, 2024 at 4:06 PM David Wei <[email protected]> wrote:
>
> From: Pavel Begunkov <[email protected]>
>
> There is a good bunch of places in generic paths assuming that the only
> page pool memory provider is devmem TCP. As we want to reuse the net_iov
> and provider infrastructure, we need to patch it up and explicitly check
> the provider type when we branch into devmem TCP code.
>
> Signed-off-by: Pavel Begunkov <[email protected]>
> Signed-off-by: David Wei <[email protected]>
> ---
>  net/core/devmem.c         | 10 ++++++++--
>  net/core/devmem.h         |  8 ++++++++
>  net/core/page_pool_user.c | 15 +++++++++------
>  net/ipv4/tcp.c            |  6 ++++++
>  4 files changed, 31 insertions(+), 8 deletions(-)
>
> diff --git a/net/core/devmem.c b/net/core/devmem.c
> index 01738029e35c..78983a98e5dc 100644
> --- a/net/core/devmem.c
> +++ b/net/core/devmem.c
> @@ -28,6 +28,12 @@ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1);
>
>  static const struct memory_provider_ops dmabuf_devmem_ops;
>
> +bool net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops)
> +{
> +       return ops == &dmabuf_devmem_ops;
> +}
> +EXPORT_SYMBOL_GPL(net_is_devmem_page_pool_ops);
> +
>  static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool,
>                                                struct gen_pool_chunk *chunk,
>                                                void *not_used)
> @@ -316,10 +322,10 @@ void dev_dmabuf_uninstall(struct net_device *dev)
>         unsigned int i;
>
>         for (i = 0; i < dev->real_num_rx_queues; i++) {
> -               binding = dev->_rx[i].mp_params.mp_priv;
> -               if (!binding)
> +               if (dev->_rx[i].mp_params.mp_ops != &dmabuf_devmem_ops)
>                         continue;
>

Use the net_is_devmem_page_pool_ops helper here?

> +               binding = dev->_rx[i].mp_params.mp_priv;
>                 xa_for_each(&binding->bound_rxqs, xa_idx, rxq)
>                         if (rxq == &dev->_rx[i]) {
>                                 xa_erase(&binding->bound_rxqs, xa_idx);
> diff --git a/net/core/devmem.h b/net/core/devmem.h
> index a2b9913e9a17..a3fdd66bb05b 100644
> --- a/net/core/devmem.h
> +++ b/net/core/devmem.h
> @@ -116,6 +116,8 @@ struct net_iov *
>  net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding);
>  void net_devmem_free_dmabuf(struct net_iov *ppiov);
>
> +bool net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops);
> +
>  #else
>  struct net_devmem_dmabuf_binding;
>
> @@ -168,6 +170,12 @@ static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov)
>  {
>         return 0;
>  }
> +
> +static inline bool
> +net_is_devmem_page_pool_ops(const struct memory_provider_ops *ops)
> +{
> +       return false;
> +}
>  #endif
>
>  #endif /* _NET_DEVMEM_H */
> diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
> index 48335766c1bf..604862a73535 100644
> --- a/net/core/page_pool_user.c
> +++ b/net/core/page_pool_user.c
> @@ -214,7 +214,7 @@ static int
>  page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
>                   const struct genl_info *info)
>  {
> -       struct net_devmem_dmabuf_binding *binding = pool->mp_priv;
> +       struct net_devmem_dmabuf_binding *binding;
>         size_t inflight, refsz;
>         void *hdr;
>
> @@ -244,8 +244,11 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
>                          pool->user.detach_time))
>                 goto err_cancel;
>
> -       if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
> -               goto err_cancel;
> +       if (net_is_devmem_page_pool_ops(pool->mp_ops)) {
> +               binding = pool->mp_priv;
> +               if (nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id))
> +                       goto err_cancel;
> +       }

Worthy of note is that I think Jakub asked for this introspection, and
likely you should also add similar introspection. I.e. page_pool
dumping should likely be improved to dump that it's bound to io_uring
memory. Not sure what io_uring memory 'id' equivalent would be, if
any.

>
>         genlmsg_end(rsp, hdr);
>
> @@ -353,16 +356,16 @@ void page_pool_unlist(struct page_pool *pool)
>  int page_pool_check_memory_provider(struct net_device *dev,
>                                     struct netdev_rx_queue *rxq)
>  {
> -       struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv;
> +       void *mp_priv = rxq->mp_params.mp_priv;
>         struct page_pool *pool;
>         struct hlist_node *n;
>
> -       if (!binding)
> +       if (!mp_priv)
>                 return 0;
>
>         mutex_lock(&page_pools_lock);
>         hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
> -               if (pool->mp_priv != binding)
> +               if (pool->mp_priv != mp_priv)
>                         continue;
>
>                 if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
> diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
> index e928efc22f80..31e01da61c12 100644
> --- a/net/ipv4/tcp.c
> +++ b/net/ipv4/tcp.c
> @@ -277,6 +277,7 @@
>  #include <net/ip.h>
>  #include <net/sock.h>
>  #include <net/rstreason.h>
> +#include <net/page_pool/types.h>
>
>  #include <linux/uaccess.h>
>  #include <asm/ioctls.h>
> @@ -2476,6 +2477,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
>                         }
>
>                         niov = skb_frag_net_iov(frag);
> +                       if (net_is_devmem_page_pool_ops(niov->pp->mp_ops)) {
> +                               err = -ENODEV;
> +                               goto out;
> +                       }
> +

I think this check needs to go in the caller. Currently the caller
assumes that if !skb_frags_readable(), then the frag is dma-buf, and
calls tcp_recvmsg_dmabuf on it. The caller needs to check that the
frag is specifically a dma-buf frag now.

Can io_uring frags somehow end up in tcp_recvmsg_locked? You're still
using the tcp stack with io_uring ZC right? So I suspect they might?

--
Thanks,
Mina

  reply	other threads:[~2024-11-01 17:09 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-29 23:05 [PATCH v7 00/15] io_uring zero copy rx David Wei
2024-10-29 23:05 ` [PATCH v7 01/15] net: prefix devmem specific helpers David Wei
2024-11-01 14:57   ` Mina Almasry
2024-10-29 23:05 ` [PATCH v7 02/15] net: generalise net_iov chunk owners David Wei
2024-10-29 23:05 ` [PATCH v7 03/15] net: page_pool: create hooks for custom page providers David Wei
2024-10-29 23:05 ` [PATCH v7 04/15] net: prepare for non devmem TCP memory providers David Wei
2024-11-01 17:09   ` Mina Almasry [this message]
2024-11-01 17:41     ` Pavel Begunkov
2024-10-29 23:05 ` [PATCH v7 05/15] net: page_pool: add ->scrub mem provider callback David Wei
2024-10-29 23:05 ` [PATCH v7 06/15] net: page pool: add helper creating area from pages David Wei
2024-11-01 17:33   ` Mina Almasry
2024-11-01 18:16     ` Pavel Begunkov
2024-10-29 23:05 ` [PATCH v7 07/15] net: page_pool: introduce page_pool_mp_return_in_cache David Wei
2024-11-01 20:05   ` Mina Almasry
2024-10-29 23:05 ` [PATCH v7 08/15] net: add helper executing custom callback from napi David Wei
2024-10-29 23:05 ` [PATCH v7 09/15] io_uring/zcrx: add interface queue and refill queue David Wei
2024-10-29 23:05 ` [PATCH v7 10/15] io_uring/zcrx: add io_zcrx_area David Wei
2024-10-29 23:05 ` [PATCH v7 11/15] io_uring/zcrx: implement zerocopy receive pp memory provider David Wei
2024-11-01 20:06   ` Mina Almasry
2024-10-29 23:05 ` [PATCH v7 12/15] io_uring/zcrx: add io_recvzc request David Wei
2024-11-01 20:11   ` Mina Almasry
2024-10-29 23:05 ` [PATCH v7 13/15] io_uring/zcrx: set pp memory provider for an rx queue David Wei
2024-11-01 20:16   ` Mina Almasry
2024-10-29 23:05 ` [PATCH v7 14/15] io_uring/zcrx: add copy fallback David Wei
2024-10-29 23:05 ` [PATCH v7 15/15] io_uring/zcrx: throttle receive requests David Wei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CAHS8izPZ3bzmPx=geE0Nb0q8kG8fvzsGT2YgohoFJbSz2r21Zw@mail.gmail.com' \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    [email protected] \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox