Linux-mm Archive mirror
 help / color / mirror / Atom feed
* Re: [RFC PATCH net-next v7 06/14] page_pool: convert to use netmem
       [not found] ` <20240326225048.785801-7-almasrymina@google.com>
@ 2024-03-27  8:25   ` Mina Almasry
  0 siblings, 0 replies; 2+ messages in thread
From: Mina Almasry @ 2024-03-27  8:25 UTC (permalink / raw
  To: netdev, linux-kernel, linux-doc, linux-alpha, linux-mips,
	linux-parisc, sparclinux, linux-trace-kernel, linux-arch, bpf,
	linux-kselftest, linux-media, dri-devel, Linux-MM
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Jonathan Corbet, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	Thomas Bogendoerfer, James E.J. Bottomley, Helge Deller,
	Andreas Larsson, Jesper Dangaard Brouer, Ilias Apalodimas,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Arnd Bergmann, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Steffen Klassert, Herbert Xu, David Ahern,
	Willem de Bruijn, Shuah Khan, Sumit Semwal, Christian König,
	Pavel Begunkov, David Wei, Jason Gunthorpe, Yunsheng Lin,
	Shailend Chand, Harshitha Ramamurthy, Shakeel Butt,
	Jeroen de Borst, Praveen Kaligineedi, Matthew Wilcox

On Tue, Mar 26, 2024 at 3:51 PM Mina Almasry <almasrymina@google.com> wrote:
>
> Abstrace the memory type from the page_pool so we can later add support
> for new memory types. Convert the page_pool to use the new netmem type
> abstraction, rather than use struct page directly.
>
> As of this patch the netmem type is a no-op abstraction: it's always a
> struct page underneath. All the page pool internals are converted to
> use struct netmem instead of struct page, and the page pool now exports
> 2 APIs:
>
> 1. The existing struct page API.
> 2. The new struct netmem API.
>
> Keeping the existing API is transitional; we do not want to refactor all
> the current drivers using the page pool at once.
>
> The netmem abstraction is currently a no-op. The page_pool uses
> page_to_netmem() to convert allocated pages to netmem, and uses
> netmem_to_page() to convert the netmem back to pages to pass to mm APIs,
>
> Follow up patches to this series add non-paged netmem support to the
> page_pool. This change is factored out on its own to limit the code
> churn to this 1 patch, for ease of code review.
>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
>
> ---
>
> v6:
>
> - Rebased on top of the merged netmem_ref type.
>
> To: linux-mm@kvack.org

It looks like this tag to add linux-mm did not work as intended. CCing
linux-mm manually.

> Cc: Matthew Wilcox <willy@infradead.org>
>
> ---
>  include/linux/skbuff.h           |   4 +-
>  include/net/netmem.h             |  15 ++
>  include/net/page_pool/helpers.h  | 122 +++++++++----
>  include/net/page_pool/types.h    |  17 +-
>  include/trace/events/page_pool.h |  29 +--
>  net/bpf/test_run.c               |   5 +-
>  net/core/page_pool.c             | 303 +++++++++++++++++--------------
>  net/core/skbuff.c                |   7 +-
>  8 files changed, 302 insertions(+), 200 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index b945af8a6208..78659c8efa4e 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3521,7 +3521,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
>                     unsigned int headroom);
>  int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
>                          struct bpf_prog *prog);
> -bool napi_pp_put_page(struct page *page, bool napi_safe);
> +bool napi_pp_put_page(netmem_ref netmem, bool napi_safe);
>
>  static inline void
>  skb_page_unref(const struct sk_buff *skb, struct page *page, bool napi_safe)
> @@ -3539,7 +3539,7 @@ napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
>         struct page *page = skb_frag_page(frag);
>
>  #ifdef CONFIG_PAGE_POOL
> -       if (recycle && napi_pp_put_page(page, napi_safe))
> +       if (recycle && napi_pp_put_page(page_to_netmem(page), napi_safe))
>                 return;
>  #endif
>         put_page(page);
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index ca17ea1d33f8..21f53b29e5fe 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -88,4 +88,19 @@ static inline netmem_ref page_to_netmem(struct page *page)
>         return (__force netmem_ref)page;
>  }
>
> +static inline int netmem_ref_count(netmem_ref netmem)
> +{
> +       return page_ref_count(netmem_to_page(netmem));
> +}
> +
> +static inline unsigned long netmem_to_pfn(netmem_ref netmem)
> +{
> +       return page_to_pfn(netmem_to_page(netmem));
> +}
> +
> +static inline netmem_ref netmem_compound_head(netmem_ref netmem)
> +{
> +       return page_to_netmem(compound_head(netmem_to_page(netmem)));
> +}
> +
>  #endif /* _NET_NETMEM_H */
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index 1d397c1a0043..61814f91a458 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -53,6 +53,8 @@
>  #define _NET_PAGE_POOL_HELPERS_H
>
>  #include <net/page_pool/types.h>
> +#include <net/net_debug.h>
> +#include <net/netmem.h>
>
>  #ifdef CONFIG_PAGE_POOL_STATS
>  /* Deprecated driver-facing API, use netlink instead */
> @@ -101,7 +103,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
>   * Get a page fragment from the page allocator or page_pool caches.
>   *
>   * Return:
> - * Return allocated page fragment, otherwise return NULL.
> + * Return allocated page fragment, otherwise return 0.
>   */
>  static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
>                                                     unsigned int *offset,
> @@ -112,22 +114,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
>         return page_pool_alloc_frag(pool, offset, size, gfp);
>  }
>
> -static inline struct page *page_pool_alloc(struct page_pool *pool,
> -                                          unsigned int *offset,
> -                                          unsigned int *size, gfp_t gfp)
> +static inline netmem_ref page_pool_alloc(struct page_pool *pool,
> +                                        unsigned int *offset,
> +                                        unsigned int *size, gfp_t gfp)
>  {
>         unsigned int max_size = PAGE_SIZE << pool->p.order;
> -       struct page *page;
> +       netmem_ref netmem;
>
>         if ((*size << 1) > max_size) {
>                 *size = max_size;
>                 *offset = 0;
> -               return page_pool_alloc_pages(pool, gfp);
> +               return page_pool_alloc_netmem(pool, gfp);
>         }
>
> -       page = page_pool_alloc_frag(pool, offset, *size, gfp);
> -       if (unlikely(!page))
> -               return NULL;
> +       netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp);
> +       if (unlikely(!netmem))
> +               return 0;
>
>         /* There is very likely not enough space for another fragment, so append
>          * the remaining size to the current fragment to avoid truesize
> @@ -138,7 +140,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
>                 pool->frag_offset = max_size;
>         }
>
> -       return page;
> +       return netmem;
>  }
>
>  /**
> @@ -152,7 +154,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
>   * utilization and performance penalty.
>   *
>   * Return:
> - * Return allocated page or page fragment, otherwise return NULL.
> + * Return allocated page or page fragment, otherwise return 0.
>   */
>  static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
>                                                unsigned int *offset,
> @@ -160,7 +162,7 @@ static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
>  {
>         gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
>
> -       return page_pool_alloc(pool, offset, size, gfp);
> +       return netmem_to_page(page_pool_alloc(pool, offset, size, gfp));
>  }
>
>  static inline void *page_pool_alloc_va(struct page_pool *pool,
> @@ -170,9 +172,10 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
>         struct page *page;
>
>         /* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
> -       page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
> +       page = netmem_to_page(
> +               page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM));
>         if (unlikely(!page))
> -               return NULL;
> +               return 0;
>
>         return page_address(page) + offset;
>  }
> @@ -187,7 +190,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
>   * it returns va of the allocated page or page fragment.
>   *
>   * Return:
> - * Return the va for the allocated page or page fragment, otherwise return NULL.
> + * Return the va for the allocated page or page fragment, otherwise return 0.
>   */
>  static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
>                                            unsigned int *size)
> @@ -210,6 +213,11 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
>         return pool->p.dma_dir;
>  }
>
> +static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
> +{
> +       atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
> +}
> +
>  /**
>   * page_pool_fragment_page() - split a fresh page into fragments
>   * @page:      page to split
> @@ -230,11 +238,12 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
>   */
>  static inline void page_pool_fragment_page(struct page *page, long nr)
>  {
> -       atomic_long_set(&page->pp_ref_count, nr);
> +       page_pool_fragment_netmem(page_to_netmem(page), nr);
>  }
>
> -static inline long page_pool_unref_page(struct page *page, long nr)
> +static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
>  {
> +       struct page *page = netmem_to_page(netmem);
>         long ret;
>
>         /* If nr == pp_ref_count then we have cleared all remaining
> @@ -277,15 +286,41 @@ static inline long page_pool_unref_page(struct page *page, long nr)
>         return ret;
>  }
>
> +static inline long page_pool_unref_page(struct page *page, long nr)
> +{
> +       return page_pool_unref_netmem(page_to_netmem(page), nr);
> +}
> +
> +static inline void page_pool_ref_netmem(netmem_ref netmem)
> +{
> +       atomic_long_inc(&netmem_to_page(netmem)->pp_ref_count);
> +}
> +
>  static inline void page_pool_ref_page(struct page *page)
>  {
> -       atomic_long_inc(&page->pp_ref_count);
> +       page_pool_ref_netmem(page_to_netmem(page));
>  }
>
> -static inline bool page_pool_is_last_ref(struct page *page)
> +static inline bool page_pool_is_last_ref(netmem_ref netmem)
>  {
>         /* If page_pool_unref_page() returns 0, we were the last user */
> -       return page_pool_unref_page(page, 1) == 0;
> +       return page_pool_unref_netmem(netmem, 1) == 0;
> +}
> +
> +static inline void page_pool_put_netmem(struct page_pool *pool,
> +                                       netmem_ref netmem,
> +                                       unsigned int dma_sync_size,
> +                                       bool allow_direct)
> +{
> +       /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
> +        * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
> +        */
> +#ifdef CONFIG_PAGE_POOL
> +       if (!page_pool_is_last_ref(netmem))
> +               return;
> +
> +       page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct);
> +#endif
>  }
>
>  /**
> @@ -306,15 +341,15 @@ static inline void page_pool_put_page(struct page_pool *pool,
>                                       unsigned int dma_sync_size,
>                                       bool allow_direct)
>  {
> -       /* When page_pool isn't compiled-in, net/core/xdp.c doesn't
> -        * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
> -        */
> -#ifdef CONFIG_PAGE_POOL
> -       if (!page_pool_is_last_ref(page))
> -               return;
> +       page_pool_put_netmem(pool, page_to_netmem(page), dma_sync_size,
> +                            allow_direct);
> +}
>
> -       page_pool_put_unrefed_page(pool, page, dma_sync_size, allow_direct);
> -#endif
> +static inline void page_pool_put_full_netmem(struct page_pool *pool,
> +                                            netmem_ref netmem,
> +                                            bool allow_direct)
> +{
> +       page_pool_put_netmem(pool, netmem, -1, allow_direct);
>  }
>
>  /**
> @@ -329,7 +364,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
>  static inline void page_pool_put_full_page(struct page_pool *pool,
>                                            struct page *page, bool allow_direct)
>  {
> -       page_pool_put_page(pool, page, -1, allow_direct);
> +       page_pool_put_netmem(pool, page_to_netmem(page), -1, allow_direct);
>  }
>
>  /**
> @@ -363,6 +398,18 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
>         page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
>  }
>
> +static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
> +{
> +       struct page *page = netmem_to_page(netmem);
> +
> +       dma_addr_t ret = page->dma_addr;
> +
> +       if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
> +               ret <<= PAGE_SHIFT;
> +
> +       return ret;
> +}
> +
>  /**
>   * page_pool_get_dma_addr() - Retrieve the stored DMA address.
>   * @page:      page allocated from a page pool
> @@ -372,16 +419,14 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
>   */
>  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  {
> -       dma_addr_t ret = page->dma_addr;
> -
> -       if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
> -               ret <<= PAGE_SHIFT;
> -
> -       return ret;
> +       return page_pool_get_dma_addr_netmem(page_to_netmem(page));
>  }
>
> -static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
> +                                                dma_addr_t addr)
>  {
> +       struct page *page = netmem_to_page(netmem);
> +
>         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
>                 page->dma_addr = addr >> PAGE_SHIFT;
>
> @@ -395,6 +440,11 @@ static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
>         return false;
>  }
>
> +static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
> +{
> +       return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr);
> +}
> +
>  static inline bool page_pool_put(struct page_pool *pool)
>  {
>         return refcount_dec_and_test(&pool->user_cnt);
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 07e6afafedbe..0d164624f16d 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -40,7 +40,7 @@
>  #define PP_ALLOC_CACHE_REFILL  64
>  struct pp_alloc_cache {
>         u32 count;
> -       struct page *cache[PP_ALLOC_CACHE_SIZE];
> +       netmem_ref cache[PP_ALLOC_CACHE_SIZE];
>  };
>
>  /**
> @@ -73,7 +73,7 @@ struct page_pool_params {
>         struct_group_tagged(page_pool_params_slow, slow,
>                 struct net_device *netdev;
>  /* private: used by test code only */
> -               void (*init_callback)(struct page *page, void *arg);
> +               void (*init_callback)(netmem_ref netmem, void *arg);
>                 void *init_arg;
>         );
>  };
> @@ -131,8 +131,8 @@ struct page_pool_stats {
>  struct memory_provider_ops {
>         int (*init)(struct page_pool *pool);
>         void (*destroy)(struct page_pool *pool);
> -       struct page *(*alloc_pages)(struct page_pool *pool, gfp_t gfp);
> -       bool (*release_page)(struct page_pool *pool, struct page *page);
> +       netmem_ref (*alloc_pages)(struct page_pool *pool, gfp_t gfp);
> +       bool (*release_page)(struct page_pool *pool, netmem_ref netmem);
>  };
>
>  struct pp_memory_provider_params {
> @@ -147,7 +147,7 @@ struct page_pool {
>         bool has_init_callback;
>
>         long frag_users;
> -       struct page *frag_page;
> +       netmem_ref frag_page;
>         unsigned int frag_offset;
>         u32 pages_state_hold_cnt;
>
> @@ -219,8 +219,12 @@ struct page_pool {
>  };
>
>  struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp);
> +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp);
>  struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
>                                   unsigned int size, gfp_t gfp);
> +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
> +                                      unsigned int *offset, unsigned int size,
> +                                      gfp_t gfp);
>  struct page_pool *page_pool_create(const struct page_pool_params *params);
>  struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
>                                           int cpuid);
> @@ -250,6 +254,9 @@ static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data,
>  }
>  #endif
>
> +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
> +                                 unsigned int dma_sync_size,
> +                                 bool allow_direct);
>  void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
>                                 unsigned int dma_sync_size,
>                                 bool allow_direct);
> diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
> index 6834356b2d2a..c5b6383ff276 100644
> --- a/include/trace/events/page_pool.h
> +++ b/include/trace/events/page_pool.h
> @@ -42,51 +42,52 @@ TRACE_EVENT(page_pool_release,
>  TRACE_EVENT(page_pool_state_release,
>
>         TP_PROTO(const struct page_pool *pool,
> -                const struct page *page, u32 release),
> +                netmem_ref netmem, u32 release),
>
> -       TP_ARGS(pool, page, release),
> +       TP_ARGS(pool, netmem, release),
>
>         TP_STRUCT__entry(
>                 __field(const struct page_pool *,       pool)
> -               __field(const struct page *,            page)
> +               __field(netmem_ref,                     netmem)
>                 __field(u32,                            release)
>                 __field(unsigned long,                  pfn)
>         ),
>
>         TP_fast_assign(
>                 __entry->pool           = pool;
> -               __entry->page           = page;
> +               __entry->netmem         = netmem;
>                 __entry->release        = release;
> -               __entry->pfn            = page_to_pfn(page);
> +               __entry->pfn            = netmem_to_pfn(netmem);
>         ),
>
> -       TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u",
> -                 __entry->pool, __entry->page, __entry->pfn, __entry->release)
> +       TP_printk("page_pool=%p netmem=%lu pfn=0x%lx release=%u",
> +                 __entry->pool, (__force unsigned long)__entry->netmem,
> +                 __entry->pfn, __entry->release)
>  );
>
>  TRACE_EVENT(page_pool_state_hold,
>
>         TP_PROTO(const struct page_pool *pool,
> -                const struct page *page, u32 hold),
> +                netmem_ref netmem, u32 hold),
>
> -       TP_ARGS(pool, page, hold),
> +       TP_ARGS(pool, netmem, hold),
>
>         TP_STRUCT__entry(
>                 __field(const struct page_pool *,       pool)
> -               __field(const struct page *,            page)
> +               __field(netmem_ref,                     netmem)
>                 __field(u32,                            hold)
>                 __field(unsigned long,                  pfn)
>         ),
>
>         TP_fast_assign(
>                 __entry->pool   = pool;
> -               __entry->page   = page;
> +               __entry->netmem = netmem;
>                 __entry->hold   = hold;
> -               __entry->pfn    = page_to_pfn(page);
> +               __entry->pfn    = netmem_to_pfn(netmem);
>         ),
>
> -       TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u",
> -                 __entry->pool, __entry->page, __entry->pfn, __entry->hold)
> +       TP_printk("page_pool=%p netmem=%lu pfn=0x%lx hold=%u",
> +                 __entry->pool, __entry->netmem, __entry->pfn, __entry->hold)
>  );
>
>  TRACE_EVENT(page_pool_update_nid,
> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
> index 61efeadaff8d..fc300e807e1d 100644
> --- a/net/bpf/test_run.c
> +++ b/net/bpf/test_run.c
> @@ -127,9 +127,10 @@ struct xdp_test_data {
>  #define TEST_XDP_FRAME_SIZE (PAGE_SIZE - sizeof(struct xdp_page_head))
>  #define TEST_XDP_MAX_BATCH 256
>
> -static void xdp_test_run_init_page(struct page *page, void *arg)
> +static void xdp_test_run_init_page(netmem_ref netmem, void *arg)
>  {
> -       struct xdp_page_head *head = phys_to_virt(page_to_phys(page));
> +       struct xdp_page_head *head =
> +               phys_to_virt(page_to_phys(netmem_to_page(netmem)));
>         struct xdp_buff *new_ctx, *orig_ctx;
>         u32 headroom = XDP_PACKET_HEADROOM;
>         struct xdp_test_data *xdp = arg;
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index 795b7ff1c01f..c8125be3a6e2 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -329,19 +329,18 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
>  }
>  EXPORT_SYMBOL(page_pool_create);
>
> -static void page_pool_return_page(struct page_pool *pool, struct page *page);
> +static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem);
>
> -noinline
> -static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
> +static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
>  {
>         struct ptr_ring *r = &pool->ring;
> -       struct page *page;
> +       netmem_ref netmem;
>         int pref_nid; /* preferred NUMA node */
>
>         /* Quicker fallback, avoid locks when ring is empty */
>         if (__ptr_ring_empty(r)) {
>                 alloc_stat_inc(pool, empty);
> -               return NULL;
> +               return 0;
>         }
>
>         /* Softirq guarantee CPU and thus NUMA node is stable. This,
> @@ -356,56 +355,56 @@ static struct page *page_pool_refill_alloc_cache(struct page_pool *pool)
>
>         /* Refill alloc array, but only if NUMA match */
>         do {
> -               page = __ptr_ring_consume(r);
> -               if (unlikely(!page))
> +               netmem = (__force netmem_ref)__ptr_ring_consume(r);
> +               if (unlikely(!netmem))
>                         break;
>
> -               if (likely(page_to_nid(page) == pref_nid)) {
> -                       pool->alloc.cache[pool->alloc.count++] = page;
> +               if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
> +                       pool->alloc.cache[pool->alloc.count++] = netmem;
>                 } else {
>                         /* NUMA mismatch;
>                          * (1) release 1 page to page-allocator and
>                          * (2) break out to fallthrough to alloc_pages_node.
>                          * This limit stress on page buddy alloactor.
>                          */
> -                       page_pool_return_page(pool, page);
> +                       page_pool_return_page(pool, netmem);
>                         alloc_stat_inc(pool, waive);
> -                       page = NULL;
> +                       netmem = 0;
>                         break;
>                 }
>         } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL);
>
>         /* Return last page */
>         if (likely(pool->alloc.count > 0)) {
> -               page = pool->alloc.cache[--pool->alloc.count];
> +               netmem = pool->alloc.cache[--pool->alloc.count];
>                 alloc_stat_inc(pool, refill);
>         }
>
> -       return page;
> +       return netmem;
>  }
>
>  /* fast path */
> -static struct page *__page_pool_get_cached(struct page_pool *pool)
> +static netmem_ref __page_pool_get_cached(struct page_pool *pool)
>  {
> -       struct page *page;
> +       netmem_ref netmem;
>
>         /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */
>         if (likely(pool->alloc.count)) {
>                 /* Fast-path */
> -               page = pool->alloc.cache[--pool->alloc.count];
> +               netmem = pool->alloc.cache[--pool->alloc.count];
>                 alloc_stat_inc(pool, fast);
>         } else {
> -               page = page_pool_refill_alloc_cache(pool);
> +               netmem = page_pool_refill_alloc_cache(pool);
>         }
>
> -       return page;
> +       return netmem;
>  }
>
>  static void page_pool_dma_sync_for_device(struct page_pool *pool,
> -                                         struct page *page,
> +                                         netmem_ref netmem,
>                                           unsigned int dma_sync_size)
>  {
> -       dma_addr_t dma_addr = page_pool_get_dma_addr(page);
> +       dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem);
>
>         dma_sync_size = min(dma_sync_size, pool->p.max_len);
>         dma_sync_single_range_for_device(pool->p.dev, dma_addr,
> @@ -413,7 +412,7 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
>                                          pool->p.dma_dir);
>  }
>
> -static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
> +static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
>  {
>         dma_addr_t dma;
>
> @@ -422,18 +421,18 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>          * into page private data (i.e 32bit cpu with 64bit DMA caps)
>          * This mapping is kept for lifetime of page, until leaving pool.
>          */
> -       dma = dma_map_page_attrs(pool->p.dev, page, 0,
> -                                (PAGE_SIZE << pool->p.order),
> -                                pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC |
> -                                                 DMA_ATTR_WEAK_ORDERING);
> +       dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0,
> +                                (PAGE_SIZE << pool->p.order), pool->p.dma_dir,
> +                                DMA_ATTR_SKIP_CPU_SYNC |
> +                                        DMA_ATTR_WEAK_ORDERING);
>         if (dma_mapping_error(pool->p.dev, dma))
>                 return false;
>
> -       if (page_pool_set_dma_addr(page, dma))
> +       if (page_pool_set_dma_addr_netmem(netmem, dma))
>                 goto unmap_failed;
>
>         if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> -               page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
> +               page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
>
>         return true;
>
> @@ -445,9 +444,10 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
>         return false;
>  }
>
> -static void page_pool_set_pp_info(struct page_pool *pool,
> -                                 struct page *page)
> +static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
>  {
> +       struct page *page = netmem_to_page(netmem);
> +
>         page->pp = pool;
>         page->pp_magic |= PP_SIGNATURE;
>
> @@ -457,13 +457,15 @@ static void page_pool_set_pp_info(struct page_pool *pool,
>          * is dirtying the same cache line as the page->pp_magic above, so
>          * the overhead is negligible.
>          */
> -       page_pool_fragment_page(page, 1);
> +       page_pool_fragment_netmem(netmem, 1);
>         if (pool->has_init_callback)
> -               pool->slow.init_callback(page, pool->slow.init_arg);
> +               pool->slow.init_callback(netmem, pool->slow.init_arg);
>  }
>
> -static void page_pool_clear_pp_info(struct page *page)
> +static void page_pool_clear_pp_info(netmem_ref netmem)
>  {
> +       struct page *page = netmem_to_page(netmem);
> +
>         page->pp_magic = 0;
>         page->pp = NULL;
>  }
> @@ -479,34 +481,34 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
>                 return NULL;
>
>         if ((pool->p.flags & PP_FLAG_DMA_MAP) &&
> -           unlikely(!page_pool_dma_map(pool, page))) {
> +           unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
>                 put_page(page);
>                 return NULL;
>         }
>
>         alloc_stat_inc(pool, slow_high_order);
> -       page_pool_set_pp_info(pool, page);
> +       page_pool_set_pp_info(pool, page_to_netmem(page));
>
>         /* Track how many pages are held 'in-flight' */
>         pool->pages_state_hold_cnt++;
> -       trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
> +       trace_page_pool_state_hold(pool, page_to_netmem(page),
> +                                  pool->pages_state_hold_cnt);
>         return page;
>  }
>
>  /* slow path */
> -noinline
> -static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
> -                                                gfp_t gfp)
> +static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
> +                                                       gfp_t gfp)
>  {
>         const int bulk = PP_ALLOC_CACHE_REFILL;
>         unsigned int pp_flags = pool->p.flags;
>         unsigned int pp_order = pool->p.order;
> -       struct page *page;
> +       netmem_ref netmem;
>         int i, nr_pages;
>
>         /* Don't support bulk alloc for high-order pages */
>         if (unlikely(pp_order))
> -               return __page_pool_alloc_page_order(pool, gfp);
> +               return page_to_netmem(__page_pool_alloc_page_order(pool, gfp));
>
>         /* Unnecessary as alloc cache is empty, but guarantees zero count */
>         if (unlikely(pool->alloc.count > 0))
> @@ -515,60 +517,67 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
>         /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk_array */
>         memset(&pool->alloc.cache, 0, sizeof(void *) * bulk);
>
> -       nr_pages = alloc_pages_bulk_array_node(gfp, pool->p.nid, bulk,
> -                                              pool->alloc.cache);
> +       nr_pages = alloc_pages_bulk_array_node(gfp,
> +                                              pool->p.nid, bulk,
> +                                              (struct page **)pool->alloc.cache);
>         if (unlikely(!nr_pages))
> -               return NULL;
> +               return 0;
>
>         /* Pages have been filled into alloc.cache array, but count is zero and
>          * page element have not been (possibly) DMA mapped.
>          */
>         for (i = 0; i < nr_pages; i++) {
> -               page = pool->alloc.cache[i];
> +               netmem = pool->alloc.cache[i];
>                 if ((pp_flags & PP_FLAG_DMA_MAP) &&
> -                   unlikely(!page_pool_dma_map(pool, page))) {
> -                       put_page(page);
> +                   unlikely(!page_pool_dma_map(pool, netmem))) {
> +                       put_page(netmem_to_page(netmem));
>                         continue;
>                 }
>
> -               page_pool_set_pp_info(pool, page);
> -               pool->alloc.cache[pool->alloc.count++] = page;
> +               page_pool_set_pp_info(pool, netmem);
> +               pool->alloc.cache[pool->alloc.count++] = netmem;
>                 /* Track how many pages are held 'in-flight' */
>                 pool->pages_state_hold_cnt++;
> -               trace_page_pool_state_hold(pool, page,
> +               trace_page_pool_state_hold(pool, netmem,
>                                            pool->pages_state_hold_cnt);
>         }
>
>         /* Return last page */
>         if (likely(pool->alloc.count > 0)) {
> -               page = pool->alloc.cache[--pool->alloc.count];
> +               netmem = pool->alloc.cache[--pool->alloc.count];
>                 alloc_stat_inc(pool, slow);
>         } else {
> -               page = NULL;
> +               netmem = 0;
>         }
>
>         /* When page just alloc'ed is should/must have refcnt 1. */
> -       return page;
> +       return netmem;
>  }
>
>  /* For using page_pool replace: alloc_pages() API calls, but provide
>   * synchronization guarantee for allocation side.
>   */
> -struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
> +netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp)
>  {
> -       struct page *page;
> +       netmem_ref netmem;
>
>         /* Fast-path: Get a page from cache */
> -       page = __page_pool_get_cached(pool);
> -       if (page)
> -               return page;
> +       netmem = __page_pool_get_cached(pool);
> +       if (netmem)
> +               return netmem;
>
>         /* Slow-path: cache empty, do real allocation */
>         if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
> -               page = pool->mp_ops->alloc_pages(pool, gfp);
> +               netmem = pool->mp_ops->alloc_pages(pool, gfp);
>         else
> -               page = __page_pool_alloc_pages_slow(pool, gfp);
> -       return page;
> +               netmem = __page_pool_alloc_pages_slow(pool, gfp);
> +       return netmem;
> +}
> +EXPORT_SYMBOL(page_pool_alloc_netmem);
> +
> +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp)
> +{
> +       return netmem_to_page(page_pool_alloc_netmem(pool, gfp));
>  }
>  EXPORT_SYMBOL(page_pool_alloc_pages);
>
> @@ -596,8 +605,8 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
>         return inflight;
>  }
>
> -static __always_inline
> -void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
> +static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
> +                                                        netmem_ref netmem)
>  {
>         dma_addr_t dma;
>
> @@ -607,13 +616,13 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
>                  */
>                 return;
>
> -       dma = page_pool_get_dma_addr(page);
> +       dma = page_pool_get_dma_addr_netmem(netmem);
>
>         /* When page is unmapped, it cannot be returned to our pool */
>         dma_unmap_page_attrs(pool->p.dev, dma,
>                              PAGE_SIZE << pool->p.order, pool->p.dma_dir,
>                              DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
> -       page_pool_set_dma_addr(page, 0);
> +       page_pool_set_dma_addr_netmem(netmem, 0);
>  }
>
>  /* Disconnects a page (from a page_pool).  API users can have a need
> @@ -621,26 +630,26 @@ void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
>   * a regular page (that will eventually be returned to the normal
>   * page-allocator via put_page).
>   */
> -void page_pool_return_page(struct page_pool *pool, struct page *page)
> +void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
>  {
>         int count;
>         bool put;
>
>         put = true;
>         if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops)
> -               put = pool->mp_ops->release_page(pool, page);
> +               put = pool->mp_ops->release_page(pool, netmem);
>         else
> -               __page_pool_release_page_dma(pool, page);
> +               __page_pool_release_page_dma(pool, netmem);
>
>         /* This may be the last page returned, releasing the pool, so
>          * it is not safe to reference pool afterwards.
>          */
>         count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt);
> -       trace_page_pool_state_release(pool, page, count);
> +       trace_page_pool_state_release(pool, netmem, count);
>
>         if (put) {
> -               page_pool_clear_pp_info(page);
> -               put_page(page);
> +               page_pool_clear_pp_info(netmem);
> +               put_page(netmem_to_page(netmem));
>         }
>         /* An optimization would be to call __free_pages(page, pool->p.order)
>          * knowing page is not part of page-cache (thus avoiding a
> @@ -648,14 +657,14 @@ void page_pool_return_page(struct page_pool *pool, struct page *page)
>          */
>  }
>
> -static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
> +static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem)
>  {
>         int ret;
>         /* BH protection not needed if current is softirq */
>         if (in_softirq())
> -               ret = ptr_ring_produce(&pool->ring, page);
> +               ret = ptr_ring_produce(&pool->ring, (__force void *)netmem);
>         else
> -               ret = ptr_ring_produce_bh(&pool->ring, page);
> +               ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem);
>
>         if (!ret) {
>                 recycle_stat_inc(pool, ring);
> @@ -670,7 +679,7 @@ static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
>   *
>   * Caller must provide appropriate safe context.
>   */
> -static bool page_pool_recycle_in_cache(struct page *page,
> +static bool page_pool_recycle_in_cache(netmem_ref netmem,
>                                        struct page_pool *pool)
>  {
>         if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) {
> @@ -679,14 +688,15 @@ static bool page_pool_recycle_in_cache(struct page *page,
>         }
>
>         /* Caller MUST have verified/know (page_ref_count(page) == 1) */
> -       pool->alloc.cache[pool->alloc.count++] = page;
> +       pool->alloc.cache[pool->alloc.count++] = netmem;
>         recycle_stat_inc(pool, cached);
>         return true;
>  }
>
> -static bool __page_pool_page_can_be_recycled(const struct page *page)
> +static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
>  {
> -       return page_ref_count(page) == 1 && !page_is_pfmemalloc(page);
> +       return page_ref_count(netmem_to_page(netmem)) == 1 &&
> +              !page_is_pfmemalloc(netmem_to_page(netmem));
>  }
>
>  /* If the page refcnt == 1, this will try to recycle the page.
> @@ -695,8 +705,8 @@ static bool __page_pool_page_can_be_recycled(const struct page *page)
>   * If the page refcnt != 1, then the page will be returned to memory
>   * subsystem.
>   */
> -static __always_inline struct page *
> -__page_pool_put_page(struct page_pool *pool, struct page *page,
> +static __always_inline netmem_ref
> +__page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
>                      unsigned int dma_sync_size, bool allow_direct)
>  {
>         lockdep_assert_no_hardirq();
> @@ -710,19 +720,19 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
>          * page is NOT reusable when allocated when system is under
>          * some pressure. (page_is_pfmemalloc)
>          */
> -       if (likely(__page_pool_page_can_be_recycled(page))) {
> +       if (likely(__page_pool_page_can_be_recycled(netmem))) {
>                 /* Read barrier done in page_ref_count / READ_ONCE */
>
>                 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> -                       page_pool_dma_sync_for_device(pool, page,
> +                       page_pool_dma_sync_for_device(pool, netmem,
>                                                       dma_sync_size);
>
>                 if (allow_direct && in_softirq() &&
> -                   page_pool_recycle_in_cache(page, pool))
> -                       return NULL;
> +                   page_pool_recycle_in_cache(netmem, pool))
> +                       return 0;
>
>                 /* Page found as candidate for recycling */
> -               return page;
> +               return netmem;
>         }
>         /* Fallback/non-XDP mode: API user have elevated refcnt.
>          *
> @@ -738,21 +748,30 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
>          * will be invoking put_page.
>          */
>         recycle_stat_inc(pool, released_refcnt);
> -       page_pool_return_page(pool, page);
> +       page_pool_return_page(pool, netmem);
>
> -       return NULL;
> +       return 0;
>  }
>
> -void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
> -                               unsigned int dma_sync_size, bool allow_direct)
> +void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem,
> +                                 unsigned int dma_sync_size, bool allow_direct)
>  {
> -       page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
> -       if (page && !page_pool_recycle_in_ring(pool, page)) {
> +       netmem =
> +               __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct);
> +       if (netmem && !page_pool_recycle_in_ring(pool, netmem)) {
>                 /* Cache full, fallback to free pages */
>                 recycle_stat_inc(pool, ring_full);
> -               page_pool_return_page(pool, page);
> +               page_pool_return_page(pool, netmem);
>         }
>  }
> +EXPORT_SYMBOL(page_pool_put_unrefed_netmem);
> +
> +void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
> +                               unsigned int dma_sync_size, bool allow_direct)
> +{
> +       page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size,
> +                                    allow_direct);
> +}
>  EXPORT_SYMBOL(page_pool_put_unrefed_page);
>
>  /**
> @@ -777,16 +796,16 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
>         bool in_softirq;
>
>         for (i = 0; i < count; i++) {
> -               struct page *page = virt_to_head_page(data[i]);
> +               netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i]));
>
>                 /* It is not the last user for the page frag case */
> -               if (!page_pool_is_last_ref(page))
> +               if (!page_pool_is_last_ref(netmem))
>                         continue;
>
> -               page = __page_pool_put_page(pool, page, -1, false);
> +               netmem = __page_pool_put_page(pool, netmem, -1, false);
>                 /* Approved for bulk recycling in ptr_ring cache */
> -               if (page)
> -                       data[bulk_len++] = page;
> +               if (netmem)
> +                       data[bulk_len++] = (__force void *)netmem;
>         }
>
>         if (unlikely(!bulk_len))
> @@ -812,100 +831,108 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
>          * since put_page() with refcnt == 1 can be an expensive operation
>          */
>         for (; i < bulk_len; i++)
> -               page_pool_return_page(pool, data[i]);
> +               page_pool_return_page(pool, (__force netmem_ref)data[i]);
>  }
>  EXPORT_SYMBOL(page_pool_put_page_bulk);
>
> -static struct page *page_pool_drain_frag(struct page_pool *pool,
> -                                        struct page *page)
> +static netmem_ref page_pool_drain_frag(struct page_pool *pool,
> +                                      netmem_ref netmem)
>  {
>         long drain_count = BIAS_MAX - pool->frag_users;
>
>         /* Some user is still using the page frag */
> -       if (likely(page_pool_unref_page(page, drain_count)))
> -               return NULL;
> +       if (likely(page_pool_unref_netmem(netmem, drain_count)))
> +               return 0;
>
> -       if (__page_pool_page_can_be_recycled(page)) {
> +       if (__page_pool_page_can_be_recycled(netmem)) {
>                 if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
> -                       page_pool_dma_sync_for_device(pool, page, -1);
> +                       page_pool_dma_sync_for_device(pool, netmem, -1);
>
> -               return page;
> +               return netmem;
>         }
>
> -       page_pool_return_page(pool, page);
> -       return NULL;
> +       page_pool_return_page(pool, netmem);
> +       return 0;
>  }
>
>  static void page_pool_free_frag(struct page_pool *pool)
>  {
>         long drain_count = BIAS_MAX - pool->frag_users;
> -       struct page *page = pool->frag_page;
> +       netmem_ref netmem = pool->frag_page;
>
> -       pool->frag_page = NULL;
> +       pool->frag_page = 0;
>
> -       if (!page || page_pool_unref_page(page, drain_count))
> +       if (!netmem || page_pool_unref_netmem(netmem, drain_count))
>                 return;
>
> -       page_pool_return_page(pool, page);
> +       page_pool_return_page(pool, netmem);
>  }
>
> -struct page *page_pool_alloc_frag(struct page_pool *pool,
> -                                 unsigned int *offset,
> -                                 unsigned int size, gfp_t gfp)
> +netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
> +                                      unsigned int *offset, unsigned int size,
> +                                      gfp_t gfp)
>  {
>         unsigned int max_size = PAGE_SIZE << pool->p.order;
> -       struct page *page = pool->frag_page;
> +       netmem_ref netmem = pool->frag_page;
>
>         if (WARN_ON(size > max_size))
> -               return NULL;
> +               return 0;
>
>         size = ALIGN(size, dma_get_cache_alignment());
>         *offset = pool->frag_offset;
>
> -       if (page && *offset + size > max_size) {
> -               page = page_pool_drain_frag(pool, page);
> -               if (page) {
> +       if (netmem && *offset + size > max_size) {
> +               netmem = page_pool_drain_frag(pool, netmem);
> +               if (netmem) {
>                         alloc_stat_inc(pool, fast);
>                         goto frag_reset;
>                 }
>         }
>
> -       if (!page) {
> -               page = page_pool_alloc_pages(pool, gfp);
> -               if (unlikely(!page)) {
> -                       pool->frag_page = NULL;
> -                       return NULL;
> +       if (!netmem) {
> +               netmem = page_pool_alloc_netmem(pool, gfp);
> +               if (unlikely(!netmem)) {
> +                       pool->frag_page = 0;
> +                       return 0;
>                 }
>
> -               pool->frag_page = page;
> +               pool->frag_page = netmem;
>
>  frag_reset:
>                 pool->frag_users = 1;
>                 *offset = 0;
>                 pool->frag_offset = size;
> -               page_pool_fragment_page(page, BIAS_MAX);
> -               return page;
> +               page_pool_fragment_netmem(netmem, BIAS_MAX);
> +               return netmem;
>         }
>
>         pool->frag_users++;
>         pool->frag_offset = *offset + size;
>         alloc_stat_inc(pool, fast);
> -       return page;
> +       return netmem;
> +}
> +EXPORT_SYMBOL(page_pool_alloc_frag_netmem);
> +
> +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset,
> +                                 unsigned int size, gfp_t gfp)
> +{
> +       return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size,
> +                                                         gfp));
>  }
>  EXPORT_SYMBOL(page_pool_alloc_frag);
>
>  static void page_pool_empty_ring(struct page_pool *pool)
>  {
> -       struct page *page;
> +       netmem_ref netmem;
>
>         /* Empty recycle ring */
> -       while ((page = ptr_ring_consume_bh(&pool->ring))) {
> +       while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
>                 /* Verify the refcnt invariant of cached pages */
> -               if (!(page_ref_count(page) == 1))
> +               if (!(page_ref_count(netmem_to_page(netmem)) == 1))
>                         pr_crit("%s() page_pool refcnt %d violation\n",
> -                               __func__, page_ref_count(page));
> +                               __func__, netmem_ref_count(netmem));
>
> -               page_pool_return_page(pool, page);
> +               page_pool_return_page(pool, netmem);
>         }
>  }
>
> @@ -927,7 +954,7 @@ static void __page_pool_destroy(struct page_pool *pool)
>
>  static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
>  {
> -       struct page *page;
> +       netmem_ref netmem;
>
>         if (pool->destroy_cnt)
>                 return;
> @@ -937,8 +964,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
>          * call concurrently.
>          */
>         while (pool->alloc.count) {
> -               page = pool->alloc.cache[--pool->alloc.count];
> -               page_pool_return_page(pool, page);
> +               netmem = pool->alloc.cache[--pool->alloc.count];
> +               page_pool_return_page(pool, netmem);
>         }
>  }
>
> @@ -1044,15 +1071,15 @@ EXPORT_SYMBOL(page_pool_destroy);
>  /* Caller must provide appropriate safe context, e.g. NAPI. */
>  void page_pool_update_nid(struct page_pool *pool, int new_nid)
>  {
> -       struct page *page;
> +       netmem_ref netmem;
>
>         trace_page_pool_update_nid(pool, new_nid);
>         pool->p.nid = new_nid;
>
>         /* Flush pool alloc cache, as refill will check NUMA node */
>         while (pool->alloc.count) {
> -               page = pool->alloc.cache[--pool->alloc.count];
> -               page_pool_return_page(pool, page);
> +               netmem = pool->alloc.cache[--pool->alloc.count];
> +               page_pool_return_page(pool, netmem);
>         }
>  }
>  EXPORT_SYMBOL(page_pool_update_nid);
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 17617c29be2d..7193ee9737a0 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -1005,8 +1005,9 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb,
>  EXPORT_SYMBOL(skb_cow_data_for_xdp);
>
>  #if IS_ENABLED(CONFIG_PAGE_POOL)
> -bool napi_pp_put_page(struct page *page, bool napi_safe)
> +bool napi_pp_put_page(netmem_ref netmem, bool napi_safe)
>  {
> +       struct page *page = netmem_to_page(netmem);
>         bool allow_direct = false;
>         struct page_pool *pp;
>
> @@ -1043,7 +1044,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
>          * The page will be returned to the pool here regardless of the
>          * 'flipped' fragment being in use or not.
>          */
> -       page_pool_put_full_page(pp, page, allow_direct);
> +       page_pool_put_full_netmem(pp, page_to_netmem(page), allow_direct);
>
>         return true;
>  }
> @@ -1054,7 +1055,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
>  {
>         if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
>                 return false;
> -       return napi_pp_put_page(virt_to_page(data), napi_safe);
> +       return napi_pp_put_page(page_to_netmem(virt_to_page(data)), napi_safe);
>  }
>
>  /**
> --
> 2.44.0.396.g6e790dbe36-goog
>


-- 
Thanks,
Mina


^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [RFC PATCH net-next v7 07/14] page_pool: devmem support
       [not found] ` <20240326225048.785801-8-almasrymina@google.com>
@ 2024-03-27  8:26   ` Mina Almasry
  0 siblings, 0 replies; 2+ messages in thread
From: Mina Almasry @ 2024-03-27  8:26 UTC (permalink / raw
  To: netdev, linux-kernel, linux-doc, linux-alpha, linux-mips,
	linux-parisc, sparclinux, linux-trace-kernel, linux-arch, bpf,
	linux-kselftest, linux-media, dri-devel, Linux-MM
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Jonathan Corbet, Richard Henderson, Ivan Kokshaysky, Matt Turner,
	Thomas Bogendoerfer, James E.J. Bottomley, Helge Deller,
	Andreas Larsson, Jesper Dangaard Brouer, Ilias Apalodimas,
	Steven Rostedt, Masami Hiramatsu, Mathieu Desnoyers,
	Arnd Bergmann, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, Martin KaFai Lau, Eduard Zingerman, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Steffen Klassert, Herbert Xu, David Ahern,
	Willem de Bruijn, Shuah Khan, Sumit Semwal, Christian König,
	Pavel Begunkov, David Wei, Jason Gunthorpe, Yunsheng Lin,
	Shailend Chand, Harshitha Ramamurthy, Shakeel Butt,
	Jeroen de Borst, Praveen Kaligineedi, Matthew Wilcox

On Tue, Mar 26, 2024 at 3:51 PM Mina Almasry <almasrymina@google.com> wrote:
>
> Convert netmem to be a union of struct page and struct netmem. Overload
> the LSB of struct netmem* to indicate that it's a net_iov, otherwise
> it's a page.
>
> Currently these entries in struct page are rented by the page_pool and
> used exclusively by the net stack:
>
> struct {
>         unsigned long pp_magic;
>         struct page_pool *pp;
>         unsigned long _pp_mapping_pad;
>         unsigned long dma_addr;
>         atomic_long_t pp_ref_count;
> };
>
> Mirror these (and only these) entries into struct net_iov and implement
> netmem helpers that can access these common fields regardless of
> whether the underlying type is page or net_iov.
>
> Implement checks for net_iov in netmem helpers which delegate to mm
> APIs, to ensure net_iov are never passed to the mm stack.
>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
>
> ---
>
> v7:
> - Remove static_branch_unlikely from netmem_to_net_iov(). We're getting
>   better results from the fast path in bench_page_pool_simple tests
>   without the static_branch_unlikely, and the addition of
>   static_branch_unlikely doesn't improve performance of devmem TCP.
>
>   Additionally only check netmem_to_net_iov() if
>   CONFIG_DMA_SHARED_BUFFER is enabled, otherwise dmabuf net_iovs cannot
>   exist anyway.
>
>   net-next base: 8 cycle fast path.
>   with static_branch_unlikely: 10 cycle fast path.
>   without static_branch_unlikely: 9 cycle fast path.
>   CONFIG_DMA_SHARED_BUFFER disabled: 8 cycle fast path as baseline.
>
>   Performance of devmem TCP is at 95% line rate is regardless of
>   static_branch_unlikely or not.
>
> v6:
> - Rebased on top of the merged netmem_ref type.
> - Rebased on top of the merged skb_pp_frag_ref() changes.
>
> v5:
> - Use netmem instead of page* with LSB set.
> - Use pp_ref_count for refcounting net_iov.
> - Removed many of the custom checks for netmem.
>
> v1:
> - Disable fragmentation support for iov properly.
> - fix napi_pp_put_page() path (Yunsheng).
> - Use pp_frag_count for devmem refcounting.
>
> To: linux-mm@kvack.org

It looks like this tag to add linux-mm did not work as intended. CCing
linux-mm manually.

> Cc: Matthew Wilcox <willy@infradead.org>
>
> ---
>  include/net/netmem.h            | 143 ++++++++++++++++++++++++++++++--
>  include/net/page_pool/helpers.h |  25 +++---
>  include/net/page_pool/types.h   |   1 +
>  net/core/page_pool.c            |  26 +++---
>  net/core/skbuff.c               |  23 +++--
>  5 files changed, 171 insertions(+), 47 deletions(-)
>
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 21f53b29e5fe..74eeaa34883e 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -9,14 +9,51 @@
>  #define _NET_NETMEM_H
>
>  #include <net/devmem.h>
> +#include <net/net_debug.h>
>
>  /* net_iov */
>
> +DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +
> +/*  We overload the LSB of the struct page pointer to indicate whether it's
> + *  a page or net_iov.
> + */
> +#define NET_IOV 0x01UL
> +
>  struct net_iov {
> +       unsigned long __unused_padding;
> +       unsigned long pp_magic;
> +       struct page_pool *pp;
>         struct dmabuf_genpool_chunk_owner *owner;
>         unsigned long dma_addr;
> +       atomic_long_t pp_ref_count;
>  };
>
> +/* These fields in struct page are used by the page_pool and net stack:
> + *
> + *     struct {
> + *             unsigned long pp_magic;
> + *             struct page_pool *pp;
> + *             unsigned long _pp_mapping_pad;
> + *             unsigned long dma_addr;
> + *             atomic_long_t pp_ref_count;
> + *     };
> + *
> + * We mirror the page_pool fields here so the page_pool can access these fields
> + * without worrying whether the underlying fields belong to a page or net_iov.
> + *
> + * The non-net stack fields of struct page are private to the mm stack and must
> + * never be mirrored to net_iov.
> + */
> +#define NET_IOV_ASSERT_OFFSET(pg, iov)             \
> +       static_assert(offsetof(struct page, pg) == \
> +                     offsetof(struct net_iov, iov))
> +NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
> +NET_IOV_ASSERT_OFFSET(pp, pp);
> +NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
> +NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
> +#undef NET_IOV_ASSERT_OFFSET
> +
>  static inline struct dmabuf_genpool_chunk_owner *
>  net_iov_owner(const struct net_iov *niov)
>  {
> @@ -50,7 +87,7 @@ static inline dma_addr_t net_iov_dma_addr(const struct net_iov *niov)
>                ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT);
>  }
>
> -static inline struct netdev_dmabuf_binding *
> +static inline struct net_devmem_dmabuf_binding *
>  net_iov_binding(const struct net_iov *niov)
>  {
>         return net_iov_owner(niov)->binding;
> @@ -69,20 +106,26 @@ net_iov_binding(const struct net_iov *niov)
>   */
>  typedef unsigned long __bitwise netmem_ref;
>
> +static inline bool netmem_is_net_iov(const netmem_ref netmem)
> +{
> +#if defined(CONFIG_PAGE_POOL) && defined(CONFIG_DMA_SHARED_BUFFER)
> +       return (__force unsigned long)netmem & NET_IOV;
> +#else
> +       return false;
> +#endif
> +}
> +
>  /* This conversion fails (returns NULL) if the netmem_ref is not struct page
>   * backed.
> - *
> - * Currently struct page is the only possible netmem, and this helper never
> - * fails.
>   */
>  static inline struct page *netmem_to_page(netmem_ref netmem)
>  {
> +       if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
> +               return NULL;
> +
>         return (__force struct page *)netmem;
>  }
>
> -/* Converting from page to netmem is always safe, because a page can always be
> - * a netmem.
> - */
>  static inline netmem_ref page_to_netmem(struct page *page)
>  {
>         return (__force netmem_ref)page;
> @@ -90,17 +133,103 @@ static inline netmem_ref page_to_netmem(struct page *page)
>
>  static inline int netmem_ref_count(netmem_ref netmem)
>  {
> +       /* The non-pp refcount of net_iov is always 1. On net_iov, we only
> +        * support pp refcounting which uses the pp_ref_count field.
> +        */
> +       if (netmem_is_net_iov(netmem))
> +               return 1;
> +
>         return page_ref_count(netmem_to_page(netmem));
>  }
>
>  static inline unsigned long netmem_to_pfn(netmem_ref netmem)
>  {
> +       if (netmem_is_net_iov(netmem))
> +               return 0;
> +
>         return page_to_pfn(netmem_to_page(netmem));
>  }
>
> +static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> +{
> +       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
> +}
> +
> +static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
> +{
> +       return __netmem_clear_lsb(netmem)->pp_magic;
> +}
> +
> +static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
> +{
> +       __netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
> +}
> +
> +static inline void netmem_clear_pp_magic(netmem_ref netmem)
> +{
> +       __netmem_clear_lsb(netmem)->pp_magic = 0;
> +}
> +
> +static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
> +{
> +       return __netmem_clear_lsb(netmem)->pp;
> +}
> +
> +static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
> +{
> +       __netmem_clear_lsb(netmem)->pp = pool;
> +}
> +
> +static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
> +{
> +       return __netmem_clear_lsb(netmem)->dma_addr;
> +}
> +
> +static inline void netmem_set_dma_addr(netmem_ref netmem,
> +                                      unsigned long dma_addr)
> +{
> +       __netmem_clear_lsb(netmem)->dma_addr = dma_addr;
> +}
> +
> +static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
> +{
> +       return &__netmem_clear_lsb(netmem)->pp_ref_count;
> +}
> +
> +static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
> +{
> +       /* Assume net_iov are on the preferred node without actually
> +        * checking...
> +        *
> +        * This check is only used to check for recycling memory in the page
> +        * pool's fast paths. Currently the only implementation of net_iov
> +        * is dmabuf device memory. It's a deliberate decision by the user to
> +        * bind a certain dmabuf to a certain netdev, and the netdev rx queue
> +        * would not be able to reallocate memory from another dmabuf that
> +        * exists on the preferred node, so, this check doesn't make much sense
> +        * in this case. Assume all net_iovs can be recycled for now.
> +        */
> +       if (netmem_is_net_iov(netmem))
> +               return true;
> +
> +       return page_to_nid(netmem_to_page(netmem)) == pref_nid;
> +}
> +
>  static inline netmem_ref netmem_compound_head(netmem_ref netmem)
>  {
> +       /* niov are never compounded */
> +       if (netmem_is_net_iov(netmem))
> +               return netmem;
> +
>         return page_to_netmem(compound_head(netmem_to_page(netmem)));
>  }
>
> +static inline void *netmem_address(netmem_ref netmem)
> +{
> +       if (netmem_is_net_iov(netmem))
> +               return NULL;
> +
> +       return page_address(netmem_to_page(netmem));
> +}
> +
>  #endif /* _NET_NETMEM_H */
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index 61814f91a458..c6a55eddefae 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -215,7 +215,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
>
>  static inline void page_pool_fragment_netmem(netmem_ref netmem, long nr)
>  {
> -       atomic_long_set(&netmem_to_page(netmem)->pp_ref_count, nr);
> +       atomic_long_set(netmem_get_pp_ref_count_ref(netmem), nr);
>  }
>
>  /**
> @@ -243,7 +243,7 @@ static inline void page_pool_fragment_page(struct page *page, long nr)
>
>  static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
>  {
> -       struct page *page = netmem_to_page(netmem);
> +       atomic_long_t *pp_ref_count = netmem_get_pp_ref_count_ref(netmem);
>         long ret;
>
>         /* If nr == pp_ref_count then we have cleared all remaining
> @@ -260,19 +260,19 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
>          * initially, and only overwrite it when the page is partitioned into
>          * more than one piece.
>          */
> -       if (atomic_long_read(&page->pp_ref_count) == nr) {
> +       if (atomic_long_read(pp_ref_count) == nr) {
>                 /* As we have ensured nr is always one for constant case using
>                  * the BUILD_BUG_ON(), only need to handle the non-constant case
>                  * here for pp_ref_count draining, which is a rare case.
>                  */
>                 BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
>                 if (!__builtin_constant_p(nr))
> -                       atomic_long_set(&page->pp_ref_count, 1);
> +                       atomic_long_set(pp_ref_count, 1);
>
>                 return 0;
>         }
>
> -       ret = atomic_long_sub_return(nr, &page->pp_ref_count);
> +       ret = atomic_long_sub_return(nr, pp_ref_count);
>         WARN_ON(ret < 0);
>
>         /* We are the last user here too, reset pp_ref_count back to 1 to
> @@ -281,7 +281,7 @@ static inline long page_pool_unref_netmem(netmem_ref netmem, long nr)
>          * page_pool_unref_page() currently.
>          */
>         if (unlikely(!ret))
> -               atomic_long_set(&page->pp_ref_count, 1);
> +               atomic_long_set(pp_ref_count, 1);
>
>         return ret;
>  }
> @@ -400,9 +400,7 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
>
>  static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
>  {
> -       struct page *page = netmem_to_page(netmem);
> -
> -       dma_addr_t ret = page->dma_addr;
> +       dma_addr_t ret = netmem_get_dma_addr(netmem);
>
>         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
>                 ret <<= PAGE_SHIFT;
> @@ -425,18 +423,17 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
>  static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem,
>                                                  dma_addr_t addr)
>  {
> -       struct page *page = netmem_to_page(netmem);
> -
>         if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
> -               page->dma_addr = addr >> PAGE_SHIFT;
> +               netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT);
>
>                 /* We assume page alignment to shave off bottom bits,
>                  * if this "compression" doesn't work we need to drop.
>                  */
> -               return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
> +               return addr != (dma_addr_t)netmem_get_dma_addr(netmem)
> +                                      << PAGE_SHIFT;
>         }
>
> -       page->dma_addr = addr;
> +       netmem_set_dma_addr(netmem, addr);
>         return false;
>  }
>
> diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
> index 0d164624f16d..f04af1613f59 100644
> --- a/include/net/page_pool/types.h
> +++ b/include/net/page_pool/types.h
> @@ -6,6 +6,7 @@
>  #include <linux/dma-direction.h>
>  #include <linux/ptr_ring.h>
>  #include <linux/types.h>
> +#include <net/netmem.h>
>
>  #define PP_FLAG_DMA_MAP                BIT(0) /* Should page_pool do the DMA
>                                         * map/unmap
> diff --git a/net/core/page_pool.c b/net/core/page_pool.c
> index c8125be3a6e2..c7bffd08218b 100644
> --- a/net/core/page_pool.c
> +++ b/net/core/page_pool.c
> @@ -25,7 +25,7 @@
>
>  #include "page_pool_priv.h"
>
> -static DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
> +DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers);
>
>  #define DEFER_TIME (msecs_to_jiffies(1000))
>  #define DEFER_WARN_INTERVAL (60 * HZ)
> @@ -359,7 +359,7 @@ static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool)
>                 if (unlikely(!netmem))
>                         break;
>
> -               if (likely(page_to_nid(netmem_to_page(netmem)) == pref_nid)) {
> +               if (likely(netmem_is_pref_nid(netmem, pref_nid))) {
>                         pool->alloc.cache[pool->alloc.count++] = netmem;
>                 } else {
>                         /* NUMA mismatch;
> @@ -446,10 +446,8 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
>
>  static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
>  {
> -       struct page *page = netmem_to_page(netmem);
> -
> -       page->pp = pool;
> -       page->pp_magic |= PP_SIGNATURE;
> +       netmem_set_pp(netmem, pool);
> +       netmem_or_pp_magic(netmem, PP_SIGNATURE);
>
>         /* Ensuring all pages have been split into one fragment initially:
>          * page_pool_set_pp_info() is only called once for every page when it
> @@ -464,10 +462,8 @@ static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
>
>  static void page_pool_clear_pp_info(netmem_ref netmem)
>  {
> -       struct page *page = netmem_to_page(netmem);
> -
> -       page->pp_magic = 0;
> -       page->pp = NULL;
> +       netmem_clear_pp_magic(netmem);
> +       netmem_set_pp(netmem, NULL);
>  }
>
>  static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
> @@ -695,8 +691,9 @@ static bool page_pool_recycle_in_cache(netmem_ref netmem,
>
>  static bool __page_pool_page_can_be_recycled(netmem_ref netmem)
>  {
> -       return page_ref_count(netmem_to_page(netmem)) == 1 &&
> -              !page_is_pfmemalloc(netmem_to_page(netmem));
> +       return netmem_is_net_iov(netmem) ||
> +              (page_ref_count(netmem_to_page(netmem)) == 1 &&
> +               !page_is_pfmemalloc(netmem_to_page(netmem)));
>  }
>
>  /* If the page refcnt == 1, this will try to recycle the page.
> @@ -718,7 +715,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
>          * refcnt == 1 means page_pool owns page, and can recycle it.
>          *
>          * page is NOT reusable when allocated when system is under
> -        * some pressure. (page_is_pfmemalloc)
> +        * some pressure. (page_pool_page_is_pfmemalloc)
>          */
>         if (likely(__page_pool_page_can_be_recycled(netmem))) {
>                 /* Read barrier done in page_ref_count / READ_ONCE */
> @@ -734,6 +731,7 @@ __page_pool_put_page(struct page_pool *pool, netmem_ref netmem,
>                 /* Page found as candidate for recycling */
>                 return netmem;
>         }
> +
>         /* Fallback/non-XDP mode: API user have elevated refcnt.
>          *
>          * Many drivers split up the page into fragments, and some
> @@ -928,7 +926,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
>         /* Empty recycle ring */
>         while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) {
>                 /* Verify the refcnt invariant of cached pages */
> -               if (!(page_ref_count(netmem_to_page(netmem)) == 1))
> +               if (!(netmem_ref_count(netmem) == 1))
>                         pr_crit("%s() page_pool refcnt %d violation\n",
>                                 __func__, netmem_ref_count(netmem));
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 7193ee9737a0..b7e974f0ae51 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -907,9 +907,9 @@ static void skb_clone_fraglist(struct sk_buff *skb)
>                 skb_get(list);
>  }
>
> -static bool is_pp_page(struct page *page)
> +static bool is_pp_netmem(netmem_ref netmem)
>  {
> -       return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
> +       return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
>  }
>
>  int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
> @@ -1007,11 +1007,10 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp);
>  #if IS_ENABLED(CONFIG_PAGE_POOL)
>  bool napi_pp_put_page(netmem_ref netmem, bool napi_safe)
>  {
> -       struct page *page = netmem_to_page(netmem);
>         bool allow_direct = false;
>         struct page_pool *pp;
>
> -       page = compound_head(page);
> +       netmem = netmem_compound_head(netmem);
>
>         /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
>          * in order to preserve any existing bits, such as bit 0 for the
> @@ -1020,10 +1019,10 @@ bool napi_pp_put_page(netmem_ref netmem, bool napi_safe)
>          * and page_is_pfmemalloc() is checked in __page_pool_put_page()
>          * to avoid recycling the pfmemalloc page.
>          */
> -       if (unlikely(!is_pp_page(page)))
> +       if (unlikely(!is_pp_netmem(netmem)))
>                 return false;
>
> -       pp = page->pp;
> +       pp = netmem_get_pp(netmem);
>
>         /* Allow direct recycle if we have reasons to believe that we are
>          * in the same context as the consumer would run, so there's
> @@ -1044,7 +1043,7 @@ bool napi_pp_put_page(netmem_ref netmem, bool napi_safe)
>          * The page will be returned to the pool here regardless of the
>          * 'flipped' fragment being in use or not.
>          */
> -       page_pool_put_full_netmem(pp, page_to_netmem(page), allow_direct);
> +       page_pool_put_full_netmem(pp, netmem, allow_direct);
>
>         return true;
>  }
> @@ -1071,7 +1070,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
>  static int skb_pp_frag_ref(struct sk_buff *skb)
>  {
>         struct skb_shared_info *shinfo;
> -       struct page *head_page;
> +       netmem_ref head_netmem;
>         int i;
>
>         if (!skb->pp_recycle)
> @@ -1080,11 +1079,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
>         shinfo = skb_shinfo(skb);
>
>         for (i = 0; i < shinfo->nr_frags; i++) {
> -               head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
> -               if (likely(is_pp_page(head_page)))
> -                       page_pool_ref_page(head_page);
> +               head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
> +               if (likely(is_pp_netmem(head_netmem)))
> +                       page_pool_ref_netmem(head_netmem);
>                 else
> -                       page_ref_inc(head_page);
> +                       page_ref_inc(netmem_to_page(head_netmem));
>         }
>         return 0;
>  }
> --
> 2.44.0.396.g6e790dbe36-goog
>


-- 
Thanks,
Mina


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-03-27  8:26 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20240326225048.785801-1-almasrymina@google.com>
     [not found] ` <20240326225048.785801-7-almasrymina@google.com>
2024-03-27  8:25   ` [RFC PATCH net-next v7 06/14] page_pool: convert to use netmem Mina Almasry
     [not found] ` <20240326225048.785801-8-almasrymina@google.com>
2024-03-27  8:26   ` [RFC PATCH net-next v7 07/14] page_pool: devmem support Mina Almasry

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).