forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
page_pool: refurbish version of page_pool code
Need a fast page recycle mechanism for ndo_xdp_xmit API for returning pages on DMA-TX completion time, which have good cross CPU performance, given DMA-TX completion time can happen on a remote CPU. Refurbish my page_pool code, that was presented[1] at MM-summit 2016. Adapted page_pool code to not depend the page allocator and integration into struct page. The DMA mapping feature is kept, even-though it will not be activated/used in this patchset. [1] http://people.netfilter.org/hawk/presentations/MM-summit2016/generic_page_pool_mm_summit2016.pdf V2: Adjustments requested by Tariq - Changed page_pool_create return codes, don't return NULL, only ERR_PTR, as this simplifies err handling in drivers. V4: many small improvements and cleanups - Add DOC comment section, that can be used by kernel-doc - Improve fallback mode, to work better with refcnt based recycling e.g. remove a WARN as pointed out by Tariq e.g. quicker fallback if ptr_ring is empty. V5: Fixed SPDX license as pointed out by Alexei V6: Adjustments requested by Eric Dumazet - Adjust ____cacheline_aligned_in_smp usage/placement - Move rcu_head in struct page_pool - Free pages quicker on destroy, minimize resources delayed an RCU period - Remove code for forward/backward compat ABI interface V8: Issues found by kbuild test robot - Address sparse should be static warnings - Only compile+link when a driver use/select page_pool, mlx5 selects CONFIG_PAGE_POOL, although its first used in two patches Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
- Loading branch information
1 parent
8d5d885
commit ff7d6b2
Showing
5 changed files
with
451 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 | ||
* | ||
* page_pool.h | ||
* Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> | ||
* Copyright (C) 2016 Red Hat, Inc. | ||
*/ | ||
|
||
/** | ||
* DOC: page_pool allocator | ||
* | ||
* This page_pool allocator is optimized for the XDP mode that | ||
* uses one-frame-per-page, but have fallbacks that act like the | ||
* regular page allocator APIs. | ||
* | ||
* Basic use involve replacing alloc_pages() calls with the | ||
* page_pool_alloc_pages() call. Drivers should likely use | ||
* page_pool_dev_alloc_pages() replacing dev_alloc_pages(). | ||
* | ||
* If page_pool handles DMA mapping (use page->private), then API user | ||
* is responsible for invoking page_pool_put_page() once. In-case of | ||
* elevated refcnt, the DMA state is released, assuming other users of | ||
* the page will eventually call put_page(). | ||
* | ||
* If no DMA mapping is done, then it can act as shim-layer that | ||
* fall-through to alloc_page. As no state is kept on the page, the | ||
* regular put_page() call is sufficient. | ||
*/ | ||
#ifndef _NET_PAGE_POOL_H | ||
#define _NET_PAGE_POOL_H | ||
|
||
#include <linux/mm.h> /* Needed by ptr_ring */ | ||
#include <linux/ptr_ring.h> | ||
#include <linux/dma-direction.h> | ||
|
||
#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ | ||
#define PP_FLAG_ALL PP_FLAG_DMA_MAP | ||
|
||
/* | ||
* Fast allocation side cache array/stack | ||
* | ||
* The cache size and refill watermark is related to the network | ||
* use-case. The NAPI budget is 64 packets. After a NAPI poll the RX | ||
* ring is usually refilled and the max consumed elements will be 64, | ||
* thus a natural max size of objects needed in the cache. | ||
* | ||
* Keeping room for more objects, is due to XDP_DROP use-case. As | ||
* XDP_DROP allows the opportunity to recycle objects directly into | ||
* this array, as it shares the same softirq/NAPI protection. If | ||
* cache is already full (or partly full) then the XDP_DROP recycles | ||
* would have to take a slower code path. | ||
*/ | ||
#define PP_ALLOC_CACHE_SIZE 128 | ||
#define PP_ALLOC_CACHE_REFILL 64 | ||
struct pp_alloc_cache { | ||
u32 count; | ||
void *cache[PP_ALLOC_CACHE_SIZE]; | ||
}; | ||
|
||
struct page_pool_params { | ||
unsigned int flags; | ||
unsigned int order; | ||
unsigned int pool_size; | ||
int nid; /* Numa node id to allocate from pages from */ | ||
struct device *dev; /* device, for DMA pre-mapping purposes */ | ||
enum dma_data_direction dma_dir; /* DMA mapping direction */ | ||
}; | ||
|
||
struct page_pool { | ||
struct rcu_head rcu; | ||
struct page_pool_params p; | ||
|
||
/* | ||
* Data structure for allocation side | ||
* | ||
* Drivers allocation side usually already perform some kind | ||
* of resource protection. Piggyback on this protection, and | ||
* require driver to protect allocation side. | ||
* | ||
* For NIC drivers this means, allocate a page_pool per | ||
* RX-queue. As the RX-queue is already protected by | ||
* Softirq/BH scheduling and napi_schedule. NAPI schedule | ||
* guarantee that a single napi_struct will only be scheduled | ||
* on a single CPU (see napi_schedule). | ||
*/ | ||
struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; | ||
|
||
/* Data structure for storing recycled pages. | ||
* | ||
* Returning/freeing pages is more complicated synchronization | ||
* wise, because free's can happen on remote CPUs, with no | ||
* association with allocation resource. | ||
* | ||
* Use ptr_ring, as it separates consumer and producer | ||
* effeciently, it a way that doesn't bounce cache-lines. | ||
* | ||
* TODO: Implement bulk return pages into this structure. | ||
*/ | ||
struct ptr_ring ring; | ||
}; | ||
|
||
struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); | ||
|
||
static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) | ||
{ | ||
gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); | ||
|
||
return page_pool_alloc_pages(pool, gfp); | ||
} | ||
|
||
struct page_pool *page_pool_create(const struct page_pool_params *params); | ||
|
||
void page_pool_destroy(struct page_pool *pool); | ||
|
||
/* Never call this directly, use helpers below */ | ||
void __page_pool_put_page(struct page_pool *pool, | ||
struct page *page, bool allow_direct); | ||
|
||
static inline void page_pool_put_page(struct page_pool *pool, struct page *page) | ||
{ | ||
__page_pool_put_page(pool, page, false); | ||
} | ||
/* Very limited use-cases allow recycle direct */ | ||
static inline void page_pool_recycle_direct(struct page_pool *pool, | ||
struct page *page) | ||
{ | ||
__page_pool_put_page(pool, page, true); | ||
} | ||
|
||
#endif /* _NET_PAGE_POOL_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.