mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2026-02-03 08:56:42 +00:00
Right now, drivers like ARM SMMU are using dma_alloc_coherent() to get coherent DMA buffers to save their command queues and page tables. As there is only one default CMA in the whole system, SMMUs on nodes other than node0 will get remote memory. This leads to significant latency. This patch provides per-numa CMA so that drivers like SMMU can get local memory. Tests show localizing CMA can decrease dma_unmap latency much. For instance, before this patch, SMMU on node2 has to wait for more than 560ns for the completion of CMD_SYNC in an empty command queue; with this patch, it needs 240ns only. A positive side effect of this patch would be improving performance even further for those users who are worried about performance more than DMA security and use iommu.passthrough=1 to skip IOMMU. With local CMA, all drivers can get local coherent DMA buffers. Also, this patch changes the default CONFIG_CMA_AREAS to 19 in NUMA. As 1+CONFIG_CMA_AREAS should be quite enough for most servers on the market even they enable both hugetlb_cma and pernuma_cma. 2 numa nodes: 2(hugetlb) + 2(pernuma) + 1(default global cma) = 5 4 numa nodes: 4(hugetlb) + 4(pernuma) + 1(default global cma) = 9 8 numa nodes: 8(hugetlb) + 8(pernuma) + 1(default global cma) = 17 Signed-off-by: Barry Song <song.bao.hua@hisilicon.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
183 lines
4.9 KiB
C
183 lines
4.9 KiB
C
/* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
#ifndef __LINUX_CMA_H
|
|
#define __LINUX_CMA_H
|
|
|
|
/*
|
|
* Contiguous Memory Allocator for DMA mapping framework
|
|
* Copyright (c) 2010-2011 by Samsung Electronics.
|
|
* Written by:
|
|
* Marek Szyprowski <m.szyprowski@samsung.com>
|
|
* Michal Nazarewicz <mina86@mina86.com>
|
|
*/
|
|
|
|
/*
|
|
* Contiguous Memory Allocator
|
|
*
|
|
* The Contiguous Memory Allocator (CMA) makes it possible to
|
|
* allocate big contiguous chunks of memory after the system has
|
|
* booted.
|
|
*
|
|
* Why is it needed?
|
|
*
|
|
* Various devices on embedded systems have no scatter-getter and/or
|
|
* IO map support and require contiguous blocks of memory to
|
|
* operate. They include devices such as cameras, hardware video
|
|
* coders, etc.
|
|
*
|
|
* Such devices often require big memory buffers (a full HD frame
|
|
* is, for instance, more then 2 mega pixels large, i.e. more than 6
|
|
* MB of memory), which makes mechanisms such as kmalloc() or
|
|
* alloc_page() ineffective.
|
|
*
|
|
* At the same time, a solution where a big memory region is
|
|
* reserved for a device is suboptimal since often more memory is
|
|
* reserved then strictly required and, moreover, the memory is
|
|
* inaccessible to page system even if device drivers don't use it.
|
|
*
|
|
* CMA tries to solve this issue by operating on memory regions
|
|
* where only movable pages can be allocated from. This way, kernel
|
|
* can use the memory for pagecache and when device driver requests
|
|
* it, allocated pages can be migrated.
|
|
*
|
|
* Driver usage
|
|
*
|
|
* CMA should not be used by the device drivers directly. It is
|
|
* only a helper framework for dma-mapping subsystem.
|
|
*
|
|
* For more information, see kernel-docs in kernel/dma/contiguous.c
|
|
*/
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/mm.h>
|
|
|
|
struct cma;
|
|
struct page;
|
|
|
|
#ifdef CONFIG_DMA_CMA
|
|
|
|
extern struct cma *dma_contiguous_default_area;
|
|
|
|
static inline struct cma *dev_get_cma_area(struct device *dev)
|
|
{
|
|
if (dev && dev->cma_area)
|
|
return dev->cma_area;
|
|
return dma_contiguous_default_area;
|
|
}
|
|
|
|
static inline void dev_set_cma_area(struct device *dev, struct cma *cma)
|
|
{
|
|
if (dev)
|
|
dev->cma_area = cma;
|
|
}
|
|
|
|
static inline void dma_contiguous_set_default(struct cma *cma)
|
|
{
|
|
dma_contiguous_default_area = cma;
|
|
}
|
|
|
|
void dma_contiguous_reserve(phys_addr_t addr_limit);
|
|
|
|
int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
|
|
phys_addr_t limit, struct cma **res_cma,
|
|
bool fixed);
|
|
|
|
/**
|
|
* dma_declare_contiguous() - reserve area for contiguous memory handling
|
|
* for particular device
|
|
* @dev: Pointer to device structure.
|
|
* @size: Size of the reserved memory.
|
|
* @base: Start address of the reserved memory (optional, 0 for any).
|
|
* @limit: End address of the reserved memory (optional, 0 for any).
|
|
*
|
|
* This function reserves memory for specified device. It should be
|
|
* called by board specific code when early allocator (memblock or bootmem)
|
|
* is still activate.
|
|
*/
|
|
|
|
static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size,
|
|
phys_addr_t base, phys_addr_t limit)
|
|
{
|
|
struct cma *cma;
|
|
int ret;
|
|
ret = dma_contiguous_reserve_area(size, base, limit, &cma, true);
|
|
if (ret == 0)
|
|
dev_set_cma_area(dev, cma);
|
|
|
|
return ret;
|
|
}
|
|
|
|
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
|
|
unsigned int order, bool no_warn);
|
|
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
|
|
int count);
|
|
struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp);
|
|
void dma_free_contiguous(struct device *dev, struct page *page, size_t size);
|
|
|
|
#else
|
|
|
|
static inline struct cma *dev_get_cma_area(struct device *dev)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { }
|
|
|
|
static inline void dma_contiguous_set_default(struct cma *cma) { }
|
|
|
|
static inline void dma_contiguous_reserve(phys_addr_t limit) { }
|
|
|
|
static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
|
|
phys_addr_t limit, struct cma **res_cma,
|
|
bool fixed)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static inline
|
|
int dma_declare_contiguous(struct device *dev, phys_addr_t size,
|
|
phys_addr_t base, phys_addr_t limit)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
|
|
static inline
|
|
struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
|
|
unsigned int order, bool no_warn)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline
|
|
bool dma_release_from_contiguous(struct device *dev, struct page *pages,
|
|
int count)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */
|
|
static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size,
|
|
gfp_t gfp)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void dma_free_contiguous(struct device *dev, struct page *page,
|
|
size_t size)
|
|
{
|
|
__free_pages(page, get_order(size));
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_DMA_PERNUMA_CMA
|
|
void dma_pernuma_cma_reserve(void);
|
|
#else
|
|
static inline void dma_pernuma_cma_reserve(void) { }
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#endif
|