linux-loongson/include/linux/vmalloc.h
Linus Torvalds 47cf96fbe3 arm64 updates for 6.16
ACPI, EFI and PSCI:
  - Decouple Arm's "Software Delegated Exception Interface" (SDEI)
    support from the ACPI GHES code so that it can be used by platforms
    booted with device-tree.
 
  - Remove unnecessary per-CPU tracking of the FPSIMD state across EFI
    runtime calls.
 
  - Fix a node refcount imbalance in the PSCI device-tree code.
 
 CPU Features:
  - Ensure register sanitisation is applied to fields in ID_AA64MMFR4.
 
  - Expose AIDR_EL1 to userspace via sysfs, primarily so that KVM guests
    can reliably query the underlying CPU types from the VMM.
 
  - Re-enabling of SME support (CONFIG_ARM64_SME) as a result of fixes
    to our context-switching, signal handling and ptrace code.
 
 Entry code:
  - Hook up TIF_NEED_RESCHED_LAZY so that CONFIG_PREEMPT_LAZY can be
    selected.
 
 Memory management:
  - Prevent BSS exports from being used by the early PI code.
 
  - Propagate level and stride information to the low-level TLB
    invalidation routines when operating on hugetlb entries.
 
  - Use the page-table contiguous hint for vmap() mappings with
    VM_ALLOW_HUGE_VMAP where possible.
 
  - Optimise vmalloc()/vmap() page-table updates to use "lazy MMU mode"
    and hook this up on arm64 so that the trailing DSB (used to publish
    the updates to the hardware walker) can be deferred until the end of
    the mapping operation.
 
  - Extend mmap() randomisation for 52-bit virtual addresses (on par with
    48-bit addressing) and remove limited support for randomisation of
    the linear map.
 
 Perf and PMUs:
  - Add support for probing the CMN-S3 driver using ACPI.
 
  - Minor driver fixes to the CMN, Arm-NI and amlogic PMU drivers.
 
 Selftests:
  - Fix FPSIMD and SME tests to align with the freshly re-enabled SME
    support.
 
  - Fix default setting of the OUTPUT variable so that tests are
    installed in the right location.
 
 vDSO:
  - Replace raw counter access from inline assembly code with a call to
    the the __arch_counter_get_cntvct() helper function.
 
 Miscellaneous:
  - Add some missing header inclusions to the CCA headers.
 
  - Rework rendering of /proc/cpuinfo to follow the x86-approach and
    avoid repeated buffer expansion (the user-visible format remains
    identical).
 
  - Remove redundant selection of CONFIG_CRC32
 
  - Extend early error message when failing to map the device-tree blob.
 -----BEGIN PGP SIGNATURE-----
 
 iQFEBAABCgAuFiEEPxTL6PPUbjXGY88ct6xw3ITBYzQFAmg1uTgQHHdpbGxAa2Vy
 bmVsLm9yZwAKCRC3rHDchMFjNFv2CAC9S5OW0btOAo7V/LFBpLhJM3hdIV6Sn6N1
 d/K5znuqPBG6VPfBrshaZltEl/C3U8KG4H8xrlX5cSo7CRuf3DgVBw3kiZ6ERZj6
 1gnKR54juA1oWhcroPl0s76ETWj3N4gO036u2qOhWNAYflDunh1+bCIGJkG4H/yP
 wqtWn974YUbad/zQJSbG3IMO1yvxZ/PsNpVF8HjyQ0/ZPWsYTscrhNQ0hWro17sR
 CTcUaGxH4GrXW24EGNgkLB9aq67X2rtGGtaIlp5oFl8FuLklc7TYbPwJp8cPCTNm
 0Sp0mpuR9M675pYIKoCI9m5twc46znRIKmbXi5LvPd77418y3jTf
 =03N4
 -----END PGP SIGNATURE-----

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Will Deacon:
 "The headline feature is the re-enablement of support for Arm's
  Scalable Matrix Extension (SME) thanks to a bumper crop of fixes
  from Mark Rutland.

  If matrices aren't your thing, then Ryan's page-table optimisation
  work is much more interesting.

  Summary:

  ACPI, EFI and PSCI:

   - Decouple Arm's "Software Delegated Exception Interface" (SDEI)
     support from the ACPI GHES code so that it can be used by platforms
     booted with device-tree

   - Remove unnecessary per-CPU tracking of the FPSIMD state across EFI
     runtime calls

   - Fix a node refcount imbalance in the PSCI device-tree code

  CPU Features:

   - Ensure register sanitisation is applied to fields in ID_AA64MMFR4

   - Expose AIDR_EL1 to userspace via sysfs, primarily so that KVM
     guests can reliably query the underlying CPU types from the VMM

   - Re-enabling of SME support (CONFIG_ARM64_SME) as a result of fixes
     to our context-switching, signal handling and ptrace code

  Entry code:

   - Hook up TIF_NEED_RESCHED_LAZY so that CONFIG_PREEMPT_LAZY can be
     selected

  Memory management:

   - Prevent BSS exports from being used by the early PI code

   - Propagate level and stride information to the low-level TLB
     invalidation routines when operating on hugetlb entries

   - Use the page-table contiguous hint for vmap() mappings with
     VM_ALLOW_HUGE_VMAP where possible

   - Optimise vmalloc()/vmap() page-table updates to use "lazy MMU mode"
     and hook this up on arm64 so that the trailing DSB (used to publish
     the updates to the hardware walker) can be deferred until the end
     of the mapping operation

   - Extend mmap() randomisation for 52-bit virtual addresses (on par
     with 48-bit addressing) and remove limited support for
     randomisation of the linear map

  Perf and PMUs:

   - Add support for probing the CMN-S3 driver using ACPI

   - Minor driver fixes to the CMN, Arm-NI and amlogic PMU drivers

  Selftests:

   - Fix FPSIMD and SME tests to align with the freshly re-enabled SME
     support

   - Fix default setting of the OUTPUT variable so that tests are
     installed in the right location

  vDSO:

   - Replace raw counter access from inline assembly code with a call to
     the the __arch_counter_get_cntvct() helper function

  Miscellaneous:

   - Add some missing header inclusions to the CCA headers

   - Rework rendering of /proc/cpuinfo to follow the x86-approach and
     avoid repeated buffer expansion (the user-visible format remains
     identical)

   - Remove redundant selection of CONFIG_CRC32

   - Extend early error message when failing to map the device-tree
     blob"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (83 commits)
  arm64: cputype: Add cputype definition for HIP12
  arm64: el2_setup.h: Make __init_el2_fgt labels consistent, again
  perf/arm-cmn: Add CMN S3 ACPI binding
  arm64/boot: Disallow BSS exports to startup code
  arm64/boot: Move global CPU override variables out of BSS
  arm64/boot: Move init_pgdir[] and init_idmap_pgdir[] into __pi_ namespace
  perf/arm-cmn: Initialise cmn->cpu earlier
  kselftest/arm64: Set default OUTPUT path when undefined
  arm64: Update comment regarding values in __boot_cpu_mode
  arm64: mm: Drop redundant check in pmd_trans_huge()
  arm64/mm: Re-organise setting up FEAT_S1PIE registers PIRE0_EL1 and PIR_EL1
  arm64/mm: Permit lazy_mmu_mode to be nested
  arm64/mm: Disable barrier batching in interrupt contexts
  arm64/cpuinfo: only show one cpu's info in c_show()
  arm64/mm: Batch barriers when updating kernel mappings
  mm/vmalloc: Enter lazy mmu mode while manipulating vmalloc ptes
  arm64/mm: Support huge pte-mapped pages in vmap
  mm/vmalloc: Gracefully unmap huge ptes
  mm/vmalloc: Warn on improper use of vunmap_range()
  arm64/mm: Hoist barriers out of set_ptes_anysz() loop
  ...
2025-05-28 14:55:35 -07:00

342 lines
11 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_VMALLOC_H
#define _LINUX_VMALLOC_H
#include <linux/alloc_tag.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/llist.h>
#include <asm/page.h> /* pgprot_t */
#include <linux/rbtree.h>
#include <linux/overflow.h>
#include <asm/vmalloc.h>
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
struct notifier_block; /* in notifier.h */
struct iov_iter; /* in uio.h */
/* bits in flags of vmalloc's vm_struct below */
#define VM_IOREMAP 0x00000001 /* ioremap() and friends */
#define VM_ALLOC 0x00000002 /* vmalloc() */
#define VM_MAP 0x00000004 /* vmap()ed pages */
#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */
#define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
#define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */
#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \
!defined(CONFIG_KASAN_VMALLOC)
#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */
#else
#define VM_DEFER_KMEMLEAK 0
#endif
#define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */
/* bits [20..32] reserved for arch specific ioremap internals */
/*
* Maximum alignment for ioremap() regions.
* Can be overridden by arch-specific value.
*/
#ifndef IOREMAP_MAX_ORDER
#define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */
#endif
struct vm_struct {
struct vm_struct *next;
void *addr;
unsigned long size;
unsigned long flags;
struct page **pages;
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
unsigned int page_order;
#endif
unsigned int nr_pages;
phys_addr_t phys_addr;
const void *caller;
unsigned long requested_size;
};
struct vmap_area {
unsigned long va_start;
unsigned long va_end;
struct rb_node rb_node; /* address sorted rbtree */
struct list_head list; /* address sorted list */
/*
* The following two variables can be packed, because
* a vmap_area object can be either:
* 1) in "free" tree (root is free_vmap_area_root)
* 2) or "busy" tree (root is vmap_area_root)
*/
union {
unsigned long subtree_max_size; /* in "free" tree */
struct vm_struct *vm; /* in "busy" tree */
};
unsigned long flags; /* mark type of vm_map_ram area */
};
/* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */
#ifndef arch_vmap_p4d_supported
static inline bool arch_vmap_p4d_supported(pgprot_t prot)
{
return false;
}
#endif
#ifndef arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
return false;
}
#endif
#ifndef arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
return false;
}
#endif
#ifndef arch_vmap_pte_range_map_size
static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
u64 pfn, unsigned int max_page_shift)
{
return PAGE_SIZE;
}
#endif
#ifndef arch_vmap_pte_range_unmap_size
static inline unsigned long arch_vmap_pte_range_unmap_size(unsigned long addr,
pte_t *ptep)
{
return PAGE_SIZE;
}
#endif
#ifndef arch_vmap_pte_supported_shift
static inline int arch_vmap_pte_supported_shift(unsigned long size)
{
return PAGE_SHIFT;
}
#endif
#ifndef arch_vmap_pgprot_tagged
static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
{
return prot;
}
#endif
/*
* Highlevel APIs for driver use
*/
extern void vm_unmap_ram(const void *mem, unsigned int count);
extern void *vm_map_ram(struct page **pages, unsigned int count, int node);
extern void vm_unmap_aliases(void);
extern void *vmalloc_noprof(unsigned long size) __alloc_size(1);
#define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__))
extern void *vzalloc_noprof(unsigned long size) __alloc_size(1);
#define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__))
extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1);
#define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__))
extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
#define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__))
extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1);
#define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__))
extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1);
#define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__))
extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1);
#define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__))
extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
#define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__))
extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller) __alloc_size(1);
#define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__))
void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask,
int node, const void *caller) __alloc_size(1);
#define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__))
void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1);
#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__))
static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
{
return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE);
}
extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
#define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__))
extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2);
#define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__))
extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
#define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__))
extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2);
#define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__))
void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags)
__realloc_size(2);
#define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__))
extern void vfree(const void *addr);
extern void vfree_atomic(const void *addr);
extern void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot);
extern void vunmap(const void *addr);
extern int remap_vmalloc_range_partial(struct vm_area_struct *vma,
unsigned long uaddr, void *kaddr,
unsigned long pgoff, unsigned long size);
extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff);
int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot,
struct page **pages, unsigned int page_shift);
/*
* Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
* and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
* needs to be called.
*/
#ifndef ARCH_PAGE_TABLE_SYNC_MASK
#define ARCH_PAGE_TABLE_SYNC_MASK 0
#endif
/*
* There is no default implementation for arch_sync_kernel_mappings(). It is
* relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK
* is 0.
*/
void arch_sync_kernel_mappings(unsigned long start, unsigned long end);
/*
* Lowlevel-APIs (not for driver use!)
*/
static inline size_t get_vm_area_size(const struct vm_struct *area)
{
if (!(area->flags & VM_NO_GUARD))
/* return actual size without guard page */
return area->size - PAGE_SIZE;
else
return area->size;
}
extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
extern struct vm_struct *get_vm_area_caller(unsigned long size,
unsigned long flags, const void *caller);
extern struct vm_struct *__get_vm_area_caller(unsigned long size,
unsigned long flags,
unsigned long start, unsigned long end,
const void *caller);
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
struct vmap_area *find_vmap_area(unsigned long addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
/*
* This may not 100% tell if the area is mapped with > PAGE_SIZE
* page table entries, if for some reason the architecture indicates
* larger sizes are available but decides not to use them, nothing
* prevents that. This only indicates the size of the physical page
* allocated in the vmalloc layer.
*/
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
return find_vm_area(addr)->page_order > 0;
#else
return false;
#endif
}
/* for /proc/kcore */
long vread_iter(struct iov_iter *iter, const char *addr, size_t count);
/*
* Internals. Don't use..
*/
__init void vm_area_add_early(struct vm_struct *vm);
__init void vm_area_register_early(struct vm_struct *vm, size_t align);
int register_vmap_purge_notifier(struct notifier_block *nb);
int unregister_vmap_purge_notifier(struct notifier_block *nb);
#ifdef CONFIG_MMU
#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
unsigned long vmalloc_nr_pages(void);
int vm_area_map_pages(struct vm_struct *area, unsigned long start,
unsigned long end, struct page **pages);
void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
unsigned long end);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{
struct vm_struct *vm = find_vm_area(addr);
if (vm)
vm->flags |= VM_FLUSH_RESET_PERMS;
}
#else /* !CONFIG_MMU */
#define VMALLOC_TOTAL 0UL
static inline unsigned long vmalloc_nr_pages(void) { return 0; }
static inline void set_vm_flush_reset_perms(void *addr) {}
#endif /* CONFIG_MMU */
#if defined(CONFIG_MMU) && defined(CONFIG_SMP)
struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
size_t align);
void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
# else
static inline struct vm_struct **
pcpu_get_vm_areas(const unsigned long *offsets,
const size_t *sizes, int nr_vms,
size_t align)
{
return NULL;
}
static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {}
#endif
#if defined(CONFIG_MMU) && defined(CONFIG_PRINTK)
bool vmalloc_dump_obj(void *object);
#else
static inline bool vmalloc_dump_obj(void *object) { return false; }
#endif
#endif /* _LINUX_VMALLOC_H */