mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-12-07 13:11:09 +00:00
Add s390-specific pte_free_defer(), to free table page via call_rcu(). pte_free_defer() will be called inside khugepaged's retract_page_tables() loop, where allocating extra memory cannot be relied upon. This precedes the generic version to avoid build breakage from incompatible pgtable_t. This version is more complicated than others: because s390 fits two 2K page tables into one 4K page (so page->rcu_head must be shared between both halves), and already uses page->lru (which page->rcu_head overlays) to list any free halves; with clever management by page->_refcount bits. Build upon the existing management, adjusted to follow a new rule: that a page is never on the free list if pte_free_defer() was used on either half (marked by PageActive). And for simplicity, delay calling RCU until both halves are freed. Not adding back unallocated fragments to the list in pte_free_defer() can result in wasting some amount of memory for pagetables, depending on how long the allocated fragment will stay in use. In practice, this effect is expected to be insignificant, and not justify a far more complex approach, which might allow to add the fragments back later in __tlb_remove_table(), where we might not have a stable mm any more. [hughd@google.com: Claudio finds warning on mm_has_pgste() more useful than on mm_alloc_pgste()] Link: https://lkml.kernel.org/r/3bc095ba-a180-ce3b-82b1-2bfc64612f3@google.com Link: https://lkml.kernel.org/r/94eccf5f-264c-8abe-4567-e77f4b4e14a@google.com Signed-off-by: Hugh Dickins <hughd@google.com> Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com> Tested-by: Alexander Gordeev <agordeev@linux.ibm.com> Acked-by: Alexander Gordeev <agordeev@linux.ibm.com> Cc: Alistair Popple <apopple@nvidia.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Axel Rasmussen <axelrasmussen@google.com> Cc: Christian Borntraeger <borntraeger@linux.ibm.com> Cc: Christophe Leroy <christophe.leroy@csgroup.eu> Cc: Christoph Hellwig <hch@infradead.org> Cc: Claudio Imbrenda <imbrenda@linux.ibm.com> Cc: David Hildenbrand <david@redhat.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Huang, Ying <ying.huang@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Jann Horn <jannh@google.com> Cc: Jason Gunthorpe <jgg@ziepe.ca> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Mel Gorman <mgorman@techsingularity.net> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Mike Rapoport (IBM) <rppt@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Naoya Horiguchi <naoya.horiguchi@nec.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Peter Xu <peterx@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Qi Zheng <zhengqi.arch@bytedance.com> Cc: Ralph Campbell <rcampbell@nvidia.com> Cc: Russell King <linux@armlinux.org.uk> Cc: SeongJae Park <sj@kernel.org> Cc: Song Liu <song@kernel.org> Cc: Steven Price <steven.price@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: Will Deacon <will@kernel.org> Cc: Yang Shi <shy828301@gmail.com> Cc: Yu Zhao <yuzhao@google.com> Cc: Zack Rusin <zackr@vmware.com> Cc: Zi Yan <ziy@nvidia.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
158 lines
4.1 KiB
C
158 lines
4.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* S390 version
|
|
* Copyright IBM Corp. 1999, 2000
|
|
* Author(s): Hartmut Penner (hp@de.ibm.com)
|
|
* Martin Schwidefsky (schwidefsky@de.ibm.com)
|
|
*
|
|
* Derived from "include/asm-i386/pgalloc.h"
|
|
* Copyright (C) 1994 Linus Torvalds
|
|
*/
|
|
|
|
#ifndef _S390_PGALLOC_H
|
|
#define _S390_PGALLOC_H
|
|
|
|
#include <linux/threads.h>
|
|
#include <linux/string.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/mm.h>
|
|
|
|
#define CRST_ALLOC_ORDER 2
|
|
|
|
unsigned long *crst_table_alloc(struct mm_struct *);
|
|
void crst_table_free(struct mm_struct *, unsigned long *);
|
|
|
|
unsigned long *page_table_alloc(struct mm_struct *);
|
|
struct page *page_table_alloc_pgste(struct mm_struct *mm);
|
|
void page_table_free(struct mm_struct *, unsigned long *);
|
|
void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
|
|
void page_table_free_pgste(struct page *page);
|
|
extern int page_table_allocate_pgste;
|
|
|
|
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
|
|
{
|
|
memset64((u64 *)crst, entry, _CRST_ENTRIES);
|
|
}
|
|
|
|
int crst_table_upgrade(struct mm_struct *mm, unsigned long limit);
|
|
|
|
static inline unsigned long check_asce_limit(struct mm_struct *mm, unsigned long addr,
|
|
unsigned long len)
|
|
{
|
|
int rc;
|
|
|
|
if (addr + len > mm->context.asce_limit &&
|
|
addr + len <= TASK_SIZE) {
|
|
rc = crst_table_upgrade(mm, addr + len);
|
|
if (rc)
|
|
return (unsigned long) rc;
|
|
}
|
|
return addr;
|
|
}
|
|
|
|
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
unsigned long *table = crst_table_alloc(mm);
|
|
|
|
if (table)
|
|
crst_table_init(table, _REGION2_ENTRY_EMPTY);
|
|
return (p4d_t *) table;
|
|
}
|
|
|
|
static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
|
|
{
|
|
if (!mm_p4d_folded(mm))
|
|
crst_table_free(mm, (unsigned long *) p4d);
|
|
}
|
|
|
|
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
|
|
{
|
|
unsigned long *table = crst_table_alloc(mm);
|
|
if (table)
|
|
crst_table_init(table, _REGION3_ENTRY_EMPTY);
|
|
return (pud_t *) table;
|
|
}
|
|
|
|
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
|
|
{
|
|
if (!mm_pud_folded(mm))
|
|
crst_table_free(mm, (unsigned long *) pud);
|
|
}
|
|
|
|
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
|
|
{
|
|
unsigned long *table = crst_table_alloc(mm);
|
|
|
|
if (!table)
|
|
return NULL;
|
|
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
|
|
if (!pgtable_pmd_page_ctor(virt_to_page(table))) {
|
|
crst_table_free(mm, table);
|
|
return NULL;
|
|
}
|
|
return (pmd_t *) table;
|
|
}
|
|
|
|
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
|
|
{
|
|
if (mm_pmd_folded(mm))
|
|
return;
|
|
pgtable_pmd_page_dtor(virt_to_page(pmd));
|
|
crst_table_free(mm, (unsigned long *) pmd);
|
|
}
|
|
|
|
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
|
|
{
|
|
set_pgd(pgd, __pgd(_REGION1_ENTRY | __pa(p4d)));
|
|
}
|
|
|
|
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
|
|
{
|
|
set_p4d(p4d, __p4d(_REGION2_ENTRY | __pa(pud)));
|
|
}
|
|
|
|
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
|
|
{
|
|
set_pud(pud, __pud(_REGION3_ENTRY | __pa(pmd)));
|
|
}
|
|
|
|
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
return (pgd_t *) crst_table_alloc(mm);
|
|
}
|
|
|
|
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
{
|
|
crst_table_free(mm, (unsigned long *) pgd);
|
|
}
|
|
|
|
static inline void pmd_populate(struct mm_struct *mm,
|
|
pmd_t *pmd, pgtable_t pte)
|
|
{
|
|
set_pmd(pmd, __pmd(_SEGMENT_ENTRY | __pa(pte)));
|
|
}
|
|
|
|
#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
|
|
|
|
/*
|
|
* page table entry allocation/free routines.
|
|
*/
|
|
#define pte_alloc_one_kernel(mm) ((pte_t *)page_table_alloc(mm))
|
|
#define pte_alloc_one(mm) ((pte_t *)page_table_alloc(mm))
|
|
|
|
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
|
|
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
|
|
|
|
/* arch use pte_free_defer() implementation in arch/s390/mm/pgalloc.c */
|
|
#define pte_free_defer pte_free_defer
|
|
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
|
|
|
|
void vmem_map_init(void);
|
|
void *vmem_crst_alloc(unsigned long val);
|
|
pte_t *vmem_pte_alloc(void);
|
|
|
|
unsigned long base_asce_alloc(unsigned long addr, unsigned long num_pages);
|
|
void base_asce_free(unsigned long asce);
|
|
|
|
#endif /* _S390_PGALLOC_H */
|