diff --git a/mm/rmap.c b/mm/rmap.c index 765e541ac9be..7a93a7cd2c64 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1722,6 +1722,25 @@ void folio_remove_rmap_pmd(struct folio *folio, struct page *page, #endif } +/* We support batch unmapping of PTEs for lazyfree large folios */ +static inline bool can_batch_unmap_folio_ptes(unsigned long addr, + struct folio *folio, pte_t *ptep) +{ + const fpb_t fpb_flags = FPB_IGNORE_DIRTY | FPB_IGNORE_SOFT_DIRTY; + int max_nr = folio_nr_pages(folio); + pte_t pte = ptep_get(ptep); + + if (!folio_test_anon(folio) || folio_test_swapbacked(folio)) + return false; + if (pte_unused(pte)) + return false; + if (pte_pfn(pte) != folio_pfn(folio)) + return false; + + return folio_pte_batch(folio, addr, ptep, pte, max_nr, fpb_flags, NULL, + NULL, NULL) == max_nr; +} + /* * @arg: enum ttu_flags will be passed to this argument */ @@ -1735,6 +1754,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, struct page *subpage; struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg; + unsigned long nr_pages = 1, end_addr; unsigned long pfn; unsigned long hsz = 0; @@ -1874,23 +1894,26 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (pte_dirty(pteval)) folio_mark_dirty(folio); } else if (likely(pte_present(pteval))) { - flush_cache_page(vma, address, pfn); - /* Nuke the page table entry. */ - if (should_defer_flush(mm, flags)) { - /* - * We clear the PTE but do not flush so potentially - * a remote CPU could still be writing to the folio. - * If the entry was previously clean then the - * architecture must guarantee that a clear->dirty - * transition on a cached TLB entry is written through - * and traps if the PTE is unmapped. - */ - pteval = ptep_get_and_clear(mm, address, pvmw.pte); + if (folio_test_large(folio) && !(flags & TTU_HWPOISON) && + can_batch_unmap_folio_ptes(address, folio, pvmw.pte)) + nr_pages = folio_nr_pages(folio); + end_addr = address + nr_pages * PAGE_SIZE; + flush_cache_range(vma, address, end_addr); - set_tlb_ubc_flush_pending(mm, pteval, address, address + PAGE_SIZE); - } else { - pteval = ptep_clear_flush(vma, address, pvmw.pte); - } + /* Nuke the page table entry. */ + pteval = get_and_clear_full_ptes(mm, address, pvmw.pte, nr_pages, 0); + /* + * We clear the PTE but do not flush so potentially + * a remote CPU could still be writing to the folio. + * If the entry was previously clean then the + * architecture must guarantee that a clear->dirty + * transition on a cached TLB entry is written through + * and traps if the PTE is unmapped. + */ + if (should_defer_flush(mm, flags)) + set_tlb_ubc_flush_pending(mm, pteval, address, end_addr); + else + flush_tlb_range(vma, address, end_addr); if (pte_dirty(pteval)) folio_mark_dirty(folio); } else { @@ -1968,7 +1991,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * redirtied either using the page table or a previously * obtained GUP reference. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); folio_set_swapbacked(folio); goto walk_abort; } else if (ref_count != 1 + map_count) { @@ -1981,10 +2004,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * We'll come back here later and detect if the folio was * dirtied when the additional reference is gone. */ - set_pte_at(mm, address, pvmw.pte, pteval); + set_ptes(mm, address, pvmw.pte, pteval, nr_pages); goto walk_abort; } - dec_mm_counter(mm, MM_ANONPAGES); + add_mm_counter(mm, MM_ANONPAGES, -nr_pages); goto discard; } @@ -2049,13 +2072,18 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter_file(folio)); } discard: - if (unlikely(folio_test_hugetlb(folio))) + if (unlikely(folio_test_hugetlb(folio))) { hugetlb_remove_rmap(folio); - else - folio_remove_rmap_pte(folio, subpage, vma); + } else { + folio_remove_rmap_ptes(folio, subpage, nr_pages, vma); + folio_ref_sub(folio, nr_pages - 1); + } if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + /* We have already batched the entire folio */ + if (nr_pages > 1) + goto walk_done; continue; walk_abort: ret = false;