mirror of
				https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
				synced 2025-10-31 18:53:24 +00:00 
			
		
		
		
	 b291f00039
			
		
	
	
		b291f00039
		
	
	
	
	
		
			
			Make sure that mlocked pages also live on the unevictable LRU, so kswapd will not scan them over and over again. This is achieved through various strategies: 1) add yet another page flag--PG_mlocked--to indicate that the page is locked for efficient testing in vmscan and, optionally, fault path. This allows early culling of unevictable pages, preventing them from getting to page_referenced()/try_to_unmap(). Also allows separate accounting of mlock'd pages, as Nick's original patch did. Note: Nick's original mlock patch used a PG_mlocked flag. I had removed this in favor of the PG_unevictable flag + an mlock_count [new page struct member]. I restored the PG_mlocked flag to eliminate the new count field. 2) add the mlock/unevictable infrastructure to mm/mlock.c, with internal APIs in mm/internal.h. This is a rework of Nick's original patch to these files, taking into account that mlocked pages are now kept on unevictable LRU list. 3) update vmscan.c:page_evictable() to check PageMlocked() and, if vma passed in, the vm_flags. Note that the vma will only be passed in for new pages in the fault path; and then only if the "cull unevictable pages in fault path" patch is included. 4) add try_to_unlock() to rmap.c to walk a page's rmap and ClearPageMlocked() if no other vmas have it mlocked. Reuses as much of try_to_unmap() as possible. This effectively replaces the use of one of the lru list links as an mlock count. If this mechanism let's pages in mlocked vmas leak through w/o PG_mlocked set [I don't know that it does], we should catch them later in try_to_unmap(). One hopes this will be rare, as it will be relatively expensive. Original mm/internal.h, mm/rmap.c and mm/mlock.c changes: Signed-off-by: Nick Piggin <npiggin@suse.de> splitlru: introduce __get_user_pages(): New munlock processing need to GUP_FLAGS_IGNORE_VMA_PERMISSIONS. because current get_user_pages() can't grab PROT_NONE pages theresore it cause PROT_NONE pages can't munlock. [akpm@linux-foundation.org: fix this for pagemap-pass-mm-into-pagewalkers.patch] [akpm@linux-foundation.org: untangle patch interdependencies] [akpm@linux-foundation.org: fix things after out-of-order merging] [hugh@veritas.com: fix page-flags mess] [lee.schermerhorn@hp.com: fix munlock page table walk - now requires 'mm'] [kosaki.motohiro@jp.fujitsu.com: build fix] [kosaki.motohiro@jp.fujitsu.com: fix truncate race and sevaral comments] [kosaki.motohiro@jp.fujitsu.com: splitlru: introduce __get_user_pages()] Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Cc: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Cc: Matt Mackall <mpm@selenic.com> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			403 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			403 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Macros for manipulating and testing page->flags
 | |
|  */
 | |
| 
 | |
| #ifndef PAGE_FLAGS_H
 | |
| #define PAGE_FLAGS_H
 | |
| 
 | |
| #include <linux/types.h>
 | |
| #ifndef __GENERATING_BOUNDS_H
 | |
| #include <linux/mm_types.h>
 | |
| #include <linux/bounds.h>
 | |
| #endif /* !__GENERATING_BOUNDS_H */
 | |
| 
 | |
| /*
 | |
|  * Various page->flags bits:
 | |
|  *
 | |
|  * PG_reserved is set for special pages, which can never be swapped out. Some
 | |
|  * of them might not even exist (eg empty_bad_page)...
 | |
|  *
 | |
|  * The PG_private bitflag is set on pagecache pages if they contain filesystem
 | |
|  * specific data (which is normally at page->private). It can be used by
 | |
|  * private allocations for its own usage.
 | |
|  *
 | |
|  * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
 | |
|  * and cleared when writeback _starts_ or when read _completes_. PG_writeback
 | |
|  * is set before writeback starts and cleared when it finishes.
 | |
|  *
 | |
|  * PG_locked also pins a page in pagecache, and blocks truncation of the file
 | |
|  * while it is held.
 | |
|  *
 | |
|  * page_waitqueue(page) is a wait queue of all tasks waiting for the page
 | |
|  * to become unlocked.
 | |
|  *
 | |
|  * PG_uptodate tells whether the page's contents is valid.  When a read
 | |
|  * completes, the page becomes uptodate, unless a disk I/O error happened.
 | |
|  *
 | |
|  * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
 | |
|  * file-backed pagecache (see mm/vmscan.c).
 | |
|  *
 | |
|  * PG_error is set to indicate that an I/O error occurred on this page.
 | |
|  *
 | |
|  * PG_arch_1 is an architecture specific page state bit.  The generic code
 | |
|  * guarantees that this bit is cleared for a page when it first is entered into
 | |
|  * the page cache.
 | |
|  *
 | |
|  * PG_highmem pages are not permanently mapped into the kernel virtual address
 | |
|  * space, they need to be kmapped separately for doing IO on the pages.  The
 | |
|  * struct page (these bits with information) are always mapped into kernel
 | |
|  * address space...
 | |
|  *
 | |
|  * PG_buddy is set to indicate that the page is free and in the buddy system
 | |
|  * (see mm/page_alloc.c).
 | |
|  *
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
 | |
|  * locked- and dirty-page accounting.
 | |
|  *
 | |
|  * The page flags field is split into two parts, the main flags area
 | |
|  * which extends from the low bits upwards, and the fields area which
 | |
|  * extends from the high bits downwards.
 | |
|  *
 | |
|  *  | FIELD | ... | FLAGS |
 | |
|  *  N-1           ^       0
 | |
|  *               (NR_PAGEFLAGS)
 | |
|  *
 | |
|  * The fields area is reserved for fields mapping zone, node (for NUMA) and
 | |
|  * SPARSEMEM section (for variants of SPARSEMEM that require section ids like
 | |
|  * SPARSEMEM_EXTREME with !SPARSEMEM_VMEMMAP).
 | |
|  */
 | |
| enum pageflags {
 | |
| 	PG_locked,		/* Page is locked. Don't touch. */
 | |
| 	PG_error,
 | |
| 	PG_referenced,
 | |
| 	PG_uptodate,
 | |
| 	PG_dirty,
 | |
| 	PG_lru,
 | |
| 	PG_active,
 | |
| 	PG_slab,
 | |
| 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 | |
| 	PG_arch_1,
 | |
| 	PG_reserved,
 | |
| 	PG_private,		/* If pagecache, has fs-private data */
 | |
| 	PG_writeback,		/* Page is under writeback */
 | |
| #ifdef CONFIG_PAGEFLAGS_EXTENDED
 | |
| 	PG_head,		/* A head page */
 | |
| 	PG_tail,		/* A tail page */
 | |
| #else
 | |
| 	PG_compound,		/* A compound page */
 | |
| #endif
 | |
| 	PG_swapcache,		/* Swap page: swp_entry_t in private */
 | |
| 	PG_mappedtodisk,	/* Has blocks allocated on-disk */
 | |
| 	PG_reclaim,		/* To be reclaimed asap */
 | |
| 	PG_buddy,		/* Page is free, on buddy lists */
 | |
| 	PG_swapbacked,		/* Page is backed by RAM/swap */
 | |
| #ifdef CONFIG_UNEVICTABLE_LRU
 | |
| 	PG_unevictable,		/* Page is "unevictable"  */
 | |
| 	PG_mlocked,		/* Page is vma mlocked */
 | |
| #endif
 | |
| #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 | |
| 	PG_uncached,		/* Page has been mapped as uncached */
 | |
| #endif
 | |
| 	__NR_PAGEFLAGS,
 | |
| 
 | |
| 	/* Filesystems */
 | |
| 	PG_checked = PG_owner_priv_1,
 | |
| 
 | |
| 	/* XEN */
 | |
| 	PG_pinned = PG_owner_priv_1,
 | |
| 	PG_savepinned = PG_dirty,
 | |
| 
 | |
| 	/* SLOB */
 | |
| 	PG_slob_page = PG_active,
 | |
| 	PG_slob_free = PG_private,
 | |
| 
 | |
| 	/* SLUB */
 | |
| 	PG_slub_frozen = PG_active,
 | |
| 	PG_slub_debug = PG_error,
 | |
| };
 | |
| 
 | |
| #ifndef __GENERATING_BOUNDS_H
 | |
| 
 | |
| /*
 | |
|  * Macros to create function definitions for page flags
 | |
|  */
 | |
| #define TESTPAGEFLAG(uname, lname)					\
 | |
| static inline int Page##uname(struct page *page) 			\
 | |
| 			{ return test_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define SETPAGEFLAG(uname, lname)					\
 | |
| static inline void SetPage##uname(struct page *page)			\
 | |
| 			{ set_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define CLEARPAGEFLAG(uname, lname)					\
 | |
| static inline void ClearPage##uname(struct page *page)			\
 | |
| 			{ clear_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define __SETPAGEFLAG(uname, lname)					\
 | |
| static inline void __SetPage##uname(struct page *page)			\
 | |
| 			{ __set_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define __CLEARPAGEFLAG(uname, lname)					\
 | |
| static inline void __ClearPage##uname(struct page *page)		\
 | |
| 			{ __clear_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define TESTSETFLAG(uname, lname)					\
 | |
| static inline int TestSetPage##uname(struct page *page)			\
 | |
| 		{ return test_and_set_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| #define TESTCLEARFLAG(uname, lname)					\
 | |
| static inline int TestClearPage##uname(struct page *page)		\
 | |
| 		{ return test_and_clear_bit(PG_##lname, &page->flags); }
 | |
| 
 | |
| 
 | |
| #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 | |
| 	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
 | |
| 
 | |
| #define __PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 | |
| 	__SETPAGEFLAG(uname, lname)  __CLEARPAGEFLAG(uname, lname)
 | |
| 
 | |
| #define PAGEFLAG_FALSE(uname) 						\
 | |
| static inline int Page##uname(struct page *page) 			\
 | |
| 			{ return 0; }
 | |
| 
 | |
| #define TESTSCFLAG(uname, lname)					\
 | |
| 	TESTSETFLAG(uname, lname) TESTCLEARFLAG(uname, lname)
 | |
| 
 | |
| #define SETPAGEFLAG_NOOP(uname)						\
 | |
| static inline void SetPage##uname(struct page *page) {  }
 | |
| 
 | |
| #define CLEARPAGEFLAG_NOOP(uname)					\
 | |
| static inline void ClearPage##uname(struct page *page) {  }
 | |
| 
 | |
| #define __CLEARPAGEFLAG_NOOP(uname)					\
 | |
| static inline void __ClearPage##uname(struct page *page) {  }
 | |
| 
 | |
| #define TESTCLEARFLAG_FALSE(uname)					\
 | |
| static inline int TestClearPage##uname(struct page *page) { return 0; }
 | |
| 
 | |
| struct page;	/* forward declaration */
 | |
| 
 | |
| TESTPAGEFLAG(Locked, locked)
 | |
| PAGEFLAG(Error, error)
 | |
| PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced)
 | |
| PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 | |
| PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 | |
| PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 | |
| 	TESTCLEARFLAG(Active, active)
 | |
| __PAGEFLAG(Slab, slab)
 | |
| PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 | |
| PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
 | |
| PAGEFLAG(SavePinned, savepinned);			/* Xen */
 | |
| PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 | |
| PAGEFLAG(Private, private) __CLEARPAGEFLAG(Private, private)
 | |
| 	__SETPAGEFLAG(Private, private)
 | |
| PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
 | |
| 
 | |
| __PAGEFLAG(SlobPage, slob_page)
 | |
| __PAGEFLAG(SlobFree, slob_free)
 | |
| 
 | |
| __PAGEFLAG(SlubFrozen, slub_frozen)
 | |
| __PAGEFLAG(SlubDebug, slub_debug)
 | |
| 
 | |
| /*
 | |
|  * Only test-and-set exist for PG_writeback.  The unconditional operators are
 | |
|  * risky: they bypass page accounting.
 | |
|  */
 | |
| TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback)
 | |
| __PAGEFLAG(Buddy, buddy)
 | |
| PAGEFLAG(MappedToDisk, mappedtodisk)
 | |
| 
 | |
| /* PG_readahead is only used for file reads; PG_reclaim is only for writes */
 | |
| PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim)
 | |
| PAGEFLAG(Readahead, reclaim)		/* Reminder to do async read-ahead */
 | |
| 
 | |
| #ifdef CONFIG_HIGHMEM
 | |
| /*
 | |
|  * Must use a macro here due to header dependency issues. page_zone() is not
 | |
|  * available at this point.
 | |
|  */
 | |
| #define PageHighMem(__p) is_highmem(page_zone(__p))
 | |
| #else
 | |
| PAGEFLAG_FALSE(HighMem)
 | |
| #endif
 | |
| 
 | |
| #ifdef CONFIG_SWAP
 | |
| PAGEFLAG(SwapCache, swapcache)
 | |
| #else
 | |
| PAGEFLAG_FALSE(SwapCache)
 | |
| #endif
 | |
| 
 | |
| #ifdef CONFIG_UNEVICTABLE_LRU
 | |
| PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 | |
| 	TESTCLEARFLAG(Unevictable, unevictable)
 | |
| 
 | |
| #define MLOCK_PAGES 1
 | |
| PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
 | |
| 	TESTSCFLAG(Mlocked, mlocked)
 | |
| 
 | |
| #else
 | |
| 
 | |
| #define MLOCK_PAGES 0
 | |
| PAGEFLAG_FALSE(Mlocked)
 | |
| 	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
 | |
| 
 | |
| PAGEFLAG_FALSE(Unevictable) TESTCLEARFLAG_FALSE(Unevictable)
 | |
| 	SETPAGEFLAG_NOOP(Unevictable) CLEARPAGEFLAG_NOOP(Unevictable)
 | |
| 	__CLEARPAGEFLAG_NOOP(Unevictable)
 | |
| #endif
 | |
| 
 | |
| #ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
 | |
| PAGEFLAG(Uncached, uncached)
 | |
| #else
 | |
| PAGEFLAG_FALSE(Uncached)
 | |
| #endif
 | |
| 
 | |
| static inline int PageUptodate(struct page *page)
 | |
| {
 | |
| 	int ret = test_bit(PG_uptodate, &(page)->flags);
 | |
| 
 | |
| 	/*
 | |
| 	 * Must ensure that the data we read out of the page is loaded
 | |
| 	 * _after_ we've loaded page->flags to check for PageUptodate.
 | |
| 	 * We can skip the barrier if the page is not uptodate, because
 | |
| 	 * we wouldn't be reading anything from it.
 | |
| 	 *
 | |
| 	 * See SetPageUptodate() for the other side of the story.
 | |
| 	 */
 | |
| 	if (ret)
 | |
| 		smp_rmb();
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static inline void __SetPageUptodate(struct page *page)
 | |
| {
 | |
| 	smp_wmb();
 | |
| 	__set_bit(PG_uptodate, &(page)->flags);
 | |
| }
 | |
| 
 | |
| static inline void SetPageUptodate(struct page *page)
 | |
| {
 | |
| #ifdef CONFIG_S390
 | |
| 	if (!test_and_set_bit(PG_uptodate, &page->flags))
 | |
| 		page_clear_dirty(page);
 | |
| #else
 | |
| 	/*
 | |
| 	 * Memory barrier must be issued before setting the PG_uptodate bit,
 | |
| 	 * so that all previous stores issued in order to bring the page
 | |
| 	 * uptodate are actually visible before PageUptodate becomes true.
 | |
| 	 *
 | |
| 	 * s390 doesn't need an explicit smp_wmb here because the test and
 | |
| 	 * set bit already provides full barriers.
 | |
| 	 */
 | |
| 	smp_wmb();
 | |
| 	set_bit(PG_uptodate, &(page)->flags);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| CLEARPAGEFLAG(Uptodate, uptodate)
 | |
| 
 | |
| extern void cancel_dirty_page(struct page *page, unsigned int account_size);
 | |
| 
 | |
| int test_clear_page_writeback(struct page *page);
 | |
| int test_set_page_writeback(struct page *page);
 | |
| 
 | |
| static inline void set_page_writeback(struct page *page)
 | |
| {
 | |
| 	test_set_page_writeback(page);
 | |
| }
 | |
| 
 | |
| #ifdef CONFIG_PAGEFLAGS_EXTENDED
 | |
| /*
 | |
|  * System with lots of page flags available. This allows separate
 | |
|  * flags for PageHead() and PageTail() checks of compound pages so that bit
 | |
|  * tests can be used in performance sensitive paths. PageCompound is
 | |
|  * generally not used in hot code paths.
 | |
|  */
 | |
| __PAGEFLAG(Head, head)
 | |
| __PAGEFLAG(Tail, tail)
 | |
| 
 | |
| static inline int PageCompound(struct page *page)
 | |
| {
 | |
| 	return page->flags & ((1L << PG_head) | (1L << PG_tail));
 | |
| 
 | |
| }
 | |
| #else
 | |
| /*
 | |
|  * Reduce page flag use as much as possible by overlapping
 | |
|  * compound page flags with the flags used for page cache pages. Possible
 | |
|  * because PageCompound is always set for compound pages and not for
 | |
|  * pages on the LRU and/or pagecache.
 | |
|  */
 | |
| TESTPAGEFLAG(Compound, compound)
 | |
| __PAGEFLAG(Head, compound)
 | |
| 
 | |
| /*
 | |
|  * PG_reclaim is used in combination with PG_compound to mark the
 | |
|  * head and tail of a compound page. This saves one page flag
 | |
|  * but makes it impossible to use compound pages for the page cache.
 | |
|  * The PG_reclaim bit would have to be used for reclaim or readahead
 | |
|  * if compound pages enter the page cache.
 | |
|  *
 | |
|  * PG_compound & PG_reclaim	=> Tail page
 | |
|  * PG_compound & ~PG_reclaim	=> Head page
 | |
|  */
 | |
| #define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim))
 | |
| 
 | |
| static inline int PageTail(struct page *page)
 | |
| {
 | |
| 	return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask);
 | |
| }
 | |
| 
 | |
| static inline void __SetPageTail(struct page *page)
 | |
| {
 | |
| 	page->flags |= PG_head_tail_mask;
 | |
| }
 | |
| 
 | |
| static inline void __ClearPageTail(struct page *page)
 | |
| {
 | |
| 	page->flags &= ~PG_head_tail_mask;
 | |
| }
 | |
| 
 | |
| #endif /* !PAGEFLAGS_EXTENDED */
 | |
| 
 | |
| #ifdef CONFIG_UNEVICTABLE_LRU
 | |
| #define __PG_UNEVICTABLE	(1 << PG_unevictable)
 | |
| #define __PG_MLOCKED		(1 << PG_mlocked)
 | |
| #else
 | |
| #define __PG_UNEVICTABLE	0
 | |
| #define __PG_MLOCKED		0
 | |
| #endif
 | |
| 
 | |
| #define PAGE_FLAGS	(1 << PG_lru   | 1 << PG_private   | 1 << PG_locked | \
 | |
| 			 1 << PG_buddy | 1 << PG_writeback | \
 | |
| 			 1 << PG_slab  | 1 << PG_swapcache | 1 << PG_active | \
 | |
| 			 __PG_UNEVICTABLE | __PG_MLOCKED)
 | |
| 
 | |
| /*
 | |
|  * Flags checked in bad_page().  Pages on the free list should not have
 | |
|  * these flags set.  It they are, there is a problem.
 | |
|  */
 | |
| #define PAGE_FLAGS_CLEAR_WHEN_BAD (PAGE_FLAGS | \
 | |
| 		1 << PG_reclaim | 1 << PG_dirty | 1 << PG_swapbacked)
 | |
| 
 | |
| /*
 | |
|  * Flags checked when a page is freed.  Pages being freed should not have
 | |
|  * these flags set.  It they are, there is a problem.
 | |
|  */
 | |
| #define PAGE_FLAGS_CHECK_AT_FREE (PAGE_FLAGS | 1 << PG_reserved)
 | |
| 
 | |
| /*
 | |
|  * Flags checked when a page is prepped for return by the page allocator.
 | |
|  * Pages being prepped should not have these flags set.  It they are, there
 | |
|  * is a problem.
 | |
|  */
 | |
| #define PAGE_FLAGS_CHECK_AT_PREP (PAGE_FLAGS | \
 | |
| 		1 << PG_reserved | 1 << PG_dirty | 1 << PG_swapbacked)
 | |
| 
 | |
| #endif /* !__GENERATING_BOUNDS_H */
 | |
| #endif	/* PAGE_FLAGS_H */
 |