mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-11-25 10:28:29 +00:00
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
104 lines
2.5 KiB
C
104 lines
2.5 KiB
C
#ifndef LINUX_MM_INLINE_H
|
|
#define LINUX_MM_INLINE_H
|
|
|
|
/**
|
|
* page_is_file_cache - should the page be on a file LRU or anon LRU?
|
|
* @page: the page to test
|
|
*
|
|
* Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
|
|
* or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
|
|
* Used by functions that manipulate the LRU lists, to sort a page
|
|
* onto the right LRU list.
|
|
*
|
|
* We would like to get this info without a page flag, but the state
|
|
* needs to survive until the page is last deleted from the LRU, which
|
|
* could be as far down as __page_cache_release.
|
|
*/
|
|
static inline int page_is_file_cache(struct page *page)
|
|
{
|
|
if (PageSwapBacked(page))
|
|
return 0;
|
|
|
|
/* The page is page cache backed by a normal filesystem. */
|
|
return LRU_FILE;
|
|
}
|
|
|
|
static inline void
|
|
add_page_to_lru_list(struct zone *zone, struct page *page, enum lru_list l)
|
|
{
|
|
list_add(&page->lru, &zone->lru[l].list);
|
|
__inc_zone_state(zone, NR_LRU_BASE + l);
|
|
mem_cgroup_add_lru_list(page, l);
|
|
}
|
|
|
|
static inline void
|
|
del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
|
|
{
|
|
list_del(&page->lru);
|
|
__dec_zone_state(zone, NR_LRU_BASE + l);
|
|
mem_cgroup_del_lru_list(page, l);
|
|
}
|
|
|
|
static inline void
|
|
del_page_from_lru(struct zone *zone, struct page *page)
|
|
{
|
|
enum lru_list l = LRU_BASE;
|
|
|
|
list_del(&page->lru);
|
|
if (PageUnevictable(page)) {
|
|
__ClearPageUnevictable(page);
|
|
l = LRU_UNEVICTABLE;
|
|
} else {
|
|
if (PageActive(page)) {
|
|
__ClearPageActive(page);
|
|
l += LRU_ACTIVE;
|
|
}
|
|
l += page_is_file_cache(page);
|
|
}
|
|
__dec_zone_state(zone, NR_LRU_BASE + l);
|
|
mem_cgroup_del_lru_list(page, l);
|
|
}
|
|
|
|
/**
|
|
* page_lru - which LRU list should a page be on?
|
|
* @page: the page to test
|
|
*
|
|
* Returns the LRU list a page should be on, as an index
|
|
* into the array of LRU lists.
|
|
*/
|
|
static inline enum lru_list page_lru(struct page *page)
|
|
{
|
|
enum lru_list lru = LRU_BASE;
|
|
|
|
if (PageUnevictable(page))
|
|
lru = LRU_UNEVICTABLE;
|
|
else {
|
|
if (PageActive(page))
|
|
lru += LRU_ACTIVE;
|
|
lru += page_is_file_cache(page);
|
|
}
|
|
|
|
return lru;
|
|
}
|
|
|
|
/**
|
|
* inactive_anon_is_low - check if anonymous pages need to be deactivated
|
|
* @zone: zone to check
|
|
*
|
|
* Returns true if the zone does not have enough inactive anon pages,
|
|
* meaning some active anon pages need to be deactivated.
|
|
*/
|
|
static inline int inactive_anon_is_low(struct zone *zone)
|
|
{
|
|
unsigned long active, inactive;
|
|
|
|
active = zone_page_state(zone, NR_ACTIVE_ANON);
|
|
inactive = zone_page_state(zone, NR_INACTIVE_ANON);
|
|
|
|
if (inactive * zone->inactive_ratio < active)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
#endif
|