mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-10-25 00:52:06 +00:00
A big patch for changing memcg's LRU semantics.
Now,
- page_cgroup is linked to mem_cgroup's its own LRU (per zone).
- LRU of page_cgroup is not synchronous with global LRU.
- page and page_cgroup is one-to-one and statically allocated.
- To find page_cgroup is on what LRU, you have to check pc->mem_cgroup as
- lru = page_cgroup_zoneinfo(pc, nid_of_pc, zid_of_pc);
- SwapCache is handled.
And, when we handle LRU list of page_cgroup, we do following.
pc = lookup_page_cgroup(page);
lock_page_cgroup(pc); .....................(1)
mz = page_cgroup_zoneinfo(pc);
spin_lock(&mz->lru_lock);
.....add to LRU
spin_unlock(&mz->lru_lock);
unlock_page_cgroup(pc);
But (1) is spin_lock and we have to be afraid of dead-lock with zone->lru_lock.
So, trylock() is used at (1), now. Without (1), we can't trust "mz" is correct.
This is a trial to remove this dirty nesting of locks.
This patch changes mz->lru_lock to be zone->lru_lock.
Then, above sequence will be written as
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
mem_cgroup_add/remove/etc_lru() {
pc = lookup_page_cgroup(page);
mz = page_cgroup_zoneinfo(pc);
if (PageCgroupUsed(pc)) {
....add to LRU
}
spin_lock(&zone->lru_lock); # in vmscan.c or swap.c via global LRU
This is much simpler.
(*) We're safe even if we don't take lock_page_cgroup(pc). Because..
1. When pc->mem_cgroup can be modified.
- at charge.
- at account_move().
2. at charge
the PCG_USED bit is not set before pc->mem_cgroup is fixed.
3. at account_move()
the page is isolated and not on LRU.
Pros.
- easy for maintenance.
- memcg can make use of laziness of pagevec.
- we don't have to duplicated LRU/Active/Unevictable bit in page_cgroup.
- LRU status of memcg will be synchronized with global LRU's one.
- # of locks are reduced.
- account_move() is simplified very much.
Cons.
- may increase cost of LRU rotation.
(no impact if memcg is not configured.)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
127 lines
2.9 KiB
C
127 lines
2.9 KiB
C
#ifndef __LINUX_PAGE_CGROUP_H
|
|
#define __LINUX_PAGE_CGROUP_H
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
|
#include <linux/bit_spinlock.h>
|
|
/*
|
|
* Page Cgroup can be considered as an extended mem_map.
|
|
* A page_cgroup page is associated with every page descriptor. The
|
|
* page_cgroup helps us identify information about the cgroup
|
|
* All page cgroups are allocated at boot or memory hotplug event,
|
|
* then the page cgroup for pfn always exists.
|
|
*/
|
|
struct page_cgroup {
|
|
unsigned long flags;
|
|
struct mem_cgroup *mem_cgroup;
|
|
struct page *page;
|
|
struct list_head lru; /* per cgroup LRU list */
|
|
};
|
|
|
|
void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
|
|
void __init page_cgroup_init(void);
|
|
struct page_cgroup *lookup_page_cgroup(struct page *page);
|
|
|
|
enum {
|
|
/* flags for mem_cgroup */
|
|
PCG_LOCK, /* page cgroup is locked */
|
|
PCG_CACHE, /* charged as cache */
|
|
PCG_USED, /* this object is in use. */
|
|
};
|
|
|
|
#define TESTPCGFLAG(uname, lname) \
|
|
static inline int PageCgroup##uname(struct page_cgroup *pc) \
|
|
{ return test_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define SETPCGFLAG(uname, lname) \
|
|
static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
|
|
{ set_bit(PCG_##lname, &pc->flags); }
|
|
|
|
#define CLEARPCGFLAG(uname, lname) \
|
|
static inline void ClearPageCgroup##uname(struct page_cgroup *pc) \
|
|
{ clear_bit(PCG_##lname, &pc->flags); }
|
|
|
|
/* Cache flag is set only once (at allocation) */
|
|
TESTPCGFLAG(Cache, CACHE)
|
|
|
|
TESTPCGFLAG(Used, USED)
|
|
CLEARPCGFLAG(Used, USED)
|
|
|
|
static inline int page_cgroup_nid(struct page_cgroup *pc)
|
|
{
|
|
return page_to_nid(pc->page);
|
|
}
|
|
|
|
static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
|
|
{
|
|
return page_zonenum(pc->page);
|
|
}
|
|
|
|
static inline void lock_page_cgroup(struct page_cgroup *pc)
|
|
{
|
|
bit_spin_lock(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline int trylock_page_cgroup(struct page_cgroup *pc)
|
|
{
|
|
return bit_spin_trylock(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
static inline void unlock_page_cgroup(struct page_cgroup *pc)
|
|
{
|
|
bit_spin_unlock(PCG_LOCK, &pc->flags);
|
|
}
|
|
|
|
#else /* CONFIG_CGROUP_MEM_RES_CTLR */
|
|
struct page_cgroup;
|
|
|
|
static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
|
|
{
|
|
}
|
|
|
|
static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline void page_cgroup_init(void)
|
|
{
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
|
|
#include <linux/swap.h>
|
|
extern struct mem_cgroup *
|
|
swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem);
|
|
extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
|
|
extern int swap_cgroup_swapon(int type, unsigned long max_pages);
|
|
extern void swap_cgroup_swapoff(int type);
|
|
#else
|
|
#include <linux/swap.h>
|
|
|
|
static inline
|
|
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline
|
|
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline int
|
|
swap_cgroup_swapon(int type, unsigned long max_pages)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void swap_cgroup_swapoff(int type)
|
|
{
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
#endif
|