mirror of
				https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
				synced 2025-10-31 07:02:06 +00:00 
			
		
		
		
	 c04fc586c1
			
		
	
	
		c04fc586c1
		
	
	
	
	
		
			
			Show node to memory section relationship with symlinks in sysfs
Add /sys/devices/system/node/nodeX/memoryY symlinks for all
the memory sections located on nodeX.  For example:
/sys/devices/system/node/node1/memory135 -> ../../memory/memory135
indicates that memory section 135 resides on node1.
Also revises documentation to cover this change as well as updating
Documentation/ABI/testing/sysfs-devices-memory to include descriptions
of memory hotremove files 'phys_device', 'phys_index', and 'state'
that were previously not described there.
In addition to it always being a good policy to provide users with
the maximum possible amount of physical location information for
resources that can be hot-added and/or hot-removed, the following
are some (but likely not all) of the user benefits provided by
this change.
Immediate:
  - Provides information needed to determine the specific node
    on which a defective DIMM is located.  This will reduce system
    downtime when the node or defective DIMM is swapped out.
  - Prevents unintended onlining of a memory section that was
    previously offlined due to a defective DIMM.  This could happen
    during node hot-add when the user or node hot-add assist script
    onlines _all_ offlined sections due to user or script inability
    to identify the specific memory sections located on the hot-added
    node.  The consequences of reintroducing the defective memory
    could be ugly.
  - Provides information needed to vary the amount and distribution
    of memory on specific nodes for testing or debugging purposes.
Future:
  - Will provide information needed to identify the memory
    sections that need to be offlined prior to physical removal
    of a specific node.
Symlink creation during boot was tested on 2-node x86_64, 2-node
ppc64, and 2-node ia64 systems.  Symlink creation during physical
memory hot-add tested on a 2-node x86_64 system.
Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
	
			
		
			
				
	
	
		
			224 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			224 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef __LINUX_MEMORY_HOTPLUG_H
 | |
| #define __LINUX_MEMORY_HOTPLUG_H
 | |
| 
 | |
| #include <linux/mmzone.h>
 | |
| #include <linux/spinlock.h>
 | |
| #include <linux/notifier.h>
 | |
| 
 | |
| struct page;
 | |
| struct zone;
 | |
| struct pglist_data;
 | |
| struct mem_section;
 | |
| 
 | |
| #ifdef CONFIG_MEMORY_HOTPLUG
 | |
| 
 | |
| /*
 | |
|  * Types for free bootmem.
 | |
|  * The normal smallest mapcount is -1. Here is smaller value than it.
 | |
|  */
 | |
| #define SECTION_INFO		(-1 - 1)
 | |
| #define MIX_SECTION_INFO	(-1 - 2)
 | |
| #define NODE_INFO		(-1 - 3)
 | |
| 
 | |
| /*
 | |
|  * pgdat resizing functions
 | |
|  */
 | |
| static inline
 | |
| void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags)
 | |
| {
 | |
| 	spin_lock_irqsave(&pgdat->node_size_lock, *flags);
 | |
| }
 | |
| static inline
 | |
| void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags)
 | |
| {
 | |
| 	spin_unlock_irqrestore(&pgdat->node_size_lock, *flags);
 | |
| }
 | |
| static inline
 | |
| void pgdat_resize_init(struct pglist_data *pgdat)
 | |
| {
 | |
| 	spin_lock_init(&pgdat->node_size_lock);
 | |
| }
 | |
| /*
 | |
|  * Zone resizing functions
 | |
|  */
 | |
| static inline unsigned zone_span_seqbegin(struct zone *zone)
 | |
| {
 | |
| 	return read_seqbegin(&zone->span_seqlock);
 | |
| }
 | |
| static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
 | |
| {
 | |
| 	return read_seqretry(&zone->span_seqlock, iv);
 | |
| }
 | |
| static inline void zone_span_writelock(struct zone *zone)
 | |
| {
 | |
| 	write_seqlock(&zone->span_seqlock);
 | |
| }
 | |
| static inline void zone_span_writeunlock(struct zone *zone)
 | |
| {
 | |
| 	write_sequnlock(&zone->span_seqlock);
 | |
| }
 | |
| static inline void zone_seqlock_init(struct zone *zone)
 | |
| {
 | |
| 	seqlock_init(&zone->span_seqlock);
 | |
| }
 | |
| extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
 | |
| extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 | |
| extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 | |
| /* need some defines for these for archs that don't support it */
 | |
| extern void online_page(struct page *page);
 | |
| /* VM interface that may be used by firmware interface */
 | |
| extern int online_pages(unsigned long, unsigned long);
 | |
| extern void __offline_isolated_pages(unsigned long, unsigned long);
 | |
| extern int offline_pages(unsigned long, unsigned long, unsigned long);
 | |
| 
 | |
| /* reasonably generic interface to expand the physical pages in a zone  */
 | |
| extern int __add_pages(int nid, struct zone *zone, unsigned long start_pfn,
 | |
| 	unsigned long nr_pages);
 | |
| extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
 | |
| 	unsigned long nr_pages);
 | |
| 
 | |
| #ifdef CONFIG_NUMA
 | |
| extern int memory_add_physaddr_to_nid(u64 start);
 | |
| #else
 | |
| static inline int memory_add_physaddr_to_nid(u64 start)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION
 | |
| /*
 | |
|  * For supporting node-hotadd, we have to allocate a new pgdat.
 | |
|  *
 | |
|  * If an arch has generic style NODE_DATA(),
 | |
|  * node_data[nid] = kzalloc() works well. But it depends on the architecture.
 | |
|  *
 | |
|  * In general, generic_alloc_nodedata() is used.
 | |
|  * Now, arch_free_nodedata() is just defined for error path of node_hot_add.
 | |
|  *
 | |
|  */
 | |
| extern pg_data_t *arch_alloc_nodedata(int nid);
 | |
| extern void arch_free_nodedata(pg_data_t *pgdat);
 | |
| extern void arch_refresh_nodedata(int nid, pg_data_t *pgdat);
 | |
| 
 | |
| #else /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 | |
| 
 | |
| #define arch_alloc_nodedata(nid)	generic_alloc_nodedata(nid)
 | |
| #define arch_free_nodedata(pgdat)	generic_free_nodedata(pgdat)
 | |
| 
 | |
| #ifdef CONFIG_NUMA
 | |
| /*
 | |
|  * If ARCH_HAS_NODEDATA_EXTENSION=n, this func is used to allocate pgdat.
 | |
|  * XXX: kmalloc_node() can't work well to get new node's memory at this time.
 | |
|  *	Because, pgdat for the new node is not allocated/initialized yet itself.
 | |
|  *	To use new node's memory, more consideration will be necessary.
 | |
|  */
 | |
| #define generic_alloc_nodedata(nid)				\
 | |
| ({								\
 | |
| 	kzalloc(sizeof(pg_data_t), GFP_KERNEL);			\
 | |
| })
 | |
| /*
 | |
|  * This definition is just for error path in node hotadd.
 | |
|  * For node hotremove, we have to replace this.
 | |
|  */
 | |
| #define generic_free_nodedata(pgdat)	kfree(pgdat)
 | |
| 
 | |
| extern pg_data_t *node_data[];
 | |
| static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 | |
| {
 | |
| 	node_data[nid] = pgdat;
 | |
| }
 | |
| 
 | |
| #else /* !CONFIG_NUMA */
 | |
| 
 | |
| /* never called */
 | |
| static inline pg_data_t *generic_alloc_nodedata(int nid)
 | |
| {
 | |
| 	BUG();
 | |
| 	return NULL;
 | |
| }
 | |
| static inline void generic_free_nodedata(pg_data_t *pgdat)
 | |
| {
 | |
| }
 | |
| static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
 | |
| {
 | |
| }
 | |
| #endif /* CONFIG_NUMA */
 | |
| #endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
 | |
| 
 | |
| #ifdef CONFIG_SPARSEMEM_VMEMMAP
 | |
| static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 | |
| {
 | |
| }
 | |
| static inline void put_page_bootmem(struct page *page)
 | |
| {
 | |
| }
 | |
| #else
 | |
| extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
 | |
| extern void put_page_bootmem(struct page *page);
 | |
| #endif
 | |
| 
 | |
| #else /* ! CONFIG_MEMORY_HOTPLUG */
 | |
| /*
 | |
|  * Stub functions for when hotplug is off
 | |
|  */
 | |
| static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {}
 | |
| static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {}
 | |
| static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
 | |
| 
 | |
| static inline unsigned zone_span_seqbegin(struct zone *zone)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| static inline void zone_span_writelock(struct zone *zone) {}
 | |
| static inline void zone_span_writeunlock(struct zone *zone) {}
 | |
| static inline void zone_seqlock_init(struct zone *zone) {}
 | |
| 
 | |
| static inline int mhp_notimplemented(const char *func)
 | |
| {
 | |
| 	printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
 | |
| 	dump_stack();
 | |
| 	return -ENOSYS;
 | |
| }
 | |
| 
 | |
| static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 | |
| {
 | |
| }
 | |
| 
 | |
| #endif /* ! CONFIG_MEMORY_HOTPLUG */
 | |
| 
 | |
| /*
 | |
|  * Walk through all memory which is registered as resource.
 | |
|  * arg is (start_pfn, nr_pages, private_arg_pointer)
 | |
|  */
 | |
| extern int walk_memory_resource(unsigned long start_pfn,
 | |
| 			unsigned long nr_pages, void *arg,
 | |
| 			int (*func)(unsigned long, unsigned long, void *));
 | |
| 
 | |
| #ifdef CONFIG_MEMORY_HOTREMOVE
 | |
| 
 | |
| extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
 | |
| 
 | |
| #else
 | |
| static inline int is_mem_section_removable(unsigned long pfn,
 | |
| 					unsigned long nr_pages)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| #endif /* CONFIG_MEMORY_HOTREMOVE */
 | |
| 
 | |
| extern int add_memory(int nid, u64 start, u64 size);
 | |
| extern int arch_add_memory(int nid, u64 start, u64 size);
 | |
| extern int remove_memory(u64 start, u64 size);
 | |
| extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
 | |
| 								int nr_pages);
 | |
| extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
 | |
| extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
 | |
| 					  unsigned long pnum);
 | |
| 
 | |
| #endif /* __LINUX_MEMORY_HOTPLUG_H */
 |