mirror of
				https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
				synced 2025-10-25 19:27:10 +00:00 
			
		
		
		
	 5a622f2d0f
			
		
	
	
		5a622f2d0f
		
	
	
	
	
		
			
			Creating PDEs with refcount 0 and "deleted" flag has problems (see below). Switch to usual scheme: * PDE is created with refcount 1 * every de_get does +1 * every de_put() and remove_proc_entry() do -1 * once refcount reaches 0, PDE is freed. This elegantly fixes at least two following races (both observed) without introducing new locks, without abusing old locks, without spreading lock_kernel(): 1) PDE leak remove_proc_entry de_put ----------------- ------ [refcnt = 1] if (atomic_read(&de->count) == 0) if (atomic_dec_and_test(&de->count)) if (de->deleted) /* also not taken! */ free_proc_entry(de); else de->deleted = 1; [refcount=0, deleted=1] 2) use after free remove_proc_entry de_put ----------------- ------ [refcnt = 1] if (atomic_dec_and_test(&de->count)) if (atomic_read(&de->count) == 0) free_proc_entry(de); /* boom! */ if (de->deleted) free_proc_entry(de); BUG: unable to handle kernel paging request at virtual address 6b6b6b6b printing eip: c10acdda *pdpt = 00000000338f8001 *pde = 0000000000000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: af_packet ipv6 cpufreq_ondemand loop serio_raw psmouse k8temp hwmon sr_mod cdrom Pid: 23161, comm: cat Not tainted (2.6.24-rc2-8c0863403f109a43d7000b4646da4818220d501f #4) EIP: 0060:[<c10acdda>] EFLAGS: 00210097 CPU: 1 EIP is at strnlen+0x6/0x18 EAX: 6b6b6b6b EBX: 6b6b6b6b ECX: 6b6b6b6b EDX: fffffffe ESI: c128fa3b EDI: f380bf34 EBP: ffffffff ESP: f380be44 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process cat (pid: 23161, ti=f380b000 task=f38f2570 task.ti=f380b000) Stack: c10ac4f0 00000278 c12ce000 f43cd2a8 00000163 00000000 7da86067 00000400 c128fa20 00896b18 f38325a8 c128fe20 ffffffff 00000000 c11f291e 00000400 f75be300 c128fa20 f769c9a0 c10ac779 f380bf34 f7bfee70 c1018e6b f380bf34 Call Trace: [<c10ac4f0>] vsnprintf+0x2ad/0x49b [<c10ac779>] vscnprintf+0x14/0x1f [<c1018e6b>] vprintk+0xc5/0x2f9 [<c10379f1>] handle_fasteoi_irq+0x0/0xab [<c1004f44>] do_IRQ+0x9f/0xb7 [<c117db3b>] preempt_schedule_irq+0x3f/0x5b [<c100264e>] need_resched+0x1f/0x21 [<c10190ba>] printk+0x1b/0x1f [<c107c8ad>] de_put+0x3d/0x50 [<c107c8f8>] proc_delete_inode+0x38/0x41 [<c107c8c0>] proc_delete_inode+0x0/0x41 [<c1066298>] generic_delete_inode+0x5e/0xc6 [<c1065aa9>] iput+0x60/0x62 [<c1063c8e>] d_kill+0x2d/0x46 [<c1063fa9>] dput+0xdc/0xe4 [<c10571a1>] __fput+0xb0/0xcd [<c1054e49>] filp_close+0x48/0x4f [<c1055ee9>] sys_close+0x67/0xa5 [<c10026b6>] sysenter_past_esp+0x5f/0x85 ======================= Code: c9 74 0c f2 ae 74 05 bf 01 00 00 00 4f 89 fa 5f 89 d0 c3 85 c9 57 89 c7 89 d0 74 05 f2 ae 75 01 4f 89 f8 5f c3 89 c1 89 c8 eb 06 <80> 38 00 74 07 40 4a 83 fa ff 75 f4 29 c8 c3 90 90 90 57 83 c9 EIP: [<c10acdda>] strnlen+0x6/0x18 SS:ESP 0068:f380be44 Also, remove broken usage of ->deleted from reiserfs: if sget() succeeds, module is already pinned and remove_proc_entry() can't happen => nobody can mark PDE deleted. Dummy proc root in netns code is not marked with refcount 1. AFAICS, we never get it, it's just for proper /proc/net removal. I double checked CLONE_NETNS continues to work. Patch survives many hours of modprobe/rmmod/cat loops without new bugs which can be attributed to refcounting. Signed-off-by: Alexey Dobriyan <adobriyan@sw.ru> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
		
			
				
	
	
		
			298 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			298 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| #ifndef _LINUX_PROC_FS_H
 | |
| #define _LINUX_PROC_FS_H
 | |
| 
 | |
| #include <linux/slab.h>
 | |
| #include <linux/fs.h>
 | |
| #include <linux/spinlock.h>
 | |
| #include <linux/magic.h>
 | |
| #include <asm/atomic.h>
 | |
| 
 | |
| struct net;
 | |
| struct completion;
 | |
| 
 | |
| /*
 | |
|  * The proc filesystem constants/structures
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Offset of the first process in the /proc root directory..
 | |
|  */
 | |
| #define FIRST_PROCESS_ENTRY 256
 | |
| 
 | |
| 
 | |
| /*
 | |
|  * We always define these enumerators
 | |
|  */
 | |
| 
 | |
| enum {
 | |
| 	PROC_ROOT_INO = 1,
 | |
| };
 | |
| 
 | |
| /*
 | |
|  * This is not completely implemented yet. The idea is to
 | |
|  * create an in-memory tree (like the actual /proc filesystem
 | |
|  * tree) of these proc_dir_entries, so that we can dynamically
 | |
|  * add new files to /proc.
 | |
|  *
 | |
|  * The "next" pointer creates a linked list of one /proc directory,
 | |
|  * while parent/subdir create the directory structure (every
 | |
|  * /proc file has a parent, but "subdir" is NULL for all
 | |
|  * non-directory entries).
 | |
|  *
 | |
|  * "get_info" is called at "read", while "owner" is used to protect module
 | |
|  * from unloading while proc_dir_entry is in use
 | |
|  */
 | |
| 
 | |
| typedef	int (read_proc_t)(char *page, char **start, off_t off,
 | |
| 			  int count, int *eof, void *data);
 | |
| typedef	int (write_proc_t)(struct file *file, const char __user *buffer,
 | |
| 			   unsigned long count, void *data);
 | |
| typedef int (get_info_t)(char *, char **, off_t, int);
 | |
| typedef struct proc_dir_entry *(shadow_proc_t)(struct task_struct *task,
 | |
| 						struct proc_dir_entry *pde);
 | |
| 
 | |
| struct proc_dir_entry {
 | |
| 	unsigned int low_ino;
 | |
| 	unsigned short namelen;
 | |
| 	const char *name;
 | |
| 	mode_t mode;
 | |
| 	nlink_t nlink;
 | |
| 	uid_t uid;
 | |
| 	gid_t gid;
 | |
| 	loff_t size;
 | |
| 	const struct inode_operations *proc_iops;
 | |
| 	/*
 | |
| 	 * NULL ->proc_fops means "PDE is going away RSN" or
 | |
| 	 * "PDE is just created". In either case, e.g. ->read_proc won't be
 | |
| 	 * called because it's too late or too early, respectively.
 | |
| 	 *
 | |
| 	 * If you're allocating ->proc_fops dynamically, save a pointer
 | |
| 	 * somewhere.
 | |
| 	 */
 | |
| 	const struct file_operations *proc_fops;
 | |
| 	get_info_t *get_info;
 | |
| 	struct module *owner;
 | |
| 	struct proc_dir_entry *next, *parent, *subdir;
 | |
| 	void *data;
 | |
| 	read_proc_t *read_proc;
 | |
| 	write_proc_t *write_proc;
 | |
| 	atomic_t count;		/* use count */
 | |
| 	int pde_users;	/* number of callers into module in progress */
 | |
| 	spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
 | |
| 	struct completion *pde_unload_completion;
 | |
| 	shadow_proc_t *shadow_proc;
 | |
| };
 | |
| 
 | |
| struct kcore_list {
 | |
| 	struct kcore_list *next;
 | |
| 	unsigned long addr;
 | |
| 	size_t size;
 | |
| };
 | |
| 
 | |
| struct vmcore {
 | |
| 	struct list_head list;
 | |
| 	unsigned long long paddr;
 | |
| 	unsigned long long size;
 | |
| 	loff_t offset;
 | |
| };
 | |
| 
 | |
| #ifdef CONFIG_PROC_FS
 | |
| 
 | |
| extern struct proc_dir_entry proc_root;
 | |
| extern struct proc_dir_entry *proc_root_fs;
 | |
| extern struct proc_dir_entry *proc_bus;
 | |
| extern struct proc_dir_entry *proc_root_driver;
 | |
| extern struct proc_dir_entry *proc_root_kcore;
 | |
| 
 | |
| extern spinlock_t proc_subdir_lock;
 | |
| 
 | |
| extern void proc_root_init(void);
 | |
| extern void proc_misc_init(void);
 | |
| 
 | |
| struct mm_struct;
 | |
| 
 | |
| void proc_flush_task(struct task_struct *task);
 | |
| struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
 | |
| int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
 | |
| unsigned long task_vsize(struct mm_struct *);
 | |
| int task_statm(struct mm_struct *, int *, int *, int *, int *);
 | |
| char *task_mem(struct mm_struct *, char *);
 | |
| void clear_refs_smap(struct mm_struct *mm);
 | |
| 
 | |
| struct proc_dir_entry *de_get(struct proc_dir_entry *de);
 | |
| void de_put(struct proc_dir_entry *de);
 | |
| 
 | |
| extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
 | |
| 						struct proc_dir_entry *parent);
 | |
| extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
 | |
| 
 | |
| extern struct vfsmount *proc_mnt;
 | |
| struct pid_namespace;
 | |
| extern int proc_fill_super(struct super_block *);
 | |
| extern struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
 | |
| 
 | |
| /*
 | |
|  * These are generic /proc routines that use the internal
 | |
|  * "struct proc_dir_entry" tree to traverse the filesystem.
 | |
|  *
 | |
|  * The /proc root directory has extended versions to take care
 | |
|  * of the /proc/<pid> subdirectories.
 | |
|  */
 | |
| extern int proc_readdir(struct file *, void *, filldir_t);
 | |
| extern struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
 | |
| 
 | |
| extern const struct file_operations proc_kcore_operations;
 | |
| extern const struct file_operations proc_kmsg_operations;
 | |
| extern const struct file_operations ppc_htab_operations;
 | |
| 
 | |
| extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 | |
| extern void pid_ns_release_proc(struct pid_namespace *ns);
 | |
| 
 | |
| /*
 | |
|  * proc_tty.c
 | |
|  */
 | |
| struct tty_driver;
 | |
| extern void proc_tty_init(void);
 | |
| extern void proc_tty_register_driver(struct tty_driver *driver);
 | |
| extern void proc_tty_unregister_driver(struct tty_driver *driver);
 | |
| 
 | |
| /*
 | |
|  * proc_devtree.c
 | |
|  */
 | |
| #ifdef CONFIG_PROC_DEVICETREE
 | |
| struct device_node;
 | |
| struct property;
 | |
| extern void proc_device_tree_init(void);
 | |
| extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *);
 | |
| extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop);
 | |
| extern void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
 | |
| 					 struct property *prop);
 | |
| extern void proc_device_tree_update_prop(struct proc_dir_entry *pde,
 | |
| 					 struct property *newprop,
 | |
| 					 struct property *oldprop);
 | |
| #endif /* CONFIG_PROC_DEVICETREE */
 | |
| 
 | |
| extern struct proc_dir_entry *proc_symlink(const char *,
 | |
| 		struct proc_dir_entry *, const char *);
 | |
| extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
 | |
| extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
 | |
| 			struct proc_dir_entry *parent);
 | |
| 
 | |
| static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 | |
| 	mode_t mode, struct proc_dir_entry *base, 
 | |
| 	read_proc_t *read_proc, void * data)
 | |
| {
 | |
| 	struct proc_dir_entry *res=create_proc_entry(name,mode,base);
 | |
| 	if (res) {
 | |
| 		res->read_proc=read_proc;
 | |
| 		res->data=data;
 | |
| 	}
 | |
| 	return res;
 | |
| }
 | |
|  
 | |
| static inline struct proc_dir_entry *create_proc_info_entry(const char *name,
 | |
| 	mode_t mode, struct proc_dir_entry *base, get_info_t *get_info)
 | |
| {
 | |
| 	struct proc_dir_entry *res=create_proc_entry(name,mode,base);
 | |
| 	if (res) res->get_info=get_info;
 | |
| 	return res;
 | |
| }
 | |
| 
 | |
| extern struct proc_dir_entry *proc_net_fops_create(struct net *net,
 | |
| 	const char *name, mode_t mode, const struct file_operations *fops);
 | |
| extern void proc_net_remove(struct net *net, const char *name);
 | |
| 
 | |
| #else
 | |
| 
 | |
| #define proc_root_driver NULL
 | |
| #define proc_bus NULL
 | |
| 
 | |
| #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
 | |
| static inline void proc_net_remove(struct net *net, const char *name) {}
 | |
| 
 | |
| static inline void proc_flush_task(struct task_struct *task)
 | |
| {
 | |
| }
 | |
| 
 | |
| static inline struct proc_dir_entry *create_proc_entry(const char *name,
 | |
| 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
 | |
| 
 | |
| #define remove_proc_entry(name, parent) do {} while (0)
 | |
| 
 | |
| static inline struct proc_dir_entry *proc_symlink(const char *name,
 | |
| 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
 | |
| static inline struct proc_dir_entry *proc_mkdir(const char *name,
 | |
| 	struct proc_dir_entry *parent) {return NULL;}
 | |
| 
 | |
| static inline struct proc_dir_entry *create_proc_read_entry(const char *name,
 | |
| 	mode_t mode, struct proc_dir_entry *base, 
 | |
| 	read_proc_t *read_proc, void * data) { return NULL; }
 | |
| static inline struct proc_dir_entry *create_proc_info_entry(const char *name,
 | |
| 	mode_t mode, struct proc_dir_entry *base, get_info_t *get_info)
 | |
| 	{ return NULL; }
 | |
| 
 | |
| struct tty_driver;
 | |
| static inline void proc_tty_register_driver(struct tty_driver *driver) {};
 | |
| static inline void proc_tty_unregister_driver(struct tty_driver *driver) {};
 | |
| 
 | |
| extern struct proc_dir_entry proc_root;
 | |
| 
 | |
| static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
 | |
| {
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static inline void pid_ns_release_proc(struct pid_namespace *ns)
 | |
| {
 | |
| }
 | |
| 
 | |
| #endif /* CONFIG_PROC_FS */
 | |
| 
 | |
| #if !defined(CONFIG_PROC_KCORE)
 | |
| static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
 | |
| {
 | |
| }
 | |
| #else
 | |
| extern void kclist_add(struct kcore_list *, void *, size_t);
 | |
| #endif
 | |
| 
 | |
| union proc_op {
 | |
| 	int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
 | |
| 	int (*proc_read)(struct task_struct *task, char *page);
 | |
| };
 | |
| 
 | |
| struct proc_inode {
 | |
| 	struct pid *pid;
 | |
| 	int fd;
 | |
| 	union proc_op op;
 | |
| 	struct proc_dir_entry *pde;
 | |
| 	struct inode vfs_inode;
 | |
| };
 | |
| 
 | |
| static inline struct proc_inode *PROC_I(const struct inode *inode)
 | |
| {
 | |
| 	return container_of(inode, struct proc_inode, vfs_inode);
 | |
| }
 | |
| 
 | |
| static inline struct proc_dir_entry *PDE(const struct inode *inode)
 | |
| {
 | |
| 	return PROC_I(inode)->pde;
 | |
| }
 | |
| 
 | |
| static inline struct net *PDE_NET(struct proc_dir_entry *pde)
 | |
| {
 | |
| 	return pde->parent->data;
 | |
| }
 | |
| 
 | |
| struct net *get_proc_net(const struct inode *inode);
 | |
| 
 | |
| struct proc_maps_private {
 | |
| 	struct pid *pid;
 | |
| 	struct task_struct *task;
 | |
| #ifdef CONFIG_MMU
 | |
| 	struct vm_area_struct *tail_vma;
 | |
| #endif
 | |
| };
 | |
| 
 | |
| #endif /* _LINUX_PROC_FS_H */
 |