mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-11-04 10:34:34 +00:00 
			
		
		
		
	Unless __GFP_IO and __GFP_FS are removed from the file mapping gfp mask we may enter memory reclaim during IO. In this case shrink_slab() entered another file system which is notoriously hungry for stack. This additional stack usage may cause a stack overflow. This patch removes __GFP_IO and __GFP_FS from the mapping gfp mask of each file during vn_open() to avoid any reclaim in the vn_rdwr() IO path. The original mask is then restored at vn_close() time. Hats off to the loop driver which does something similiar for the same reason. [...] shrink_slab+0xdc/0x153 try_to_free_pages+0x1da/0x2d7 __alloc_pages+0x1d7/0x2da do_generic_mapping_read+0x2c9/0x36f file_read_actor+0x0/0x145 __generic_file_aio_read+0x14f/0x19b generic_file_aio_read+0x34/0x39 do_sync_read+0xc7/0x104 vfs_read+0xcb/0x171 :spl:vn_rdwr+0x2b8/0x402 :zfs:vdev_file_io_start+0xad/0xe1 [...] Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
		
			
				
	
	
		
			251 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			251 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*****************************************************************************\
 | 
						|
 *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
 | 
						|
 *  Copyright (C) 2007 The Regents of the University of California.
 | 
						|
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 | 
						|
 *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
 | 
						|
 *  UCRL-CODE-235197
 | 
						|
 *
 | 
						|
 *  This file is part of the SPL, Solaris Porting Layer.
 | 
						|
 *  For details, see <http://github.com/behlendorf/spl/>.
 | 
						|
 *
 | 
						|
 *  The SPL is free software; you can redistribute it and/or modify it
 | 
						|
 *  under the terms of the GNU General Public License as published by the
 | 
						|
 *  Free Software Foundation; either version 2 of the License, or (at your
 | 
						|
 *  option) any later version.
 | 
						|
 *
 | 
						|
 *  The SPL is distributed in the hope that it will be useful, but WITHOUT
 | 
						|
 *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | 
						|
 *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 | 
						|
 *  for more details.
 | 
						|
 *
 | 
						|
 *  You should have received a copy of the GNU General Public License along
 | 
						|
 *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
\*****************************************************************************/
 | 
						|
 | 
						|
#ifndef _SPL_VNODE_H
 | 
						|
#define _SPL_VNODE_H
 | 
						|
 | 
						|
#include <linux/module.h>
 | 
						|
#include <linux/syscalls.h>
 | 
						|
#include <linux/fcntl.h>
 | 
						|
#include <linux/buffer_head.h>
 | 
						|
#include <linux/dcache.h>
 | 
						|
#include <linux/namei.h>
 | 
						|
#include <linux/file.h>
 | 
						|
#include <linux/fs.h>
 | 
						|
#include <linux/fs_struct.h>
 | 
						|
#include <linux/mount.h>
 | 
						|
#include <sys/kmem.h>
 | 
						|
#include <sys/mutex.h>
 | 
						|
#include <sys/types.h>
 | 
						|
#include <sys/time.h>
 | 
						|
#include <sys/uio.h>
 | 
						|
#include <sys/sunldi.h>
 | 
						|
 | 
						|
#define XVA_MAPSIZE     3
 | 
						|
#define XVA_MAGIC       0x78766174
 | 
						|
 | 
						|
/*
 | 
						|
 * Prior to linux-2.6.33 only O_DSYNC semantics were implemented and
 | 
						|
 * they used the O_SYNC flag.  As of linux-2.6.33 the this behavior
 | 
						|
 * was properly split in to O_SYNC and O_DSYNC respectively.
 | 
						|
 */
 | 
						|
#ifndef O_DSYNC
 | 
						|
#define O_DSYNC		O_SYNC
 | 
						|
#endif
 | 
						|
 | 
						|
#define FREAD		1
 | 
						|
#define FWRITE		2
 | 
						|
#define FCREAT		O_CREAT
 | 
						|
#define FTRUNC		O_TRUNC
 | 
						|
#define FOFFMAX		O_LARGEFILE
 | 
						|
#define FSYNC		O_SYNC
 | 
						|
#define FDSYNC		O_DSYNC
 | 
						|
#define FRSYNC		O_RSYNC
 | 
						|
#define FEXCL		O_EXCL
 | 
						|
#define FDIRECT		O_DIRECT
 | 
						|
#define FAPPEND		O_APPEND
 | 
						|
 | 
						|
#define FNODSYNC	0x10000 /* fsync pseudo flag */
 | 
						|
#define FNOFOLLOW	0x20000 /* don't follow symlinks */
 | 
						|
 | 
						|
#define AT_TYPE		0x00001
 | 
						|
#define AT_MODE		0x00002
 | 
						|
#undef  AT_UID		/* Conflicts with linux/auxvec.h */
 | 
						|
#define AT_UID          0x00004
 | 
						|
#undef  AT_GID		/* Conflicts with linux/auxvec.h */
 | 
						|
#define AT_GID          0x00008
 | 
						|
#define AT_FSID		0x00010
 | 
						|
#define AT_NODEID	0x00020
 | 
						|
#define AT_NLINK	0x00040
 | 
						|
#define AT_SIZE		0x00080
 | 
						|
#define AT_ATIME	0x00100
 | 
						|
#define AT_MTIME	0x00200
 | 
						|
#define AT_CTIME	0x00400
 | 
						|
#define AT_RDEV		0x00800
 | 
						|
#define AT_BLKSIZE	0x01000
 | 
						|
#define AT_NBLOCKS	0x02000
 | 
						|
#define AT_SEQ		0x08000
 | 
						|
#define AT_XVATTR	0x10000
 | 
						|
 | 
						|
#define CRCREAT		0x01
 | 
						|
#define RMFILE		0x02
 | 
						|
 | 
						|
#define B_INVAL		0x01
 | 
						|
#define B_TRUNC		0x02
 | 
						|
 | 
						|
#ifdef HAVE_PATH_IN_NAMEIDATA
 | 
						|
# define nd_dentry	path.dentry
 | 
						|
# define nd_mnt		path.mnt
 | 
						|
#else
 | 
						|
# define nd_dentry	dentry
 | 
						|
# define nd_mnt		mnt
 | 
						|
#endif
 | 
						|
 | 
						|
typedef enum vtype {
 | 
						|
	VNON		= 0,
 | 
						|
	VREG		= 1,
 | 
						|
	VDIR		= 2,
 | 
						|
	VBLK		= 3,
 | 
						|
	VCHR		= 4,
 | 
						|
	VLNK		= 5,
 | 
						|
	VFIFO		= 6,
 | 
						|
	VDOOR		= 7,
 | 
						|
	VPROC		= 8,
 | 
						|
	VSOCK		= 9,
 | 
						|
	VPORT		= 10,
 | 
						|
	VBAD		= 11
 | 
						|
} vtype_t;
 | 
						|
 | 
						|
typedef struct vattr {
 | 
						|
	enum vtype	va_type;	/* vnode type */
 | 
						|
	u_int		va_mask;	/* attribute bit-mask */
 | 
						|
	u_short		va_mode;	/* acc mode */
 | 
						|
	short		va_uid;		/* owner uid */
 | 
						|
	short		va_gid;		/* owner gid */
 | 
						|
	long		va_fsid;	/* fs id */
 | 
						|
	long		va_nodeid;	/* node # */
 | 
						|
	short		va_nlink;	/* # links */
 | 
						|
	u_long		va_size;	/* file size */
 | 
						|
	long		va_blocksize;	/* block size */
 | 
						|
	struct timeval va_atime;	/* last acc */
 | 
						|
	struct timeval va_mtime;	/* last mod */
 | 
						|
	struct timeval va_ctime;	/* last chg */
 | 
						|
	dev_t		va_rdev;	/* dev */
 | 
						|
	long		va_blocks;	/* space used */
 | 
						|
} vattr_t;
 | 
						|
 | 
						|
typedef struct xoptattr {
 | 
						|
	timestruc_t	xoa_createtime;	/* Create time of file */
 | 
						|
	uint8_t		xoa_archive;
 | 
						|
	uint8_t		xoa_system;
 | 
						|
	uint8_t		xoa_readonly;
 | 
						|
	uint8_t		xoa_hidden;
 | 
						|
	uint8_t		xoa_nounlink;
 | 
						|
	uint8_t		xoa_immutable;
 | 
						|
	uint8_t		xoa_appendonly;
 | 
						|
	uint8_t		xoa_nodump;
 | 
						|
	uint8_t		xoa_settable;
 | 
						|
	uint8_t		xoa_opaque;
 | 
						|
	uint8_t		xoa_av_quarantined;
 | 
						|
	uint8_t		xoa_av_modified;
 | 
						|
} xoptattr_t;
 | 
						|
 | 
						|
typedef struct xvattr {
 | 
						|
	vattr_t		xva_vattr;	/* Embedded vattr structure */
 | 
						|
	uint32_t	xva_magic;	/* Magic Number */
 | 
						|
	uint32_t	xva_mapsize;	/* Size of attr bitmap (32-bit words) */
 | 
						|
	uint32_t	*xva_rtnattrmapp;	/* Ptr to xva_rtnattrmap[] */
 | 
						|
	uint32_t	xva_reqattrmap[XVA_MAPSIZE];	/* Requested attrs */
 | 
						|
	uint32_t	xva_rtnattrmap[XVA_MAPSIZE];	/* Returned attrs */
 | 
						|
	xoptattr_t	xva_xoptattrs;	/* Optional attributes */
 | 
						|
} xvattr_t;
 | 
						|
 | 
						|
typedef struct vsecattr {
 | 
						|
	uint_t		vsa_mask;	/* See below */
 | 
						|
	int		vsa_aclcnt;	/* ACL entry count */
 | 
						|
	void		*vsa_aclentp;	/* pointer to ACL entries */
 | 
						|
	int		vsa_dfaclcnt;	/* default ACL entry count */
 | 
						|
	void		*vsa_dfaclentp;	/* pointer to default ACL entries */
 | 
						|
	size_t		vsa_aclentsz;	/* ACE size in bytes of vsa_aclentp */
 | 
						|
} vsecattr_t;
 | 
						|
 | 
						|
typedef struct vnode {
 | 
						|
	struct file	*v_file;
 | 
						|
	kmutex_t	v_lock;		/* protects vnode fields */
 | 
						|
	uint_t		v_flag;		/* vnode flags (see below) */
 | 
						|
	uint_t		v_count;	/* reference count */
 | 
						|
	void		*v_data;	/* private data for fs */
 | 
						|
	struct vfs	*v_vfsp;	/* ptr to containing VFS */
 | 
						|
	struct stdata	*v_stream;	/* associated stream */
 | 
						|
	enum vtype	v_type;		/* vnode type */
 | 
						|
	dev_t		v_rdev;		/* device (VCHR, VBLK) */
 | 
						|
	gfp_t		v_gfp_mask;	/* original mapping gfp mask */
 | 
						|
} vnode_t;
 | 
						|
 | 
						|
typedef struct vn_file {
 | 
						|
	int		f_fd;		/* linux fd for lookup */
 | 
						|
	struct file	*f_file;	/* linux file struct */
 | 
						|
	atomic_t	f_ref;		/* ref count */
 | 
						|
	kmutex_t	f_lock;		/* struct lock */
 | 
						|
	loff_t		f_offset;	/* offset */
 | 
						|
	vnode_t		*f_vnode;	/* vnode */
 | 
						|
	struct list_head f_list;	/* list referenced file_t's */
 | 
						|
} file_t;
 | 
						|
 | 
						|
typedef struct caller_context {
 | 
						|
	pid_t		cc_pid;		/* Process ID of the caller */
 | 
						|
	int		cc_sysid;	/* System ID, used for remote calls */
 | 
						|
	u_longlong_t	cc_caller_id;	/* Identifier for (set of) caller(s) */
 | 
						|
	ulong_t		cc_flags;
 | 
						|
} caller_context_t;
 | 
						|
 | 
						|
extern vnode_t *vn_alloc(int flag);
 | 
						|
void vn_free(vnode_t *vp);
 | 
						|
extern int vn_open(const char *path, uio_seg_t seg, int flags, int mode,
 | 
						|
		   vnode_t **vpp, int x1, void *x2);
 | 
						|
extern int vn_openat(const char *path, uio_seg_t seg, int flags, int mode,
 | 
						|
		     vnode_t **vpp, int x1, void *x2, vnode_t *vp, int fd);
 | 
						|
extern int vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len,
 | 
						|
		   offset_t off, uio_seg_t seg, int x1, rlim64_t x2,
 | 
						|
		   void *x3, ssize_t *residp);
 | 
						|
extern int vn_close(vnode_t *vp, int flags, int x1, int x2, void *x3, void *x4);
 | 
						|
extern int vn_seek(vnode_t *vp, offset_t o, offset_t *op, caller_context_t *ct);
 | 
						|
 | 
						|
extern int vn_remove(const char *path, uio_seg_t seg, int flags);
 | 
						|
extern int vn_rename(const char *path1, const char *path2, int x1);
 | 
						|
extern int vn_getattr(vnode_t *vp, vattr_t *vap, int flags, void *x3, void *x4);
 | 
						|
extern int vn_fsync(vnode_t *vp, int flags, void *x3, void *x4);
 | 
						|
extern file_t *vn_getf(int fd);
 | 
						|
extern void vn_releasef(int fd);
 | 
						|
extern int vn_set_pwd(const char *filename);
 | 
						|
 | 
						|
int vn_init(void);
 | 
						|
void vn_fini(void);
 | 
						|
 | 
						|
static __inline__ int
 | 
						|
vn_rele(vnode_t *vp)
 | 
						|
{
 | 
						|
	return 0;
 | 
						|
} /* vn_rele() */
 | 
						|
 | 
						|
static __inline__ int
 | 
						|
vn_putpage(vnode_t *vp, offset_t off, ssize_t size,
 | 
						|
	   int flags, void *x1, void *x2) {
 | 
						|
	return 0;
 | 
						|
} /* vn_putpage() */
 | 
						|
 | 
						|
#define VOP_CLOSE				vn_close
 | 
						|
#define VOP_SEEK				vn_seek
 | 
						|
#define VN_RELE					vn_rele
 | 
						|
#define VOP_GETATTR				vn_getattr
 | 
						|
#define VOP_FSYNC				vn_fsync
 | 
						|
#define VOP_PUTPAGE				vn_putpage
 | 
						|
#define vn_is_readonly(vp)			0
 | 
						|
#define getf					vn_getf
 | 
						|
#define releasef				vn_releasef
 | 
						|
 | 
						|
extern vnode_t *rootdir;
 | 
						|
 | 
						|
#endif /* SPL_VNODE_H */
 |