mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-26 17:42:09 +00:00 
			
		
		
		
	Fix statfs(2) for 32-bit user space
When handling a 32-bit statfs() system call the returned fields, although 64-bit in the kernel, must be limited to 32-bits or an EOVERFLOW error will be returned. This is less of an issue for block counts since the default reported block size in 128KiB. But since it is possible to set a smaller block size, these values will be scaled as needed to fit in a 32-bit unsigned long. Unlike most other filesystems the total possible file counts are more likely to overflow because they are calculated based on the available free space in the pool. In order to prevent this the reported value must be capped at 2^32-1. This is only for statfs(2) reporting, there are no changes to the internal ZFS limits. Reviewed-by: Andreas Dilger <andreas.dilger@whamcloud.com> Reviewed-by: Richard Yao <ryao@gentoo.org> Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Issue #7927 Closes #7122 Closes #7937
This commit is contained in:
		
							parent
							
								
									36e369ecb8
								
							
						
					
					
						commit
						e897a23eb1
					
				
							
								
								
									
										20
									
								
								config/kernel-in-compat-syscall.m4
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								config/kernel-in-compat-syscall.m4
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,20 @@ | |||||||
|  | dnl # | ||||||
|  | dnl # 4.5 API change | ||||||
|  | dnl # Added in_compat_syscall() which can be overridden on a per- | ||||||
|  | dnl # architecture basis.  Prior to this is_compat_task() was the | ||||||
|  | dnl # provided interface. | ||||||
|  | dnl # | ||||||
|  | AC_DEFUN([ZFS_AC_KERNEL_IN_COMPAT_SYSCALL], [ | ||||||
|  | 	AC_MSG_CHECKING([whether in_compat_syscall() is available]) | ||||||
|  | 	ZFS_LINUX_TRY_COMPILE([ | ||||||
|  | 		#include <linux/compat.h> | ||||||
|  | 	],[ | ||||||
|  | 		in_compat_syscall(); | ||||||
|  | 	],[ | ||||||
|  | 		AC_MSG_RESULT(yes) | ||||||
|  | 		AC_DEFINE(HAVE_IN_COMPAT_SYSCALL, 1, | ||||||
|  | 		    [in_compat_syscall() is available]) | ||||||
|  | 	],[ | ||||||
|  | 		AC_MSG_RESULT(no) | ||||||
|  | 	]) | ||||||
|  | ]) | ||||||
| @ -160,6 +160,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [ | |||||||
| 	ZFS_AC_KERNEL_GLOBAL_PAGE_STATE | 	ZFS_AC_KERNEL_GLOBAL_PAGE_STATE | ||||||
| 	ZFS_AC_KERNEL_ACL_HAS_REFCOUNT | 	ZFS_AC_KERNEL_ACL_HAS_REFCOUNT | ||||||
| 	ZFS_AC_KERNEL_USERNS_CAPABILITIES | 	ZFS_AC_KERNEL_USERNS_CAPABILITIES | ||||||
|  | 	ZFS_AC_KERNEL_IN_COMPAT_SYSCALL | ||||||
| 
 | 
 | ||||||
| 	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ | 	AS_IF([test "$LINUX_OBJ" != "$LINUX"], [ | ||||||
| 		KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" | 		KERNEL_MAKE="$KERNEL_MAKE O=$LINUX_OBJ" | ||||||
|  | |||||||
| @ -30,6 +30,7 @@ | |||||||
| #include <sys/taskq.h> | #include <sys/taskq.h> | ||||||
| #include <sys/cred.h> | #include <sys/cred.h> | ||||||
| #include <linux/backing-dev.h> | #include <linux/backing-dev.h> | ||||||
|  | #include <linux/compat.h> | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * 2.6.28 API change, |  * 2.6.28 API change, | ||||||
| @ -626,4 +627,21 @@ inode_set_iversion(struct inode *ip, u64 val) | |||||||
| } | } | ||||||
| #endif | #endif | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * Returns true when called in the context of a 32-bit system call. | ||||||
|  |  */ | ||||||
|  | static inline int | ||||||
|  | zpl_is_32bit_api(void) | ||||||
|  | { | ||||||
|  | #ifdef CONFIG_COMPAT | ||||||
|  | #ifdef HAVE_IN_COMPAT_SYSCALL | ||||||
|  | 	return (in_compat_syscall()); | ||||||
|  | #else | ||||||
|  | 	return (is_compat_task()); | ||||||
|  | #endif | ||||||
|  | #else | ||||||
|  | 	return (BITS_PER_LONG == 32); | ||||||
|  | #endif | ||||||
|  | } | ||||||
|  | 
 | ||||||
| #endif /* _ZFS_VFS_H */ | #endif /* _ZFS_VFS_H */ | ||||||
|  | |||||||
| @ -1422,8 +1422,6 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) | |||||||
| { | { | ||||||
| 	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; | 	zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info; | ||||||
| 	uint64_t refdbytes, availbytes, usedobjs, availobjs; | 	uint64_t refdbytes, availbytes, usedobjs, availobjs; | ||||||
| 	uint64_t fsid; |  | ||||||
| 	uint32_t bshift; |  | ||||||
| 	int err = 0; | 	int err = 0; | ||||||
| 
 | 
 | ||||||
| 	ZFS_ENTER(zfsvfs); | 	ZFS_ENTER(zfsvfs); | ||||||
| @ -1431,7 +1429,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) | |||||||
| 	dmu_objset_space(zfsvfs->z_os, | 	dmu_objset_space(zfsvfs->z_os, | ||||||
| 	    &refdbytes, &availbytes, &usedobjs, &availobjs); | 	    &refdbytes, &availbytes, &usedobjs, &availobjs); | ||||||
| 
 | 
 | ||||||
| 	fsid = dmu_objset_fsid_guid(zfsvfs->z_os); | 	uint64_t fsid = dmu_objset_fsid_guid(zfsvfs->z_os); | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The underlying storage pool actually uses multiple block | 	 * The underlying storage pool actually uses multiple block | ||||||
| 	 * size.  Under Solaris frsize (fragment size) is reported as | 	 * size.  Under Solaris frsize (fragment size) is reported as | ||||||
| @ -1443,7 +1441,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) | |||||||
| 	 */ | 	 */ | ||||||
| 	statp->f_frsize = zfsvfs->z_max_blksz; | 	statp->f_frsize = zfsvfs->z_max_blksz; | ||||||
| 	statp->f_bsize = zfsvfs->z_max_blksz; | 	statp->f_bsize = zfsvfs->z_max_blksz; | ||||||
| 	bshift = fls(statp->f_bsize) - 1; | 	uint32_t bshift = fls(statp->f_bsize) - 1; | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * The following report "total" blocks of various kinds in | 	 * The following report "total" blocks of various kinds in | ||||||
| @ -1460,7 +1458,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp) | |||||||
| 	 * static metadata.  ZFS doesn't preallocate files, so the best | 	 * static metadata.  ZFS doesn't preallocate files, so the best | ||||||
| 	 * we can do is report the max that could possibly fit in f_files, | 	 * we can do is report the max that could possibly fit in f_files, | ||||||
| 	 * and that minus the number actually used in f_ffree. | 	 * and that minus the number actually used in f_ffree. | ||||||
| 	 * For f_ffree, report the smaller of the number of object available | 	 * For f_ffree, report the smaller of the number of objects available | ||||||
| 	 * and the number of blocks (each object will take at least a block). | 	 * and the number of blocks (each object will take at least a block). | ||||||
| 	 */ | 	 */ | ||||||
| 	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT); | 	statp->f_ffree = MIN(availobjs, availbytes >> DNODE_SHIFT); | ||||||
|  | |||||||
| @ -181,6 +181,28 @@ zpl_statfs(struct dentry *dentry, struct kstatfs *statp) | |||||||
| 	spl_fstrans_unmark(cookie); | 	spl_fstrans_unmark(cookie); | ||||||
| 	ASSERT3S(error, <=, 0); | 	ASSERT3S(error, <=, 0); | ||||||
| 
 | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * If required by a 32-bit system call, dynamically scale the | ||||||
|  | 	 * block size up to 16MiB and decrease the block counts.  This | ||||||
|  | 	 * allows for a maximum size of 64EiB to be reported.  The file | ||||||
|  | 	 * counts must be artificially capped at 2^32-1. | ||||||
|  | 	 */ | ||||||
|  | 	if (unlikely(zpl_is_32bit_api())) { | ||||||
|  | 		while (statp->f_blocks > UINT32_MAX && | ||||||
|  | 		    statp->f_bsize < SPA_MAXBLOCKSIZE) { | ||||||
|  | 			statp->f_frsize <<= 1; | ||||||
|  | 			statp->f_bsize <<= 1; | ||||||
|  | 
 | ||||||
|  | 			statp->f_blocks >>= 1; | ||||||
|  | 			statp->f_bfree >>= 1; | ||||||
|  | 			statp->f_bavail >>= 1; | ||||||
|  | 		} | ||||||
|  | 
 | ||||||
|  | 		uint64_t usedobjs = statp->f_files - statp->f_ffree; | ||||||
|  | 		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); | ||||||
|  | 		statp->f_files = statp->f_ffree + usedobjs; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
| 	return (error); | 	return (error); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Brian Behlendorf
						Brian Behlendorf