mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-26 08:33:52 +00:00 
			
		
		
		
	MMAP Optimization
Enable zfs_getpage, zfs_fillpage, zfs_putpage, zfs_putapage functions. The functions have been modified to make them Linux friendly. ZFS uses these functions to read/write the mmapped pages. Using them from readpage/writepage results in clear code. The patch also adds readpages and writepages interface functions to read/write list of pages in one function call. The code change handles the first mmap optimization mentioned on https://github.com/behlendorf/zfs/issues/225 Signed-off-by: Prasad Joshi <pjoshi@stec-inc.com> Signed-off-by: Brian Behlendorf <behlendorf@llnl.gov> Issue #255
This commit is contained in:
		
							parent
							
								
									2a005961a4
								
							
						
					
					
						commit
						dde471ef5a
					
				| @ -71,6 +71,9 @@ extern int zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, | |||||||
|     cred_t *cr); |     cred_t *cr); | ||||||
| extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, | extern int zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, | ||||||
|     cred_t *cr); |     cred_t *cr); | ||||||
|  | extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages); | ||||||
|  | extern int zfs_putpage(struct page *page, struct writeback_control *wbc, | ||||||
|  |     void *data); | ||||||
| 
 | 
 | ||||||
| #ifdef	__cplusplus | #ifdef	__cplusplus | ||||||
| } | } | ||||||
|  | |||||||
| @ -29,6 +29,7 @@ | |||||||
| #include <linux/vfs_compat.h> | #include <linux/vfs_compat.h> | ||||||
| #include <linux/xattr_compat.h> | #include <linux/xattr_compat.h> | ||||||
| #include <linux/exportfs.h> | #include <linux/exportfs.h> | ||||||
|  | #include <linux/writeback.h> | ||||||
| 
 | 
 | ||||||
| /* zpl_inode.c */ | /* zpl_inode.c */ | ||||||
| extern const struct inode_operations zpl_inode_operations; | extern const struct inode_operations zpl_inode_operations; | ||||||
|  | |||||||
| @ -3725,54 +3725,32 @@ zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, | |||||||
| 	pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); | 	pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR); | ||||||
| 	return (0); | 	return (0); | ||||||
| } | } | ||||||
|  | #endif /* HAVE_MMAP */ | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Push a page out to disk, klustering if possible. |  * Push a page out to disk | ||||||
|  * |  * | ||||||
|  *	IN:	vp	- file to push page to. |  *	IN:	vp	- file to push page to. | ||||||
|  *		pp	- page to push. |  *		pp	- page to push. | ||||||
|  *		flags	- additional flags. |  *		off	- start of range pushed. | ||||||
|  *		cr	- credentials of caller. |  *		len	- len of range pushed. | ||||||
|  * |  * | ||||||
|  *	OUT:	offp	- start of range pushed. |  | ||||||
|  *		lenp	- len of range pushed. |  | ||||||
|  * |  * | ||||||
|  *	RETURN:	0 if success |  *	RETURN:	0 if success | ||||||
|  *		error code if failure |  *		error code if failure | ||||||
|  * |  * | ||||||
|  * NOTE: callers must have locked the page to be pushed.  On |  * NOTE: callers must have locked the page to be pushed. | ||||||
|  * exit, the page (and all other pages in the kluster) must be |  | ||||||
|  * unlocked. |  | ||||||
|  */ |  */ | ||||||
| /* ARGSUSED */ | /* ARGSUSED */ | ||||||
| static int | static int | ||||||
| zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, | zfs_putapage(struct inode *ip, struct page *pp, u_offset_t off, size_t len) | ||||||
| 		size_t *lenp, int flags, cred_t *cr) |  | ||||||
| { | { | ||||||
| 	znode_t		*zp = VTOZ(vp); | 	znode_t    *zp  = ITOZ(ip); | ||||||
| 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs; | 	zfs_sb_t   *zsb = ITOZSB(ip); | ||||||
| 	dmu_tx_t   *tx; | 	dmu_tx_t   *tx; | ||||||
| 	u_offset_t	off, koff; | 	caddr_t	   va; | ||||||
| 	size_t		len, klen; |  | ||||||
| 	int        err; | 	int        err; | ||||||
| 
 | 
 | ||||||
| 	off = pp->p_offset; |  | ||||||
| 	len = PAGESIZE; |  | ||||||
| 	/*
 |  | ||||||
| 	 * If our blocksize is bigger than the page size, try to kluster |  | ||||||
| 	 * multiple pages so that we write a full block (thus avoiding |  | ||||||
| 	 * a read-modify-write). |  | ||||||
| 	 */ |  | ||||||
| 	if (off < zp->z_size && zp->z_blksz > PAGESIZE) { |  | ||||||
| 		klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE); |  | ||||||
| 		koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0; |  | ||||||
| 		ASSERT(koff <= zp->z_size); |  | ||||||
| 		if (koff + klen > zp->z_size) |  | ||||||
| 			klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE); |  | ||||||
| 		pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags); |  | ||||||
| 	} |  | ||||||
| 	ASSERT3U(btop(len), ==, btopr(len)); |  | ||||||
| 
 |  | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Can't push pages past end-of-file. | 	 * Can't push pages past end-of-file. | ||||||
| 	 */ | 	 */ | ||||||
| @ -3780,24 +3758,16 @@ zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp, | |||||||
| 		/* ignore all pages */ | 		/* ignore all pages */ | ||||||
| 		err = 0; | 		err = 0; | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} else if (off + len > zp->z_size) { | 	} else if (off + len > zp->z_size) | ||||||
| 		int npages = btopr(zp->z_size - off); |  | ||||||
| 		page_t *trunc; |  | ||||||
| 
 |  | ||||||
| 		page_list_break(&pp, &trunc, npages); |  | ||||||
| 		/* ignore pages past end of file */ |  | ||||||
| 		if (trunc) |  | ||||||
| 			pvn_write_done(trunc, flags); |  | ||||||
| 		len = zp->z_size - off; | 		len = zp->z_size - off; | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) || | 	if (zfs_owner_overquota(zsb, zp, B_FALSE) || | ||||||
| 	    zfs_owner_overquota(zfsvfs, zp, B_TRUE)) { | 	    zfs_owner_overquota(zsb, zp, B_TRUE)) { | ||||||
| 		err = EDQUOT; | 		err = EDQUOT; | ||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
| top: | top: | ||||||
| 	tx = dmu_tx_create(zfsvfs->z_os); | 	tx = dmu_tx_create(zsb->z_os); | ||||||
| 	dmu_tx_hold_write(tx, zp->z_id, off, len); | 	dmu_tx_hold_write(tx, zp->z_id, off, len); | ||||||
| 
 | 
 | ||||||
| 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); | 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); | ||||||
| @ -3813,52 +3783,38 @@ top: | |||||||
| 		goto out; | 		goto out; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	if (zp->z_blksz <= PAGESIZE) { | 	va = kmap(pp); | ||||||
| 		caddr_t va = zfs_map_page(pp, S_READ); |  | ||||||
| 	ASSERT3U(len, <=, PAGESIZE); | 	ASSERT3U(len, <=, PAGESIZE); | ||||||
| 		dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx); | 	dmu_write(zsb->z_os, zp->z_id, off, len, va, tx); | ||||||
| 		zfs_unmap_page(pp, va); | 	kunmap(pp); | ||||||
| 	} else { |  | ||||||
| 		err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx); |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	if (err == 0) { | 	if (err == 0) { | ||||||
| 		uint64_t mtime[2], ctime[2]; | 		uint64_t mtime[2], ctime[2]; | ||||||
| 		sa_bulk_attr_t bulk[3]; | 		sa_bulk_attr_t bulk[3]; | ||||||
| 		int count = 0; | 		int count = 0; | ||||||
| 
 | 
 | ||||||
| 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, | 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, | ||||||
| 		    &mtime, 16); | 		    &mtime, 16); | ||||||
| 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, | 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, | ||||||
| 		    &ctime, 16); | 		    &ctime, 16); | ||||||
| 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, | 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL, | ||||||
| 		    &zp->z_pflags, 8); | 		    &zp->z_pflags, 8); | ||||||
| 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, | 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, | ||||||
| 		    B_TRUE); | 		    B_TRUE); | ||||||
| 		zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0); | 		zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, off, len, 0); | ||||||
| 	} | 	} | ||||||
| 	dmu_tx_commit(tx); | 	dmu_tx_commit(tx); | ||||||
| 
 | 
 | ||||||
| out: | out: | ||||||
| 	pvn_write_done(pp, (err ? B_ERROR : 0) | flags); |  | ||||||
| 	if (offp) |  | ||||||
| 		*offp = off; |  | ||||||
| 	if (lenp) |  | ||||||
| 		*lenp = len; |  | ||||||
| 
 |  | ||||||
| 	return (err); | 	return (err); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Copy the portion of the file indicated from pages into the file. |  * Copy the portion of the file indicated from page into the file. | ||||||
|  * The pages are stored in a page list attached to the files vnode. |  | ||||||
|  * |  * | ||||||
|  *	IN:	vp	- vnode of file to push page data to. |  *	IN:	ip	- inode of file to push page data to. | ||||||
|  *		off	- position in file to put data. |  *		wbc	- Unused parameter | ||||||
|  *		len	- amount of data to write. |  *		data	- pointer to address_space | ||||||
|  *		flags	- flags to control the operation. |  | ||||||
|  *		cr	- credentials of caller. |  | ||||||
|  *		ct	- caller context. |  | ||||||
|  * |  * | ||||||
|  *	RETURN:	0 if success |  *	RETURN:	0 if success | ||||||
|  *		error code if failure |  *		error code if failure | ||||||
| @ -3867,87 +3823,45 @@ out: | |||||||
|  *	vp - ctime|mtime updated |  *	vp - ctime|mtime updated | ||||||
|  */ |  */ | ||||||
| /*ARGSUSED*/ | /*ARGSUSED*/ | ||||||
| static int | int | ||||||
| zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr) | zfs_putpage(struct page *page, struct writeback_control *wbc, void *data) | ||||||
| { | { | ||||||
| 	znode_t		*zp = VTOZ(vp); | 	struct address_space *mapping = data; | ||||||
| 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs; | 	struct inode         *ip      = mapping->host; | ||||||
| 	page_t		*pp; | 	znode_t              *zp      = ITOZ(ip); | ||||||
| 	size_t		io_len; | 	zfs_sb_t             *zsb     = ITOZSB(ip); | ||||||
| 	u_offset_t	io_off; |  | ||||||
| 	uint_t		blksz; |  | ||||||
| 	rl_t		     *rl; | 	rl_t		     *rl; | ||||||
| 	int		error = 0; | 	u_offset_t	     io_off; | ||||||
|  | 	size_t		     io_len; | ||||||
|  | 	size_t		     len; | ||||||
|  | 	int		     error; | ||||||
| 
 | 
 | ||||||
| 	ZFS_ENTER(zfsvfs); | 	io_off = page_offset(page); | ||||||
|  | 	io_len = PAGESIZE; | ||||||
|  | 
 | ||||||
|  | 	ZFS_ENTER(zsb); | ||||||
| 	ZFS_VERIFY_ZP(zp); | 	ZFS_VERIFY_ZP(zp); | ||||||
| 
 | 
 | ||||||
| 	/*
 |  | ||||||
| 	 * Align this request to the file block size in case we kluster. |  | ||||||
| 	 * XXX - this can result in pretty aggresive locking, which can |  | ||||||
| 	 * impact simultanious read/write access.  One option might be |  | ||||||
| 	 * to break up long requests (len == 0) into block-by-block |  | ||||||
| 	 * operations to get narrower locking. |  | ||||||
| 	 */ |  | ||||||
| 	blksz = zp->z_blksz; |  | ||||||
| 	if (ISP2(blksz)) |  | ||||||
| 		io_off = P2ALIGN_TYPED(off, blksz, u_offset_t); |  | ||||||
| 	else |  | ||||||
| 		io_off = 0; |  | ||||||
| 	if (len > 0 && ISP2(blksz)) |  | ||||||
| 		io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t); |  | ||||||
| 	else |  | ||||||
| 		io_len = 0; |  | ||||||
| 
 |  | ||||||
| 	if (io_len == 0) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * Search the entire vp list for pages >= io_off. |  | ||||||
| 		 */ |  | ||||||
| 		rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER); |  | ||||||
| 		error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr); |  | ||||||
| 		goto out; |  | ||||||
| 	} |  | ||||||
| 	rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); | 	rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER); | ||||||
| 
 | 
 | ||||||
| 	if (off > zp->z_size) { | 	if (io_off > zp->z_size) { | ||||||
| 		/* past end of file */ | 		/* past end of file */ | ||||||
| 		zfs_range_unlock(rl); | 		zfs_range_unlock(rl); | ||||||
| 		ZFS_EXIT(zfsvfs); | 		ZFS_EXIT(zsb); | ||||||
| 		return (0); | 		return (0); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); | 	len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off); | ||||||
| 
 | 
 | ||||||
| 	for (off = io_off; io_off < off + len; io_off += io_len) { | 	error = zfs_putapage(ip, page, io_off, len); | ||||||
| 		if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { |  | ||||||
| 			pp = page_lookup(vp, io_off, |  | ||||||
| 			    (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED); |  | ||||||
| 		} else { |  | ||||||
| 			pp = page_lookup_nowait(vp, io_off, |  | ||||||
| 			    (flags & B_FREE) ? SE_EXCL : SE_SHARED); |  | ||||||
| 		} |  | ||||||
| 
 |  | ||||||
| 		if (pp != NULL && pvn_getdirty(pp, flags)) { |  | ||||||
| 			int err; |  | ||||||
| 
 |  | ||||||
| 			/*
 |  | ||||||
| 			 * Found a dirty page to push |  | ||||||
| 			 */ |  | ||||||
| 			err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr); |  | ||||||
| 			if (err) |  | ||||||
| 				error = err; |  | ||||||
| 		} else { |  | ||||||
| 			io_len = PAGESIZE; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	zfs_range_unlock(rl); | 	zfs_range_unlock(rl); | ||||||
| 	if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS) | 
 | ||||||
| 		zil_commit(zfsvfs->z_log, zp->z_id); | 	if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS) | ||||||
| 	ZFS_EXIT(zfsvfs); | 		zil_commit(zsb->z_log, zp->z_id); | ||||||
|  | 	ZFS_EXIT(zsb); | ||||||
| 	return (error); | 	return (error); | ||||||
| } | } | ||||||
| #endif /* HAVE_MMAP */ | EXPORT_SYMBOL(zfs_putpage); | ||||||
| 
 | 
 | ||||||
| /*ARGSUSED*/ | /*ARGSUSED*/ | ||||||
| void | void | ||||||
| @ -4039,102 +3953,62 @@ zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset, | |||||||
| 	ZFS_EXIT(zfsvfs); | 	ZFS_EXIT(zfsvfs); | ||||||
| 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); | 	return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct)); | ||||||
| } | } | ||||||
|  | #endif /* HAVE_MMAP */ | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * If we can't find a page in the cache, we will create a new page |  * Fill pages with data from the disk. | ||||||
|  * and fill it with file data.  For efficiency, we may try to fill |  | ||||||
|  * multiple pages at once (klustering) to fill up the supplied page |  | ||||||
|  * list.  Note that the pages to be filled are held with an exclusive |  | ||||||
|  * lock to prevent access by other threads while they are being filled. |  | ||||||
|  */ |  */ | ||||||
| static int | static int | ||||||
| zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, | zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages) | ||||||
|     caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw) |  | ||||||
| { | { | ||||||
| 	znode_t *zp = VTOZ(vp); | 	znode_t	    *zp = ITOZ(ip); | ||||||
| 	page_t *pp, *cur_pp; | 	zfs_sb_t    *zsb = ITOZSB(ip); | ||||||
| 	objset_t *os = zp->z_zfsvfs->z_os; | 	objset_t    *os; | ||||||
|  | 	struct page *cur_pp; | ||||||
| 	u_offset_t  io_off, total; | 	u_offset_t  io_off, total; | ||||||
| 	size_t      io_len; | 	size_t      io_len; | ||||||
|  | 	loff_t      i_size; | ||||||
|  | 	unsigned    page_idx; | ||||||
| 	int         err; | 	int         err; | ||||||
| 
 | 
 | ||||||
| 	if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) { | 	os     = zsb->z_os; | ||||||
| 		/*
 | 	io_len = nr_pages << PAGE_CACHE_SHIFT; | ||||||
| 		 * We only have a single page, don't bother klustering | 	i_size = i_size_read(ip); | ||||||
| 		 */ | 	io_off = page_offset(pl[0]); | ||||||
| 		io_off = off; | 
 | ||||||
| 		io_len = PAGESIZE; | 	if (io_off + io_len > i_size) | ||||||
| 		pp = page_create_va(vp, io_off, io_len, | 		io_len = i_size - io_off; | ||||||
| 		    PG_EXCL | PG_WAIT, seg, addr); |  | ||||||
| 	} else { |  | ||||||
| 		/*
 |  | ||||||
| 		 * Try to find enough pages to fill the page list |  | ||||||
| 		 */ |  | ||||||
| 		pp = pvn_read_kluster(vp, off, seg, addr, &io_off, |  | ||||||
| 		    &io_len, off, plsz, 0); |  | ||||||
| 	} |  | ||||||
| 	if (pp == NULL) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * The page already exists, nothing to do here. |  | ||||||
| 		 */ |  | ||||||
| 		*pl = NULL; |  | ||||||
| 		return (0); |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	/*
 | ||||||
| 	 * Fill the pages in the kluster. | 	 * Iterate over list of pages and read each page individually. | ||||||
| 	 */ | 	 */ | ||||||
| 	cur_pp = pp; | 	page_idx = 0; | ||||||
|  | 	cur_pp   = pl[0]; | ||||||
| 	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { | 	for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) { | ||||||
| 		caddr_t va; | 		caddr_t va; | ||||||
| 
 | 
 | ||||||
| 		ASSERT3U(io_off, ==, cur_pp->p_offset); | 		va = kmap(cur_pp); | ||||||
| 		va = zfs_map_page(cur_pp, S_WRITE); |  | ||||||
| 		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, | 		err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va, | ||||||
| 		    DMU_READ_PREFETCH); | 		    DMU_READ_PREFETCH); | ||||||
| 		zfs_unmap_page(cur_pp, va); | 		kunmap(cur_pp); | ||||||
| 		if (err) { | 		if (err) { | ||||||
| 			/* On error, toss the entire kluster */ |  | ||||||
| 			pvn_read_done(pp, B_ERROR); |  | ||||||
| 			/* convert checksum errors into IO errors */ | 			/* convert checksum errors into IO errors */ | ||||||
| 			if (err == ECKSUM) | 			if (err == ECKSUM) | ||||||
| 				err = EIO; | 				err = EIO; | ||||||
| 			return (err); | 			return (err); | ||||||
| 		} | 		} | ||||||
| 		cur_pp = cur_pp->p_next; | 		cur_pp = pl[++page_idx]; | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	/*
 |  | ||||||
| 	 * Fill in the page list array from the kluster starting |  | ||||||
| 	 * from the desired offset `off'. |  | ||||||
| 	 * NOTE: the page list will always be null terminated. |  | ||||||
| 	 */ |  | ||||||
| 	pvn_plist_init(pp, pl, plsz, off, io_len, rw); |  | ||||||
| 	ASSERT(pl == NULL || (*pl)->p_offset == off); |  | ||||||
| 
 |  | ||||||
| 	return (0); | 	return (0); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Return pointers to the pages for the file region [off, off + len] |  * Uses zfs_fillpage to read data from the file and fill the pages. | ||||||
|  * in the pl array.  If plsz is greater than len, this function may |  | ||||||
|  * also return page pointers from after the specified region |  | ||||||
|  * (i.e. the region [off, off + plsz]).  These additional pages are |  | ||||||
|  * only returned if they are already in the cache, or were created as |  | ||||||
|  * part of a klustered read. |  | ||||||
|  * |  * | ||||||
|  *	IN:	vp	- vnode of file to get data from. |  *	IN:	ip	 - inode of file to get data from. | ||||||
|  *		off	- position in file to get data from. |  *		pl	 - list of pages to read | ||||||
|  *		len	- amount of data to retrieve. |  *		nr_pages - number of pages to read | ||||||
|  *		plsz	- length of provided page list. |  | ||||||
|  *		seg	- segment to obtain pages for. |  | ||||||
|  *		addr	- virtual address of fault. |  | ||||||
|  *		rw	- mode of created pages. |  | ||||||
|  *		cr	- credentials of caller. |  | ||||||
|  *		ct	- caller context. |  | ||||||
|  * |  | ||||||
|  *	OUT:	protp	- protection mode of created pages. |  | ||||||
|  *		pl	- list of pages created. |  | ||||||
|  * |  * | ||||||
|  *	RETURN:	0 if success |  *	RETURN:	0 if success | ||||||
|  *		error code if failure |  *		error code if failure | ||||||
| @ -4143,80 +4017,30 @@ zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg, | |||||||
|  *	vp - atime updated |  *	vp - atime updated | ||||||
|  */ |  */ | ||||||
| /* ARGSUSED */ | /* ARGSUSED */ | ||||||
| static int | int | ||||||
| zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp, | zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages) | ||||||
| 	page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, |  | ||||||
| 	enum seg_rw rw, cred_t *cr) |  | ||||||
| { | { | ||||||
| 	znode_t		*zp = VTOZ(vp); | 	znode_t	 *zp  = ITOZ(ip); | ||||||
| 	zfsvfs_t	*zfsvfs = zp->z_zfsvfs; | 	zfs_sb_t *zsb = ITOZSB(ip); | ||||||
| 	page_t		**pl0 = pl; | 	int	 err; | ||||||
| 	int		err = 0; |  | ||||||
| 
 | 
 | ||||||
| 	/* we do our own caching, faultahead is unnecessary */ |  | ||||||
| 	if (pl == NULL) | 	if (pl == NULL) | ||||||
| 		return (0); | 		return (0); | ||||||
| 	else if (len > plsz) |  | ||||||
| 		len = plsz; |  | ||||||
| 	else |  | ||||||
| 		len = P2ROUNDUP(len, PAGESIZE); |  | ||||||
| 	ASSERT(plsz >= len); |  | ||||||
| 
 | 
 | ||||||
| 	ZFS_ENTER(zfsvfs); | 	ZFS_ENTER(zsb); | ||||||
| 	ZFS_VERIFY_ZP(zp); | 	ZFS_VERIFY_ZP(zp); | ||||||
| 
 | 
 | ||||||
| 	if (protp) | 	err = zfs_fillpage(ip, pl, nr_pages); | ||||||
| 		*protp = PROT_ALL; |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	if (!err) | ||||||
| 	 * Loop through the requested range [off, off + len) looking | 		ZFS_ACCESSTIME_STAMP(zsb, zp); | ||||||
| 	 * for pages.  If we don't find a page, we will need to create |  | ||||||
| 	 * a new page and fill it with data from the file. |  | ||||||
| 	 */ |  | ||||||
| 	while (len > 0) { |  | ||||||
| 		if (*pl = page_lookup(vp, off, SE_SHARED)) |  | ||||||
| 			*(pl+1) = NULL; |  | ||||||
| 		else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw)) |  | ||||||
| 			goto out; |  | ||||||
| 		while (*pl) { |  | ||||||
| 			ASSERT3U((*pl)->p_offset, ==, off); |  | ||||||
| 			off += PAGESIZE; |  | ||||||
| 			addr += PAGESIZE; |  | ||||||
| 			if (len > 0) { |  | ||||||
| 				ASSERT3U(len, >=, PAGESIZE); |  | ||||||
| 				len -= PAGESIZE; |  | ||||||
| 			} |  | ||||||
| 			ASSERT3U(plsz, >=, PAGESIZE); |  | ||||||
| 			plsz -= PAGESIZE; |  | ||||||
| 			pl++; |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	/*
 | 	ZFS_EXIT(zsb); | ||||||
| 	 * Fill out the page array with any pages already in the cache. |  | ||||||
| 	 */ |  | ||||||
| 	while (plsz > 0 && |  | ||||||
| 	    (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) { |  | ||||||
| 			off += PAGESIZE; |  | ||||||
| 			plsz -= PAGESIZE; |  | ||||||
| 	} |  | ||||||
| out: |  | ||||||
| 	if (err) { |  | ||||||
| 		/*
 |  | ||||||
| 		 * Release any pages we have previously locked. |  | ||||||
| 		 */ |  | ||||||
| 		while (pl > pl0) |  | ||||||
| 			page_unlock(*--pl); |  | ||||||
| 	} else { |  | ||||||
| 		ZFS_ACCESSTIME_STAMP(zfsvfs, zp); |  | ||||||
| 	} |  | ||||||
| 
 |  | ||||||
| 	*pl = NULL; |  | ||||||
| 
 |  | ||||||
| 	ZFS_EXIT(zfsvfs); |  | ||||||
| 	return (err); | 	return (err); | ||||||
| } | } | ||||||
|  | EXPORT_SYMBOL(zfs_getpage); | ||||||
| 
 | 
 | ||||||
|  | #ifdef HAVE_MMAP | ||||||
| /*
 | /*
 | ||||||
|  * Request a memory map for a section of a file.  This code interacts |  * Request a memory map for a section of a file.  This code interacts | ||||||
|  * with common code and the VM system as follows: |  * with common code and the VM system as follows: | ||||||
|  | |||||||
| @ -254,6 +254,60 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) | |||||||
| 	return (error); | 	return (error); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static struct page ** | ||||||
|  | pages_vector_from_list(struct list_head *pages, unsigned nr_pages) | ||||||
|  | { | ||||||
|  | 	struct page **pl; | ||||||
|  | 	struct page *t; | ||||||
|  | 	unsigned page_idx; | ||||||
|  | 
 | ||||||
|  | 	pl = kmalloc(sizeof(*pl) * nr_pages, GFP_NOFS); | ||||||
|  | 	if (!pl) | ||||||
|  | 		return ERR_PTR(-ENOMEM); | ||||||
|  | 
 | ||||||
|  | 	page_idx = 0; | ||||||
|  | 	list_for_each_entry_reverse(t, pages, lru) { | ||||||
|  | 		pl[page_idx] = t; | ||||||
|  | 		page_idx++; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	return pl; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static int | ||||||
|  | zpl_readpages(struct file *file, struct address_space *mapping, | ||||||
|  | 	struct list_head *pages, unsigned nr_pages) | ||||||
|  | { | ||||||
|  | 	struct inode *ip; | ||||||
|  | 	struct page  **pl; | ||||||
|  | 	struct page  *p, *n; | ||||||
|  | 	int          error; | ||||||
|  | 
 | ||||||
|  | 	ip = mapping->host; | ||||||
|  | 
 | ||||||
|  | 	pl = pages_vector_from_list(pages, nr_pages); | ||||||
|  | 	if (IS_ERR(pl)) | ||||||
|  | 		return PTR_ERR(pl); | ||||||
|  | 
 | ||||||
|  | 	error = -zfs_getpage(ip, pl, nr_pages); | ||||||
|  | 	if (error) | ||||||
|  | 		goto error; | ||||||
|  | 
 | ||||||
|  | 	list_for_each_entry_safe_reverse(p, n, pages, lru) { | ||||||
|  | 
 | ||||||
|  | 		list_del(&p->lru); | ||||||
|  | 
 | ||||||
|  | 		flush_dcache_page(p); | ||||||
|  | 		SetPageUptodate(p); | ||||||
|  | 		unlock_page(p); | ||||||
|  | 		page_cache_release(p); | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | error: | ||||||
|  | 	kfree(pl); | ||||||
|  | 	return error; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /*
 | /*
 | ||||||
|  * Populate a page with data for the Linux page cache.  This function is |  * Populate a page with data for the Linux page cache.  This function is | ||||||
|  * only used to support mmap(2).  There will be an identical copy of the |  * only used to support mmap(2).  There will be an identical copy of the | ||||||
| @ -267,33 +321,14 @@ static int | |||||||
| zpl_readpage(struct file *filp, struct page *pp) | zpl_readpage(struct file *filp, struct page *pp) | ||||||
| { | { | ||||||
| 	struct inode *ip; | 	struct inode *ip; | ||||||
| 	loff_t off, i_size; | 	struct page *pl[1]; | ||||||
| 	size_t len, wrote; |  | ||||||
| 	cred_t *cr = CRED(); |  | ||||||
| 	void *pb; |  | ||||||
| 	int error = 0; | 	int error = 0; | ||||||
| 
 | 
 | ||||||
| 	ASSERT(PageLocked(pp)); | 	ASSERT(PageLocked(pp)); | ||||||
| 	ip = pp->mapping->host; | 	ip = pp->mapping->host; | ||||||
| 	off = page_offset(pp); | 	pl[0] = pp; | ||||||
| 	i_size = i_size_read(ip); |  | ||||||
| 	ASSERT3S(off, <, i_size); |  | ||||||
| 
 | 
 | ||||||
| 	crhold(cr); | 	error = -zfs_getpage(ip, pl, 1); | ||||||
| 	len = MIN(PAGE_CACHE_SIZE, i_size - off); |  | ||||||
| 
 |  | ||||||
| 	pb = kmap(pp); |  | ||||||
| 
 |  | ||||||
| 	/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */ |  | ||||||
| 	wrote = zpl_read_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr); |  | ||||||
| 	if (wrote != len) |  | ||||||
| 		error = -EIO; |  | ||||||
| 
 |  | ||||||
| 	if (!error && (len < PAGE_CACHE_SIZE)) |  | ||||||
| 		memset(pb + len, 0, PAGE_CACHE_SIZE - len); |  | ||||||
| 
 |  | ||||||
| 	kunmap(pp); |  | ||||||
| 	crfree(cr); |  | ||||||
| 
 | 
 | ||||||
| 	if (error) { | 	if (error) { | ||||||
| 		SetPageError(pp); | 		SetPageError(pp); | ||||||
| @ -305,47 +340,15 @@ zpl_readpage(struct file *filp, struct page *pp) | |||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	unlock_page(pp); | 	unlock_page(pp); | ||||||
| 
 | 	return error; | ||||||
| 	return (error); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /*
 | int | ||||||
|  * Write out dirty pages to the ARC, this function is only required to | zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data) | ||||||
|  * support mmap(2).  Mapped pages may be dirtied by memory operations |  | ||||||
|  * which never call .write().  These dirty pages are kept in sync with |  | ||||||
|  * the ARC buffers via this hook. |  | ||||||
|  * |  | ||||||
|  * Currently this function relies on zpl_write_common() and the O_DIRECT |  | ||||||
|  * flag to push out the page.  This works but the more correct way is |  | ||||||
|  * to update zfs_putapage() to be Linux friendly and use that interface. |  | ||||||
|  */ |  | ||||||
| static int |  | ||||||
| zpl_writepage(struct page *pp, struct writeback_control *wbc) |  | ||||||
| { | { | ||||||
| 	struct inode *ip; | 	int error; | ||||||
| 	loff_t off, i_size; |  | ||||||
| 	size_t len, read; |  | ||||||
| 	cred_t *cr = CRED(); |  | ||||||
| 	void *pb; |  | ||||||
| 	int error = 0; |  | ||||||
| 
 | 
 | ||||||
| 	ASSERT(PageLocked(pp)); | 	error = -zfs_putpage(pp, wbc, data); | ||||||
| 	ip = pp->mapping->host; |  | ||||||
| 	off = page_offset(pp); |  | ||||||
| 	i_size = i_size_read(ip); |  | ||||||
| 
 |  | ||||||
| 	crhold(cr); |  | ||||||
| 	len = MIN(PAGE_CACHE_SIZE, i_size - off); |  | ||||||
| 
 |  | ||||||
| 	pb = kmap(pp); |  | ||||||
| 
 |  | ||||||
| 	/* O_DIRECT is passed to bypass the page cache and avoid deadlock. */ |  | ||||||
| 	read = zpl_write_common(ip, pb, len, off, UIO_SYSSPACE, O_DIRECT, cr); |  | ||||||
| 	if (read != len) |  | ||||||
| 		error = -EIO; |  | ||||||
| 
 |  | ||||||
| 	kunmap(pp); |  | ||||||
| 	crfree(cr); |  | ||||||
| 
 | 
 | ||||||
| 	if (error) { | 	if (error) { | ||||||
| 		SetPageError(pp); | 		SetPageError(pp); | ||||||
| @ -353,16 +356,36 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc) | |||||||
| 	} else { | 	} else { | ||||||
| 		ClearPageError(pp); | 		ClearPageError(pp); | ||||||
| 		SetPageUptodate(pp); | 		SetPageUptodate(pp); | ||||||
|  | 		flush_dcache_page(pp); | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	unlock_page(pp); | 	unlock_page(pp); | ||||||
|  | 	return error; | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
| 	return (error); | static int | ||||||
|  | zpl_writepages(struct address_space *mapping, struct writeback_control *wbc) | ||||||
|  | { | ||||||
|  | 	return write_cache_pages(mapping, wbc, zpl_putpage, mapping); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Write out dirty pages to the ARC, this function is only required to | ||||||
|  |  * support mmap(2).  Mapped pages may be dirtied by memory operations | ||||||
|  |  * which never call .write().  These dirty pages are kept in sync with | ||||||
|  |  * the ARC buffers via this hook. | ||||||
|  |  */ | ||||||
|  | static int | ||||||
|  | zpl_writepage(struct page *pp, struct writeback_control *wbc) | ||||||
|  | { | ||||||
|  | 	return zpl_putpage(pp, wbc, pp->mapping); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| const struct address_space_operations zpl_address_space_operations = { | const struct address_space_operations zpl_address_space_operations = { | ||||||
|  | 	.readpages	= zpl_readpages, | ||||||
| 	.readpage	= zpl_readpage, | 	.readpage	= zpl_readpage, | ||||||
| 	.writepage	= zpl_writepage, | 	.writepage	= zpl_writepage, | ||||||
|  | 	.writepages     = zpl_writepages, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| const struct file_operations zpl_file_operations = { | const struct file_operations zpl_file_operations = { | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Prasad Joshi
						Prasad Joshi