mirror of
				https://git.proxmox.com/git/mirror_zfs
				synced 2025-10-31 20:33:04 +00:00 
			
		
		
		
	 1d3ba0bf01
			
		
	
	
		1d3ba0bf01
		
			
		
	
	
	
	
		
			
			The commit replaces all findings of the link: http://www.opensolaris.org/os/licensing with this one: https://opensource.org/licenses/CDDL-1.0 Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Tino Reichardt <milky-zfs@mcmilk.de> Closes #13619
		
			
				
	
	
		
			475 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			475 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * CDDL HEADER START
 | |
|  *
 | |
|  * The contents of this file are subject to the terms of the
 | |
|  * Common Development and Distribution License (the "License").
 | |
|  * You may not use this file except in compliance with the License.
 | |
|  *
 | |
|  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 | |
|  * or https://opensource.org/licenses/CDDL-1.0.
 | |
|  * See the License for the specific language governing permissions
 | |
|  * and limitations under the License.
 | |
|  *
 | |
|  * When distributing Covered Code, include this CDDL HEADER in each
 | |
|  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 | |
|  * If applicable, add the following below this CDDL HEADER, with the
 | |
|  * fields enclosed by brackets "[]" replaced with your own identifying
 | |
|  * information: Portions Copyright [yyyy] [name of copyright owner]
 | |
|  *
 | |
|  * CDDL HEADER END
 | |
|  */
 | |
| /*
 | |
|  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 | |
|  * Use is subject to license terms.
 | |
|  */
 | |
| 
 | |
| /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
 | |
| /*	  All Rights Reserved	*/
 | |
| 
 | |
| /*
 | |
|  * University Copyright- Copyright (c) 1982, 1986, 1988
 | |
|  * The Regents of the University of California
 | |
|  * All Rights Reserved
 | |
|  *
 | |
|  * University Acknowledgment- Portions of this document are derived from
 | |
|  * software developed by the University of California, Berkeley, and its
 | |
|  * contributors.
 | |
|  */
 | |
| /*
 | |
|  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
 | |
|  */
 | |
| 
 | |
| #ifdef _KERNEL
 | |
| 
 | |
| #include <sys/types.h>
 | |
| #include <sys/uio_impl.h>
 | |
| #include <sys/sysmacros.h>
 | |
| #include <sys/string.h>
 | |
| #include <linux/kmap_compat.h>
 | |
| #include <linux/uaccess.h>
 | |
| 
 | |
| /*
 | |
|  * Move "n" bytes at byte address "p"; "rw" indicates the direction
 | |
|  * of the move, and the I/O parameters are provided in "uio", which is
 | |
|  * update to reflect the data which was moved.  Returns 0 on success or
 | |
|  * a non-zero errno on failure.
 | |
|  */
 | |
| static int
 | |
| zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 | |
| {
 | |
| 	const struct iovec *iov = uio->uio_iov;
 | |
| 	size_t skip = uio->uio_skip;
 | |
| 	ulong_t cnt;
 | |
| 
 | |
| 	while (n && uio->uio_resid) {
 | |
| 		cnt = MIN(iov->iov_len - skip, n);
 | |
| 		switch (uio->uio_segflg) {
 | |
| 		case UIO_USERSPACE:
 | |
| 			/*
 | |
| 			 * p = kernel data pointer
 | |
| 			 * iov->iov_base = user data pointer
 | |
| 			 */
 | |
| 			if (rw == UIO_READ) {
 | |
| 				if (copy_to_user(iov->iov_base+skip, p, cnt))
 | |
| 					return (EFAULT);
 | |
| 			} else {
 | |
| 				unsigned long b_left = 0;
 | |
| 				if (uio->uio_fault_disable) {
 | |
| 					if (!zfs_access_ok(VERIFY_READ,
 | |
| 					    (iov->iov_base + skip), cnt)) {
 | |
| 						return (EFAULT);
 | |
| 					}
 | |
| 					pagefault_disable();
 | |
| 					b_left =
 | |
| 					    __copy_from_user_inatomic(p,
 | |
| 					    (iov->iov_base + skip), cnt);
 | |
| 					pagefault_enable();
 | |
| 				} else {
 | |
| 					b_left =
 | |
| 					    copy_from_user(p,
 | |
| 					    (iov->iov_base + skip), cnt);
 | |
| 				}
 | |
| 				if (b_left > 0) {
 | |
| 					unsigned long c_bytes =
 | |
| 					    cnt - b_left;
 | |
| 					uio->uio_skip += c_bytes;
 | |
| 					ASSERT3U(uio->uio_skip, <,
 | |
| 					    iov->iov_len);
 | |
| 					uio->uio_resid -= c_bytes;
 | |
| 					uio->uio_loffset += c_bytes;
 | |
| 					return (EFAULT);
 | |
| 				}
 | |
| 			}
 | |
| 			break;
 | |
| 		case UIO_SYSSPACE:
 | |
| 			if (rw == UIO_READ)
 | |
| 				memcpy(iov->iov_base + skip, p, cnt);
 | |
| 			else
 | |
| 				memcpy(p, iov->iov_base + skip, cnt);
 | |
| 			break;
 | |
| 		default:
 | |
| 			ASSERT(0);
 | |
| 		}
 | |
| 		skip += cnt;
 | |
| 		if (skip == iov->iov_len) {
 | |
| 			skip = 0;
 | |
| 			uio->uio_iov = (++iov);
 | |
| 			uio->uio_iovcnt--;
 | |
| 		}
 | |
| 		uio->uio_skip = skip;
 | |
| 		uio->uio_resid -= cnt;
 | |
| 		uio->uio_loffset += cnt;
 | |
| 		p = (caddr_t)p + cnt;
 | |
| 		n -= cnt;
 | |
| 	}
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| static int
 | |
| zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 | |
| {
 | |
| 	const struct bio_vec *bv = uio->uio_bvec;
 | |
| 	size_t skip = uio->uio_skip;
 | |
| 	ulong_t cnt;
 | |
| 
 | |
| 	while (n && uio->uio_resid) {
 | |
| 		void *paddr;
 | |
| 		cnt = MIN(bv->bv_len - skip, n);
 | |
| 
 | |
| 		paddr = zfs_kmap_atomic(bv->bv_page);
 | |
| 		if (rw == UIO_READ) {
 | |
| 			/* Copy from buffer 'p' to the bvec data */
 | |
| 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
 | |
| 		} else {
 | |
| 			/* Copy from bvec data to buffer 'p' */
 | |
| 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
 | |
| 		}
 | |
| 		zfs_kunmap_atomic(paddr);
 | |
| 
 | |
| 		skip += cnt;
 | |
| 		if (skip == bv->bv_len) {
 | |
| 			skip = 0;
 | |
| 			uio->uio_bvec = (++bv);
 | |
| 			uio->uio_iovcnt--;
 | |
| 		}
 | |
| 		uio->uio_skip = skip;
 | |
| 		uio->uio_resid -= cnt;
 | |
| 		uio->uio_loffset += cnt;
 | |
| 		p = (caddr_t)p + cnt;
 | |
| 		n -= cnt;
 | |
| 	}
 | |
| 	return (0);
 | |
| }
 | |
| 
 | |
| #ifdef HAVE_BLK_MQ
 | |
| static void
 | |
| zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
 | |
|     struct bio_vec *bv)
 | |
| {
 | |
| 	void *paddr;
 | |
| 
 | |
| 	paddr = zfs_kmap_atomic(bv->bv_page);
 | |
| 	if (rw == UIO_READ) {
 | |
| 		/* Copy from buffer 'p' to the bvec data */
 | |
| 		memcpy(paddr + bv->bv_offset + skip, p, cnt);
 | |
| 	} else {
 | |
| 		/* Copy from bvec data to buffer 'p' */
 | |
| 		memcpy(p, paddr + bv->bv_offset + skip, cnt);
 | |
| 	}
 | |
| 	zfs_kunmap_atomic(paddr);
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Copy 'n' bytes of data between the buffer p[] and the data represented
 | |
|  * by the request in the uio.
 | |
|  */
 | |
| static int
 | |
| zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 | |
| {
 | |
| 	struct request *rq = uio->rq;
 | |
| 	struct bio_vec bv;
 | |
| 	struct req_iterator iter;
 | |
| 	size_t this_seg_start;	/* logical offset */
 | |
| 	size_t this_seg_end;		/* logical offset */
 | |
| 	size_t skip_in_seg;
 | |
| 	size_t copy_from_seg;
 | |
| 	size_t orig_loffset;
 | |
| 	int copied = 0;
 | |
| 
 | |
| 	/*
 | |
| 	 * Get the original logical offset of this entire request (because
 | |
| 	 * uio->uio_loffset will be modified over time).
 | |
| 	 */
 | |
| 	orig_loffset = io_offset(NULL, rq);
 | |
| 	this_seg_start = orig_loffset;
 | |
| 
 | |
| 	rq_for_each_segment(bv, rq, iter) {
 | |
| 		if (uio->iter.bio) {
 | |
| 			/*
 | |
| 			 * If uio->iter.bio is present, then we know we've saved
 | |
| 			 * uio->iter from a previous call to this function, and
 | |
| 			 * we can skip ahead in this rq_for_each_segment() loop
 | |
| 			 * to where we last left off.  That way, we don't need
 | |
| 			 * to iterate over tons of segments we've already
 | |
| 			 * processed - we can just restore the "saved state".
 | |
| 			 */
 | |
| 			iter = uio->iter;
 | |
| 			bv = uio->bv;
 | |
| 			this_seg_start = uio->uio_loffset;
 | |
| 			memset(&uio->iter, 0, sizeof (uio->iter));
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		/*
 | |
| 		 * Lookup what the logical offset of the last byte of this
 | |
| 		 * segment is.
 | |
| 		 */
 | |
| 		this_seg_end = this_seg_start + bv.bv_len - 1;
 | |
| 
 | |
| 		/*
 | |
| 		 * We only need to operate on segments that have data we're
 | |
| 		 * copying.
 | |
| 		 */
 | |
| 		if (uio->uio_loffset >= this_seg_start &&
 | |
| 		    uio->uio_loffset <= this_seg_end) {
 | |
| 			/*
 | |
| 			 * Some, or all, of the data in this segment needs to be
 | |
| 			 * copied.
 | |
| 			 */
 | |
| 
 | |
| 			/*
 | |
| 			 * We may be not be copying from the first byte in the
 | |
| 			 * segment.  Figure out how many bytes to skip copying
 | |
| 			 * from the beginning of this segment.
 | |
| 			 */
 | |
| 			skip_in_seg = uio->uio_loffset - this_seg_start;
 | |
| 
 | |
| 			/*
 | |
| 			 * Calculate the total number of bytes from this
 | |
| 			 * segment that we will be copying.
 | |
| 			 */
 | |
| 			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
 | |
| 
 | |
| 			/* Copy the bytes */
 | |
| 			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
 | |
| 			p = ((char *)p) + copy_from_seg;
 | |
| 
 | |
| 			n -= copy_from_seg;
 | |
| 			uio->uio_resid -= copy_from_seg;
 | |
| 			uio->uio_loffset += copy_from_seg;
 | |
| 			copied = 1;	/* We copied some data */
 | |
| 		}
 | |
| 
 | |
| 		if (n == 0) {
 | |
| 			/*
 | |
| 			 * All done copying.  Save our 'iter' value to the uio.
 | |
| 			 * This allows us to "save our state" and skip ahead in
 | |
| 			 * the rq_for_each_segment() loop the next time we call
 | |
| 			 * call zfs_uiomove_bvec_rq() on this uio (which we
 | |
| 			 * will be doing for any remaining data in the uio).
 | |
| 			 */
 | |
| 			uio->iter = iter; /* make a copy of the struct data */
 | |
| 			uio->bv = bv;
 | |
| 			return (0);
 | |
| 		}
 | |
| 
 | |
| 		this_seg_start = this_seg_end + 1;
 | |
| 	}
 | |
| 
 | |
| 	if (!copied) {
 | |
| 		/* Didn't copy anything */
 | |
| 		uio->uio_resid = 0;
 | |
| 	}
 | |
| 	return (0);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| static int
 | |
| zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 | |
| {
 | |
| #ifdef HAVE_BLK_MQ
 | |
| 	if (uio->rq != NULL)
 | |
| 		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
 | |
| #else
 | |
| 	ASSERT3P(uio->rq, ==, NULL);
 | |
| #endif
 | |
| 	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
 | |
| }
 | |
| 
 | |
| #if defined(HAVE_VFS_IOV_ITER)
 | |
| static int
 | |
| zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
 | |
|     boolean_t revert)
 | |
| {
 | |
| 	size_t cnt = MIN(n, uio->uio_resid);
 | |
| 
 | |
| 	if (uio->uio_skip)
 | |
| 		iov_iter_advance(uio->uio_iter, uio->uio_skip);
 | |
| 
 | |
| 	if (rw == UIO_READ)
 | |
| 		cnt = copy_to_iter(p, cnt, uio->uio_iter);
 | |
| 	else
 | |
| 		cnt = copy_from_iter(p, cnt, uio->uio_iter);
 | |
| 
 | |
| 	/*
 | |
| 	 * When operating on a full pipe no bytes are processed.
 | |
| 	 * In which case return EFAULT which is converted to EAGAIN
 | |
| 	 * by the kernel's generic_file_splice_read() function.
 | |
| 	 */
 | |
| 	if (cnt == 0)
 | |
| 		return (EFAULT);
 | |
| 
 | |
| 	/*
 | |
| 	 * Revert advancing the uio_iter.  This is set by zfs_uiocopy()
 | |
| 	 * to avoid consuming the uio and its iov_iter structure.
 | |
| 	 */
 | |
| 	if (revert)
 | |
| 		iov_iter_revert(uio->uio_iter, cnt);
 | |
| 
 | |
| 	uio->uio_resid -= cnt;
 | |
| 	uio->uio_loffset += cnt;
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| #endif
 | |
| 
 | |
| int
 | |
| zfs_uiomove(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 | |
| {
 | |
| 	if (uio->uio_segflg == UIO_BVEC)
 | |
| 		return (zfs_uiomove_bvec(p, n, rw, uio));
 | |
| #if defined(HAVE_VFS_IOV_ITER)
 | |
| 	else if (uio->uio_segflg == UIO_ITER)
 | |
| 		return (zfs_uiomove_iter(p, n, rw, uio, B_FALSE));
 | |
| #endif
 | |
| 	else
 | |
| 		return (zfs_uiomove_iov(p, n, rw, uio));
 | |
| }
 | |
| EXPORT_SYMBOL(zfs_uiomove);
 | |
| 
 | |
| /*
 | |
|  * Fault in the pages of the first n bytes specified by the uio structure.
 | |
|  * 1 byte in each page is touched and the uio struct is unmodified. Any
 | |
|  * error will terminate the process as this is only a best attempt to get
 | |
|  * the pages resident.
 | |
|  */
 | |
| int
 | |
| zfs_uio_prefaultpages(ssize_t n, zfs_uio_t *uio)
 | |
| {
 | |
| 	if (uio->uio_segflg == UIO_SYSSPACE || uio->uio_segflg == UIO_BVEC) {
 | |
| 		/* There's never a need to fault in kernel pages */
 | |
| 		return (0);
 | |
| #if defined(HAVE_VFS_IOV_ITER)
 | |
| 	} else if (uio->uio_segflg == UIO_ITER) {
 | |
| 		/*
 | |
| 		 * At least a Linux 4.9 kernel, iov_iter_fault_in_readable()
 | |
| 		 * can be relied on to fault in user pages when referenced.
 | |
| 		 */
 | |
| 		if (iov_iter_fault_in_readable(uio->uio_iter, n))
 | |
| 			return (EFAULT);
 | |
| #endif
 | |
| 	} else {
 | |
| 		/* Fault in all user pages */
 | |
| 		ASSERT3S(uio->uio_segflg, ==, UIO_USERSPACE);
 | |
| 		const struct iovec *iov = uio->uio_iov;
 | |
| 		int iovcnt = uio->uio_iovcnt;
 | |
| 		size_t skip = uio->uio_skip;
 | |
| 		uint8_t tmp;
 | |
| 		caddr_t p;
 | |
| 
 | |
| 		for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
 | |
| 			ulong_t cnt = MIN(iov->iov_len - skip, n);
 | |
| 			/* empty iov */
 | |
| 			if (cnt == 0)
 | |
| 				continue;
 | |
| 			n -= cnt;
 | |
| 			/* touch each page in this segment. */
 | |
| 			p = iov->iov_base + skip;
 | |
| 			while (cnt) {
 | |
| 				if (copy_from_user(&tmp, p, 1))
 | |
| 					return (EFAULT);
 | |
| 				ulong_t incr = MIN(cnt, PAGESIZE);
 | |
| 				p += incr;
 | |
| 				cnt -= incr;
 | |
| 			}
 | |
| 			/* touch the last byte in case it straddles a page. */
 | |
| 			p--;
 | |
| 			if (copy_from_user(&tmp, p, 1))
 | |
| 				return (EFAULT);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return (0);
 | |
| }
 | |
| EXPORT_SYMBOL(zfs_uio_prefaultpages);
 | |
| 
 | |
| /*
 | |
|  * The same as zfs_uiomove() but doesn't modify uio structure.
 | |
|  * return in cbytes how many bytes were copied.
 | |
|  */
 | |
| int
 | |
| zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, size_t *cbytes)
 | |
| {
 | |
| 	zfs_uio_t uio_copy;
 | |
| 	int ret;
 | |
| 
 | |
| 	memcpy(&uio_copy, uio, sizeof (zfs_uio_t));
 | |
| 
 | |
| 	if (uio->uio_segflg == UIO_BVEC)
 | |
| 		ret = zfs_uiomove_bvec(p, n, rw, &uio_copy);
 | |
| #if defined(HAVE_VFS_IOV_ITER)
 | |
| 	else if (uio->uio_segflg == UIO_ITER)
 | |
| 		ret = zfs_uiomove_iter(p, n, rw, &uio_copy, B_TRUE);
 | |
| #endif
 | |
| 	else
 | |
| 		ret = zfs_uiomove_iov(p, n, rw, &uio_copy);
 | |
| 
 | |
| 	*cbytes = uio->uio_resid - uio_copy.uio_resid;
 | |
| 
 | |
| 	return (ret);
 | |
| }
 | |
| EXPORT_SYMBOL(zfs_uiocopy);
 | |
| 
 | |
| /*
 | |
|  * Drop the next n chars out of *uio.
 | |
|  */
 | |
| void
 | |
| zfs_uioskip(zfs_uio_t *uio, size_t n)
 | |
| {
 | |
| 	if (n > uio->uio_resid)
 | |
| 		return;
 | |
| 	/*
 | |
| 	 * When using a uio with a struct request, we simply
 | |
| 	 * use uio_loffset as a pointer to the next logical byte to
 | |
| 	 * copy in the request.  We don't have to do any fancy
 | |
| 	 * accounting with uio_bvec/uio_iovcnt since we don't use
 | |
| 	 * them.
 | |
| 	 */
 | |
| 	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
 | |
| 		uio->uio_skip += n;
 | |
| 		while (uio->uio_iovcnt &&
 | |
| 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
 | |
| 			uio->uio_skip -= uio->uio_bvec->bv_len;
 | |
| 			uio->uio_bvec++;
 | |
| 			uio->uio_iovcnt--;
 | |
| 		}
 | |
| #if defined(HAVE_VFS_IOV_ITER)
 | |
| 	} else if (uio->uio_segflg == UIO_ITER) {
 | |
| 		iov_iter_advance(uio->uio_iter, n);
 | |
| #endif
 | |
| 	} else {
 | |
| 		uio->uio_skip += n;
 | |
| 		while (uio->uio_iovcnt &&
 | |
| 		    uio->uio_skip >= uio->uio_iov->iov_len) {
 | |
| 			uio->uio_skip -= uio->uio_iov->iov_len;
 | |
| 			uio->uio_iov++;
 | |
| 			uio->uio_iovcnt--;
 | |
| 		}
 | |
| 	}
 | |
| 	uio->uio_loffset += n;
 | |
| 	uio->uio_resid -= n;
 | |
| }
 | |
| EXPORT_SYMBOL(zfs_uioskip);
 | |
| 
 | |
| #endif /* _KERNEL */
 |