mirror of
https://git.proxmox.com/git/mirror_zfs
synced 2025-04-28 06:00:44 +00:00
Use pin_user_pages API for Direct I/O requests
As of kernel v5.8, pin_user_pages* interfaced were introduced. These interfaces use the FOLL_PIN flag. This is preferred interface now for Direct I/O requests in the kernel. The reasoning for using this new interface for Direct I/O requests is explained in the kernel documenetation: Documentation/core-api/pin_user_pages.rst If pin_user_pages_unlocked is available, the all Direct I/O requests will use this new API to stay uptodate with the kernel API requirements. Reviewed-by: Alexander Motin <mav@FreeBSD.org> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Brian Atkinson <batkinson@lanl.gov> Closes #16856
This commit is contained in:
parent
c6442bd3b6
commit
882a809983
33
config/kernel-pin-user-pages.m4
Normal file
33
config/kernel-pin-user-pages.m4
Normal file
@ -0,0 +1,33 @@
|
||||
dnl #
|
||||
dnl # Check for pin_user_pages_unlocked().
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_PIN_USER_PAGES], [
|
||||
ZFS_LINUX_TEST_SRC([pin_user_pages_unlocked], [
|
||||
#include <linux/mm.h>
|
||||
],[
|
||||
unsigned long start = 0;
|
||||
unsigned long nr_pages = 1;
|
||||
struct page **pages = NULL;
|
||||
unsigned int gup_flags = 0;
|
||||
long ret __attribute__ ((unused));
|
||||
|
||||
ret = pin_user_pages_unlocked(start, nr_pages, pages,
|
||||
gup_flags);
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_PIN_USER_PAGES], [
|
||||
|
||||
dnl #
|
||||
dnl # Kernal 5.8 introduced the pin_user_pages* interfaces which should
|
||||
dnl # be used for Direct I/O requests.
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether pin_user_pages_unlocked() is available])
|
||||
ZFS_LINUX_TEST_RESULT([pin_user_pages_unlocked], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_PIN_USER_PAGES_UNLOCKED, 1,
|
||||
[pin_user_pages_unlocked() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
@ -13,20 +13,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
|
||||
error = fault_in_iov_iter_readable(&iter, size);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [
|
||||
#include <linux/uio.h>
|
||||
],[
|
||||
struct iov_iter iter = { 0 };
|
||||
struct page **pages = NULL;
|
||||
size_t maxsize = 4096;
|
||||
unsigned maxpages = 1;
|
||||
size_t start;
|
||||
size_t ret __attribute__ ((unused));
|
||||
|
||||
ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages,
|
||||
&start);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iov_iter_type], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uio.h>
|
||||
@ -35,6 +21,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
|
||||
__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iter_is_ubuf], [
|
||||
#include <linux/uio.h>
|
||||
],[
|
||||
struct iov_iter iter = { 0 };
|
||||
bool ret __attribute__((unused));
|
||||
|
||||
ret = iter_is_ubuf(&iter);
|
||||
])
|
||||
|
||||
ZFS_LINUX_TEST_SRC([iter_iov], [
|
||||
#include <linux/fs.h>
|
||||
#include <linux/uio.h>
|
||||
@ -55,18 +50,6 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_page_pages2().
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether iov_iter_get_pages2() is available])
|
||||
ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1,
|
||||
[iov_iter_get_pages2() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # This checks for iov_iter_type() in linux/uio.h. It is not
|
||||
dnl # required, however, and the module will compiled without it
|
||||
@ -81,6 +64,18 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Kernel 6.0 introduced the ITER_UBUF iov_iter type. iter_is_ubuf()
|
||||
dnl # was also added to determine if the iov_iter is an ITER_UBUF.
|
||||
dnl #
|
||||
AC_MSG_CHECKING([whether iter_is_ubuf() is available])
|
||||
ZFS_LINUX_TEST_RESULT([iter_is_ubuf], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_ITER_IS_UBUF, 1, [iter_is_ubuf() is available])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # Kernel 6.5 introduces the iter_iov() function that returns the
|
||||
dnl # __iov member of an iov_iter*. The iov member was renamed to this
|
||||
|
@ -127,6 +127,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
|
||||
ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING
|
||||
ZFS_AC_KERNEL_SRC_FILE
|
||||
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
|
||||
@ -238,6 +239,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_MM_PAGE_MAPPING
|
||||
ZFS_AC_KERNEL_1ARG_ASSIGN_STR
|
||||
ZFS_AC_KERNEL_FILE
|
||||
ZFS_AC_KERNEL_PIN_USER_PAGES
|
||||
case "$host_cpu" in
|
||||
powerpc*)
|
||||
ZFS_AC_KERNEL_CPU_HAS_FEATURE
|
||||
|
@ -441,6 +441,7 @@ zfs_unmark_page(struct page *page)
|
||||
}
|
||||
#endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
|
||||
|
||||
#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
static void
|
||||
zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
|
||||
{
|
||||
@ -472,6 +473,7 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
@ -480,6 +482,9 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
ASSERT(uio->uio_extflg & UIO_DIRECT);
|
||||
ASSERT3P(uio->uio_dio.pages, !=, NULL);
|
||||
|
||||
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
|
||||
#else
|
||||
for (long i = 0; i < uio->uio_dio.npages; i++) {
|
||||
struct page *p = uio->uio_dio.pages[i];
|
||||
|
||||
@ -491,44 +496,106 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
|
||||
put_page(p);
|
||||
}
|
||||
|
||||
#endif
|
||||
vmem_free(uio->uio_dio.pages,
|
||||
uio->uio_dio.npages * sizeof (struct page *));
|
||||
}
|
||||
|
||||
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
static int
|
||||
zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
{
|
||||
long res;
|
||||
size_t skip = uio->uio_skip;
|
||||
size_t len = uio->uio_resid - skip;
|
||||
unsigned int gup_flags = 0;
|
||||
unsigned long addr;
|
||||
unsigned long nr_pages;
|
||||
|
||||
/*
|
||||
* Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could
|
||||
* possibly be used here in the future to allow for P2P operations with
|
||||
* user pages.
|
||||
*/
|
||||
if (rw == UIO_READ)
|
||||
gup_flags = FOLL_WRITE;
|
||||
|
||||
if (len == 0)
|
||||
return (0);
|
||||
|
||||
#if defined(HAVE_ITER_IS_UBUF)
|
||||
if (iter_is_ubuf(uio->uio_iter)) {
|
||||
nr_pages = DIV_ROUND_UP(len, PAGE_SIZE);
|
||||
addr = (unsigned long)uio->uio_iter->ubuf + skip;
|
||||
res = pin_user_pages_unlocked(addr, nr_pages,
|
||||
&uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
|
||||
if (res < 0) {
|
||||
return (SET_ERROR(-res));
|
||||
} else if (len != (res * PAGE_SIZE)) {
|
||||
uio->uio_dio.npages += res;
|
||||
return (SET_ERROR(EFAULT));
|
||||
}
|
||||
uio->uio_dio.npages += res;
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter);
|
||||
for (int i = 0; i < uio->uio_iovcnt; i++) {
|
||||
size_t amt = iovp->iov_len - skip;
|
||||
if (amt == 0) {
|
||||
iovp++;
|
||||
skip = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
addr = (unsigned long)iovp->iov_base + skip;
|
||||
nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE);
|
||||
res = pin_user_pages_unlocked(addr, nr_pages,
|
||||
&uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
|
||||
if (res < 0) {
|
||||
return (SET_ERROR(-res));
|
||||
} else if (amt != (res * PAGE_SIZE)) {
|
||||
uio->uio_dio.npages += res;
|
||||
return (SET_ERROR(EFAULT));
|
||||
}
|
||||
|
||||
len -= amt;
|
||||
uio->uio_dio.npages += res;
|
||||
skip = 0;
|
||||
iovp++;
|
||||
};
|
||||
|
||||
ASSERT0(len);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
#else
|
||||
static int
|
||||
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
{
|
||||
size_t skip = uio->uio_skip;
|
||||
size_t start;
|
||||
size_t wanted = uio->uio_resid - uio->uio_skip;
|
||||
ssize_t rollback = 0;
|
||||
ssize_t cnt;
|
||||
unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
|
||||
|
||||
while (wanted) {
|
||||
#if defined(HAVE_IOV_ITER_GET_PAGES2)
|
||||
cnt = iov_iter_get_pages2(uio->uio_iter,
|
||||
&uio->uio_dio.pages[uio->uio_dio.npages],
|
||||
wanted, maxpages, &skip);
|
||||
#else
|
||||
cnt = iov_iter_get_pages(uio->uio_iter,
|
||||
&uio->uio_dio.pages[uio->uio_dio.npages],
|
||||
wanted, maxpages, &skip);
|
||||
#endif
|
||||
wanted, maxpages, &start);
|
||||
if (cnt < 0) {
|
||||
iov_iter_revert(uio->uio_iter, rollback);
|
||||
return (SET_ERROR(-cnt));
|
||||
}
|
||||
/*
|
||||
* All Direct I/O operations must be page aligned.
|
||||
*/
|
||||
ASSERT(IS_P2ALIGNED(start, PAGE_SIZE));
|
||||
uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
|
||||
rollback += cnt;
|
||||
wanted -= cnt;
|
||||
skip = 0;
|
||||
#if !defined(HAVE_IOV_ITER_GET_PAGES2)
|
||||
/*
|
||||
* iov_iter_get_pages2() advances the iov_iter on success.
|
||||
*/
|
||||
iov_iter_advance(uio->uio_iter, cnt);
|
||||
#endif
|
||||
|
||||
}
|
||||
ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
|
||||
@ -536,6 +603,7 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* HAVE_PIN_USER_PAGES_UNLOCKED */
|
||||
|
||||
/*
|
||||
* This function pins user pages. In the event that the user pages were not
|
||||
@ -552,7 +620,11 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
|
||||
if (uio->uio_segflg == UIO_ITER) {
|
||||
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
|
||||
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
error = zfs_uio_pin_user_pages(uio, rw);
|
||||
#else
|
||||
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
|
||||
#endif
|
||||
} else {
|
||||
return (SET_ERROR(EOPNOTSUPP));
|
||||
}
|
||||
@ -560,17 +632,22 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
|
||||
ASSERT3S(uio->uio_dio.npages, >=, 0);
|
||||
|
||||
if (error) {
|
||||
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
|
||||
#else
|
||||
for (long i = 0; i < uio->uio_dio.npages; i++)
|
||||
put_page(uio->uio_dio.pages[i]);
|
||||
#endif
|
||||
vmem_free(uio->uio_dio.pages, size);
|
||||
return (error);
|
||||
} else {
|
||||
ASSERT3S(uio->uio_dio.npages, ==, npages);
|
||||
}
|
||||
|
||||
if (rw == UIO_WRITE) {
|
||||
#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
|
||||
if (rw == UIO_WRITE)
|
||||
zfs_uio_dio_check_for_zero_page(uio);
|
||||
}
|
||||
#endif
|
||||
|
||||
uio->uio_extflg |= UIO_DIRECT;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user