Use pin_user_pages API for Direct I/O requests

As of kernel v5.8, pin_user_pages* interfaced were introduced. These
interfaces use the FOLL_PIN flag. This is preferred interface now for
Direct I/O requests in the kernel. The reasoning for using this new
interface for Direct I/O requests is explained in the kernel
documenetation:
Documentation/core-api/pin_user_pages.rst

If pin_user_pages_unlocked is available, the all Direct I/O requests
will use this new API to stay uptodate with the kernel API requirements.

Reviewed-by: Alexander Motin <mav@FreeBSD.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Brian Atkinson <batkinson@lanl.gov>
Closes #16856
This commit is contained in:
Brian Atkinson 2024-12-10 10:21:06 -07:00 committed by Brian Behlendorf
parent 1862c1c0a8
commit d67eb17e27
4 changed files with 150 additions and 43 deletions

View File

@ -0,0 +1,33 @@
dnl #
dnl # Check for pin_user_pages_unlocked().
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_PIN_USER_PAGES], [
ZFS_LINUX_TEST_SRC([pin_user_pages_unlocked], [
#include <linux/mm.h>
],[
unsigned long start = 0;
unsigned long nr_pages = 1;
struct page **pages = NULL;
unsigned int gup_flags = 0;
long ret __attribute__ ((unused));
ret = pin_user_pages_unlocked(start, nr_pages, pages,
gup_flags);
])
])
AC_DEFUN([ZFS_AC_KERNEL_PIN_USER_PAGES], [
dnl #
dnl # Kernal 5.8 introduced the pin_user_pages* interfaces which should
dnl # be used for Direct I/O requests.
dnl #
AC_MSG_CHECKING([whether pin_user_pages_unlocked() is available])
ZFS_LINUX_TEST_RESULT([pin_user_pages_unlocked], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_PIN_USER_PAGES_UNLOCKED, 1,
[pin_user_pages_unlocked() is available])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -13,20 +13,6 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
error = fault_in_iov_iter_readable(&iter, size);
])
ZFS_LINUX_TEST_SRC([iov_iter_get_pages2], [
#include <linux/uio.h>
],[
struct iov_iter iter = { 0 };
struct page **pages = NULL;
size_t maxsize = 4096;
unsigned maxpages = 1;
size_t start;
size_t ret __attribute__ ((unused));
ret = iov_iter_get_pages2(&iter, pages, maxsize, maxpages,
&start);
])
ZFS_LINUX_TEST_SRC([iov_iter_type], [
#include <linux/fs.h>
#include <linux/uio.h>
@ -35,6 +21,15 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_VFS_IOV_ITER], [
__attribute__((unused)) enum iter_type i = iov_iter_type(&iter);
])
ZFS_LINUX_TEST_SRC([iter_is_ubuf], [
#include <linux/uio.h>
],[
struct iov_iter iter = { 0 };
bool ret __attribute__((unused));
ret = iter_is_ubuf(&iter);
])
ZFS_LINUX_TEST_SRC([iter_iov], [
#include <linux/fs.h>
#include <linux/uio.h>
@ -55,18 +50,6 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
AC_MSG_RESULT(no)
])
dnl #
dnl # Kernel 6.0 changed iov_iter_get_pages() to iov_iter_page_pages2().
dnl #
AC_MSG_CHECKING([whether iov_iter_get_pages2() is available])
ZFS_LINUX_TEST_RESULT([iov_iter_get_pages2], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOV_ITER_GET_PAGES2, 1,
[iov_iter_get_pages2() is available])
],[
AC_MSG_RESULT(no)
])
dnl #
dnl # This checks for iov_iter_type() in linux/uio.h. It is not
dnl # required, however, and the module will compiled without it
@ -81,6 +64,18 @@ AC_DEFUN([ZFS_AC_KERNEL_VFS_IOV_ITER], [
AC_MSG_RESULT(no)
])
dnl #
dnl # Kernel 6.0 introduced the ITER_UBUF iov_iter type. iter_is_ubuf()
dnl # was also added to determine if the iov_iter is an ITER_UBUF.
dnl #
AC_MSG_CHECKING([whether iter_is_ubuf() is available])
ZFS_LINUX_TEST_RESULT([iter_is_ubuf], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_ITER_IS_UBUF, 1, [iter_is_ubuf() is available])
],[
AC_MSG_RESULT(no)
])
dnl #
dnl # Kernel 6.5 introduces the iter_iov() function that returns the
dnl # __iov member of an iov_iter*. The iov member was renamed to this

View File

@ -127,6 +127,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_MM_PAGE_SIZE
ZFS_AC_KERNEL_SRC_MM_PAGE_MAPPING
ZFS_AC_KERNEL_SRC_FILE
ZFS_AC_KERNEL_SRC_PIN_USER_PAGES
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
@ -238,6 +239,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_MM_PAGE_MAPPING
ZFS_AC_KERNEL_1ARG_ASSIGN_STR
ZFS_AC_KERNEL_FILE
ZFS_AC_KERNEL_PIN_USER_PAGES
case "$host_cpu" in
powerpc*)
ZFS_AC_KERNEL_CPU_HAS_FEATURE

View File

@ -441,6 +441,7 @@ zfs_unmark_page(struct page *page)
}
#endif /* HAVE_ZERO_PAGE_GPL_ONLY || !_LP64 */
#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
static void
zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
{
@ -472,6 +473,7 @@ zfs_uio_dio_check_for_zero_page(zfs_uio_t *uio)
}
}
}
#endif
void
zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
@ -480,6 +482,9 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
ASSERT(uio->uio_extflg & UIO_DIRECT);
ASSERT3P(uio->uio_dio.pages, !=, NULL);
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
#else
for (long i = 0; i < uio->uio_dio.npages; i++) {
struct page *p = uio->uio_dio.pages[i];
@ -491,44 +496,106 @@ zfs_uio_free_dio_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
put_page(p);
}
#endif
vmem_free(uio->uio_dio.pages,
uio->uio_dio.npages * sizeof (struct page *));
}
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
static int
zfs_uio_pin_user_pages(zfs_uio_t *uio, zfs_uio_rw_t rw)
{
long res;
size_t skip = uio->uio_skip;
size_t len = uio->uio_resid - skip;
unsigned int gup_flags = 0;
unsigned long addr;
unsigned long nr_pages;
/*
* Kernel 6.2 introduced the FOLL_PCI_P2PDMA flag. This flag could
* possibly be used here in the future to allow for P2P operations with
* user pages.
*/
if (rw == UIO_READ)
gup_flags = FOLL_WRITE;
if (len == 0)
return (0);
#if defined(HAVE_ITER_IS_UBUF)
if (iter_is_ubuf(uio->uio_iter)) {
nr_pages = DIV_ROUND_UP(len, PAGE_SIZE);
addr = (unsigned long)uio->uio_iter->ubuf + skip;
res = pin_user_pages_unlocked(addr, nr_pages,
&uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
if (res < 0) {
return (SET_ERROR(-res));
} else if (len != (res * PAGE_SIZE)) {
uio->uio_dio.npages += res;
return (SET_ERROR(EFAULT));
}
uio->uio_dio.npages += res;
return (0);
}
#endif
const struct iovec *iovp = zfs_uio_iter_iov(uio->uio_iter);
for (int i = 0; i < uio->uio_iovcnt; i++) {
size_t amt = iovp->iov_len - skip;
if (amt == 0) {
iovp++;
skip = 0;
continue;
}
addr = (unsigned long)iovp->iov_base + skip;
nr_pages = DIV_ROUND_UP(amt, PAGE_SIZE);
res = pin_user_pages_unlocked(addr, nr_pages,
&uio->uio_dio.pages[uio->uio_dio.npages], gup_flags);
if (res < 0) {
return (SET_ERROR(-res));
} else if (amt != (res * PAGE_SIZE)) {
uio->uio_dio.npages += res;
return (SET_ERROR(EFAULT));
}
len -= amt;
uio->uio_dio.npages += res;
skip = 0;
iovp++;
};
ASSERT0(len);
return (0);
}
#else
static int
zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
{
size_t skip = uio->uio_skip;
size_t start;
size_t wanted = uio->uio_resid - uio->uio_skip;
ssize_t rollback = 0;
ssize_t cnt;
unsigned maxpages = DIV_ROUND_UP(wanted, PAGE_SIZE);
while (wanted) {
#if defined(HAVE_IOV_ITER_GET_PAGES2)
cnt = iov_iter_get_pages2(uio->uio_iter,
&uio->uio_dio.pages[uio->uio_dio.npages],
wanted, maxpages, &skip);
#else
cnt = iov_iter_get_pages(uio->uio_iter,
&uio->uio_dio.pages[uio->uio_dio.npages],
wanted, maxpages, &skip);
#endif
wanted, maxpages, &start);
if (cnt < 0) {
iov_iter_revert(uio->uio_iter, rollback);
return (SET_ERROR(-cnt));
}
/*
* All Direct I/O operations must be page aligned.
*/
ASSERT(IS_P2ALIGNED(start, PAGE_SIZE));
uio->uio_dio.npages += DIV_ROUND_UP(cnt, PAGE_SIZE);
rollback += cnt;
wanted -= cnt;
skip = 0;
#if !defined(HAVE_IOV_ITER_GET_PAGES2)
/*
* iov_iter_get_pages2() advances the iov_iter on success.
*/
iov_iter_advance(uio->uio_iter, cnt);
#endif
}
ASSERT3U(rollback, ==, uio->uio_resid - uio->uio_skip);
@ -536,6 +603,7 @@ zfs_uio_get_dio_pages_iov_iter(zfs_uio_t *uio, zfs_uio_rw_t rw)
return (0);
}
#endif /* HAVE_PIN_USER_PAGES_UNLOCKED */
/*
* This function pins user pages. In the event that the user pages were not
@ -552,7 +620,11 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
if (uio->uio_segflg == UIO_ITER) {
uio->uio_dio.pages = vmem_alloc(size, KM_SLEEP);
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
error = zfs_uio_pin_user_pages(uio, rw);
#else
error = zfs_uio_get_dio_pages_iov_iter(uio, rw);
#endif
} else {
return (SET_ERROR(EOPNOTSUPP));
}
@ -560,17 +632,22 @@ zfs_uio_get_dio_pages_alloc(zfs_uio_t *uio, zfs_uio_rw_t rw)
ASSERT3S(uio->uio_dio.npages, >=, 0);
if (error) {
#if defined(HAVE_PIN_USER_PAGES_UNLOCKED)
unpin_user_pages(uio->uio_dio.pages, uio->uio_dio.npages);
#else
for (long i = 0; i < uio->uio_dio.npages; i++)
put_page(uio->uio_dio.pages[i]);
#endif
vmem_free(uio->uio_dio.pages, size);
return (error);
} else {
ASSERT3S(uio->uio_dio.npages, ==, npages);
}
if (rw == UIO_WRITE) {
#if !defined(HAVE_PIN_USER_PAGES_UNLOCKED)
if (rw == UIO_WRITE)
zfs_uio_dio_check_for_zero_page(uio);
}
#endif
uio->uio_extflg |= UIO_DIRECT;