linux/tools/testing/selftests/mm/process_madv.c
wang lian b50e37889f selftests/mm: add process_madvise() tests
Add tests for process_madvise(), focusing on verifying behavior under
various conditions including valid usage and error cases.

[lianux.mm@gmail.com: v7]
  Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com
Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com
Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com
Signed-off-by: wang lian <lianux.mm@gmail.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Suggested-by: Zi Yan <ziy@nvidia.com>
Suggested-by: Mark Brown <broonie@kernel.org>
Acked-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Tested-by: Zi Yan <ziy@nvidia.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Kairui Song <ryncsn@gmail.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2025-08-02 12:06:08 -07:00

345 lines
8.5 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
#define _GNU_SOURCE
#include "../kselftest_harness.h"
#include <errno.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <sched.h>
#include "vm_util.h"
#include "../pidfd/pidfd.h"
FIXTURE(process_madvise)
{
unsigned long page_size;
pid_t child_pid;
int remote_pidfd;
int pidfd;
};
FIXTURE_SETUP(process_madvise)
{
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
self->pidfd = PIDFD_SELF;
self->remote_pidfd = -1;
self->child_pid = -1;
};
FIXTURE_TEARDOWN_PARENT(process_madvise)
{
/* This teardown is guaranteed to run, even if tests SKIP or ASSERT */
if (self->child_pid > 0) {
kill(self->child_pid, SIGKILL);
waitpid(self->child_pid, NULL, 0);
}
if (self->remote_pidfd >= 0)
close(self->remote_pidfd);
}
static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
size_t vlen, int advice, unsigned int flags)
{
return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags);
}
/*
* This test uses PIDFD_SELF to target the current process. The main
* goal is to verify the basic behavior of process_madvise() with
* a vector of non-contiguous memory ranges, not its cross-process
* capabilities.
*/
TEST_F(process_madvise, basic)
{
const unsigned long pagesize = self->page_size;
const int madvise_pages = 4;
struct iovec vec[madvise_pages];
int pidfd = self->pidfd;
ssize_t ret;
char *map;
/*
* Create a single large mapping. We will pick pages from this
* mapping to advise on. This ensures we test non-contiguous iovecs.
*/
map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (map == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
/* Fill the entire region with a known pattern. */
memset(map, 'A', pagesize * 10);
/*
* Setup the iovec to point to 4 non-contiguous pages
* within the mapping.
*/
vec[0].iov_base = &map[0 * pagesize];
vec[0].iov_len = pagesize;
vec[1].iov_base = &map[3 * pagesize];
vec[1].iov_len = pagesize;
vec[2].iov_base = &map[5 * pagesize];
vec[2].iov_len = pagesize;
vec[3].iov_base = &map[8 * pagesize];
vec[3].iov_len = pagesize;
ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0);
if (ret == -1 && errno == EPERM)
SKIP(return,
"process_madvise() unsupported or permission denied, try running as root.\n");
else if (errno == EINVAL)
SKIP(return,
"process_madvise() unsupported or parameter invalid, please check arguments.\n");
/* The call should succeed and report the total bytes processed. */
ASSERT_EQ(ret, madvise_pages * pagesize);
/* Check that advised pages are now zero. */
for (int i = 0; i < madvise_pages; i++) {
char *advised_page = (char *)vec[i].iov_base;
/* Content must be 0, not 'A'. */
ASSERT_EQ(*advised_page, '\0');
}
/* Check that an un-advised page in between is still 'A'. */
char *unadvised_page = &map[1 * pagesize];
for (int i = 0; i < pagesize; i++)
ASSERT_EQ(unadvised_page[i], 'A');
/* Cleanup. */
ASSERT_EQ(munmap(map, pagesize * 10), 0);
}
/*
* This test deterministically validates process_madvise() with MADV_COLLAPSE
* on a remote process, other advices are difficult to verify reliably.
*
* The test verifies that a memory region in a child process,
* focus on process_madv remote result, only check addresses and lengths.
* The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged.
*/
TEST_F(process_madvise, remote_collapse)
{
const unsigned long pagesize = self->page_size;
long huge_page_size;
int pipe_info[2];
ssize_t ret;
struct iovec vec;
struct child_info {
pid_t pid;
void *map_addr;
} info;
huge_page_size = read_pmd_pagesize();
if (huge_page_size <= 0)
SKIP(return, "Could not determine a valid huge page size.\n");
ASSERT_EQ(pipe(pipe_info), 0);
self->child_pid = fork();
ASSERT_NE(self->child_pid, -1);
if (self->child_pid == 0) {
char *map;
size_t map_size = 2 * huge_page_size;
close(pipe_info[0]);
map = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
ASSERT_NE(map, MAP_FAILED);
/* Fault in as small pages */
for (size_t i = 0; i < map_size; i += pagesize)
map[i] = 'A';
/* Send info and pause */
info.pid = getpid();
info.map_addr = map;
ret = write(pipe_info[1], &info, sizeof(info));
ASSERT_EQ(ret, sizeof(info));
close(pipe_info[1]);
pause();
exit(0);
}
close(pipe_info[1]);
/* Receive child info */
ret = read(pipe_info[0], &info, sizeof(info));
if (ret <= 0) {
waitpid(self->child_pid, NULL, 0);
SKIP(return, "Failed to read child info from pipe.\n");
}
ASSERT_EQ(ret, sizeof(info));
close(pipe_info[0]);
self->child_pid = info.pid;
self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
ASSERT_GE(self->remote_pidfd, 0);
vec.iov_base = info.map_addr;
vec.iov_len = huge_page_size;
ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE,
0);
if (ret == -1) {
if (errno == EINVAL)
SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n");
else if (errno == EPERM)
SKIP(return,
"No process_madvise() permissions, try running as root.\n");
return;
}
ASSERT_EQ(ret, huge_page_size);
}
/*
* Test process_madvise() with a pidfd for a process that has already
* exited to ensure correct error handling.
*/
TEST_F(process_madvise, exited_process_pidfd)
{
const unsigned long pagesize = self->page_size;
struct iovec vec;
char *map;
ssize_t ret;
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
if (map == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
vec.iov_base = map;
vec.iov_len = pagesize;
/*
* Using a pidfd for a process that has already exited should fail
* with ESRCH.
*/
self->child_pid = fork();
ASSERT_NE(self->child_pid, -1);
if (self->child_pid == 0)
exit(0);
self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
ASSERT_GE(self->remote_pidfd, 0);
/* Wait for the child to ensure it has terminated. */
waitpid(self->child_pid, NULL, 0);
ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED,
0);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, ESRCH);
}
/*
* Test process_madvise() with bad pidfds to ensure correct error
* handling.
*/
TEST_F(process_madvise, bad_pidfd)
{
const unsigned long pagesize = self->page_size;
struct iovec vec;
char *map;
ssize_t ret;
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
if (map == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
vec.iov_base = map;
vec.iov_len = pagesize;
/* Using an invalid fd number (-1) should fail with EBADF. */
ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, EBADF);
/*
* Using a valid fd that is not a pidfd (e.g. stdin) should fail
* with EBADF.
*/
ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, EBADF);
}
/*
* Test that process_madvise() rejects vlen > UIO_MAXIOV.
* The kernel should return -EINVAL when the number of iovecs exceeds 1024.
*/
TEST_F(process_madvise, invalid_vlen)
{
const unsigned long pagesize = self->page_size;
int pidfd = self->pidfd;
struct iovec vec;
char *map;
ssize_t ret;
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
if (map == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
vec.iov_base = map;
vec.iov_len = pagesize;
ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, EINVAL);
/* Cleanup. */
ASSERT_EQ(munmap(map, pagesize), 0);
}
/*
* Test process_madvise() with an invalid flag value. Currently, only a flag
* value of 0 is supported. This test is reserved for the future, e.g., if
* synchronous flags are added.
*/
TEST_F(process_madvise, flag)
{
const unsigned long pagesize = self->page_size;
unsigned int invalid_flag;
int pidfd = self->pidfd;
struct iovec vec;
char *map;
ssize_t ret;
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
0);
if (map == MAP_FAILED)
SKIP(return, "mmap failed, not enough memory.\n");
vec.iov_base = map;
vec.iov_len = pagesize;
invalid_flag = 0x80000000;
ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag);
ASSERT_EQ(ret, -1);
ASSERT_EQ(errno, EINVAL);
/* Cleanup. */
ASSERT_EQ(munmap(map, pagesize), 0);
}
TEST_HARNESS_MAIN