mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-08-16 19:16:56 +00:00

Add tests for process_madvise(), focusing on verifying behavior under various conditions including valid usage and error cases. [lianux.mm@gmail.com: v7] Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250729113109.12272-1-lianux.mm@gmail.com Link: https://lkml.kernel.org/r/20250721114614.40996-1-lianux.mm@gmail.com Signed-off-by: wang lian <lianux.mm@gmail.com> Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Suggested-by: David Hildenbrand <david@redhat.com> Suggested-by: Zi Yan <ziy@nvidia.com> Suggested-by: Mark Brown <broonie@kernel.org> Acked-by: SeongJae Park <sj@kernel.org> Reviewed-by: Zi Yan <ziy@nvidia.com> Tested-by: Zi Yan <ziy@nvidia.com> Cc: Christian Brauner <brauner@kernel.org> Cc: Jann Horn <jannh@google.com> Cc: Kairui Song <ryncsn@gmail.com> Cc: Liam Howlett <liam.howlett@oracle.com> Cc: Shuah Khan <shuah@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
345 lines
8.5 KiB
C
345 lines
8.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#define _GNU_SOURCE
|
|
#include "../kselftest_harness.h"
|
|
#include <errno.h>
|
|
#include <setjmp.h>
|
|
#include <signal.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <linux/mman.h>
|
|
#include <sys/syscall.h>
|
|
#include <unistd.h>
|
|
#include <sched.h>
|
|
#include "vm_util.h"
|
|
|
|
#include "../pidfd/pidfd.h"
|
|
|
|
FIXTURE(process_madvise)
|
|
{
|
|
unsigned long page_size;
|
|
pid_t child_pid;
|
|
int remote_pidfd;
|
|
int pidfd;
|
|
};
|
|
|
|
FIXTURE_SETUP(process_madvise)
|
|
{
|
|
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
|
|
self->pidfd = PIDFD_SELF;
|
|
self->remote_pidfd = -1;
|
|
self->child_pid = -1;
|
|
};
|
|
|
|
FIXTURE_TEARDOWN_PARENT(process_madvise)
|
|
{
|
|
/* This teardown is guaranteed to run, even if tests SKIP or ASSERT */
|
|
if (self->child_pid > 0) {
|
|
kill(self->child_pid, SIGKILL);
|
|
waitpid(self->child_pid, NULL, 0);
|
|
}
|
|
|
|
if (self->remote_pidfd >= 0)
|
|
close(self->remote_pidfd);
|
|
}
|
|
|
|
static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
|
|
size_t vlen, int advice, unsigned int flags)
|
|
{
|
|
return syscall(__NR_process_madvise, pidfd, iovec, vlen, advice, flags);
|
|
}
|
|
|
|
/*
|
|
* This test uses PIDFD_SELF to target the current process. The main
|
|
* goal is to verify the basic behavior of process_madvise() with
|
|
* a vector of non-contiguous memory ranges, not its cross-process
|
|
* capabilities.
|
|
*/
|
|
TEST_F(process_madvise, basic)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
const int madvise_pages = 4;
|
|
struct iovec vec[madvise_pages];
|
|
int pidfd = self->pidfd;
|
|
ssize_t ret;
|
|
char *map;
|
|
|
|
/*
|
|
* Create a single large mapping. We will pick pages from this
|
|
* mapping to advise on. This ensures we test non-contiguous iovecs.
|
|
*/
|
|
map = mmap(NULL, pagesize * 10, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
if (map == MAP_FAILED)
|
|
SKIP(return, "mmap failed, not enough memory.\n");
|
|
|
|
/* Fill the entire region with a known pattern. */
|
|
memset(map, 'A', pagesize * 10);
|
|
|
|
/*
|
|
* Setup the iovec to point to 4 non-contiguous pages
|
|
* within the mapping.
|
|
*/
|
|
vec[0].iov_base = &map[0 * pagesize];
|
|
vec[0].iov_len = pagesize;
|
|
vec[1].iov_base = &map[3 * pagesize];
|
|
vec[1].iov_len = pagesize;
|
|
vec[2].iov_base = &map[5 * pagesize];
|
|
vec[2].iov_len = pagesize;
|
|
vec[3].iov_base = &map[8 * pagesize];
|
|
vec[3].iov_len = pagesize;
|
|
|
|
ret = sys_process_madvise(pidfd, vec, madvise_pages, MADV_DONTNEED, 0);
|
|
if (ret == -1 && errno == EPERM)
|
|
SKIP(return,
|
|
"process_madvise() unsupported or permission denied, try running as root.\n");
|
|
else if (errno == EINVAL)
|
|
SKIP(return,
|
|
"process_madvise() unsupported or parameter invalid, please check arguments.\n");
|
|
|
|
/* The call should succeed and report the total bytes processed. */
|
|
ASSERT_EQ(ret, madvise_pages * pagesize);
|
|
|
|
/* Check that advised pages are now zero. */
|
|
for (int i = 0; i < madvise_pages; i++) {
|
|
char *advised_page = (char *)vec[i].iov_base;
|
|
|
|
/* Content must be 0, not 'A'. */
|
|
ASSERT_EQ(*advised_page, '\0');
|
|
}
|
|
|
|
/* Check that an un-advised page in between is still 'A'. */
|
|
char *unadvised_page = &map[1 * pagesize];
|
|
|
|
for (int i = 0; i < pagesize; i++)
|
|
ASSERT_EQ(unadvised_page[i], 'A');
|
|
|
|
/* Cleanup. */
|
|
ASSERT_EQ(munmap(map, pagesize * 10), 0);
|
|
}
|
|
|
|
/*
|
|
* This test deterministically validates process_madvise() with MADV_COLLAPSE
|
|
* on a remote process, other advices are difficult to verify reliably.
|
|
*
|
|
* The test verifies that a memory region in a child process,
|
|
* focus on process_madv remote result, only check addresses and lengths.
|
|
* The correctness of the MADV_COLLAPSE can be found in the relevant test examples in khugepaged.
|
|
*/
|
|
TEST_F(process_madvise, remote_collapse)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
long huge_page_size;
|
|
int pipe_info[2];
|
|
ssize_t ret;
|
|
struct iovec vec;
|
|
|
|
struct child_info {
|
|
pid_t pid;
|
|
void *map_addr;
|
|
} info;
|
|
|
|
huge_page_size = read_pmd_pagesize();
|
|
if (huge_page_size <= 0)
|
|
SKIP(return, "Could not determine a valid huge page size.\n");
|
|
|
|
ASSERT_EQ(pipe(pipe_info), 0);
|
|
|
|
self->child_pid = fork();
|
|
ASSERT_NE(self->child_pid, -1);
|
|
|
|
if (self->child_pid == 0) {
|
|
char *map;
|
|
size_t map_size = 2 * huge_page_size;
|
|
|
|
close(pipe_info[0]);
|
|
|
|
map = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
ASSERT_NE(map, MAP_FAILED);
|
|
|
|
/* Fault in as small pages */
|
|
for (size_t i = 0; i < map_size; i += pagesize)
|
|
map[i] = 'A';
|
|
|
|
/* Send info and pause */
|
|
info.pid = getpid();
|
|
info.map_addr = map;
|
|
ret = write(pipe_info[1], &info, sizeof(info));
|
|
ASSERT_EQ(ret, sizeof(info));
|
|
close(pipe_info[1]);
|
|
|
|
pause();
|
|
exit(0);
|
|
}
|
|
|
|
close(pipe_info[1]);
|
|
|
|
/* Receive child info */
|
|
ret = read(pipe_info[0], &info, sizeof(info));
|
|
if (ret <= 0) {
|
|
waitpid(self->child_pid, NULL, 0);
|
|
SKIP(return, "Failed to read child info from pipe.\n");
|
|
}
|
|
ASSERT_EQ(ret, sizeof(info));
|
|
close(pipe_info[0]);
|
|
self->child_pid = info.pid;
|
|
|
|
self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
|
|
ASSERT_GE(self->remote_pidfd, 0);
|
|
|
|
vec.iov_base = info.map_addr;
|
|
vec.iov_len = huge_page_size;
|
|
|
|
ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_COLLAPSE,
|
|
0);
|
|
if (ret == -1) {
|
|
if (errno == EINVAL)
|
|
SKIP(return, "PROCESS_MADV_ADVISE is not supported.\n");
|
|
else if (errno == EPERM)
|
|
SKIP(return,
|
|
"No process_madvise() permissions, try running as root.\n");
|
|
return;
|
|
}
|
|
|
|
ASSERT_EQ(ret, huge_page_size);
|
|
}
|
|
|
|
/*
|
|
* Test process_madvise() with a pidfd for a process that has already
|
|
* exited to ensure correct error handling.
|
|
*/
|
|
TEST_F(process_madvise, exited_process_pidfd)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
struct iovec vec;
|
|
char *map;
|
|
ssize_t ret;
|
|
|
|
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
|
|
0);
|
|
if (map == MAP_FAILED)
|
|
SKIP(return, "mmap failed, not enough memory.\n");
|
|
|
|
vec.iov_base = map;
|
|
vec.iov_len = pagesize;
|
|
|
|
/*
|
|
* Using a pidfd for a process that has already exited should fail
|
|
* with ESRCH.
|
|
*/
|
|
self->child_pid = fork();
|
|
ASSERT_NE(self->child_pid, -1);
|
|
|
|
if (self->child_pid == 0)
|
|
exit(0);
|
|
|
|
self->remote_pidfd = syscall(__NR_pidfd_open, self->child_pid, 0);
|
|
ASSERT_GE(self->remote_pidfd, 0);
|
|
|
|
/* Wait for the child to ensure it has terminated. */
|
|
waitpid(self->child_pid, NULL, 0);
|
|
|
|
ret = sys_process_madvise(self->remote_pidfd, &vec, 1, MADV_DONTNEED,
|
|
0);
|
|
ASSERT_EQ(ret, -1);
|
|
ASSERT_EQ(errno, ESRCH);
|
|
}
|
|
|
|
/*
|
|
* Test process_madvise() with bad pidfds to ensure correct error
|
|
* handling.
|
|
*/
|
|
TEST_F(process_madvise, bad_pidfd)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
struct iovec vec;
|
|
char *map;
|
|
ssize_t ret;
|
|
|
|
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
|
|
0);
|
|
if (map == MAP_FAILED)
|
|
SKIP(return, "mmap failed, not enough memory.\n");
|
|
|
|
vec.iov_base = map;
|
|
vec.iov_len = pagesize;
|
|
|
|
/* Using an invalid fd number (-1) should fail with EBADF. */
|
|
ret = sys_process_madvise(-1, &vec, 1, MADV_DONTNEED, 0);
|
|
ASSERT_EQ(ret, -1);
|
|
ASSERT_EQ(errno, EBADF);
|
|
|
|
/*
|
|
* Using a valid fd that is not a pidfd (e.g. stdin) should fail
|
|
* with EBADF.
|
|
*/
|
|
ret = sys_process_madvise(STDIN_FILENO, &vec, 1, MADV_DONTNEED, 0);
|
|
ASSERT_EQ(ret, -1);
|
|
ASSERT_EQ(errno, EBADF);
|
|
}
|
|
|
|
/*
|
|
* Test that process_madvise() rejects vlen > UIO_MAXIOV.
|
|
* The kernel should return -EINVAL when the number of iovecs exceeds 1024.
|
|
*/
|
|
TEST_F(process_madvise, invalid_vlen)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
int pidfd = self->pidfd;
|
|
struct iovec vec;
|
|
char *map;
|
|
ssize_t ret;
|
|
|
|
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
|
|
0);
|
|
if (map == MAP_FAILED)
|
|
SKIP(return, "mmap failed, not enough memory.\n");
|
|
|
|
vec.iov_base = map;
|
|
vec.iov_len = pagesize;
|
|
|
|
ret = sys_process_madvise(pidfd, &vec, 1025, MADV_DONTNEED, 0);
|
|
ASSERT_EQ(ret, -1);
|
|
ASSERT_EQ(errno, EINVAL);
|
|
|
|
/* Cleanup. */
|
|
ASSERT_EQ(munmap(map, pagesize), 0);
|
|
}
|
|
|
|
/*
|
|
* Test process_madvise() with an invalid flag value. Currently, only a flag
|
|
* value of 0 is supported. This test is reserved for the future, e.g., if
|
|
* synchronous flags are added.
|
|
*/
|
|
TEST_F(process_madvise, flag)
|
|
{
|
|
const unsigned long pagesize = self->page_size;
|
|
unsigned int invalid_flag;
|
|
int pidfd = self->pidfd;
|
|
struct iovec vec;
|
|
char *map;
|
|
ssize_t ret;
|
|
|
|
map = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1,
|
|
0);
|
|
if (map == MAP_FAILED)
|
|
SKIP(return, "mmap failed, not enough memory.\n");
|
|
|
|
vec.iov_base = map;
|
|
vec.iov_len = pagesize;
|
|
|
|
invalid_flag = 0x80000000;
|
|
|
|
ret = sys_process_madvise(pidfd, &vec, 1, MADV_DONTNEED, invalid_flag);
|
|
ASSERT_EQ(ret, -1);
|
|
ASSERT_EQ(errno, EINVAL);
|
|
|
|
/* Cleanup. */
|
|
ASSERT_EQ(munmap(map, pagesize), 0);
|
|
}
|
|
|
|
TEST_HARNESS_MAIN
|