mirror of
https://git.proxmox.com/git/qemu
synced 2025-10-24 18:18:09 +00:00

By default qemu will use MAP_PRIVATE for guest pages. This will write protect pages and thus break on s390 systems that dont support this feature. Therefore qemu has a hack to always use MAP_SHARED for s390. But MAP_SHARED has other problems (no dirty pages tracking, a lot more swap overhead etc.) Newer systems allow the distinction via KVM_CAP_S390_COW. With this feature qemu can use the standard qemu alloc if available, otherwise it will use the old s390 hack. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com> Acked-by: Jan Kiszka <jan.kiszka@siemens.com> Signed-off-by: Alexander Graf <agraf@suse.de>
260 lines
6.5 KiB
C
260 lines
6.5 KiB
C
/*
|
|
* os-posix-lib.c
|
|
*
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
|
* Copyright (c) 2010 Red Hat, Inc.
|
|
*
|
|
* QEMU library functions on POSIX which are shared between QEMU and
|
|
* the QEMU tools.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
/* The following block of code temporarily renames the daemon() function so the
|
|
compiler does not see the warning associated with it in stdlib.h on OSX */
|
|
#ifdef __APPLE__
|
|
#define daemon qemu_fake_daemon_function
|
|
#include <stdlib.h>
|
|
#undef daemon
|
|
extern int daemon(int, int);
|
|
#endif
|
|
|
|
#if defined(__linux__) && defined(__x86_64__)
|
|
/* Use 2 MiB alignment so transparent hugepages can be used by KVM.
|
|
Valgrind does not support alignments larger than 1 MiB,
|
|
therefore we need special code which handles running on Valgrind. */
|
|
# define QEMU_VMALLOC_ALIGN (512 * 4096)
|
|
# define CONFIG_VALGRIND
|
|
#elif defined(__linux__) && defined(__s390x__)
|
|
/* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
|
|
# define QEMU_VMALLOC_ALIGN (256 * 4096)
|
|
#else
|
|
# define QEMU_VMALLOC_ALIGN getpagesize()
|
|
#endif
|
|
|
|
#include "config-host.h"
|
|
#include "sysemu.h"
|
|
#include "trace.h"
|
|
#include "qemu_socket.h"
|
|
|
|
#if defined(CONFIG_VALGRIND)
|
|
static int running_on_valgrind = -1;
|
|
#else
|
|
# define running_on_valgrind 0
|
|
#endif
|
|
#ifdef CONFIG_LINUX
|
|
#include <sys/syscall.h>
|
|
#endif
|
|
#ifdef CONFIG_EVENTFD
|
|
#include <sys/eventfd.h>
|
|
#endif
|
|
|
|
int qemu_get_thread_id(void)
|
|
{
|
|
#if defined(__linux__)
|
|
return syscall(SYS_gettid);
|
|
#else
|
|
return getpid();
|
|
#endif
|
|
}
|
|
|
|
int qemu_daemon(int nochdir, int noclose)
|
|
{
|
|
return daemon(nochdir, noclose);
|
|
}
|
|
|
|
void *qemu_oom_check(void *ptr)
|
|
{
|
|
if (ptr == NULL) {
|
|
fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
|
|
abort();
|
|
}
|
|
return ptr;
|
|
}
|
|
|
|
void *qemu_memalign(size_t alignment, size_t size)
|
|
{
|
|
void *ptr;
|
|
#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
|
|
int ret;
|
|
ret = posix_memalign(&ptr, alignment, size);
|
|
if (ret != 0) {
|
|
fprintf(stderr, "Failed to allocate %zu B: %s\n",
|
|
size, strerror(ret));
|
|
abort();
|
|
}
|
|
#elif defined(CONFIG_BSD)
|
|
ptr = qemu_oom_check(valloc(size));
|
|
#else
|
|
ptr = qemu_oom_check(memalign(alignment, size));
|
|
#endif
|
|
trace_qemu_memalign(alignment, size, ptr);
|
|
return ptr;
|
|
}
|
|
|
|
/* conflicts with qemu_vmalloc in bsd-user/mmap.c */
|
|
#if !defined(CONFIG_BSD_USER)
|
|
/* alloc shared memory pages */
|
|
void *qemu_vmalloc(size_t size)
|
|
{
|
|
void *ptr;
|
|
size_t align = QEMU_VMALLOC_ALIGN;
|
|
|
|
#if defined(CONFIG_VALGRIND)
|
|
if (running_on_valgrind < 0) {
|
|
/* First call, test whether we are running on Valgrind.
|
|
This is a substitute for RUNNING_ON_VALGRIND from valgrind.h. */
|
|
const char *ld = getenv("LD_PRELOAD");
|
|
running_on_valgrind = (ld != NULL && strstr(ld, "vgpreload"));
|
|
}
|
|
#endif
|
|
|
|
if (size < align || running_on_valgrind) {
|
|
align = getpagesize();
|
|
}
|
|
ptr = qemu_memalign(align, size);
|
|
trace_qemu_vmalloc(size, ptr);
|
|
return ptr;
|
|
}
|
|
#endif
|
|
|
|
void qemu_vfree(void *ptr)
|
|
{
|
|
trace_qemu_vfree(ptr);
|
|
free(ptr);
|
|
}
|
|
|
|
void socket_set_block(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFL);
|
|
fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
|
|
}
|
|
|
|
void socket_set_nonblock(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFL);
|
|
fcntl(fd, F_SETFL, f | O_NONBLOCK);
|
|
}
|
|
|
|
void qemu_set_cloexec(int fd)
|
|
{
|
|
int f;
|
|
f = fcntl(fd, F_GETFD);
|
|
fcntl(fd, F_SETFD, f | FD_CLOEXEC);
|
|
}
|
|
|
|
/*
|
|
* Creates a pipe with FD_CLOEXEC set on both file descriptors
|
|
*/
|
|
int qemu_pipe(int pipefd[2])
|
|
{
|
|
int ret;
|
|
|
|
#ifdef CONFIG_PIPE2
|
|
ret = pipe2(pipefd, O_CLOEXEC);
|
|
if (ret != -1 || errno != ENOSYS) {
|
|
return ret;
|
|
}
|
|
#endif
|
|
ret = pipe(pipefd);
|
|
if (ret == 0) {
|
|
qemu_set_cloexec(pipefd[0]);
|
|
qemu_set_cloexec(pipefd[1]);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Creates an eventfd that looks like a pipe and has EFD_CLOEXEC set.
|
|
*/
|
|
int qemu_eventfd(int fds[2])
|
|
{
|
|
#ifdef CONFIG_EVENTFD
|
|
int ret;
|
|
|
|
ret = eventfd(0, 0);
|
|
if (ret >= 0) {
|
|
fds[0] = ret;
|
|
fds[1] = dup(ret);
|
|
if (fds[1] == -1) {
|
|
close(ret);
|
|
return -1;
|
|
}
|
|
qemu_set_cloexec(ret);
|
|
qemu_set_cloexec(fds[1]);
|
|
return 0;
|
|
}
|
|
if (errno != ENOSYS) {
|
|
return -1;
|
|
}
|
|
#endif
|
|
|
|
return qemu_pipe(fds);
|
|
}
|
|
|
|
int qemu_utimens(const char *path, const struct timespec *times)
|
|
{
|
|
struct timeval tv[2], tv_now;
|
|
struct stat st;
|
|
int i;
|
|
#ifdef CONFIG_UTIMENSAT
|
|
int ret;
|
|
|
|
ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
|
|
if (ret != -1 || errno != ENOSYS) {
|
|
return ret;
|
|
}
|
|
#endif
|
|
/* Fallback: use utimes() instead of utimensat() */
|
|
|
|
/* happy if special cases */
|
|
if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
|
|
return 0;
|
|
}
|
|
if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
|
|
return utimes(path, NULL);
|
|
}
|
|
|
|
/* prepare for hard cases */
|
|
if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
|
|
gettimeofday(&tv_now, NULL);
|
|
}
|
|
if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
|
|
stat(path, &st);
|
|
}
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
if (times[i].tv_nsec == UTIME_NOW) {
|
|
tv[i].tv_sec = tv_now.tv_sec;
|
|
tv[i].tv_usec = tv_now.tv_usec;
|
|
} else if (times[i].tv_nsec == UTIME_OMIT) {
|
|
tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
|
|
tv[i].tv_usec = 0;
|
|
} else {
|
|
tv[i].tv_sec = times[i].tv_sec;
|
|
tv[i].tv_usec = times[i].tv_nsec / 1000;
|
|
}
|
|
}
|
|
|
|
return utimes(path, &tv[0]);
|
|
}
|