qemu/include/hw/boards.h
Laurent Vivier 3bfe57165b numa: equally distribute memory on nodes
When there are more nodes than available memory to put the minimum
allowed memory by node, all the memory is put on the last node.

This is because we put (ram_size / nb_numa_nodes) &
~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this
case the value is 0. This is particularly true with pseries,
as the memory must be aligned to 256MB.

To avoid this problem, this patch uses an error diffusion algorithm [1]
to distribute equally the memory on nodes.

We introduce numa_auto_assign_ram() function in MachineClass
to keep compatibility between machine type versions.
The legacy function is used with pseries-2.9, pc-q35-2.9 and
pc-i440fx-2.9 (and previous), the new one with all others.

Example:

qemu-system-ppc64 -S -nographic  -nodefaults -monitor stdio -m 1G -smp 8 \
                  -numa node -numa node -numa node \
                  -numa node -numa node -numa node

Before:

(qemu) info numa
6 nodes
node 0 cpus: 0 6
node 0 size: 0 MB
node 1 cpus: 1 7
node 1 size: 0 MB
node 2 cpus: 2
node 2 size: 0 MB
node 3 cpus: 3
node 3 size: 0 MB
node 4 cpus: 4
node 4 size: 0 MB
node 5 cpus: 5
node 5 size: 1024 MB

After:
(qemu) info numa
6 nodes
node 0 cpus: 0 6
node 0 size: 0 MB
node 1 cpus: 1 7
node 1 size: 256 MB
node 2 cpus: 2
node 2 size: 0 MB
node 3 cpus: 3
node 3 size: 256 MB
node 4 cpus: 4
node 4 size: 256 MB
node 5 cpus: 5
node 5 size: 256 MB

[1] https://en.wikipedia.org/wiki/Error_diffusion

Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Message-Id: <20170502162955.1610-2-lvivier@redhat.com>
Reviewed-by: Eduardo Habkost <ehabkost@redhat.com>
[ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()]
Signed-off-by: Eduardo Habkost <ehabkost@redhat.com>
2017-05-11 16:08:47 -03:00

224 lines
7.3 KiB
C

/* Declarations for use by board files for creating devices. */
#ifndef HW_BOARDS_H
#define HW_BOARDS_H
#include "sysemu/blockdev.h"
#include "sysemu/accel.h"
#include "hw/qdev.h"
#include "qom/object.h"
#include "qom/cpu.h"
void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
const char *name,
uint64_t ram_size);
#define TYPE_MACHINE_SUFFIX "-machine"
/* Machine class name that needs to be used for class-name-based machine
* type lookup to work.
*/
#define MACHINE_TYPE_NAME(machinename) (machinename TYPE_MACHINE_SUFFIX)
#define TYPE_MACHINE "machine"
#undef MACHINE /* BSD defines it and QEMU does not use it */
#define MACHINE(obj) \
OBJECT_CHECK(MachineState, (obj), TYPE_MACHINE)
#define MACHINE_GET_CLASS(obj) \
OBJECT_GET_CLASS(MachineClass, (obj), TYPE_MACHINE)
#define MACHINE_CLASS(klass) \
OBJECT_CLASS_CHECK(MachineClass, (klass), TYPE_MACHINE)
MachineClass *find_default_machine(void);
extern MachineState *current_machine;
bool machine_usb(MachineState *machine);
bool machine_kernel_irqchip_allowed(MachineState *machine);
bool machine_kernel_irqchip_required(MachineState *machine);
bool machine_kernel_irqchip_split(MachineState *machine);
int machine_kvm_shadow_mem(MachineState *machine);
int machine_phandle_start(MachineState *machine);
bool machine_dump_guest_core(MachineState *machine);
bool machine_mem_merge(MachineState *machine);
void machine_register_compat_props(MachineState *machine);
HotpluggableCPUList *machine_query_hotpluggable_cpus(MachineState *machine);
/**
* CPUArchId:
* @arch_id - architecture-dependent CPU ID of present or possible CPU
* @cpu - pointer to corresponding CPU object if it's present on NULL otherwise
* @props - CPU object properties, initialized by board
* #vcpus_count - number of threads provided by @cpu object
*/
typedef struct {
uint64_t arch_id;
int64_t vcpus_count;
CpuInstanceProperties props;
Object *cpu;
} CPUArchId;
/**
* CPUArchIdList:
* @len - number of @CPUArchId items in @cpus array
* @cpus - array of present or possible CPUs for current machine configuration
*/
typedef struct {
int len;
CPUArchId cpus[0];
} CPUArchIdList;
/**
* MachineClass:
* @get_hotplug_handler: this function is called during bus-less
* device hotplug. If defined it returns pointer to an instance
* of HotplugHandler object, which handles hotplug operation
* for a given @dev. It may return NULL if @dev doesn't require
* any actions to be performed by hotplug handler.
* @cpu_index_to_socket_id:
* used to provide @cpu_index to socket number mapping, allowing
* a machine to group CPU threads belonging to the same socket/package
* Returns: socket number given cpu_index belongs to.
* @hw_version:
* Value of QEMU_VERSION when the machine was added to QEMU.
* Set only by old machines because they need to keep
* compatibility on code that exposed QEMU_VERSION to guests in
* the past (and now use qemu_hw_version()).
* @possible_cpu_arch_ids:
* Returns an array of @CPUArchId architecture-dependent CPU IDs
* which includes CPU IDs for present and possible to hotplug CPUs.
* Caller is responsible for freeing returned list.
* @has_hotpluggable_cpus:
* If true, board supports CPUs creation with -device/device_add.
* @minimum_page_bits:
* If non-zero, the board promises never to create a CPU with a page size
* smaller than this, so QEMU can use a more efficient larger page
* size than the target architecture's minimum. (Attempting to create
* such a CPU will fail.) Note that changing this is a migration
* compatibility break for the machine.
*/
struct MachineClass {
/*< private >*/
ObjectClass parent_class;
/*< public >*/
const char *family; /* NULL iff @name identifies a standalone machtype */
char *name;
const char *alias;
const char *desc;
void (*init)(MachineState *state);
void (*reset)(void);
void (*hot_add_cpu)(const int64_t id, Error **errp);
int (*kvm_type)(const char *arg);
BlockInterfaceType block_default_type;
int units_per_default_bus;
int max_cpus;
unsigned int no_serial:1,
no_parallel:1,
use_virtcon:1,
use_sclp:1,
no_floppy:1,
no_cdrom:1,
no_sdcard:1,
has_dynamic_sysbus:1,
pci_allow_0_address:1,
legacy_fw_cfg_order:1;
int is_default;
const char *default_machine_opts;
const char *default_boot_order;
const char *default_display;
GArray *compat_props;
const char *hw_version;
ram_addr_t default_ram_size;
bool option_rom_has_mr;
bool rom_file_has_mr;
int minimum_page_bits;
bool has_hotpluggable_cpus;
int numa_mem_align_shift;
void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
int nb_nodes, ram_addr_t size);
HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
DeviceState *dev);
unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
const CPUArchIdList *(*possible_cpu_arch_ids)(MachineState *machine);
};
/**
* MachineState:
*/
struct MachineState {
/*< private >*/
Object parent_obj;
Notifier sysbus_notifier;
/*< public >*/
char *accel;
bool kernel_irqchip_allowed;
bool kernel_irqchip_required;
bool kernel_irqchip_split;
int kvm_shadow_mem;
char *dtb;
char *dumpdtb;
int phandle_start;
char *dt_compatible;
bool dump_guest_core;
bool mem_merge;
bool usb;
bool usb_disabled;
bool igd_gfx_passthru;
char *firmware;
bool iommu;
bool suppress_vmdesc;
bool enforce_config_section;
bool enable_graphics;
ram_addr_t ram_size;
ram_addr_t maxram_size;
uint64_t ram_slots;
const char *boot_order;
char *kernel_filename;
char *kernel_cmdline;
char *initrd_filename;
const char *cpu_model;
AccelState *accelerator;
CPUArchIdList *possible_cpus;
};
#define DEFINE_MACHINE(namestr, machine_initfn) \
static void machine_initfn##_class_init(ObjectClass *oc, void *data) \
{ \
MachineClass *mc = MACHINE_CLASS(oc); \
machine_initfn(mc); \
} \
static const TypeInfo machine_initfn##_typeinfo = { \
.name = MACHINE_TYPE_NAME(namestr), \
.parent = TYPE_MACHINE, \
.class_init = machine_initfn##_class_init, \
}; \
static void machine_initfn##_register_types(void) \
{ \
type_register_static(&machine_initfn##_typeinfo); \
} \
type_init(machine_initfn##_register_types)
#define SET_MACHINE_COMPAT(m, COMPAT) \
do { \
int i; \
static GlobalProperty props[] = { \
COMPAT \
{ /* end of list */ } \
}; \
if (!m->compat_props) { \
m->compat_props = g_array_new(false, false, sizeof(void *)); \
} \
for (i = 0; props[i].driver != NULL; i++) { \
GlobalProperty *prop = &props[i]; \
g_array_append_val(m->compat_props, prop); \
} \
} while (0)
#endif