mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2025-11-18 09:31:19 +00:00
* Eager page splitting optimization for dirty logging, optionally
allowing for a VM to avoid the cost of hugepage splitting in the stage-2
fault path.
* Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with
services that live in the Secure world. pKVM intervenes on FF-A calls
to guarantee the host doesn't misuse memory donated to the hyp or a
pKVM guest.
* Support for running the split hypervisor with VHE enabled, known as
'hVHE' mode. This is extremely useful for testing the split
hypervisor on VHE-only systems, and paves the way for new use cases
that depend on having two TTBRs available at EL2.
* Generalized framework for configurable ID registers from userspace.
KVM/arm64 currently prevents arbitrary CPU feature set configuration
from userspace, but the intent is to relax this limitation and allow
userspace to select a feature set consistent with the CPU.
* Enable the use of Branch Target Identification (FEAT_BTI) in the
hypervisor.
* Use a separate set of pointer authentication keys for the hypervisor
when running in protected mode, as the host is untrusted at runtime.
* Ensure timer IRQs are consistently released in the init failure
paths.
* Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps
(FEAT_EVT), as it is a register commonly read from userspace.
* Erratum workaround for the upcoming AmpereOne part, which has broken
hardware A/D state management.
RISC-V:
* Redirect AMO load/store misaligned traps to KVM guest
* Trap-n-emulate AIA in-kernel irqchip for KVM guest
* Svnapot support for KVM Guest
s390:
* New uvdevice secret API
* CMM selftest and fixes
* fix racy access to target CPU for diag 9c
x86:
* Fix missing/incorrect #GP checks on ENCLS
* Use standard mmu_notifier hooks for handling APIC access page
* Drop now unnecessary TR/TSS load after VM-Exit on AMD
* Print more descriptive information about the status of SEV and SEV-ES during
module load
* Add a test for splitting and reconstituting hugepages during and after
dirty logging
* Add support for CPU pinning in demand paging test
* Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes
included along the way
* Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage
recovery threads (because nx_huge_pages=off can be toggled at runtime)
* Move handling of PAT out of MTRR code and dedup SVM+VMX code
* Fix output of PIC poll command emulation when there's an interrupt
* Add a maintainer's handbook to document KVM x86 processes, preferred coding
style, testing expectations, etc.
* Misc cleanups, fixes and comments
Generic:
* Miscellaneous bugfixes and cleanups
Selftests:
* Generate dependency files so that partial rebuilds work as expected
-----BEGIN PGP SIGNATURE-----
iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmSgHrIUHHBib256aW5p
QHJlZGhhdC5jb20ACgkQv/vSX3jHroORcAf+KkBlXwQMf+Q0Hy6Mfe0OtkKmh0Ae
6HJ6dsuMfOHhWv5kgukh+qvuGUGzHq+gpVKmZg2yP3h3cLHOLUAYMCDm+rjXyjsk
F4DbnJLfxq43Pe9PHRKFxxSecRcRYCNox0GD5UYL4PLKcH0FyfQrV+HVBK+GI8L3
FDzUcyJkR12Lcj1qf++7fsbzfOshL0AJPmidQCoc6wkLJpUEr/nYUqlI1Kx3YNuQ
LKmxFHS4l4/O/px3GKNDrLWDbrVlwciGIa3GZLS52PZdW3mAqT+cqcPcYK6SW71P
m1vE80VbNELX5q3YSRoOXtedoZ3Pk97LEmz/xQAsJ/jri0Z5Syk0Ok0m/Q==
=AMXp
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"ARM64:
- Eager page splitting optimization for dirty logging, optionally
allowing for a VM to avoid the cost of hugepage splitting in the
stage-2 fault path.
- Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact
with services that live in the Secure world. pKVM intervenes on
FF-A calls to guarantee the host doesn't misuse memory donated to
the hyp or a pKVM guest.
- Support for running the split hypervisor with VHE enabled, known as
'hVHE' mode. This is extremely useful for testing the split
hypervisor on VHE-only systems, and paves the way for new use cases
that depend on having two TTBRs available at EL2.
- Generalized framework for configurable ID registers from userspace.
KVM/arm64 currently prevents arbitrary CPU feature set
configuration from userspace, but the intent is to relax this
limitation and allow userspace to select a feature set consistent
with the CPU.
- Enable the use of Branch Target Identification (FEAT_BTI) in the
hypervisor.
- Use a separate set of pointer authentication keys for the
hypervisor when running in protected mode, as the host is untrusted
at runtime.
- Ensure timer IRQs are consistently released in the init failure
paths.
- Avoid trapping CTR_EL0 on systems with Enhanced Virtualization
Traps (FEAT_EVT), as it is a register commonly read from userspace.
- Erratum workaround for the upcoming AmpereOne part, which has
broken hardware A/D state management.
RISC-V:
- Redirect AMO load/store misaligned traps to KVM guest
- Trap-n-emulate AIA in-kernel irqchip for KVM guest
- Svnapot support for KVM Guest
s390:
- New uvdevice secret API
- CMM selftest and fixes
- fix racy access to target CPU for diag 9c
x86:
- Fix missing/incorrect #GP checks on ENCLS
- Use standard mmu_notifier hooks for handling APIC access page
- Drop now unnecessary TR/TSS load after VM-Exit on AMD
- Print more descriptive information about the status of SEV and
SEV-ES during module load
- Add a test for splitting and reconstituting hugepages during and
after dirty logging
- Add support for CPU pinning in demand paging test
- Add support for AMD PerfMonV2, with a variety of cleanups and minor
fixes included along the way
- Add a "nx_huge_pages=never" option to effectively avoid creating NX
hugepage recovery threads (because nx_huge_pages=off can be toggled
at runtime)
- Move handling of PAT out of MTRR code and dedup SVM+VMX code
- Fix output of PIC poll command emulation when there's an interrupt
- Add a maintainer's handbook to document KVM x86 processes,
preferred coding style, testing expectations, etc.
- Misc cleanups, fixes and comments
Generic:
- Miscellaneous bugfixes and cleanups
Selftests:
- Generate dependency files so that partial rebuilds work as
expected"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits)
Documentation/process: Add a maintainer handbook for KVM x86
Documentation/process: Add a label for the tip tree handbook's coding style
KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index
RISC-V: KVM: Remove unneeded semicolon
RISC-V: KVM: Allow Svnapot extension for Guest/VM
riscv: kvm: define vcpu_sbi_ext_pmu in header
RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC
RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip
RISC-V: KVM: Add in-kernel emulation of AIA APLIC
RISC-V: KVM: Implement device interface for AIA irqchip
RISC-V: KVM: Skeletal in-kernel AIA irqchip support
RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero
RISC-V: KVM: Add APLIC related defines
RISC-V: KVM: Add IMSIC related defines
RISC-V: KVM: Implement guest external interrupt line management
KVM: x86: Remove PRIx* definitions as they are solely for user space
s390/uv: Update query for secret-UVCs
s390/uv: replace scnprintf with sysfs_emit
s390/uvdevice: Add 'Lock Secret Store' UVC
...
339 lines
7.7 KiB
C
339 lines
7.7 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Early cpufeature override framework
|
|
*
|
|
* Copyright (C) 2020 Google LLC
|
|
* Author: Marc Zyngier <maz@kernel.org>
|
|
*/
|
|
|
|
#include <linux/ctype.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/libfdt.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/setup.h>
|
|
|
|
#define FTR_DESC_NAME_LEN 20
|
|
#define FTR_DESC_FIELD_LEN 10
|
|
#define FTR_ALIAS_NAME_LEN 30
|
|
#define FTR_ALIAS_OPTION_LEN 116
|
|
|
|
static u64 __boot_status __initdata;
|
|
|
|
struct ftr_set_desc {
|
|
char name[FTR_DESC_NAME_LEN];
|
|
struct arm64_ftr_override *override;
|
|
struct {
|
|
char name[FTR_DESC_FIELD_LEN];
|
|
u8 shift;
|
|
u8 width;
|
|
bool (*filter)(u64 val);
|
|
} fields[];
|
|
};
|
|
|
|
#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
|
|
|
|
static bool __init mmfr1_vh_filter(u64 val)
|
|
{
|
|
/*
|
|
* If we ever reach this point while running VHE, we're
|
|
* guaranteed to be on one of these funky, VHE-stuck CPUs. If
|
|
* the user was trying to force nVHE on us, proceed with
|
|
* attitude adjustment.
|
|
*/
|
|
return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
|
|
val == 0);
|
|
}
|
|
|
|
static const struct ftr_set_desc mmfr1 __initconst = {
|
|
.name = "id_aa64mmfr1",
|
|
.override = &id_aa64mmfr1_override,
|
|
.fields = {
|
|
FIELD("vh", ID_AA64MMFR1_EL1_VH_SHIFT, mmfr1_vh_filter),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static bool __init pfr0_sve_filter(u64 val)
|
|
{
|
|
/*
|
|
* Disabling SVE also means disabling all the features that
|
|
* are associated with it. The easiest way to do it is just to
|
|
* override id_aa64zfr0_el1 to be 0.
|
|
*/
|
|
if (!val) {
|
|
id_aa64zfr0_override.val = 0;
|
|
id_aa64zfr0_override.mask = GENMASK(63, 0);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static const struct ftr_set_desc pfr0 __initconst = {
|
|
.name = "id_aa64pfr0",
|
|
.override = &id_aa64pfr0_override,
|
|
.fields = {
|
|
FIELD("sve", ID_AA64PFR0_EL1_SVE_SHIFT, pfr0_sve_filter),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static bool __init pfr1_sme_filter(u64 val)
|
|
{
|
|
/*
|
|
* Similarly to SVE, disabling SME also means disabling all
|
|
* the features that are associated with it. Just set
|
|
* id_aa64smfr0_el1 to 0 and don't look back.
|
|
*/
|
|
if (!val) {
|
|
id_aa64smfr0_override.val = 0;
|
|
id_aa64smfr0_override.mask = GENMASK(63, 0);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static const struct ftr_set_desc pfr1 __initconst = {
|
|
.name = "id_aa64pfr1",
|
|
.override = &id_aa64pfr1_override,
|
|
.fields = {
|
|
FIELD("bt", ID_AA64PFR1_EL1_BT_SHIFT, NULL ),
|
|
FIELD("mte", ID_AA64PFR1_EL1_MTE_SHIFT, NULL),
|
|
FIELD("sme", ID_AA64PFR1_EL1_SME_SHIFT, pfr1_sme_filter),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static const struct ftr_set_desc isar1 __initconst = {
|
|
.name = "id_aa64isar1",
|
|
.override = &id_aa64isar1_override,
|
|
.fields = {
|
|
FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
|
|
FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
|
|
FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
|
|
FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static const struct ftr_set_desc isar2 __initconst = {
|
|
.name = "id_aa64isar2",
|
|
.override = &id_aa64isar2_override,
|
|
.fields = {
|
|
FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
|
|
FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
|
|
FIELD("mops", ID_AA64ISAR2_EL1_MOPS_SHIFT, NULL),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static const struct ftr_set_desc smfr0 __initconst = {
|
|
.name = "id_aa64smfr0",
|
|
.override = &id_aa64smfr0_override,
|
|
.fields = {
|
|
FIELD("smever", ID_AA64SMFR0_EL1_SMEver_SHIFT, NULL),
|
|
/* FA64 is a one bit field... :-/ */
|
|
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
|
|
{}
|
|
},
|
|
};
|
|
|
|
static bool __init hvhe_filter(u64 val)
|
|
{
|
|
u64 mmfr1 = read_sysreg(id_aa64mmfr1_el1);
|
|
|
|
return (val == 1 &&
|
|
lower_32_bits(__boot_status) == BOOT_CPU_MODE_EL2 &&
|
|
cpuid_feature_extract_unsigned_field(mmfr1,
|
|
ID_AA64MMFR1_EL1_VH_SHIFT));
|
|
}
|
|
|
|
static const struct ftr_set_desc sw_features __initconst = {
|
|
.name = "arm64_sw",
|
|
.override = &arm64_sw_feature_override,
|
|
.fields = {
|
|
FIELD("nokaslr", ARM64_SW_FEATURE_OVERRIDE_NOKASLR, NULL),
|
|
FIELD("hvhe", ARM64_SW_FEATURE_OVERRIDE_HVHE, hvhe_filter),
|
|
{}
|
|
},
|
|
};
|
|
|
|
static const struct ftr_set_desc * const regs[] __initconst = {
|
|
&mmfr1,
|
|
&pfr0,
|
|
&pfr1,
|
|
&isar1,
|
|
&isar2,
|
|
&smfr0,
|
|
&sw_features,
|
|
};
|
|
|
|
static const struct {
|
|
char alias[FTR_ALIAS_NAME_LEN];
|
|
char feature[FTR_ALIAS_OPTION_LEN];
|
|
} aliases[] __initconst = {
|
|
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
|
|
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
|
|
{ "arm64.nosve", "id_aa64pfr0.sve=0" },
|
|
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
|
|
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
|
|
{ "arm64.nopauth",
|
|
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
|
|
"id_aa64isar1.api=0 id_aa64isar1.apa=0 "
|
|
"id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" },
|
|
{ "arm64.nomops", "id_aa64isar2.mops=0" },
|
|
{ "arm64.nomte", "id_aa64pfr1.mte=0" },
|
|
{ "nokaslr", "arm64_sw.nokaslr=1" },
|
|
};
|
|
|
|
static int __init parse_nokaslr(char *unused)
|
|
{
|
|
/* nokaslr param handling is done by early cpufeature code */
|
|
return 0;
|
|
}
|
|
early_param("nokaslr", parse_nokaslr);
|
|
|
|
static int __init find_field(const char *cmdline,
|
|
const struct ftr_set_desc *reg, int f, u64 *v)
|
|
{
|
|
char opt[FTR_DESC_NAME_LEN + FTR_DESC_FIELD_LEN + 2];
|
|
int len;
|
|
|
|
len = snprintf(opt, ARRAY_SIZE(opt), "%s.%s=",
|
|
reg->name, reg->fields[f].name);
|
|
|
|
if (!parameqn(cmdline, opt, len))
|
|
return -1;
|
|
|
|
return kstrtou64(cmdline + len, 0, v);
|
|
}
|
|
|
|
static void __init match_options(const char *cmdline)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
int f;
|
|
|
|
if (!regs[i]->override)
|
|
continue;
|
|
|
|
for (f = 0; strlen(regs[i]->fields[f].name); f++) {
|
|
u64 shift = regs[i]->fields[f].shift;
|
|
u64 width = regs[i]->fields[f].width ?: 4;
|
|
u64 mask = GENMASK_ULL(shift + width - 1, shift);
|
|
u64 v;
|
|
|
|
if (find_field(cmdline, regs[i], f, &v))
|
|
continue;
|
|
|
|
/*
|
|
* If an override gets filtered out, advertise
|
|
* it by setting the value to the all-ones while
|
|
* clearing the mask... Yes, this is fragile.
|
|
*/
|
|
if (regs[i]->fields[f].filter &&
|
|
!regs[i]->fields[f].filter(v)) {
|
|
regs[i]->override->val |= mask;
|
|
regs[i]->override->mask &= ~mask;
|
|
continue;
|
|
}
|
|
|
|
regs[i]->override->val &= ~mask;
|
|
regs[i]->override->val |= (v << shift) & mask;
|
|
regs[i]->override->mask |= mask;
|
|
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
|
|
{
|
|
do {
|
|
char buf[256];
|
|
size_t len;
|
|
int i;
|
|
|
|
cmdline = skip_spaces(cmdline);
|
|
|
|
for (len = 0; cmdline[len] && !isspace(cmdline[len]); len++);
|
|
if (!len)
|
|
return;
|
|
|
|
len = min(len, ARRAY_SIZE(buf) - 1);
|
|
strncpy(buf, cmdline, len);
|
|
buf[len] = 0;
|
|
|
|
if (strcmp(buf, "--") == 0)
|
|
return;
|
|
|
|
cmdline += len;
|
|
|
|
match_options(buf);
|
|
|
|
for (i = 0; parse_aliases && i < ARRAY_SIZE(aliases); i++)
|
|
if (parameq(buf, aliases[i].alias))
|
|
__parse_cmdline(aliases[i].feature, false);
|
|
} while (1);
|
|
}
|
|
|
|
static __init const u8 *get_bootargs_cmdline(void)
|
|
{
|
|
const u8 *prop;
|
|
void *fdt;
|
|
int node;
|
|
|
|
fdt = get_early_fdt_ptr();
|
|
if (!fdt)
|
|
return NULL;
|
|
|
|
node = fdt_path_offset(fdt, "/chosen");
|
|
if (node < 0)
|
|
return NULL;
|
|
|
|
prop = fdt_getprop(fdt, node, "bootargs", NULL);
|
|
if (!prop)
|
|
return NULL;
|
|
|
|
return strlen(prop) ? prop : NULL;
|
|
}
|
|
|
|
static __init void parse_cmdline(void)
|
|
{
|
|
const u8 *prop = get_bootargs_cmdline();
|
|
|
|
if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
|
|
__parse_cmdline(CONFIG_CMDLINE, true);
|
|
|
|
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
|
|
__parse_cmdline(prop, true);
|
|
}
|
|
|
|
/* Keep checkers quiet */
|
|
void init_feature_override(u64 boot_status);
|
|
|
|
asmlinkage void __init init_feature_override(u64 boot_status)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
if (regs[i]->override) {
|
|
regs[i]->override->val = 0;
|
|
regs[i]->override->mask = 0;
|
|
}
|
|
}
|
|
|
|
__boot_status = boot_status;
|
|
|
|
parse_cmdline();
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs); i++) {
|
|
if (regs[i]->override)
|
|
dcache_clean_inval_poc((unsigned long)regs[i]->override,
|
|
(unsigned long)regs[i]->override +
|
|
sizeof(*regs[i]->override));
|
|
}
|
|
}
|