From 38e2e370c8e485a32f202f3e90ded05fa8c24bbe Mon Sep 17 00:00:00 2001 From: Thomas Lamprecht Date: Fri, 17 Jan 2025 09:16:28 +0100 Subject: [PATCH] update submodule and patches for Ubuntu-6.8.0-53.54 Signed-off-by: Thomas Lamprecht --- ...ides-for-missing-ACS-capabilities-4..patch | 8 +- ...-default-dynamic-halt-polling-growth.patch | 2 +- ...de-unregister_netdevice-refcount-lea.patch | 4 +- ...sk-out-PKRU-bit-in-xfeatures-if-vCPU.patch | 16 +- ...allow-pass-through-on-broken-hardwar.patch | 2 +- ...UCE-iommu-intel-disable-DMAR-for-SKL.patch | 54 +++++- ...t-EOPNOTSUPP-for-IOCB_NOWAIT-like-EA.patch | 45 ----- ...t-subreq-iov-iter-before-tail-clean.patch} | 0 ...-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch} | 0 ...UID.0xD-XSTATE-offsets-sizes-during-.patch | 165 ------------------ submodules/ubuntu-kernel | 2 +- 11 files changed, 67 insertions(+), 231 deletions(-) delete mode 100644 patches/kernel/0016-io_uring-rw-treat-EOPNOTSUPP-for-IOCB_NOWAIT-like-EA.patch rename patches/kernel/{0017-netfs-reset-subreq-iov-iter-before-tail-clean.patch => 0016-netfs-reset-subreq-iov-iter-before-tail-clean.patch} (100%) rename patches/kernel/{0018-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch => 0017-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch} (100%) delete mode 100644 patches/kernel/0019-KVM-x86-Cache-CPUID.0xD-XSTATE-offsets-sizes-during-.patch diff --git a/patches/kernel/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch b/patches/kernel/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch index d239cbf..6a807d6 100644 --- a/patches/kernel/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch +++ b/patches/kernel/0004-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch @@ -55,7 +55,7 @@ Signed-off-by: Thomas Lamprecht 2 files changed, 111 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt -index e9be08602d4db4e9eeca3d3d8cc9ee2e8a330ee8..2ed9b047bbd8fab74596fbc3ccb035fdbb63aa4e 100644 +index 4d27542bebcbff40ae27446410207cd32272f2b7..87480be6e054df3927c5691a49c0dd185fba0410 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4397,6 +4397,15 @@ @@ -75,10 +75,10 @@ index e9be08602d4db4e9eeca3d3d8cc9ee2e8a330ee8..2ed9b047bbd8fab74596fbc3ccb035fd Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c -index ce469d84ebaec642fdcffc66334b5d0c4ebb672c..4f163ef55e7b3272dc31fdb16aa9e16e46965496 100644 +index bd2470ec3922f13843f2adcf0df41b5be91fbf94..0d0781ede9235c154e061eb11b38815931c352f7 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c -@@ -287,6 +287,106 @@ static int __init pci_apply_final_quirks(void) +@@ -300,6 +300,106 @@ static int __init pci_apply_final_quirks(void) } fs_initcall_sync(pci_apply_final_quirks); @@ -185,7 +185,7 @@ index ce469d84ebaec642fdcffc66334b5d0c4ebb672c..4f163ef55e7b3272dc31fdb16aa9e16e /* * Decoding should be disabled for a PCI device during BAR sizing to avoid * conflict. But doing so may cause problems on host bridge and perhaps other -@@ -5100,6 +5200,8 @@ static const struct pci_dev_acs_enabled { +@@ -5113,6 +5213,8 @@ static const struct pci_dev_acs_enabled { { PCI_VENDOR_ID_CAVIUM, 0xA060, pci_quirk_mf_endpoint_acs }, /* APM X-Gene */ { PCI_VENDOR_ID_AMCC, 0xE004, pci_quirk_xgene_acs }, diff --git a/patches/kernel/0005-kvm-disable-default-dynamic-halt-polling-growth.patch b/patches/kernel/0005-kvm-disable-default-dynamic-halt-polling-growth.patch index e85f2ec..69afad9 100644 --- a/patches/kernel/0005-kvm-disable-default-dynamic-halt-polling-growth.patch +++ b/patches/kernel/0005-kvm-disable-default-dynamic-halt-polling-growth.patch @@ -13,7 +13,7 @@ Signed-off-by: Thomas Lamprecht 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c -index 6a56de7ff82e707035a1ade82c7f5fafba4b1f26..96bd40a73e0ed5872eca156ef1f61ab4bda6661f 100644 +index 6de2c9889d4722b39717896614f3ffc20a879d35..fef608f130d3ba0934dba0dd74c1dc1bd6446b87 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -82,7 +82,7 @@ module_param(halt_poll_ns, uint, 0644); diff --git a/patches/kernel/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch b/patches/kernel/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch index a75076e..60bac14 100644 --- a/patches/kernel/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch +++ b/patches/kernel/0006-net-core-downgrade-unregister_netdevice-refcount-lea.patch @@ -14,10 +14,10 @@ Signed-off-by: Fabian Grünbichler 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c -index a32811aebde5979c5ac2023b3925f6f076a42563..15078ab81ec824c56307b77f784fbe30b66d303e 100644 +index e578afa66bcd2189a5113ccef69227fa619dc16c..86adbe2df8e860d524c6a74e758502b80f64a79d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c -@@ -10471,7 +10471,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) +@@ -10475,7 +10475,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) if (time_after(jiffies, warning_time + READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { list_for_each_entry(dev, list, todo_list) { diff --git a/patches/kernel/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch b/patches/kernel/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch index 9621750..643b7d0 100644 --- a/patches/kernel/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch +++ b/patches/kernel/0008-kvm-xsave-set-mask-out-PKRU-bit-in-xfeatures-if-vCPU.patch @@ -73,15 +73,15 @@ maintenance burden is high. Signed-off-by: Thomas Lamprecht --- arch/x86/kvm/cpuid.c | 6 ++++++ - arch/x86/kvm/cpuid.h | 2 ++ + arch/x86/kvm/cpuid.h | 3 +++ arch/x86/kvm/x86.c | 13 +++++++++++++ - 3 files changed, 21 insertions(+) + 3 files changed, 22 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index ce1499732cb86f7a547ec319ef3430f6e463acdc..d68c04bde5ededcd2ffb24300646f4a9d9c1bc9a 100644 +index c4a369bb1444e07d8e580cd39e95fbc4c702a17c..be6e9995fb6dcd84aa21ff7a324b585d0a4a008f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c -@@ -262,6 +262,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) +@@ -283,6 +283,12 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0; } @@ -95,20 +95,22 @@ index ce1499732cb86f7a547ec319ef3430f6e463acdc..d68c04bde5ededcd2ffb24300646f4a9 int nent) { diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 23dbb9eb277c7465f19bc5af137b79d5a2b894d1..07da153802e4d4f75219d77596e323347393eab7 100644 +index da4e23e32cffa430f04d1589d6fa2d4a856ed714..e33c2269c5a075d57c53d817ebeaf1b6a1d6a227 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h -@@ -32,6 +32,8 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, +@@ -32,7 +32,10 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, bool exact_only); +bool vcpu_supports_xsave_pkru(struct kvm_vcpu *vcpu); ++ + void __init kvm_init_xstate_sizes(void); + u32 xstate_required_size(u64 xstate_bv, bool compacted); int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index e36bf6162fc801d5ad505f0fd28baf5c7c2278ce..08ef6f01bf7e8c9c6cdb842f23adac09195c1aff 100644 +index 898b191954eb2423a42a62bae40be72a7e91f38c..558e267ae0def34227f6f854a0955a26766a6e2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5580,6 +5580,19 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, diff --git a/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch b/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch index f308440..bb03c5b 100644 --- a/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch +++ b/patches/kernel/0009-allow-opt-in-to-allow-pass-through-on-broken-hardwar.patch @@ -11,7 +11,7 @@ Signed-off-by: Thomas Lamprecht 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index 94518306c6f96fdf590cdf638e7f1e73c176906a..d9cc1264e40afa9250305b960419185e23863ccc 100644 +index 3ea9bbaf77f1e7dd89326cadad13994257aed3aa..0cbec26d432e75098d6f6272647511efc6e1a517 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -234,6 +234,7 @@ static int dmar_map_gfx = 1; diff --git a/patches/kernel/0015-Revert-UBUNTU-SAUCE-iommu-intel-disable-DMAR-for-SKL.patch b/patches/kernel/0015-Revert-UBUNTU-SAUCE-iommu-intel-disable-DMAR-for-SKL.patch index 534e836..2d7eb12 100644 --- a/patches/kernel/0015-Revert-UBUNTU-SAUCE-iommu-intel-disable-DMAR-for-SKL.patch +++ b/patches/kernel/0015-Revert-UBUNTU-SAUCE-iommu-intel-disable-DMAR-for-SKL.patch @@ -8,16 +8,19 @@ Some of our users use the iGPU for PCI-passthrough on those plattforms, which seems broken with this commit added. https://forum.proxmox.com/threads/.157266 -This reverts commit b310f5f58c83756fc164f7d391d76f0df9cc65c3. +This reverts both, commit b310f5f58c83 ("UBUNTU: SAUCE: iommu/intel: +disable DMAR for SKL integrated gfx") and also commit 252bf1619fd5 +("UBUNTU: SAUCE: iommu/intel: disable DMAR for KBL and CML integrated +gfx"). --- - drivers/iommu/intel/iommu.c | 27 --------------------------- - 1 file changed, 27 deletions(-) + drivers/iommu/intel/iommu.c | 68 ------------------------------------- + 1 file changed, 68 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c -index d9cc1264e40afa9250305b960419185e23863ccc..89fdcaad20e1e8251fe379cded7701c762628871 100644 +index 0cbec26d432e75098d6f6272647511efc6e1a517..9d88215d0350691ec0a1ecbf76344668ef4fd740 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c -@@ -5034,33 +5034,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); +@@ -5034,74 +5034,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x1632, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163A, quirk_iommu_igfx); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x163D, quirk_iommu_igfx); @@ -47,6 +50,47 @@ index d9cc1264e40afa9250305b960419185e23863ccc..89fdcaad20e1e8251fe379cded7701c7 -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193A, quirk_iommu_igfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193B, quirk_iommu_igfx); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x193D, quirk_iommu_igfx); +- +-/* KBL */ +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5902, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5906, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5908, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590A, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590B, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x590E, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5912, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5913, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5915, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5916, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5917, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591A, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591B, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591D, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x591E, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5921, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5923, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5926, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x5927, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x593B, quirk_iommu_igfx); +- +-/* CML */ +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9B21, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA2, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA4, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA5, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BA8, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BAA, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BAC, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC2, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC4, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC5, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC6, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BC8, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BE6, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BF6, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9B41, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BCA, quirk_iommu_igfx); +-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x9BCC, quirk_iommu_igfx); - /* disable IPU dmar support */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, quirk_iommu_ipu); diff --git a/patches/kernel/0016-io_uring-rw-treat-EOPNOTSUPP-for-IOCB_NOWAIT-like-EA.patch b/patches/kernel/0016-io_uring-rw-treat-EOPNOTSUPP-for-IOCB_NOWAIT-like-EA.patch deleted file mode 100644 index 1f320cf..0000000 --- a/patches/kernel/0016-io_uring-rw-treat-EOPNOTSUPP-for-IOCB_NOWAIT-like-EA.patch +++ /dev/null @@ -1,45 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Jens Axboe -Date: Tue, 10 Sep 2024 08:30:57 -0600 -Subject: [PATCH] io_uring/rw: treat -EOPNOTSUPP for IOCB_NOWAIT like -EAGAIN - -Some file systems, ocfs2 in this case, will return -EOPNOTSUPP for -an IOCB_NOWAIT read/write attempt. While this can be argued to be -correct, the usual return value for something that requires blocking -issue is -EAGAIN. - -A refactoring io_uring commit dropped calling kiocb_done() for -negative return values, which is otherwise where we already do that -transformation. To ensure we catch it in both spots, check it in -__io_read() itself as well. - -Reported-by: Robert Sander -Link: https://fosstodon.org/@gurubert@mastodon.gurubert.de/113112431889638440 -Cc: stable@vger.kernel.org -Fixes: a08d195b586a ("io_uring/rw: split io_read() into a helper") -Signed-off-by: Jens Axboe -(cherry picked from commit c0a9d496e0fece67db777bd48550376cf2960c47) -Signed-off-by: Daniel Kral ---- - io_uring/rw.c | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/io_uring/rw.c b/io_uring/rw.c -index c3c154790e45230a1d503173e287c6305ab22916..ed7f6709757268faacd69ed4c83a17fa155b3c98 100644 ---- a/io_uring/rw.c -+++ b/io_uring/rw.c -@@ -825,6 +825,14 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags) - - ret = io_iter_do_read(rw, &s->iter); - -+ /* -+ * Some file systems like to return -EOPNOTSUPP for an IOCB_NOWAIT -+ * issue, even though they should be returning -EAGAIN. To be safe, -+ * retry from blocking context for either. -+ */ -+ if (ret == -EOPNOTSUPP && force_nonblock) -+ ret = -EAGAIN; -+ - if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { - req->flags &= ~REQ_F_REISSUE; - /* diff --git a/patches/kernel/0017-netfs-reset-subreq-iov-iter-before-tail-clean.patch b/patches/kernel/0016-netfs-reset-subreq-iov-iter-before-tail-clean.patch similarity index 100% rename from patches/kernel/0017-netfs-reset-subreq-iov-iter-before-tail-clean.patch rename to patches/kernel/0016-netfs-reset-subreq-iov-iter-before-tail-clean.patch diff --git a/patches/kernel/0018-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch b/patches/kernel/0017-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch similarity index 100% rename from patches/kernel/0018-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch rename to patches/kernel/0017-x86-CPU-AMD-Clear-virtualized-VMLOAD-VMSAVE-on-Zen4-.patch diff --git a/patches/kernel/0019-KVM-x86-Cache-CPUID.0xD-XSTATE-offsets-sizes-during-.patch b/patches/kernel/0019-KVM-x86-Cache-CPUID.0xD-XSTATE-offsets-sizes-during-.patch deleted file mode 100644 index beda78e..0000000 --- a/patches/kernel/0019-KVM-x86-Cache-CPUID.0xD-XSTATE-offsets-sizes-during-.patch +++ /dev/null @@ -1,165 +0,0 @@ -From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 -From: Sean Christopherson -Date: Tue, 10 Dec 2024 17:32:58 -0800 -Subject: [PATCH] KVM: x86: Cache CPUID.0xD XSTATE offsets+sizes during module - init - -Snapshot the output of CPUID.0xD.[1..n] during kvm.ko initiliaization to -avoid the overead of CPUID during runtime. The offset, size, and metadata -for CPUID.0xD.[1..n] sub-leaves does not depend on XCR0 or XSS values, i.e. -is constant for a given CPU, and thus can be cached during module load. - -On Intel's Emerald Rapids, CPUID is *wildly* expensive, to the point where -recomputing XSAVE offsets and sizes results in a 4x increase in latency of -nested VM-Enter and VM-Exit (nested transitions can trigger -xstate_required_size() multiple times per transition), relative to using -cached values. The issue is easily visible by running `perf top` while -triggering nested transitions: kvm_update_cpuid_runtime() shows up at a -whopping 50%. - -As measured via RDTSC from L2 (using KVM-Unit-Test's CPUID VM-Exit test -and a slightly modified L1 KVM to handle CPUID in the fastpath), a nested -roundtrip to emulate CPUID on Skylake (SKX), Icelake (ICX), and Emerald -Rapids (EMR) takes: - - SKX 11650 - ICX 22350 - EMR 28850 - -Using cached values, the latency drops to: - - SKX 6850 - ICX 9000 - EMR 7900 - -The underlying issue is that CPUID itself is slow on ICX, and comically -slow on EMR. The problem is exacerbated on CPUs which support XSAVES -and/or XSAVEC, as KVM invokes xstate_required_size() twice on each -runtime CPUID update, and because there are more supported XSAVE features -(CPUID for supported XSAVE feature sub-leafs is significantly slower). - - SKX: - CPUID.0xD.2 = 348 cycles - CPUID.0xD.3 = 400 cycles - CPUID.0xD.4 = 276 cycles - CPUID.0xD.5 = 236 cycles - - - EMR: - CPUID.0xD.2 = 1138 cycles - CPUID.0xD.3 = 1362 cycles - CPUID.0xD.4 = 1068 cycles - CPUID.0xD.5 = 910 cycles - CPUID.0xD.6 = 914 cycles - CPUID.0xD.7 = 1350 cycles - CPUID.0xD.8 = 734 cycles - CPUID.0xD.9 = 766 cycles - CPUID.0xD.10 = 732 cycles - CPUID.0xD.11 = 718 cycles - CPUID.0xD.12 = 734 cycles - CPUID.0xD.13 = 1700 cycles - CPUID.0xD.14 = 1126 cycles - CPUID.0xD.15 = 898 cycles - CPUID.0xD.16 = 716 cycles - CPUID.0xD.17 = 748 cycles - CPUID.0xD.18 = 776 cycles - -Note, updating runtime CPUID information multiple times per nested -transition is itself a flaw, especially since CPUID is a mandotory -intercept on both Intel and AMD. E.g. KVM doesn't need to ensure emulated -CPUID state is up-to-date while running L2. That flaw will be fixed in a -future patch, as deferring runtime CPUID updates is more subtle than it -appears at first glance, the benefits aren't super critical to have once -the XSAVE issue is resolved, and caching CPUID output is desirable even if -KVM's updates are deferred. - -Cc: Jim Mattson -Cc: stable@vger.kernel.org -Signed-off-by: Sean Christopherson -Message-ID: <20241211013302.1347853-2-seanjc@google.com> -Signed-off-by: Paolo Bonzini -(cherry picked from commit 1201f226c863b7da739f7420ddba818cedf372fc) -Signed-off-by: Fiona Ebner ---- - arch/x86/kvm/cpuid.c | 31 ++++++++++++++++++++++++++----- - arch/x86/kvm/cpuid.h | 1 + - arch/x86/kvm/x86.c | 2 ++ - 3 files changed, 29 insertions(+), 5 deletions(-) - -diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c -index d68c04bde5ededcd2ffb24300646f4a9d9c1bc9a..be6e9995fb6dcd84aa21ff7a324b585d0a4a008f 100644 ---- a/arch/x86/kvm/cpuid.c -+++ b/arch/x86/kvm/cpuid.c -@@ -36,6 +36,26 @@ - u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly; - EXPORT_SYMBOL_GPL(kvm_cpu_caps); - -+struct cpuid_xstate_sizes { -+ u32 eax; -+ u32 ebx; -+ u32 ecx; -+}; -+ -+static struct cpuid_xstate_sizes xstate_sizes[XFEATURE_MAX] __ro_after_init; -+ -+void __init kvm_init_xstate_sizes(void) -+{ -+ u32 ign; -+ int i; -+ -+ for (i = XFEATURE_YMM; i < ARRAY_SIZE(xstate_sizes); i++) { -+ struct cpuid_xstate_sizes *xs = &xstate_sizes[i]; -+ -+ cpuid_count(0xD, i, &xs->eax, &xs->ebx, &xs->ecx, &ign); -+ } -+} -+ - u32 xstate_required_size(u64 xstate_bv, bool compacted) - { - int feature_bit = 0; -@@ -44,14 +64,15 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) - xstate_bv &= XFEATURE_MASK_EXTEND; - while (xstate_bv) { - if (xstate_bv & 0x1) { -- u32 eax, ebx, ecx, edx, offset; -- cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx); -+ struct cpuid_xstate_sizes *xs = &xstate_sizes[feature_bit]; -+ u32 offset; -+ - /* ECX[1]: 64B alignment in compacted form */ - if (compacted) -- offset = (ecx & 0x2) ? ALIGN(ret, 64) : ret; -+ offset = (xs->ecx & 0x2) ? ALIGN(ret, 64) : ret; - else -- offset = ebx; -- ret = max(ret, offset + eax); -+ offset = xs->ebx; -+ ret = max(ret, offset + xs->eax); - } - - xstate_bv >>= 1; -diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h -index 07da153802e4d4f75219d77596e323347393eab7..a50b57e5d40073027901249a355b65b9a8b54a74 100644 ---- a/arch/x86/kvm/cpuid.h -+++ b/arch/x86/kvm/cpuid.h -@@ -34,6 +34,7 @@ bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, - - bool vcpu_supports_xsave_pkru(struct kvm_vcpu *vcpu); - -+void __init kvm_init_xstate_sizes(void); - u32 xstate_required_size(u64 xstate_bv, bool compacted); - - int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); -diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c -index 08ef6f01bf7e8c9c6cdb842f23adac09195c1aff..558e267ae0def34227f6f854a0955a26766a6e2c 100644 ---- a/arch/x86/kvm/x86.c -+++ b/arch/x86/kvm/x86.c -@@ -13930,6 +13930,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); - - static int __init kvm_x86_init(void) - { -+ kvm_init_xstate_sizes(); -+ - kvm_mmu_x86_module_init(); - mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); - return 0; diff --git a/submodules/ubuntu-kernel b/submodules/ubuntu-kernel index 94580ef..5748450 160000 --- a/submodules/ubuntu-kernel +++ b/submodules/ubuntu-kernel @@ -1 +1 @@ -Subproject commit 94580ef17da71a3b95f1ef05eb2f72b6f7934b1d +Subproject commit 5748450da0f43f2212cad2e3942c24db8a68913d