diff --git a/patches/kernel/0027-KVM-VMX-Heed-the-msr-argument-in-msr_write_intercept.patch b/patches/kernel/0027-KVM-VMX-Heed-the-msr-argument-in-msr_write_intercept.patch new file mode 100644 index 0000000..2000c9b --- /dev/null +++ b/patches/kernel/0027-KVM-VMX-Heed-the-msr-argument-in-msr_write_intercept.patch @@ -0,0 +1,41 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Jim Mattson +Date: Wed, 10 Aug 2022 14:30:50 -0700 +Subject: [PATCH] KVM: VMX: Heed the 'msr' argument in msr_write_intercepted() + +[ Upstream commit 020dac4187968535f089f83f376a72beb3451311 ] + +Regardless of the 'msr' argument passed to the VMX version of +msr_write_intercepted(), the function always checks to see if a +specific MSR (IA32_SPEC_CTRL) is intercepted for write. This behavior +seems unintentional and unexpected. + +Modify the function so that it checks to see if the provided 'msr' +index is intercepted for write. + +Fixes: 67f4b9969c30 ("KVM: nVMX: Handle dynamic MSR intercept toggling") +Cc: Sean Christopherson +Signed-off-by: Jim Mattson +Reviewed-by: Sean Christopherson +Message-Id: <20220810213050.2655000-1-jmattson@google.com> +Signed-off-by: Paolo Bonzini +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + arch/x86/kvm/vmx/vmx.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index fc0bb685283d..290f4d0aca7e 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -831,8 +831,7 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) + if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) + return true; + +- return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, +- MSR_IA32_SPEC_CTRL); ++ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap, msr); + } + + unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) diff --git a/patches/kernel/0028-skmsg-Fix-wrong-last-sg-check-in-sk_msg_recvmsg.patch b/patches/kernel/0028-skmsg-Fix-wrong-last-sg-check-in-sk_msg_recvmsg.patch new file mode 100644 index 0000000..c909c0f --- /dev/null +++ b/patches/kernel/0028-skmsg-Fix-wrong-last-sg-check-in-sk_msg_recvmsg.patch @@ -0,0 +1,63 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Liu Jian +Date: Tue, 9 Aug 2022 17:49:15 +0800 +Subject: [PATCH] skmsg: Fix wrong last sg check in sk_msg_recvmsg() + +[ Upstream commit 583585e48d965338e73e1eb383768d16e0922d73 ] + +Fix one kernel NULL pointer dereference as below: + +[ 224.462334] Call Trace: +[ 224.462394] __tcp_bpf_recvmsg+0xd3/0x380 +[ 224.462441] ? sock_has_perm+0x78/0xa0 +[ 224.462463] tcp_bpf_recvmsg+0x12e/0x220 +[ 224.462494] inet_recvmsg+0x5b/0xd0 +[ 224.462534] __sys_recvfrom+0xc8/0x130 +[ 224.462574] ? syscall_trace_enter+0x1df/0x2e0 +[ 224.462606] ? __do_page_fault+0x2de/0x500 +[ 224.462635] __x64_sys_recvfrom+0x24/0x30 +[ 224.462660] do_syscall_64+0x5d/0x1d0 +[ 224.462709] entry_SYSCALL_64_after_hwframe+0x65/0xca + +In commit 9974d37ea75f ("skmsg: Fix invalid last sg check in +sk_msg_recvmsg()"), we change last sg check to sg_is_last(), +but in sockmap redirection case (without stream_parser/stream_verdict/ +skb_verdict), we did not mark the end of the scatterlist. Check the +sk_msg_alloc, sk_msg_page_add, and bpf_msg_push_data functions, they all +do not mark the end of sg. They are expected to use sg.end for end +judgment. So the judgment of '(i != msg_rx->sg.end)' is added back here. + +Fixes: 9974d37ea75f ("skmsg: Fix invalid last sg check in sk_msg_recvmsg()") +Signed-off-by: Liu Jian +Signed-off-by: Daniel Borkmann +Acked-by: John Fastabend +Acked-by: Jakub Sitnicki +Link: https://lore.kernel.org/bpf/20220809094915.150391-1-liujian56@huawei.com +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + net/core/skmsg.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/core/skmsg.c b/net/core/skmsg.c +index f50f8d95b628..23d65fe160c3 100644 +--- a/net/core/skmsg.c ++++ b/net/core/skmsg.c +@@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + + if (copied == len) + break; +- } while (!sg_is_last(sge)); ++ } while ((i != msg_rx->sg.end) && !sg_is_last(sge)); + + if (unlikely(peek)) { + msg_rx = sk_psock_next_msg(psock, msg_rx); +@@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + } + + msg_rx->sg.start = i; +- if (!sge->length && sg_is_last(sge)) { ++ if (!sge->length && (i == msg_rx->sg.end || sg_is_last(sge))) { + msg_rx = sk_psock_dequeue_msg(psock); + kfree_sk_msg(msg_rx); + } diff --git a/patches/kernel/0029-drm-i915-gt-Skip-TLB-invalidations-once-wedged.patch b/patches/kernel/0029-drm-i915-gt-Skip-TLB-invalidations-once-wedged.patch new file mode 100644 index 0000000..4c46f83 --- /dev/null +++ b/patches/kernel/0029-drm-i915-gt-Skip-TLB-invalidations-once-wedged.patch @@ -0,0 +1,51 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Chris Wilson +Date: Wed, 27 Jul 2022 14:29:54 +0200 +Subject: [PATCH] drm/i915/gt: Skip TLB invalidations once wedged +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit e5a95c83ed1492c0f442b448b20c90c8faaf702b ] + +Skip all further TLB invalidations once the device is wedged and +had been reset, as, on such cases, it can no longer process instructions +on the GPU and the user no longer has access to the TLB's in each engine. + +So, an attempt to do a TLB cache invalidation will produce a timeout. + +That helps to reduce the performance regression introduced by TLB +invalidate logic. + +Cc: stable@vger.kernel.org +Fixes: 7938d61591d3 ("drm/i915: Flush TLBs before releasing backing store") +Signed-off-by: Chris Wilson +Cc: Fei Yang +Cc: Tvrtko Ursulin +Reviewed-by: Andi Shyti +Acked-by: Thomas Hellström +Signed-off-by: Mauro Carvalho Chehab +Signed-off-by: Andi Shyti +Link: https://patchwork.freedesktop.org/patch/msgid/5aa86564b9ec5fe7fe605c1dd7de76855401ed73.1658924372.git.mchehab@kernel.org +(cherry picked from commit be0366f168033374a93e4c43fdaa1a90ab905184) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/i915/gt/intel_gt.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c +index 3a76000d15bf..ed8ad3b26395 100644 +--- a/drivers/gpu/drm/i915/gt/intel_gt.c ++++ b/drivers/gpu/drm/i915/gt/intel_gt.c +@@ -949,6 +949,9 @@ void intel_gt_invalidate_tlbs(struct intel_gt *gt) + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + ++ if (intel_gt_is_wedged(gt)) ++ return; ++ + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); diff --git a/patches/kernel/0030-net-phy-Don-t-WARN-for-PHY_UP-state-in-mdio_bus_phy_.patch b/patches/kernel/0030-net-phy-Don-t-WARN-for-PHY_UP-state-in-mdio_bus_phy_.patch new file mode 100644 index 0000000..e8c2f57 --- /dev/null +++ b/patches/kernel/0030-net-phy-Don-t-WARN-for-PHY_UP-state-in-mdio_bus_phy_.patch @@ -0,0 +1,60 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Lukas Wunner +Date: Fri, 23 Sep 2022 06:09:52 +0200 +Subject: [PATCH] net: phy: Don't WARN for PHY_UP state in + mdio_bus_phy_resume() + +[ Upstream commit ea64cdfad124922c931633e39287c5a31a9b14a1 ] + +Commit 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() +state") introduced a WARN() on resume from system sleep if a PHY is not +in PHY_HALTED state. + +Commit 6dbe852c379f ("net: phy: Don't WARN for PHY_READY state in +mdio_bus_phy_resume()") added an exemption for PHY_READY state from +the WARN(). + +It turns out PHY_UP state needs to be exempted as well because the +following may happen on suspend: + + mdio_bus_phy_suspend() + phy_stop_machine() + phydev->state = PHY_UP # if (phydev->state >= PHY_UP) + +Fixes: 744d23c71af3 ("net: phy: Warn about incorrect mdio_bus_phy_resume() state") +Reported-by: Marek Szyprowski +Tested-by: Marek Szyprowski +Link: https://lore.kernel.org/netdev/2b1a1588-505e-dff3-301d-bfc1fb14d685@samsung.com/ +Signed-off-by: Lukas Wunner +Acked-by: Florian Fainelli +Cc: Xiaolei Wang +Link: https://lore.kernel.org/r/8128fdb51eeebc9efbf3776a4097363a1317aaf1.1663905575.git.lukas@wunner.de +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + drivers/net/phy/phy_device.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index b616f55ea222..c5b92ffaffb9 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -315,11 +315,13 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) + + phydev->suspended_by_mdio_bus = 0; + +- /* If we manged to get here with the PHY state machine in a state neither +- * PHY_HALTED nor PHY_READY this is an indication that something went wrong +- * and we should most likely be using MAC managed PM and we are not. ++ /* If we managed to get here with the PHY state machine in a state ++ * neither PHY_HALTED, PHY_READY nor PHY_UP, this is an indication ++ * that something went wrong and we should most likely be using ++ * MAC managed PM, but we are not. + */ +- WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY); ++ WARN_ON(phydev->state != PHY_HALTED && phydev->state != PHY_READY && ++ phydev->state != PHY_UP); + + ret = phy_init_hw(phydev); + if (ret < 0) diff --git a/patches/kernel/0031-drm-amdgpu-Don-t-enable-LTR-if-not-supported.patch b/patches/kernel/0031-drm-amdgpu-Don-t-enable-LTR-if-not-supported.patch new file mode 100644 index 0000000..9ffa865 --- /dev/null +++ b/patches/kernel/0031-drm-amdgpu-Don-t-enable-LTR-if-not-supported.patch @@ -0,0 +1,169 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Lijo Lazar +Date: Thu, 8 Sep 2022 08:28:57 +0530 +Subject: [PATCH] drm/amdgpu: Don't enable LTR if not supported + +commit 6c20490663553cd7e07d8de8af482012329ab9d6 upstream. + +As per PCIE Base Spec r4.0 Section 6.18 +'Software must not enable LTR in an Endpoint unless the Root Complex +and all intermediate Switches indicate support for LTR.' + +This fixes the Unsupported Request error reported through AER during +ASPM enablement. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216455 + +The error was unnoticed before and got visible because of the commit +referenced below. This doesn't fix anything in the commit below, rather +fixes the issue in amdgpu exposed by the commit. The reference is only +to associate this commit with below one so that both go together. + +Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") + +Reported-by: Gustaw Smolarczyk +Signed-off-by: Lijo Lazar +Reviewed-by: Alex Deucher +Signed-off-by: Greg Kroah-Hartman + +Cc: stable@vger.kernel.org +Signed-off-by: Alex Deucher +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 9 ++++++++- + drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c | 9 ++++++++- + drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 9 ++++++++- + 3 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +index b184b656b9b6..6f21154d4891 100644 +--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c ++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +@@ -366,6 +366,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, + WREG32_PCIE(smnPCIE_LC_CNTL, data); + } + ++#ifdef CONFIG_PCIEASPM + static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) + { + uint32_t def, data; +@@ -387,9 +388,11 @@ static void nbio_v2_3_program_ltr(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + } ++#endif + + static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) + { ++#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); +@@ -445,7 +448,10 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL6, data); + +- nbio_v2_3_program_ltr(adev); ++ /* Don't bother about LTR if LTR is not enabled ++ * in the path */ ++ if (adev->pdev->ltr_path) ++ nbio_v2_3_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; +@@ -469,6 +475,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); ++#endif + } + + static void nbio_v2_3_apply_lc_spc_mode_wa(struct amdgpu_device *adev) +diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +index 0d2d629e2d6a..be3f6c52c3ff 100644 +--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c ++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +@@ -278,6 +278,7 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) + WREG32_PCIE(smnPCIE_CI_CNTL, data); + } + ++#ifdef CONFIG_PCIEASPM + static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) + { + uint32_t def, data; +@@ -299,9 +300,11 @@ static void nbio_v6_1_program_ltr(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + } ++#endif + + static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) + { ++#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); +@@ -357,7 +360,10 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL6, data); + +- nbio_v6_1_program_ltr(adev); ++ /* Don't bother about LTR if LTR is not enabled ++ * in the path */ ++ if (adev->pdev->ltr_path) ++ nbio_v6_1_program_ltr(adev); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; +@@ -381,6 +387,7 @@ static void nbio_v6_1_program_aspm(struct amdgpu_device *adev) + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); ++#endif + } + + const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { +diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +index f50045cebd44..74cd7543729b 100644 +--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c ++++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +@@ -630,6 +630,7 @@ const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = { + .ras_fini = amdgpu_nbio_ras_fini, + }; + ++#ifdef CONFIG_PCIEASPM + static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) + { + uint32_t def, data; +@@ -651,9 +652,11 @@ static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + } ++#endif + + static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) + { ++#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_LC_CNTL); +@@ -709,7 +712,10 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL6, data); + +- nbio_v7_4_program_ltr(adev); ++ /* Don't bother about LTR if LTR is not enabled ++ * in the path */ ++ if (adev->pdev->ltr_path) ++ nbio_v7_4_program_ltr(adev); + + def = data = RREG32_PCIE(smnRCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; +@@ -733,6 +739,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_PCIE(smnPCIE_LC_CNTL3, data); ++#endif + } + + const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { diff --git a/patches/kernel/0032-drm-amdgpu-move-nbio-ih_doorbell_range-into-ih-code-.patch b/patches/kernel/0032-drm-amdgpu-move-nbio-ih_doorbell_range-into-ih-code-.patch new file mode 100644 index 0000000..879f2f1 --- /dev/null +++ b/patches/kernel/0032-drm-amdgpu-move-nbio-ih_doorbell_range-into-ih-code-.patch @@ -0,0 +1,87 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Fri, 9 Sep 2022 11:47:20 -0400 +Subject: [PATCH] drm/amdgpu: move nbio ih_doorbell_range() into ih code for + vega +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit dc1d85cb790f2091eea074cee24a704b2d6c4a06 upstream. + +This mirrors what we do for other asics and this way we are +sure the ih doorbell range is properly initialized. + +There is a comment about the way doorbells on gfx9 work that +requires that they are initialized for other IPs before GFX +is initialized. In this case IH is initialized before GFX, +so there should be no issue. + +This is a prerequisite for fixing the Unsupported Request error +reported through AER during driver load. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 + +The error was unnoticed before and got visible because of the commit +referenced below. This doesn't fix anything in the commit below, rather +fixes the issue in amdgpu exposed by the commit. The reference is only +to associate this commit with below one so that both go together. + +Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") + +Acked-by: Christian König +Reviewed-by: Lijo Lazar +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/amd/amdgpu/soc15.c | 3 --- + drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 4 ++++ + drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 4 ++++ + 3 files changed, 8 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c +index bdb47ae96ce6..723b088094f2 100644 +--- a/drivers/gpu/drm/amd/amdgpu/soc15.c ++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c +@@ -1429,9 +1429,6 @@ static void soc15_doorbell_range_init(struct amdgpu_device *adev) + ring->use_doorbell, ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range); + } +- +- adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, +- adev->irq.ih.doorbell_index); + } + } + +diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +index a9ca6988009e..73728fa85997 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +@@ -289,6 +289,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) + } + } + ++ if (!amdgpu_sriov_vf(adev)) ++ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, ++ adev->irq.ih.doorbell_index); ++ + pci_set_master(adev->pdev); + + /* enable interrupts */ +diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +index f51dfc38ac65..ac34af4cb178 100644 +--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c ++++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +@@ -340,6 +340,10 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) + } + } + ++ if (!amdgpu_sriov_vf(adev)) ++ adev->nbio.funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, ++ adev->irq.ih.doorbell_index); ++ + pci_set_master(adev->pdev); + + /* enable interrupts */ diff --git a/patches/kernel/0033-drm-amdgpu-move-nbio-sdma_doorbell_range-into-sdma-c.patch b/patches/kernel/0033-drm-amdgpu-move-nbio-sdma_doorbell_range-into-sdma-c.patch new file mode 100644 index 0000000..b1a9a85 --- /dev/null +++ b/patches/kernel/0033-drm-amdgpu-move-nbio-sdma_doorbell_range-into-sdma-c.patch @@ -0,0 +1,100 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Fri, 9 Sep 2022 11:53:27 -0400 +Subject: [PATCH] drm/amdgpu: move nbio sdma_doorbell_range() into sdma code + for vega +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit e3163bc8ffdfdb405e10530b140135b2ee487f89 upstream. + +This mirrors what we do for other asics and this way we are +sure the sdma doorbell range is properly initialized. + +There is a comment about the way doorbells on gfx9 work that +requires that they are initialized for other IPs before GFX +is initialized. However, the statement says that it applies to +multimedia as well, but the VCN code currently initializes +doorbells after GFX and there are no known issues there. In my +testing at least I don't see any problems on SDMA. + +This is a prerequisite for fixing the Unsupported Request error +reported through AER during driver load. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 + +The error was unnoticed before and got visible because of the commit +referenced below. This doesn't fix anything in the commit below, rather +fixes the issue in amdgpu exposed by the commit. The reference is only +to associate this commit with below one so that both go together. + +Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") + +Acked-by: Christian König +Reviewed-by: Lijo Lazar +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 5 +++++ + drivers/gpu/drm/amd/amdgpu/soc15.c | 22 ---------------------- + 2 files changed, 5 insertions(+), 22 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +index 9014f71d52dd..8b20326c4c05 100644 +--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +@@ -1507,6 +1507,11 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) + WREG32_SDMA(i, mmSDMA0_CNTL, temp); + + if (!amdgpu_sriov_vf(adev)) { ++ ring = &adev->sdma.instance[i].ring; ++ adev->nbio.funcs->sdma_doorbell_range(adev, i, ++ ring->use_doorbell, ring->doorbell_index, ++ adev->doorbell_index.sdma_doorbell_range); ++ + /* unhalt engine */ + temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); +diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c +index 723b088094f2..7d5ff50435e5 100644 +--- a/drivers/gpu/drm/amd/amdgpu/soc15.c ++++ b/drivers/gpu/drm/amd/amdgpu/soc15.c +@@ -1416,22 +1416,6 @@ static int soc15_common_sw_fini(void *handle) + return 0; + } + +-static void soc15_doorbell_range_init(struct amdgpu_device *adev) +-{ +- int i; +- struct amdgpu_ring *ring; +- +- /* sdma/ih doorbell range are programed by hypervisor */ +- if (!amdgpu_sriov_vf(adev)) { +- for (i = 0; i < adev->sdma.num_instances; i++) { +- ring = &adev->sdma.instance[i].ring; +- adev->nbio.funcs->sdma_doorbell_range(adev, i, +- ring->use_doorbell, ring->doorbell_index, +- adev->doorbell_index.sdma_doorbell_range); +- } +- } +-} +- + static int soc15_common_hw_init(void *handle) + { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; +@@ -1451,12 +1435,6 @@ static int soc15_common_hw_init(void *handle) + + /* enable the doorbell aperture */ + soc15_enable_doorbell_aperture(adev, true); +- /* HW doorbell routing policy: doorbell writing not +- * in SDMA/IH/MM/ACV range will be routed to CP. So +- * we need to init SDMA/IH/MM/ACV doorbell range prior +- * to CP ip block init and ring test. +- */ +- soc15_doorbell_range_init(adev); + + return 0; + } diff --git a/patches/kernel/0034-drm-amdgpu-Separate-vf2pf-work-item-init-from-virt-d.patch b/patches/kernel/0034-drm-amdgpu-Separate-vf2pf-work-item-init-from-virt-d.patch new file mode 100644 index 0000000..0046c03 --- /dev/null +++ b/patches/kernel/0034-drm-amdgpu-Separate-vf2pf-work-item-init-from-virt-d.patch @@ -0,0 +1,130 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Victor Skvortsov +Date: Thu, 16 Dec 2021 17:01:45 +0000 +Subject: [PATCH] drm/amdgpu: Separate vf2pf work item init from virt data + exchange + +[ Upstream commit 892deb48269c65376f3eeb5b4c032ff2c2979bd7 ] + +We want to be able to call virt data exchange conditionally +after gmc sw init to reserve bad pages as early as possible. +Since this is a conditional call, we will need +to call it again unconditionally later in the init sequence. + +Refactor the data exchange function so it can be +called multiple times without re-initializing the work item. + +v2: Cleaned up the code. Kept the original call to init_exchange_data() +inside early init to initialize the work item, afterwards call +exchange_data() when needed. + +Signed-off-by: Victor Skvortsov +Reviewed By: Shaoyun.liu +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +++- + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 36 ++++++++++++++-------- + drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 1 + + 3 files changed, 30 insertions(+), 13 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index d1af709cc7dc..f443b4630f9d 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2390,6 +2390,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) + + /* need to do gmc hw init early so we can allocate gpu mem */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { ++ /* Try to reserve bad pages early */ ++ if (amdgpu_sriov_vf(adev)) ++ amdgpu_virt_exchange_data(adev); ++ + r = amdgpu_device_vram_scratch_init(adev); + if (r) { + DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); +@@ -2421,7 +2425,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) + } + + if (amdgpu_sriov_vf(adev)) +- amdgpu_virt_init_data_exchange(adev); ++ amdgpu_virt_exchange_data(adev); + + r = amdgpu_ib_pool_init(adev); + if (r) { +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +index 16787c675f35..cce03aad5f0e 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +@@ -614,17 +614,35 @@ void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev) + + void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) + { +- uint64_t bp_block_offset = 0; +- uint32_t bp_block_size = 0; +- struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; +- + adev->virt.fw_reserve.p_pf2vf = NULL; + adev->virt.fw_reserve.p_vf2pf = NULL; + adev->virt.vf2pf_update_interval_ms = 0; + +- if (adev->mman.fw_vram_usage_va != NULL) { ++ if (adev->bios != NULL) { + adev->virt.vf2pf_update_interval_ms = 2000; + ++ adev->virt.fw_reserve.p_pf2vf = ++ (struct amd_sriov_msg_pf2vf_info_header *) ++ (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); ++ ++ amdgpu_virt_read_pf2vf_data(adev); ++ } ++ ++ if (adev->virt.vf2pf_update_interval_ms != 0) { ++ INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item); ++ schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms)); ++ } ++} ++ ++ ++void amdgpu_virt_exchange_data(struct amdgpu_device *adev) ++{ ++ uint64_t bp_block_offset = 0; ++ uint32_t bp_block_size = 0; ++ struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; ++ ++ if (adev->mman.fw_vram_usage_va != NULL) { ++ + adev->virt.fw_reserve.p_pf2vf = + (struct amd_sriov_msg_pf2vf_info_header *) + (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); +@@ -655,16 +673,10 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) + (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); + + amdgpu_virt_read_pf2vf_data(adev); +- +- return; +- } +- +- if (adev->virt.vf2pf_update_interval_ms != 0) { +- INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item); +- schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); + } + } + ++ + void amdgpu_detect_virtualization(struct amdgpu_device *adev) + { + uint32_t reg; +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +index 8d4c20bb71c5..9adfb8d63280 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +@@ -308,6 +308,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); + void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); + void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); + void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); ++void amdgpu_virt_exchange_data(struct amdgpu_device *adev); + void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); + void amdgpu_detect_virtualization(struct amdgpu_device *adev); + diff --git a/patches/kernel/0035-drm-amdgpu-make-sure-to-init-common-IP-before-gmc.patch b/patches/kernel/0035-drm-amdgpu-make-sure-to-init-common-IP-before-gmc.patch new file mode 100644 index 0000000..8190183 --- /dev/null +++ b/patches/kernel/0035-drm-amdgpu-make-sure-to-init-common-IP-before-gmc.patch @@ -0,0 +1,69 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Alex Deucher +Date: Tue, 30 Aug 2022 10:59:49 -0400 +Subject: [PATCH] drm/amdgpu: make sure to init common IP before gmc +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +[ Upstream commit a8671493d2074950553da3cf07d1be43185ef6c6 ] + +Move common IP init before GMC init so that HDP gets +remapped before GMC init which uses it. + +This fixes the Unsupported Request error reported through +AER during driver load. The error happens as a write happens +to the remap offset before real remapping is done. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=216373 + +The error was unnoticed before and got visible because of the commit +referenced below. This doesn't fix anything in the commit below, rather +fixes the issue in amdgpu exposed by the commit. The reference is only +to associate this commit with below one so that both go together. + +Fixes: 8795e182b02d ("PCI/portdrv: Don't disable AER reporting in get_port_device_capability()") + +Acked-by: Christian König +Reviewed-by: Lijo Lazar +Signed-off-by: Alex Deucher +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +Signed-off-by: Thomas Lamprecht +--- + drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +index f443b4630f9d..7450773821f4 100644 +--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c ++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +@@ -2388,8 +2388,16 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) + } + adev->ip_blocks[i].status.sw = true; + +- /* need to do gmc hw init early so we can allocate gpu mem */ +- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { ++ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { ++ /* need to do common hw init early so everything is set up for gmc */ ++ r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); ++ if (r) { ++ DRM_ERROR("hw_init %d failed %d\n", i, r); ++ goto init_failed; ++ } ++ adev->ip_blocks[i].status.hw = true; ++ } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { ++ /* need to do gmc hw init early so we can allocate gpu mem */ + /* Try to reserve bad pages early */ + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_exchange_data(adev); +@@ -3037,8 +3045,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) + int i, r; + + static enum amd_ip_block_type ip_order[] = { +- AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_COMMON, ++ AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_PSP, + AMD_IP_BLOCK_TYPE_IH, + };