amd-drm-next-6.11-2024-06-07:

amdgpu:
 - DCN 4.0.x support
 - DCN 3.5 updates
 - GC 12.0 support
 - DP MST fixes
 - Cursor fixes
 - MES11 updates
 - MMHUB 4.1 support
 - DML2 Updates
 - DCN 3.1.5 fixes
 - IPS fixes
 - Various code cleanups
 - GMC 12.0 support
 - SDMA 7.0 support
 - SMU 13 updates
 - SR-IOV fixes
 - VCN 5.x fixes
 - MES12 support
 - SMU 14.x updates
 - Devcoredump improvements
 - Fixes for HDP flush on platforms with >4k pages
 - GC 9.4.3 fixes
 - RAS ACA updates
 - Silence UBSAN flex array warnings
 - MMHUB 3.3 updates
 
 amdkfd:
 - Contiguous VRAM allocations
 - GC 12.0 support
 - SDMA 7.0 support
 - SR-IOV fixes
 
 radeon:
 - Backlight workaround for iMac
 - Silence UBSAN flex array warnings
 
 UAPI:
 - GFX12 modifier and DCC support
   Proposed Mesa changes:
   https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510
 - KFD GFX ALU exceptions
   Proposed ROCdebugger changes:
   08c760622b
   944fe1c141
 - KFD Contiguous VRAM allocation flag
   Proposed ROCr/HIP changes:
   f7b4a26991
   26e8530d05
   1d48f2a1ab
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZmNlVQAKCRC93/aFa7yZ
 2MLSAP9lflyG//+wC9WWX5OjvqFWO6qkhVm1w55xy6TwN9NkqQEA76TqmcNZ6rk1
 4o9RaYpMJQU275FvK1NvwUbl4PPQYAs=
 =cFxt
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-next-6.11-2024-06-07' of https://gitlab.freedesktop.org/agd5f/linux into drm-next

amd-drm-next-6.11-2024-06-07:

amdgpu:
- DCN 4.0.x support
- DCN 3.5 updates
- GC 12.0 support
- DP MST fixes
- Cursor fixes
- MES11 updates
- MMHUB 4.1 support
- DML2 Updates
- DCN 3.1.5 fixes
- IPS fixes
- Various code cleanups
- GMC 12.0 support
- SDMA 7.0 support
- SMU 13 updates
- SR-IOV fixes
- VCN 5.x fixes
- MES12 support
- SMU 14.x updates
- Devcoredump improvements
- Fixes for HDP flush on platforms with >4k pages
- GC 9.4.3 fixes
- RAS ACA updates
- Silence UBSAN flex array warnings
- MMHUB 3.3 updates

amdkfd:
- Contiguous VRAM allocations
- GC 12.0 support
- SDMA 7.0 support
- SR-IOV fixes

radeon:
- Backlight workaround for iMac
- Silence UBSAN flex array warnings

UAPI:
- GFX12 modifier and DCC support
  Proposed Mesa changes:
  https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29510
- KFD GFX ALU exceptions
  Proposed ROCdebugger changes:
  08c760622b
  944fe1c141
- KFD Contiguous VRAM allocation flag
  Proposed ROCr/HIP changes:
  f7b4a26991
  26e8530d05
  1d48f2a1ab

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240607195900.902537-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2024-06-11 14:01:55 +10:00
commit 1ddaaa2440
612 changed files with 334476 additions and 4695 deletions

View File

@ -49,6 +49,12 @@ pp_power_profile_mode
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: pp_power_profile_mode
pm_policy
---------------------
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: pm_policy
\*_busy_percent
---------------

View File

@ -19,7 +19,7 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
*/
#ifndef _ACP_GFX_IF_H
#define _ACP_GFX_IF_H

View File

@ -17,6 +17,7 @@ config DRM_AMDGPU
select HWMON
select I2C
select I2C_ALGOBIT
select CRC16
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
select DRM_BUDDY

View File

@ -97,7 +97,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \
nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o soc24.o \
sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \
nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o
@ -116,7 +116,7 @@ amdgpu-y += \
gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \
mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \
mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \
gfxhub_v11_5_0.o
gfxhub_v11_5_0.o mmhub_v4_1_0.o gfxhub_v12_0.o gmc_v12_0.o
# add UMC block
amdgpu-y += \
@ -167,7 +167,9 @@ amdgpu-y += \
imu_v11_0.o \
gfx_v11_0.o \
gfx_v11_0_3.o \
imu_v11_0_3.o
imu_v11_0_3.o \
gfx_v12_0.o \
imu_v12_0.o
# add async DMA block
amdgpu-y += \
@ -179,13 +181,14 @@ amdgpu-y += \
sdma_v4_4_2.o \
sdma_v5_0.o \
sdma_v5_2.o \
sdma_v6_0.o
sdma_v6_0.o \
sdma_v7_0.o
# add MES block
amdgpu-y += \
amdgpu_mes.o \
mes_v10_1.o \
mes_v11_0.o
mes_v11_0.o \
mes_v12_0.o
# add UVD block
amdgpu-y += \
@ -277,7 +280,8 @@ amdgpu-y += \
amdgpu_amdkfd_gc_9_4_3.o \
amdgpu_amdkfd_gfx_v10.o \
amdgpu_amdkfd_gfx_v10_3.o \
amdgpu_amdkfd_gfx_v11.o
amdgpu_amdkfd_gfx_v11.o \
amdgpu_amdkfd_gfx_v12.o
ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
amdgpu-y += amdgpu_amdkfd_gfx_v7.o

View File

@ -220,6 +220,8 @@ extern int amdgpu_discovery;
extern int amdgpu_mes;
extern int amdgpu_mes_log_enable;
extern int amdgpu_mes_kiq;
extern int amdgpu_uni_mes;
extern int amdgpu_jpeg_test;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
@ -253,10 +255,12 @@ extern int amdgpu_cik_support;
extern int amdgpu_num_kcq;
#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024)
#define AMDGPU_UMSCHFW_LOG_SIZE (32 * 1024)
extern int amdgpu_vcnfw_log;
extern int amdgpu_sg_display;
extern int amdgpu_umsch_mm;
extern int amdgpu_seamless;
extern int amdgpu_umsch_mm_fwlog;
extern int amdgpu_user_partt_mode;
extern int amdgpu_agp;
@ -341,9 +345,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000
int amdgpu_device_ip_set_clockgating_state(void *dev,
@ -1014,6 +1018,7 @@ struct amdgpu_device {
/* jpeg */
struct amdgpu_jpeg jpeg;
bool enable_jpeg_test;
/* vpe */
struct amdgpu_vpe vpe;
@ -1046,6 +1051,7 @@ struct amdgpu_device {
/* mes */
bool enable_mes;
bool enable_mes_kiq;
bool enable_uni_mes;
struct amdgpu_mes mes;
struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
@ -1161,6 +1167,7 @@ struct amdgpu_device {
bool debug_largebar;
bool debug_disable_soft_recovery;
bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca;
};
static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,

View File

@ -222,9 +222,9 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_
INIT_LIST_HEAD(&bank_error->node);
memcpy(&bank_error->info, info, sizeof(*info));
mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_add_tail(&bank_error->node, &aerr->list);
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);
return bank_error;
}
@ -235,7 +235,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca
struct aca_bank_info *tmp_info;
bool found = false;
mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_for_each_entry(bank_error, &aerr->list, node) {
tmp_info = &bank_error->info;
if (tmp_info->socket_id == info->socket_id &&
@ -246,7 +246,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca
}
out_unlock:
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);
return found ? bank_error : NULL;
}
@ -474,7 +474,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
struct aca_error *aerr = &error_cache->errors[type];
struct aca_bank_error *bank_error, *tmp;
mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
if (list_empty(&aerr->list))
goto out_unlock;
@ -485,7 +485,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type
}
out_unlock:
mutex_unlock(&aerr->lock);
spin_unlock(&aerr->lock);
return 0;
}
@ -534,7 +534,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
if (aca_handle_is_valid(handle))
return -EOPNOTSUPP;
if (!(BIT(type) & handle->mask))
if ((type < 0) || (!(BIT(type) & handle->mask)))
return 0;
return __aca_get_error_data(adev, handle, type, err_data, qctx);
@ -542,7 +542,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han
static void aca_error_init(struct aca_error *aerr, enum aca_error_type type)
{
mutex_init(&aerr->lock);
spin_lock_init(&aerr->lock);
INIT_LIST_HEAD(&aerr->list);
aerr->type = type;
aerr->nr_errors = 0;
@ -561,11 +561,10 @@ static void aca_error_fini(struct aca_error *aerr)
{
struct aca_bank_error *bank_error, *tmp;
mutex_lock(&aerr->lock);
spin_lock(&aerr->lock);
list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
aca_bank_error_remove(aerr, bank_error);
mutex_destroy(&aerr->lock);
spin_unlock(&aerr->lock);
}
static void aca_fini_error_cache(struct aca_handle *handle)
@ -686,7 +685,8 @@ static void aca_manager_fini(struct aca_handle_manager *mgr)
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev)
{
return adev->aca.is_enabled;
return (adev->aca.is_enabled ||
adev->debug_enable_ras_aca);
}
int amdgpu_aca_init(struct amdgpu_device *adev)
@ -712,13 +712,6 @@ void amdgpu_aca_fini(struct amdgpu_device *adev)
atomic_set(&aca->ue_update_flag, 0);
}
int amdgpu_aca_reset(struct amdgpu_device *adev)
{
amdgpu_aca_fini(adev);
return amdgpu_aca_init(adev);
}
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs)
{
struct amdgpu_aca *aca = &adev->aca;
@ -892,7 +885,9 @@ DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_se
void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
{
#if defined(CONFIG_DEBUG_FS)
if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6))
if (!root ||
(adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6) &&
adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 14)))
return;
debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops);

View File

@ -25,6 +25,7 @@
#define __AMDGPU_ACA_H__
#include <linux/list.h>
#include <linux/spinlock.h>
struct ras_err_data;
struct ras_query_context;
@ -133,7 +134,7 @@ struct aca_bank_error {
struct aca_error {
struct list_head list;
struct mutex lock;
spinlock_t lock;
enum aca_error_type type;
int nr_errors;
};
@ -191,7 +192,6 @@ struct aca_info {
int amdgpu_aca_init(struct amdgpu_device *adev);
void amdgpu_aca_fini(struct amdgpu_device *adev);
int amdgpu_aca_reset(struct amdgpu_device *adev);
void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs);
bool amdgpu_aca_is_enabled(struct amdgpu_device *adev);

View File

@ -100,6 +100,7 @@ struct amdgpu_afmt_acr amdgpu_afmt_acr(uint32_t clock)
amdgpu_afmt_calc_cts(clock, &res.cts_32khz, &res.n_32khz, 32000);
amdgpu_afmt_calc_cts(clock, &res.cts_44_1khz, &res.n_44_1khz, 44100);
amdgpu_afmt_calc_cts(clock, &res.cts_48khz, &res.n_48khz, 48000);
res.clock = clock;
return res;
}

View File

@ -133,6 +133,9 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
reset_context.src = adev->enable_mes ?
AMDGPU_RESET_SRC_MES :
AMDGPU_RESET_SRC_HWS;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
@ -261,12 +264,13 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm)
return r;
}
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context)
{
int r = 0;
if (adev->kfd.dev)
r = kgd2kfd_pre_reset(adev->kfd.dev);
r = kgd2kfd_pre_reset(adev->kfd.dev, reset_context);
return r;
}

View File

@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE {
};
struct amdgpu_device;
struct amdgpu_reset_context;
enum kfd_mem_attachment_type {
KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */
@ -170,7 +171,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev,
struct amdgpu_reset_context *reset_context);
int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
@ -416,7 +418,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
void kgd2kfd_device_exit(struct kfd_dev *kfd);
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm);
int kgd2kfd_pre_reset(struct kfd_dev *kfd);
int kgd2kfd_pre_reset(struct kfd_dev *kfd,
struct amdgpu_reset_context *reset_context);
int kgd2kfd_post_reset(struct kfd_dev *kfd);
void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
@ -459,7 +462,8 @@ static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
return 0;
}
static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd)
static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd,
struct amdgpu_reset_context *reset_context)
{
return 0;
}

View File

@ -0,0 +1,377 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "soc24.h"
#include <uapi/linux/kfd_ioctl.h>
static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe,
uint32_t queue, uint32_t vmid)
{
mutex_lock(&adev->srbm_mutex);
soc24_grbm_select(adev, mec, pipe, queue, vmid);
}
static void unlock_srbm(struct amdgpu_device *adev)
{
soc24_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id)
{
uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, queue_id, 0);
}
static void release_queue(struct amdgpu_device *adev)
{
unlock_srbm(adev);
}
static int init_interrupts_v12(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
lock_srbm(adev, mec, pipe, 0, 0);
WREG32_SOC15(GC, 0, regCPC_INT_CNTL,
CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
unlock_srbm(adev);
return 0;
}
static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
{
uint32_t sdma_engine_reg_base = 0;
uint32_t sdma_rlc_reg_offset;
switch (engine_id) {
case 0:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
regSDMA0_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
break;
case 1:
sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
regSDMA1_QUEUE0_RB_CNTL) - regSDMA0_QUEUE0_RB_CNTL;
break;
default:
BUG();
}
sdma_rlc_reg_offset = sdma_engine_reg_base
+ queue_id * (regSDMA0_QUEUE1_RB_CNTL - regSDMA0_QUEUE0_RB_CNTL);
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
queue_id, sdma_rlc_reg_offset);
return sdma_rlc_reg_offset;
}
static int hqd_dump_v12(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
#define DUMP_REG(addr) do { \
if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
break; \
(*dump)[i][0] = (addr) << 2; \
(*dump)[i++][1] = RREG32(addr); \
} while (0)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
acquire_queue(adev, pipe_id, queue_id);
for (reg = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
reg <= SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI); reg++)
DUMP_REG(reg);
release_queue(adev);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static int hqd_sdma_dump_v12(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
{
uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev,
engine_id, queue_id);
uint32_t i = 0, reg;
const uint32_t first_reg = regSDMA0_QUEUE0_RB_CNTL;
const uint32_t last_reg = regSDMA0_QUEUE0_CONTEXT_STATUS;
#undef HQD_N_REGS
#define HQD_N_REGS (last_reg - first_reg + 1)
*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
if (*dump == NULL)
return -ENOMEM;
for (reg = first_reg;
reg <= last_reg; reg++)
DUMP_REG(sdma_rlc_reg_offset + reg);
WARN_ON_ONCE(i != HQD_N_REGS);
*n_regs = i;
return 0;
}
static int wave_control_execute_v12(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
mutex_lock(&adev->grbm_idx_mutex);
WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), gfx_index_val);
WREG32(SOC15_REG_OFFSET(GC, 0, regSQ_CMD), sq_cmd);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
INSTANCE_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SA_BROADCAST_WRITES, 1);
data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
SE_BROADCAST_WRITES, 1);
WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX), data);
mutex_unlock(&adev->grbm_idx_mutex);
return 0;
}
/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_gfx_v12_enable_debug_trap(struct amdgpu_device *adev,
bool restore_dbg_registers,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_gfx_v12_disable_debug_trap(struct amdgpu_device *adev,
bool keep_trap_enabled,
uint32_t vmid)
{
uint32_t data = 0;
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0);
return data;
}
static int kgd_gfx_v12_validate_trap_override_request(struct amdgpu_device *adev,
uint32_t trap_override,
uint32_t *trap_mask_supported)
{
*trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID |
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
KFD_DBG_TRAP_MASK_FP_INEXACT |
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION |
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START |
KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR &&
trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE)
return -EPERM;
return 0;
}
static uint32_t trap_mask_map_sw_to_hw(uint32_t mask)
{
uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0;
uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0;
uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID |
KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL |
KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_FP_OVERFLOW |
KFD_DBG_TRAP_MASK_FP_UNDERFLOW |
KFD_DBG_TRAP_MASK_FP_INEXACT |
KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO |
KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH |
KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION);
uint32_t ret;
ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en);
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start);
ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end);
return ret;
}
static uint32_t trap_mask_map_hw_to_sw(uint32_t mask)
{
uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN);
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START))
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START;
if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END))
ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END;
return ret;
}
/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */
static uint32_t kgd_gfx_v12_set_wave_launch_trap_override(struct amdgpu_device *adev,
uint32_t vmid,
uint32_t trap_override,
uint32_t trap_mask_bits,
uint32_t trap_mask_request,
uint32_t *trap_mask_prev,
uint32_t kfd_dbg_trap_cntl_prev)
{
uint32_t data = 0;
*trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev);
data = (trap_mask_bits & trap_mask_request) | (*trap_mask_prev & ~trap_mask_request);
data = trap_mask_map_sw_to_hw(data);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1);
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override);
return data;
}
/* returns STALL_VMID or LAUNCH_MODE. */
static uint32_t kgd_gfx_v12_set_wave_launch_mode(struct amdgpu_device *adev,
uint8_t wave_launch_mode,
uint32_t vmid)
{
uint32_t data = 0;
bool is_stall_mode = wave_launch_mode == 4;
if (is_stall_mode)
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, STALL_VMID,
1);
else
data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, LAUNCH_MODE,
wave_launch_mode);
return data;
}
#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H)
static uint32_t kgd_gfx_v12_set_address_watch(struct amdgpu_device *adev,
uint64_t watch_address,
uint32_t watch_address_mask,
uint32_t watch_id,
uint32_t watch_mode,
uint32_t debug_vmid,
uint32_t inst)
{
uint32_t watch_address_high;
uint32_t watch_address_low;
uint32_t watch_address_cntl;
watch_address_cntl = 0;
watch_address_low = lower_32_bits(watch_address);
watch_address_high = upper_32_bits(watch_address) & 0xffff;
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MODE,
watch_mode);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
MASK,
watch_address_mask >> 7);
watch_address_cntl = REG_SET_FIELD(watch_address_cntl,
TCP_WATCH0_CNTL,
VALID,
1);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_H) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_high);
WREG32_RLC((SOC15_REG_OFFSET(GC, 0, regTCP_WATCH0_ADDR_L) +
(watch_id * TCP_WATCH_STRIDE)),
watch_address_low);
return watch_address_cntl;
}
static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev,
uint32_t watch_id)
{
return 0;
}
const struct kfd2kgd_calls gfx_v12_kfd2kgd = {
.init_interrupts = init_interrupts_v12,
.hqd_dump = hqd_dump_v12,
.hqd_sdma_dump = hqd_sdma_dump_v12,
.wave_control_execute = wave_control_execute_v12,
.get_atc_vmid_pasid_mapping_info = NULL,
.enable_debug_trap = kgd_gfx_v12_enable_debug_trap,
.disable_debug_trap = kgd_gfx_v12_disable_debug_trap,
.validate_trap_override_request = kgd_gfx_v12_validate_trap_override_request,
.set_wave_launch_trap_override = kgd_gfx_v12_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v12_set_address_watch,
.clear_address_watch = kgd_gfx_v12_clear_address_watch,
};

View File

@ -172,6 +172,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
uint64_t vram_size = 0;
@ -220,7 +222,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
kfd_mem_limit.max_ttm_mem_limit) ||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) {
vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
ret = -ENOMEM;
goto release;
}
@ -415,6 +417,10 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
"Called with userptr BO"))
return -EINVAL;
/* bo has been pinned, not need validate it */
if (bo->tbo.pin_count)
return 0;
amdgpu_bo_placement_from_domain(bo, domain);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
@ -1088,7 +1094,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
if (ret == -EAGAIN)
pr_debug("Failed to get user pages, try again\n");
else
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
}
@ -1472,13 +1481,30 @@ static int amdgpu_amdkfd_gpuvm_pin_bo(struct amdgpu_bo *bo, u32 domain)
if (unlikely(ret))
return ret;
if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) {
/*
* If bo is not contiguous on VRAM, move to system memory first to ensure
* we can get contiguous VRAM space after evicting other BOs.
*/
if (!(bo->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) {
struct ttm_operation_ctx ctx = { true, false };
amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (unlikely(ret)) {
pr_debug("validate bo 0x%p to GTT failed %d\n", &bo->tbo, ret);
goto out;
}
}
}
ret = amdgpu_bo_pin_restricted(bo, domain, 0, 0);
if (ret)
pr_err("Error in Pinning BO to domain: %d\n", domain);
amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
out:
amdgpu_bo_unreserve(bo);
return ret;
}
@ -1649,6 +1675,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
uint64_t reserved_for_ras = (con ? con->reserved_pages_in_bytes : 0);
ssize_t available;
uint64_t vram_available, system_mem_available, ttm_mem_available;
@ -1656,7 +1684,8 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
- adev->kfd.vram_used_aligned[xcp_id]
- atomic64_read(&adev->vram_pin_size)
- reserved_for_pt;
- reserved_for_pt
- reserved_for_ras;
if (adev->flags & AMD_IS_APU) {
system_mem_available = no_system_mem_limit ?
@ -1714,6 +1743,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
/* For contiguous VRAM allocation */
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS)
alloc_flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
}
xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
0 : fpriv->xcp_id;
@ -2712,7 +2745,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
/* keep mem without hmm range at userptr_inval_list */
if (!mem->range)
continue;
continue;
/* Only check mem with hmm range associated */
valid = amdgpu_ttm_tt_get_user_pages_done(
@ -2957,9 +2990,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
if (!attachment->is_mapped)
continue;
if (attachment->bo_va->base.bo->tbo.pin_count)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {

View File

@ -1484,6 +1484,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
(u32)le32_to_cpu(*((u32 *)reg_data + j));
j++;
} else if ((reg_table->mc_reg_address[i].pre_reg_data & LOW_NIBBLE_MASK) == DATA_EQU_PREV) {
if (i == 0)
continue;
reg_table->mc_reg_table_entry[num_ranges].mc_data[i] =
reg_table->mc_reg_table_entry[num_ranges].mc_data[i - 1];
}

View File

@ -290,7 +290,6 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
return vram_type;
}
int
amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
int *vram_width, int *vram_type,
@ -301,6 +300,7 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
u16 data_offset, size;
union igp_info *igp_info;
union vram_info *vram_info;
union umc_info *umc_info;
union vram_module *vram_module;
u8 frev, crev;
u8 mem_type;
@ -312,10 +312,16 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
if (adev->flags & AMD_IS_APU)
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
integratedsysteminfo);
else
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
vram_info);
else {
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info);
break;
default:
index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, vram_info);
}
}
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size,
&frev, &crev, &data_offset)) {
@ -383,123 +389,150 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
return -EINVAL;
}
} else {
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
if (frev == 3) {
switch (crev) {
/* v30 */
case 0:
vram_module = (union vram_module *)vram_info->v30.vram_module;
mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
mem_type = vram_info->v30.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
if (frev == 4) {
switch (crev) {
case 0:
mem_channel_number = le32_to_cpu(umc_info->v40.channel_num);
mem_type = le32_to_cpu(umc_info->v40.vram_type);
mem_channel_width = le32_to_cpu(umc_info->v40.channel_width);
mem_vendor = RREG32(adev->bios_scratch_reg_offset + 4) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
}
} else
return -EINVAL;
break;
default:
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
if (frev == 3) {
switch (crev) {
/* v30 */
case 0:
vram_module = (union vram_module *)vram_info->v30.vram_module;
mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
mem_type = vram_info->v30.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_info->v30.channel_num;
mem_channel_width = vram_info->v30.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
break;
default:
return -EINVAL;
}
} else if (frev == 2) {
switch (crev) {
/* v23 */
case 3:
if (module_id > vram_info->v23.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v23.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v9.vram_module_size);
i++;
}
mem_type = vram_module->v9.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v9.channel_num;
mem_channel_width = vram_module->v9.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v24 */
case 4:
if (module_id > vram_info->v24.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v24.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v10.vram_module_size);
i++;
}
mem_type = vram_module->v10.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v10.channel_num;
mem_channel_width = vram_module->v10.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v25 */
case 5:
if (module_id > vram_info->v25.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v25.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v11.vram_module_size);
i++;
}
mem_type = vram_module->v11.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v11.channel_num;
mem_channel_width = vram_module->v11.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v26 */
case 6:
if (module_id > vram_info->v26.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v26.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v9.vram_module_size);
i++;
}
mem_type = vram_module->v9.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v9.channel_num;
mem_channel_width = vram_module->v9.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
default:
return -EINVAL;
}
} else {
/* invalid frev */
return -EINVAL;
}
} else if (frev == 2) {
switch (crev) {
/* v23 */
case 3:
if (module_id > vram_info->v23.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v23.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v9.vram_module_size);
i++;
}
mem_type = vram_module->v9.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v9.channel_num;
mem_channel_width = vram_module->v9.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v24 */
case 4:
if (module_id > vram_info->v24.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v24.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v10.vram_module_size);
i++;
}
mem_type = vram_module->v10.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v10.channel_num;
mem_channel_width = vram_module->v10.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v25 */
case 5:
if (module_id > vram_info->v25.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v25.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v11.vram_module_size);
i++;
}
mem_type = vram_module->v11.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v11.channel_num;
mem_channel_width = vram_module->v11.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
/* v26 */
case 6:
if (module_id > vram_info->v26.vram_module_num)
module_id = 0;
vram_module = (union vram_module *)vram_info->v26.vram_module;
while (i < module_id) {
vram_module = (union vram_module *)
((u8 *)vram_module + vram_module->v9.vram_module_size);
i++;
}
mem_type = vram_module->v9.memory_type;
if (vram_type)
*vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
mem_channel_number = vram_module->v9.channel_num;
mem_channel_width = vram_module->v9.channel_width;
if (vram_width)
*vram_width = mem_channel_number * (1 << mem_channel_width);
mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
if (vram_vendor)
*vram_vendor = mem_vendor;
break;
default:
return -EINVAL;
}
} else {
/* invalid frev */
return -EINVAL;
}
}
}
return 0;

View File

@ -213,6 +213,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
struct amdgpu_firmware_info *ucode;
id = fw_type_convert(cgs_device, type);
if (id >= AMDGPU_UCODE_ID_MAXIMUM)
return -EINVAL;
ucode = &adev->firmware.ucode[id];
if (ucode->fw == NULL)
return -EINVAL;

View File

@ -918,7 +918,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==1 */
config[no_regs++] = adev->rev_id;
config[no_regs++] = lower_32_bits(adev->pg_flags);
config[no_regs++] = adev->pg_flags;
config[no_regs++] = lower_32_bits(adev->cg_flags);
/* rev==2 */
@ -935,7 +935,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
/* rev==5 PG/CG flag upper 32bit */
config[no_regs++] = upper_32_bits(adev->pg_flags);
config[no_regs++] = 0;
config[no_regs++] = upper_32_bits(adev->cg_flags);
while (size && (*pos < no_regs * 4)) {
@ -2186,6 +2186,9 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]);
}
if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog)
amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm);
amdgpu_ras_debugfs_create_all(adev);
amdgpu_rap_debugfs_init(adev);
amdgpu_securedisplay_debugfs_init(adev);

View File

@ -224,12 +224,29 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
coredump->reset_task_info.process_name,
coredump->reset_task_info.pid);
/* GPU IP's information of the SOC */
drm_printf(&p, "\nIP Information\n");
/* SOC Information */
drm_printf(&p, "\nSOC Information\n");
drm_printf(&p, "SOC Device id: %d\n", coredump->adev->pdev->device);
drm_printf(&p, "SOC PCI Revision id: %d\n", coredump->adev->pdev->revision);
drm_printf(&p, "SOC Family: %d\n", coredump->adev->family);
drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id);
drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id);
/* Memory Information */
drm_printf(&p, "\nSOC Memory Information\n");
drm_printf(&p, "real vram size: %llu\n", coredump->adev->gmc.real_vram_size);
drm_printf(&p, "visible vram size: %llu\n", coredump->adev->gmc.visible_vram_size);
drm_printf(&p, "visible vram size: %llu\n", coredump->adev->mman.gtt_mgr.manager.size);
/* GDS Config */
drm_printf(&p, "\nGDS Config\n");
drm_printf(&p, "gds: total size: %d\n", coredump->adev->gds.gds_size);
drm_printf(&p, "gds: compute partition size: %d\n", coredump->adev->gds.gds_size);
drm_printf(&p, "gds: gws per compute partition: %d\n", coredump->adev->gds.gws_size);
drm_printf(&p, "gds: os per compute partition: %d\n", coredump->adev->gds.oa_size);
/* HWIP Version Information */
drm_printf(&p, "\nHW IP Version Information\n");
for (int i = 1; i < MAX_HWIP; i++) {
for (int j = 0; j < HWIP_MAX_INSTANCE; j++) {
ver = coredump->adev->ip_versions[i][j];

View File

@ -679,7 +679,7 @@ uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
GC_HWIP, false,
&rlcg_flag)) {
ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id);
ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, GET_INST(GC, xcc_id));
} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
@ -810,7 +810,7 @@ void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
GC_HWIP, true,
&rlcg_flag)) {
amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id);
amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, GET_INST(GC, xcc_id));
} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
amdgpu_sriov_runtime(adev) &&
down_read_trylock(&adev->reset_domain->sem)) {
@ -1308,6 +1308,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
amdgpu_asic_pre_asic_init(adev);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
amdgpu_psp_wait_for_bootloader(adev);
ret = amdgpu_atomfirmware_asic_init(adev, true);
@ -4048,6 +4049,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->grbm_idx_mutex);
mutex_init(&adev->mn_lock);
mutex_init(&adev->virt.vf_errors.lock);
mutex_init(&adev->virt.rlcg_reg_lock);
hash_init(adev->mn_hash);
mutex_init(&adev->psp.mutex);
mutex_init(&adev->notifier_lock);
@ -5011,7 +5013,8 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
shadow = vmbo->shadow;
/* No need to recover an evicted BO */
if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
if (!shadow->tbo.resource ||
shadow->tbo.resource->mem_type != TTM_PL_TT ||
shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
continue;
@ -5055,29 +5058,26 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu_device pointer
* @from_hypervisor: request from hypervisor
* @reset_context: amdgpu reset context pointer
*
* do VF FLR and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
bool from_hypervisor)
struct amdgpu_reset_context *reset_context)
{
int r;
struct amdgpu_hive_info *hive = NULL;
int retry_limit = 0;
retry:
amdgpu_amdkfd_pre_reset(adev);
amdgpu_device_stop_pending_resets(adev);
if (from_hypervisor)
if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
clear_bit(AMDGPU_HOST_FLR, &reset_context->flags);
r = amdgpu_virt_request_full_gpu(adev, true);
else
} else {
r = amdgpu_virt_reset_gpu(adev);
}
if (r)
return r;
amdgpu_ras_set_fed(adev, false);
amdgpu_irq_gpu_reset_resume_helper(adev);
@ -5087,7 +5087,7 @@ retry:
/* Resume IP prior to SMC */
r = amdgpu_device_ip_reinit_early_sriov(adev);
if (r)
goto error;
return r;
amdgpu_virt_init_data_exchange(adev);
@ -5098,38 +5098,41 @@ retry:
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
if (r)
goto error;
return r;
hive = amdgpu_get_xgmi_hive(adev);
/* Update PSP FW topology after reset */
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, adev);
if (hive)
amdgpu_put_xgmi_hive(hive);
if (r)
return r;
if (!r) {
r = amdgpu_ib_ring_tests(adev);
r = amdgpu_ib_ring_tests(adev);
if (r)
return r;
amdgpu_amdkfd_post_reset(adev);
}
error:
if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
amdgpu_inc_vram_lost(adev);
r = amdgpu_device_recover_vram(adev);
}
if (r)
return r;
/* need to be called during full access so we can't do it later like
* bare-metal does.
*/
amdgpu_amdkfd_post_reset(adev);
amdgpu_virt_release_full_gpu(adev, true);
if (AMDGPU_RETRY_SRIOV_RESET(r)) {
if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
retry_limit++;
goto retry;
} else
DRM_ERROR("GPU reset retry is beyond the retry limit\n");
}
return r;
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
return 0;
}
/**
@ -5371,11 +5374,13 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
amdgpu_reset_reg_dumps(tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State\n");
/* Trigger ip dump before we reset the asic */
for (i = 0; i < tmp_adev->num_ip_blocks; i++)
if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
tmp_adev->ip_blocks[i].version->funcs
->dump_ip_state((void *)tmp_adev);
dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
}
reset_context->reset_device_list = device_list_handle;
@ -5688,6 +5693,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
int i, r = 0;
bool need_emergency_restart = false;
bool audio_suspended = false;
int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
/*
* Special case: RAS triggered and full reset isn't supported
@ -5722,7 +5728,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* to put adev in the 1st position.
*/
INIT_LIST_HEAD(&device_list);
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) {
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
list_add_tail(&tmp_adev->reset_list, &device_list);
if (adev->shutdown)
@ -5769,8 +5775,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
if (!amdgpu_sriov_vf(tmp_adev))
amdgpu_amdkfd_pre_reset(tmp_adev);
amdgpu_amdkfd_pre_reset(tmp_adev, reset_context);
/*
* Mark these ASICs to be reseted as untracked first
@ -5823,34 +5828,34 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
r, adev_to_drm(tmp_adev)->unique);
tmp_adev->asic_reset_res = r;
}
if (!amdgpu_sriov_vf(tmp_adev))
/*
* Drop all pending non scheduler resets. Scheduler resets
* were already dropped during drm_sched_stop
*/
amdgpu_device_stop_pending_resets(tmp_adev);
}
/* Actual ASIC resets if needed.*/
/* Host driver will handle XGMI hive reset for SRIOV */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_device_reset_sriov(adev, job ? false : true);
r = amdgpu_device_reset_sriov(adev, reset_context);
if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) {
amdgpu_virt_release_full_gpu(adev, true);
goto retry;
}
if (r)
adev->asic_reset_res = r;
/* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
if (amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
/*
* Drop any pending non scheduler resets queued before reset is done.
* Any reset scheduled after this point would be valid. Scheduler resets
* were already dropped during drm_sched_stop and no new ones can come
* in before drm_sched_start.
*/
amdgpu_device_stop_pending_resets(tmp_adev);
}
skip_hw_reset:
/* Post ASIC reset for all devs .*/
@ -6170,7 +6175,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
if (amdgpu_passthrough(adev) &&
if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
adev->nbio.funcs->clear_doorbell_interrupt)
adev->nbio.funcs->clear_doorbell_interrupt(adev);
@ -6281,8 +6286,9 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
amdgpu_put_xgmi_hive(hive);
}
ras = amdgpu_ras_get_context(adev);
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery))
return PCI_ERS_RESULT_RECOVERED;
DRM_INFO("PCI error: slot reset callback!!\n");

View File

@ -55,6 +55,7 @@
#include "smuio_v9_0.h"
#include "gmc_v10_0.h"
#include "gmc_v11_0.h"
#include "gmc_v12_0.h"
#include "gfxhub_v2_0.h"
#include "mmhub_v2_0.h"
#include "nbio_v2_3.h"
@ -68,15 +69,18 @@
#include "hdp_v7_0.h"
#include "nv.h"
#include "soc21.h"
#include "soc24.h"
#include "navi10_ih.h"
#include "ih_v6_0.h"
#include "ih_v6_1.h"
#include "ih_v7_0.h"
#include "gfx_v10_0.h"
#include "gfx_v11_0.h"
#include "gfx_v12_0.h"
#include "sdma_v5_0.h"
#include "sdma_v5_2.h"
#include "sdma_v6_0.h"
#include "sdma_v7_0.h"
#include "lsdma_v6_0.h"
#include "lsdma_v7_0.h"
#include "vcn_v2_0.h"
@ -90,8 +94,8 @@
#include "vcn_v4_0_5.h"
#include "jpeg_v4_0_5.h"
#include "amdgpu_vkms.h"
#include "mes_v10_1.h"
#include "mes_v11_0.h"
#include "mes_v12_0.h"
#include "smuio_v11_0.h"
#include "smuio_v11_0_6.h"
#include "smuio_v13_0.h"
@ -362,6 +366,35 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev)
}
}
static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev,
struct binary_header *bhdr)
{
struct table_info *info;
uint16_t checksum;
uint16_t offset;
info = &bhdr->table_list[NPS_INFO];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
struct nps_info_header *nhdr =
(struct nps_info_header *)(adev->mman.discovery_bin + offset);
if (le32_to_cpu(nhdr->table_id) != NPS_INFO_TABLE_ID) {
dev_dbg(adev->dev, "invalid ip discovery nps info table id\n");
return -EINVAL;
}
if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
le32_to_cpu(nhdr->size_bytes),
checksum)) {
dev_dbg(adev->dev, "invalid nps info data table checksum\n");
return -EINVAL;
}
return 0;
}
static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
@ -1421,7 +1454,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
* harvest configuration.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) &&
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) {
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) &&
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) {
if ((adev->pdev->device == 0x731E &&
(adev->pdev->revision == 0xC6 ||
adev->pdev->revision == 0xC7)) ||
@ -1595,7 +1629,7 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
break;
case 2:
mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
adev->gmc.mall_size = (uint64_t)mall_size_per_umc * adev->gmc.num_umc;
break;
default:
dev_err(adev->dev,
@ -1661,6 +1695,69 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
return 0;
}
union nps_info {
struct nps_info_v1_0 v1;
};
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
struct amdgpu_gmc_memrange **ranges,
int *range_cnt)
{
struct amdgpu_gmc_memrange *mem_ranges;
struct binary_header *bhdr;
union nps_info *nps_info;
u16 offset;
int i;
if (!nps_type || !range_cnt || !ranges)
return -EINVAL;
if (!adev->mman.discovery_bin) {
dev_err(adev->dev,
"fetch mem range failed, ip discovery uninitialized\n");
return -EINVAL;
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
offset = le16_to_cpu(bhdr->table_list[NPS_INFO].offset);
if (!offset)
return -ENOENT;
/* If verification fails, return as if NPS table doesn't exist */
if (amdgpu_discovery_verify_npsinfo(adev, bhdr))
return -ENOENT;
nps_info = (union nps_info *)(adev->mman.discovery_bin + offset);
switch (le16_to_cpu(nps_info->v1.header.version_major)) {
case 1:
*nps_type = nps_info->v1.nps_type;
*range_cnt = nps_info->v1.count;
mem_ranges = kvzalloc(
*range_cnt * sizeof(struct amdgpu_gmc_memrange),
GFP_KERNEL);
for (i = 0; i < *range_cnt; i++) {
mem_ranges[i].base_address =
nps_info->v1.instance_info[i].base_address;
mem_ranges[i].limit_address =
nps_info->v1.instance_info[i].limit_address;
mem_ranges[i].nid_mask = -1;
mem_ranges[i].flags = 0;
}
*ranges = mem_ranges;
break;
default:
dev_err(adev->dev, "Unhandled NPS info table %d.%d\n",
le16_to_cpu(nps_info->v1.header.version_major),
le16_to_cpu(nps_info->v1.header.version_minor));
return -EINVAL;
}
return 0;
}
static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
{
/* what IP to use for this? */
@ -1674,6 +1771,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
break;
case IP_VERSION(10, 1, 10):
@ -1700,6 +1798,10 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &soc24_common_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add common ip block(GC_HWIP:0x%x)\n",
@ -1722,6 +1824,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
break;
case IP_VERSION(10, 1, 10):
@ -1748,6 +1851,10 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &gmc_v12_0_ip_block);
break;
default:
dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n",
amdgpu_ip_version(adev, GC_HWIP, 0));
@ -1770,6 +1877,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block);
break;
case IP_VERSION(5, 0, 0):
@ -1839,6 +1947,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
case IP_VERSION(13, 0, 14):
case IP_VERSION(14, 0, 0):
case IP_VERSION(14, 0, 1):
amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
@ -1897,6 +2006,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 8):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 11):
case IP_VERSION(13, 0, 14):
amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
break;
case IP_VERSION(14, 0, 0):
@ -1954,6 +2064,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 2, 1):
case IP_VERSION(3, 5, 0):
case IP_VERSION(3, 5, 1):
case IP_VERSION(4, 1, 0):
/* TODO: Fix IP version. DC code expects version 4.0.1 */
if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0))
adev->ip_versions[DCE_HWIP][0] = IP_VERSION(4, 0, 1);
if (amdgpu_sriov_vf(adev))
amdgpu_discovery_set_sriov_display(adev);
else
@ -2000,6 +2115,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block);
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block);
break;
case IP_VERSION(10, 1, 10):
@ -2026,6 +2142,12 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if (!amdgpu_exp_hw_support)
return -EINVAL;
amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block);
break;
default:
dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n",
amdgpu_ip_version(adev, GC_HWIP, 0));
@ -2048,6 +2170,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block);
break;
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block);
break;
case IP_VERSION(5, 0, 0):
@ -2074,6 +2197,10 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(6, 1, 1):
amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block);
break;
case IP_VERSION(7, 0, 0):
case IP_VERSION(7, 0, 1):
amdgpu_device_ip_block_add(adev, &sdma_v7_0_ip_block);
break;
default:
dev_err(adev->dev,
"Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n",
@ -2165,6 +2292,8 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(5, 0, 0):
amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block);
amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block);
if (amdgpu_jpeg_test)
adev->enable_jpeg_test = true;
break;
default:
dev_err(adev->dev,
@ -2179,25 +2308,6 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
case IP_VERSION(10, 1, 3):
case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 3, 0):
case IP_VERSION(10, 3, 1):
case IP_VERSION(10, 3, 2):
case IP_VERSION(10, 3, 3):
case IP_VERSION(10, 3, 4):
case IP_VERSION(10, 3, 5):
case IP_VERSION(10, 3, 6):
if (amdgpu_mes) {
amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
adev->enable_mes = true;
if (amdgpu_mes_kiq)
adev->enable_mes_kiq = true;
}
break;
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
case IP_VERSION(11, 0, 2):
@ -2209,6 +2319,14 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
adev->enable_mes = true;
adev->enable_mes_kiq = true;
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
amdgpu_device_ip_block_add(adev, &mes_v12_0_ip_block);
adev->enable_mes = true;
adev->enable_mes_kiq = true;
if (amdgpu_uni_mes)
adev->enable_uni_mes = true;
break;
default:
break;
}
@ -2219,6 +2337,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev)
{
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
aqua_vanjaram_init_soc_config(adev);
break;
default:
@ -2444,6 +2563,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
adev->family = AMDGPU_FAMILY_AI;
break;
case IP_VERSION(9, 1, 0):
@ -2488,6 +2608,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(11, 5, 1):
adev->family = AMDGPU_FAMILY_GC_11_5_0;
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
adev->family = AMDGPU_FAMILY_GC_12_0_0;
break;
default:
return -EINVAL;
}
@ -2515,7 +2639,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0))
adev->gmc.xgmi.supported = true;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0);
/* set NBIO version */
@ -2596,6 +2721,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(4, 2, 1):
case IP_VERSION(4, 4, 0):
case IP_VERSION(4, 4, 2):
case IP_VERSION(4, 4, 5):
adev->hdp.funcs = &hdp_v4_0_funcs;
break;
case IP_VERSION(5, 0, 0):

View File

@ -30,4 +30,9 @@
void amdgpu_discovery_fini(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
int amdgpu_discovery_get_nps_info(struct amdgpu_device *adev,
uint32_t *nps_type,
struct amdgpu_gmc_memrange **ranges,
int *range_cnt);
#endif /* __AMDGPU_DISCOVERY__ */

View File

@ -718,6 +718,37 @@ extract_render_dcc_offset(struct amdgpu_device *adev,
return 0;
}
static int convert_tiling_flags_to_modifier_gfx12(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
const struct drm_format_info *format_info;
u64 modifier = 0;
int tile = 0;
int swizzle = 0;
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
tile = AMD_FMT_MOD_TILE_VER_GFX12;
swizzle = AMDGPU_TILING_GET(afb->tiling_flags, GFX12_SWIZZLE_MODE);
}
modifier =
AMD_FMT_MOD |
AMD_FMT_MOD_SET(TILE, swizzle) |
AMD_FMT_MOD_SET(TILE_VERSION, tile) |
AMD_FMT_MOD_SET(DCC, 0) |
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, 0);
format_info = amdgpu_lookup_format_info(afb->base.format->format,
modifier);
if (!format_info)
return -EINVAL;
afb->base.modifier = modifier;
afb->base.flags |= DRM_MODE_FB_MODIFIERS;
return 0;
}
static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
{
struct amdgpu_device *adev = drm_to_adev(afb->base.dev);
@ -742,6 +773,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb)
int pipes = ilog2(num_pipes);
uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B);
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) {
convert_tiling_flags_to_modifier_gfx12(afb);
return 0;
}
switch (swizzle >> 2) {
case 0: /* 256B */
block_size_bits = 8;

View File

@ -165,8 +165,7 @@ static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
if (r)
return ERR_PTR(r);
} else if (!(amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type) &
AMDGPU_GEM_DOMAIN_GTT)) {
} else if (bo->tbo.resource->mem_type != TTM_PL_TT) {
return ERR_PTR(-EBUSY);
}

View File

@ -129,6 +129,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_LARGEBAR = BIT(1),
AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3),
AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4),
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@ -197,11 +198,13 @@ int amdgpu_discovery = -1;
int amdgpu_mes;
int amdgpu_mes_log_enable = 0;
int amdgpu_mes_kiq;
int amdgpu_uni_mes = 1;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
int amdgpu_tmz = -1; /* auto */
uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_jpeg_test;
int amdgpu_num_kcq = -1;
int amdgpu_smartshift_bias;
int amdgpu_use_xgmi_p2p = 1;
@ -214,6 +217,7 @@ uint amdgpu_debug_mask;
int amdgpu_agp = -1; /* auto */
int amdgpu_wbrf = -1;
int amdgpu_damage_clips = -1; /* auto */
int amdgpu_umsch_mm_fwlog;
static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
@ -686,6 +690,15 @@ MODULE_PARM_DESC(mes_kiq,
"Enable Micro Engine Scheduler KIQ (0 = disabled (default), 1 = enabled)");
module_param_named(mes_kiq, amdgpu_mes_kiq, int, 0444);
/**
* DOC: uni_mes (int)
* Enable Unified Micro Engine Scheduler. This is a new engine pipe for unified scheduler.
* (0 = disabled (default), 1 = enabled)
*/
MODULE_PARM_DESC(uni_mes,
"Enable Unified Micro Engine Scheduler (0 = disabled, 1 = enabled(default)");
module_param_named(uni_mes, amdgpu_uni_mes, int, 0444);
/**
* DOC: noretry (int)
* Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
@ -927,6 +940,9 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)");
module_param_named(reset_method, amdgpu_reset_method, int, 0644);
MODULE_PARM_DESC(jpeg_test, "jpeg test(0 = disable (default), 1 = enable)");
module_param_named(jpeg_test, amdgpu_jpeg_test, int, 0444);
/**
* DOC: bad_page_threshold (int) Bad page threshold is specifies the
* threshold value of faulty pages detected by RAS ECC, which may
@ -965,6 +981,13 @@ MODULE_PARM_DESC(umsch_mm,
"Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)");
module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444);
/**
* DOC: umsch_mm_fwlog (int)
* Enable umschfw log output for debugging, the default is disabled.
*/
MODULE_PARM_DESC(umsch_mm_fwlog, "Enable umschfw log(0 = disable (default value), 1 = enable)");
module_param_named(umsch_mm_fwlog, amdgpu_umsch_mm_fwlog, int, 0444);
/**
* DOC: smu_pptable_id (int)
* Used to override pptable id. id = 0 use VBIOS pptable.
@ -2174,6 +2197,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: place fw in vram for frontdoor loading\n");
adev->debug_use_vram_fw_buf = true;
}
if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_RAS_ACA) {
pr_info("debug: enable RAS ACA\n");
adev->debug_enable_ras_aca = true;
}
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)

View File

@ -90,7 +90,7 @@
#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
u8 *eeprom_buf, u16 buf_size, bool read)
u8 *eeprom_buf, u32 buf_size, bool read)
{
u8 eeprom_offset_buf[EEPROM_OFFSET_SIZE];
struct i2c_msg msgs[] = {
@ -133,15 +133,15 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* cycle begins. This is implied for the
* "i2c_transfer()" abstraction.
*/
len = min(EEPROM_PAGE_SIZE - (eeprom_addr &
EEPROM_PAGE_MASK),
(u32)buf_size);
len = min(EEPROM_PAGE_SIZE - (eeprom_addr & EEPROM_PAGE_MASK),
buf_size);
} else {
/* Reading from the EEPROM has no limitation
* on the number of bytes read from the EEPROM
* device--they are simply sequenced out.
* Keep in mind that i2c_msg.len is u16 type.
*/
len = buf_size;
len = min(U16_MAX, buf_size);
}
msgs[1].len = len;
msgs[1].buf = eeprom_buf;
@ -179,7 +179,7 @@ static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
* Returns the number of bytes read/written; -errno on error.
*/
static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
u8 *eeprom_buf, u16 buf_size, bool read)
u8 *eeprom_buf, u32 buf_size, bool read)
{
const struct i2c_adapter_quirks *quirks = i2c_adap->quirks;
u16 limit;
@ -225,7 +225,7 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
u16 bytes)
u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
true);
@ -233,7 +233,7 @@ int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
u16 bytes)
u32 bytes)
{
return amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, eeprom_buf, bytes,
false);

View File

@ -28,10 +28,10 @@
int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
u16 bytes);
u32 bytes);
int amdgpu_eeprom_write(struct i2c_adapter *i2c_adap,
u32 eeprom_addr, u8 *eeprom_buf,
u16 bytes);
u32 bytes);
#endif

View File

@ -100,6 +100,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
*fru_addr = FRU_EEPROM_MADDR_6;
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
if (fru_addr)
*fru_addr = FRU_EEPROM_MADDR_8;
return true;

View File

@ -174,7 +174,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj,
return -EPERM;
if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID &&
abo->tbo.base.resv != vm->root.bo->tbo.base.resv)
!amdgpu_vm_is_bo_always_valid(vm, abo))
return -EPERM;
r = amdgpu_bo_reserve(abo, false);
@ -683,7 +683,7 @@ uint64_t amdgpu_gem_va_map_flags(struct amdgpu_device *adev, uint32_t flags)
if (flags & AMDGPU_VM_PAGE_WRITEABLE)
pte_flag |= AMDGPU_PTE_WRITEABLE;
if (flags & AMDGPU_VM_PAGE_PRT)
pte_flag |= AMDGPU_PTE_PRT;
pte_flag |= AMDGPU_PTE_PRT_FLAG(adev);
if (flags & AMDGPU_VM_PAGE_NOALLOC)
pte_flag |= AMDGPU_PTE_NOALLOC;

View File

@ -329,8 +329,9 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id)
ring->eop_gpu_addr = kiq->eop_gpu_addr;
ring->no_scheduler = true;
snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d",
xcc_id, ring->me, ring->pipe, ring->queue);
snprintf(ring->name, sizeof(ring->name), "kiq_%hhu.%hhu.%hhu.%hhu",
(unsigned char)xcc_id, (unsigned char)ring->me,
(unsigned char)ring->pipe, (unsigned char)ring->queue);
r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
@ -539,7 +540,8 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id)
}
ras = amdgpu_ras_get_context(adev);
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) &&
if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) &&
ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) {
spin_unlock(&kiq->ring_lock);
return 0;
@ -598,6 +600,44 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
return set_resource_bit;
}
static int amdgpu_gfx_mes_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
struct amdgpu_ring *kiq_ring = &kiq->ring;
uint64_t queue_mask = ~0ULL;
int r, i, j;
amdgpu_device_flush_hdp(adev, NULL);
if (!adev->enable_uni_mes) {
spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->set_resources_size);
if (r) {
dev_err(adev->dev, "Failed to lock KIQ (%d).\n", r);
spin_unlock(&kiq->ring_lock);
return r;
}
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
r = amdgpu_ring_test_helper(kiq_ring);
spin_unlock(&kiq->ring_lock);
if (r)
dev_err(adev->dev, "KIQ failed to set resources\n");
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
r = amdgpu_mes_map_legacy_queue(adev,
&adev->gfx.compute_ring[j]);
if (r) {
dev_err(adev->dev, "failed to map compute queue\n");
return r;
}
}
return 0;
}
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id];
@ -605,6 +645,9 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
uint64_t queue_mask = 0;
int r, i, j;
if (adev->enable_mes)
return amdgpu_gfx_mes_enable_kcq(adev, xcc_id);
if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
return -EINVAL;
@ -623,10 +666,11 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
}
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue);
amdgpu_device_flush_hdp(adev, NULL);
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue);
spin_lock(&kiq->ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
@ -637,9 +681,6 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id)
return r;
}
if (adev->enable_mes)
queue_mask = ~0ULL;
kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
j = i + xcc_id * adev->gfx.num_compute_rings;
@ -666,6 +707,20 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id)
amdgpu_device_flush_hdp(adev, NULL);
if (adev->enable_mes) {
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
j = i + xcc_id * adev->gfx.num_gfx_rings;
r = amdgpu_mes_map_legacy_queue(adev,
&adev->gfx.gfx_ring[j]);
if (r) {
DRM_ERROR("failed to map gfx queue\n");
return r;
}
}
return 0;
}
spin_lock(&kiq->ring_lock);
/* No need to map kcq on the slave */
if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) {

View File

@ -435,8 +435,9 @@ struct amdgpu_gfx {
bool mcbp; /* mid command buffer preemption */
/* IP reg dump */
uint32_t *ip_dump;
uint32_t reg_count;
uint32_t *ip_dump_core;
uint32_t *ip_dump_compute_queues;
uint32_t *ip_dump_gfx_queues;
};
struct amdgpu_gfx_ras_reg_entry {
@ -555,8 +556,6 @@ static inline const char *amdgpu_gfx_compute_mode_desc(int mode)
default:
return "UNKNOWN";
}
return "UNKNOWN";
}
#endif

View File

@ -876,11 +876,11 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)
struct amdgpu_gmc *gmc = &adev->gmc;
uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);
bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||
gc_ver == IP_VERSION(9, 3, 0) ||
gc_ver == IP_VERSION(9, 4, 0) ||
gc_ver == IP_VERSION(9, 4, 1) ||
gc_ver == IP_VERSION(9, 4, 2) ||
gc_ver == IP_VERSION(9, 4, 3) ||
gc_ver == IP_VERSION(9, 4, 4) ||
gc_ver >= IP_VERSION(10, 3, 0));
if (!amdgpu_sriov_xnack_support(adev))
@ -1015,7 +1015,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
flags |= AMDGPU_PTE_WRITEABLE;
flags |= AMDGPU_PTE_SNOOPED;
flags |= AMDGPU_PTE_FRAG((adev->gmc.vmid0_page_table_block_size + 9*1));
flags |= AMDGPU_PDE_PTE;
flags |= AMDGPU_PDE_PTE_FLAG(adev);
/* The first n PDE0 entries are used as PTE,
* pointing to vram
@ -1028,7 +1028,7 @@ void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev)
* pointing to a 4K system page
*/
flags = AMDGPU_PTE_VALID;
flags |= AMDGPU_PDE_BFS(0) | AMDGPU_PTE_SNOOPED;
flags |= AMDGPU_PTE_SNOOPED | AMDGPU_PDE_BFS_FLAG(adev, 0);
/* Requires gart_ptb_gpu_pa to be 4K aligned */
amdgpu_gmc_set_pte_pde(adev, adev->gmc.ptr_pdb0, i, gart_ptb_gpu_pa, flags);
drm_dev_exit(idx);
@ -1147,8 +1147,6 @@ static ssize_t current_memory_partition_show(
default:
return sysfs_emit(buf, "UNKNOWN\n");
}
return sysfs_emit(buf, "UNKNOWN\n");
}
static DEVICE_ATTR_RO(current_memory_partition);
@ -1166,3 +1164,79 @@ void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev)
{
device_remove_file(adev->dev, &dev_attr_current_memory_partition);
}
int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges,
int exp_ranges)
{
struct amdgpu_gmc_memrange *ranges;
int range_cnt, ret, i, j;
uint32_t nps_type;
if (!mem_ranges)
return -EINVAL;
ret = amdgpu_discovery_get_nps_info(adev, &nps_type, &ranges,
&range_cnt);
if (ret)
return ret;
/* TODO: For now, expect ranges and partition count to be the same.
* Adjust if there are holes expected in any NPS domain.
*/
if (range_cnt != exp_ranges) {
dev_warn(
adev->dev,
"NPS config mismatch - expected ranges: %d discovery - nps mode: %d, nps ranges: %d",
exp_ranges, nps_type, range_cnt);
ret = -EINVAL;
goto err;
}
for (i = 0; i < exp_ranges; ++i) {
if (ranges[i].base_address >= ranges[i].limit_address) {
dev_warn(
adev->dev,
"Invalid NPS range - nps mode: %d, range[%d]: base: %llx limit: %llx",
nps_type, i, ranges[i].base_address,
ranges[i].limit_address);
ret = -EINVAL;
goto err;
}
/* Check for overlaps, not expecting any now */
for (j = i - 1; j >= 0; j--) {
if (max(ranges[j].base_address,
ranges[i].base_address) <=
min(ranges[j].limit_address,
ranges[i].limit_address)) {
dev_warn(
adev->dev,
"overlapping ranges detected [ %llx - %llx ] | [%llx - %llx]",
ranges[j].base_address,
ranges[j].limit_address,
ranges[i].base_address,
ranges[i].limit_address);
ret = -EINVAL;
goto err;
}
}
mem_ranges[i].range.fpfn =
(ranges[i].base_address -
adev->vm_manager.vram_base_offset) >>
AMDGPU_GPU_PAGE_SHIFT;
mem_ranges[i].range.lpfn =
(ranges[i].limit_address -
adev->vm_manager.vram_base_offset) >>
AMDGPU_GPU_PAGE_SHIFT;
mem_ranges[i].size =
ranges[i].limit_address - ranges[i].base_address + 1;
}
err:
kfree(ranges);
return ret;
}

View File

@ -199,6 +199,13 @@ struct amdgpu_mem_partition_info {
#define INVALID_PFN -1
struct amdgpu_gmc_memrange {
uint64_t base_address;
uint64_t limit_address;
uint32_t flags;
int nid_mask;
};
enum amdgpu_gart_placement {
AMDGPU_GART_PLACEMENT_BEST_FIT = 0,
AMDGPU_GART_PLACEMENT_HIGH,
@ -439,4 +446,8 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev);
int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev);
void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev);
int amdgpu_gmc_get_nps_memranges(struct amdgpu_device *adev,
struct amdgpu_mem_partition_info *mem_ranges,
int exp_ranges);
#endif

View File

@ -202,20 +202,12 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
hmm_range->start, hmm_range->end);
/* Assuming 64MB takes maximum 1 second to fault page address */
timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL);
timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
timeout = jiffies + msecs_to_jiffies(timeout);
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
retry:
hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
r = hmm_range_fault(hmm_range);
if (unlikely(r)) {
schedule();
/*
* FIXME: This timeout should encompass the retry from
* mmu_interval_read_retry() as well.
*/
if (r == -EBUSY && !time_after(jiffies, timeout))
goto retry;
goto out_free_pfns;
@ -247,6 +239,8 @@ out_free_pfns:
out_free_range:
kfree(hmm_range);
if (r == -EBUSY)
r = -EAGAIN;
return r;
}

View File

@ -618,30 +618,37 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
return -EINVAL;
if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 &&
fpriv->xcp_id >= 0 && fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
fpriv->xcp_id < adev->xcp_mgr->num_xcps) {
xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id];
switch (type) {
case AMD_IP_BLOCK_TYPE_GFX:
ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask);
if (ret)
return ret;
count = hweight32(inst_mask);
break;
case AMD_IP_BLOCK_TYPE_SDMA:
ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask);
if (ret)
return ret;
count = hweight32(inst_mask);
break;
case AMD_IP_BLOCK_TYPE_JPEG:
ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
if (ret)
return ret;
count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings;
break;
case AMD_IP_BLOCK_TYPE_VCN:
ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask);
if (ret)
return ret;
count = hweight32(inst_mask);
break;
default:
return -EINVAL;
}
if (ret)
return ret;
return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
}

View File

@ -153,7 +153,7 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev)
return 0;
}
void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
static void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
{
if (!mca_set)
return;
@ -162,7 +162,7 @@ void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set)
INIT_LIST_HEAD(&mca_set->list);
}
int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
static int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry)
{
struct mca_bank_node *node;
@ -183,7 +183,30 @@ int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_
return 0;
}
void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
static int amdgpu_mca_bank_set_merge(struct mca_bank_set *mca_set, struct mca_bank_set *new)
{
struct mca_bank_node *node;
list_for_each_entry(node, &new->list, node)
amdgpu_mca_bank_set_add_entry(mca_set, &node->entry);
return 0;
}
static int amdgpu_mca_bank_set_remove_node(struct mca_bank_set *mca_set, struct mca_bank_node *node)
{
if (!node)
return -EINVAL;
list_del(&node->node);
kvfree(node);
mca_set->nr_entries--;
return 0;
}
static void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set)
{
struct mca_bank_node *node, *tmp;
@ -200,6 +223,44 @@ void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_m
mca->mca_funcs = mca_funcs;
}
int amdgpu_mca_init(struct amdgpu_device *adev)
{
struct amdgpu_mca *mca = &adev->mca;
struct mca_bank_cache *mca_cache;
int i;
atomic_set(&mca->ue_update_flag, 0);
for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
mca_cache = &mca->mca_caches[i];
spin_lock_init(&mca_cache->lock);
amdgpu_mca_bank_set_init(&mca_cache->mca_set);
}
return 0;
}
void amdgpu_mca_fini(struct amdgpu_device *adev)
{
struct amdgpu_mca *mca = &adev->mca;
struct mca_bank_cache *mca_cache;
int i;
atomic_set(&mca->ue_update_flag, 0);
for (i = 0; i < ARRAY_SIZE(mca->mca_caches); i++) {
mca_cache = &mca->mca_caches[i];
amdgpu_mca_bank_set_release(&mca_cache->mca_set);
}
}
int amdgpu_mca_reset(struct amdgpu_device *adev)
{
amdgpu_mca_fini(adev);
return amdgpu_mca_init(adev);
}
int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
@ -228,66 +289,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st
idx, entry->regs[MCA_REG_IDX_SYND]);
}
int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct ras_err_data *err_data, struct ras_query_context *qctx)
{
struct amdgpu_smuio_mcm_config_info mcm_info;
struct ras_err_addr err_addr = {0};
struct mca_bank_set mca_set;
struct mca_bank_node *node;
struct mca_bank_entry *entry;
uint32_t count;
int ret, i = 0;
amdgpu_mca_bank_set_init(&mca_set);
ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set);
if (ret)
goto out_mca_release;
list_for_each_entry(node, &mca_set.list, node) {
entry = &node->entry;
amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx);
count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
if (ret)
goto out_mca_release;
if (!count)
continue;
mcm_info.socket_id = entry->info.socket_id;
mcm_info.die_id = entry->info.aid;
if (blk == AMDGPU_RAS_BLOCK__UMC) {
err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
}
if (type == AMDGPU_MCA_ERROR_TYPE_UE)
amdgpu_ras_error_statistic_ue_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else {
if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
amdgpu_ras_error_statistic_de_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else
amdgpu_ras_error_statistic_ce_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
}
}
out_mca_release:
amdgpu_mca_bank_set_release(&mca_set);
return ret;
}
int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
static int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
@ -300,81 +302,8 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m
return -EOPNOTSUPP;
}
int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, uint32_t *total)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
struct mca_bank_set mca_set;
struct mca_bank_node *node;
struct mca_bank_entry *entry;
uint32_t count;
int ret;
if (!total)
return -EINVAL;
if (!mca_funcs)
return -EOPNOTSUPP;
if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count)
return -EOPNOTSUPP;
amdgpu_mca_bank_set_init(&mca_set);
ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set);
if (ret)
goto err_mca_set_release;
*total = 0;
list_for_each_entry(node, &mca_set.list, node) {
entry = &node->entry;
count = 0;
ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count);
if (ret)
goto err_mca_set_release;
*total += count;
}
err_mca_set_release:
amdgpu_mca_bank_set_release(&mca_set);
return ret;
}
int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!count || !entry)
return -EINVAL;
if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
return -EOPNOTSUPP;
return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
}
int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!mca_set)
return -EINVAL;
if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set)
return -EOPNOTSUPP;
WARN_ON(!list_empty(&mca_set->list));
return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set);
}
int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
int idx, struct mca_bank_entry *entry)
static int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
int idx, struct mca_bank_entry *entry)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
int count;
@ -399,6 +328,176 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
}
static bool amdgpu_mca_bank_should_update(struct amdgpu_device *adev, enum amdgpu_mca_error_type type)
{
struct amdgpu_mca *mca = &adev->mca;
bool ret = true;
/*
* Because the UE Valid MCA count will only be cleared after reset,
* in order to avoid repeated counting of the error count,
* the aca bank is only updated once during the gpu recovery stage.
*/
if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
if (amdgpu_ras_intr_triggered())
ret = atomic_cmpxchg(&mca->ue_update_flag, 0, 1) == 0;
else
atomic_set(&mca->ue_update_flag, 0);
}
return ret;
}
static int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set,
struct ras_query_context *qctx)
{
struct mca_bank_entry entry;
uint32_t count = 0, i;
int ret;
if (!mca_set)
return -EINVAL;
if (!amdgpu_mca_bank_should_update(adev, type))
return 0;
ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
if (ret)
return ret;
for (i = 0; i < count; i++) {
memset(&entry, 0, sizeof(entry));
ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, &entry);
if (ret)
return ret;
amdgpu_mca_bank_set_add_entry(mca_set, &entry);
amdgpu_mca_smu_mca_bank_dump(adev, i, &entry, qctx);
}
return 0;
}
static int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count)
{
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
if (!count || !entry)
return -EINVAL;
if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count)
return -EOPNOTSUPP;
return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count);
}
static int amdgpu_mca_dispatch_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct mca_bank_set *mca_set, struct ras_err_data *err_data)
{
struct ras_err_addr err_addr;
struct amdgpu_smuio_mcm_config_info mcm_info;
struct mca_bank_node *node, *tmp;
struct mca_bank_entry *entry;
uint32_t count;
int ret;
if (!mca_set)
return -EINVAL;
if (!mca_set->nr_entries)
return 0;
list_for_each_entry_safe(node, tmp, &mca_set->list, node) {
entry = &node->entry;
count = 0;
ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count);
if (ret && ret != -EOPNOTSUPP)
return ret;
if (!count)
continue;
memset(&mcm_info, 0, sizeof(mcm_info));
memset(&err_addr, 0, sizeof(err_addr));
mcm_info.socket_id = entry->info.socket_id;
mcm_info.die_id = entry->info.aid;
if (blk == AMDGPU_RAS_BLOCK__UMC) {
err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
}
if (type == AMDGPU_MCA_ERROR_TYPE_UE) {
amdgpu_ras_error_statistic_ue_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
} else {
if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS]))
amdgpu_ras_error_statistic_de_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else
amdgpu_ras_error_statistic_ce_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
}
amdgpu_mca_bank_set_remove_node(mca_set, node);
}
return 0;
}
static int amdgpu_mca_add_mca_set_to_cache(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, struct mca_bank_set *new)
{
struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
int ret;
spin_lock(&mca_cache->lock);
ret = amdgpu_mca_bank_set_merge(&mca_cache->mca_set, new);
spin_unlock(&mca_cache->lock);
return ret;
}
int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct ras_err_data *err_data, struct ras_query_context *qctx)
{
struct mca_bank_set mca_set;
struct mca_bank_cache *mca_cache = &adev->mca.mca_caches[type];
int ret;
amdgpu_mca_bank_set_init(&mca_set);
ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, qctx);
if (ret)
goto out_mca_release;
ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_set, err_data);
if (ret)
goto out_mca_release;
/* add remain mca bank to mca cache */
if (mca_set.nr_entries) {
ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
if (ret)
goto out_mca_release;
}
/* dispatch mca set again if mca cache has valid data */
spin_lock(&mca_cache->lock);
if (mca_cache->mca_set.nr_entries)
ret = amdgpu_mca_dispatch_mca_set(adev, blk, type, &mca_cache->mca_set, err_data);
spin_unlock(&mca_cache->lock);
out_mca_release:
amdgpu_mca_bank_set_release(&mca_set);
return ret;
}
#if defined(CONFIG_DEBUG_FS)
static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val)
{
@ -437,36 +536,32 @@ static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry)
static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
struct mca_bank_entry *entry;
uint32_t count = 0;
int i, ret;
struct mca_bank_node *node;
struct mca_bank_set mca_set;
struct ras_query_context qctx;
int ret;
ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count);
amdgpu_mca_bank_set_init(&mca_set);
qctx.event_id = 0ULL;
ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx);
if (ret)
return ret;
goto err_free_mca_set;
seq_printf(m, "amdgpu smu %s valid mca count: %d\n",
type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", count);
type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", mca_set.nr_entries);
if (!count)
return 0;
if (!mca_set.nr_entries)
goto err_free_mca_set;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
list_for_each_entry(node, &mca_set.list, node)
mca_dump_entry(m, &node->entry);
for (i = 0; i < count; i++) {
memset(entry, 0, sizeof(*entry));
/* add mca bank to mca bank cache */
ret = amdgpu_mca_add_mca_set_to_cache(adev, type, &mca_set);
ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, entry);
if (ret)
goto err_free_entry;
mca_dump_entry(m, entry);
}
err_free_entry:
kfree(entry);
err_free_mca_set:
amdgpu_mca_bank_set_release(&mca_set);
return ret;
}
@ -513,7 +608,9 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se
void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root)
{
#if defined(CONFIG_DEBUG_FS)
if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6))
if (!root ||
(amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6) &&
amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 14)))
return;
debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops);

View File

@ -77,11 +77,23 @@ struct amdgpu_mca_ras {
struct amdgpu_mca_ras_block *ras;
};
struct mca_bank_set {
int nr_entries;
struct list_head list;
};
struct mca_bank_cache {
struct mca_bank_set mca_set;
spinlock_t lock;
};
struct amdgpu_mca {
struct amdgpu_mca_ras mp0;
struct amdgpu_mca_ras mp1;
struct amdgpu_mca_ras mpio;
const struct amdgpu_mca_smu_funcs *mca_funcs;
struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE];
atomic_t ue_update_flag;
};
enum mca_reg_idx {
@ -113,17 +125,10 @@ struct mca_bank_node {
struct list_head node;
};
struct mca_bank_set {
int nr_entries;
struct list_head list;
};
struct amdgpu_mca_smu_funcs {
int max_ue_count;
int max_ce_count;
int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable);
int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct mca_bank_set *mca_set);
int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct mca_bank_entry *entry, uint32_t *count);
int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
@ -151,24 +156,13 @@ int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev);
void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs);
int amdgpu_mca_init(struct amdgpu_device *adev);
void amdgpu_mca_fini(struct amdgpu_device *adev);
int amdgpu_mca_reset(struct amdgpu_device *adev);
int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable);
int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count);
int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, uint32_t *total);
int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, uint32_t *count);
int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count);
int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk,
enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set);
int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type,
int idx, struct mca_bank_entry *entry);
void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root);
void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set);
int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry);
void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set);
int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type,
struct ras_err_data *err_data, struct ras_query_context *qctx);

View File

@ -156,7 +156,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
/* use only 1st MEC pipes */
if (i >= 4)
if (i >= adev->gfx.mec.num_pipe_per_mec)
continue;
adev->mes.compute_hqd_mask[i] = 0xc;
}
@ -1511,7 +1511,11 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe)
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix,
sizeof(ucode_prefix));
if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) {
if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) {
snprintf(fw_name, sizeof(fw_name),
"amdgpu/%s_uni_mes.bin", ucode_prefix);
} else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) &&
amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin",
ucode_prefix,
pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1");

View File

@ -102,6 +102,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_memory_partition_mode)(struct amdgpu_device *adev,
u32 *supp_modes);
u64 (*get_pcie_replay_count)(struct amdgpu_device *adev);
void (*set_reg_remap)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {

View File

@ -983,12 +983,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
ttm_bo_pin(&bo->tbo);
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
if (bo->tbo.resource->mem_type == TTM_PL_VRAM) {
atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
&adev->visible_pin_size);
} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
} else if (bo->tbo.resource->mem_type == TTM_PL_TT) {
atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
}
@ -1293,7 +1292,6 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
unsigned int domain;
bool shared;
/* Abort if the BO doesn't currently have a backing store */
@ -1303,21 +1301,20 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
domain = amdgpu_mem_type_to_domain(res->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
switch (res->mem_type) {
case TTM_PL_VRAM:
stats->vram += size;
if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
if (amdgpu_res_cpu_visible(adev, res))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
break;
case AMDGPU_GEM_DOMAIN_GTT:
case TTM_PL_TT:
stats->gtt += size;
if (shared)
stats->gtt_shared += size;
break;
case AMDGPU_GEM_DOMAIN_CPU:
case TTM_PL_SYSTEM:
default:
stats->cpu += size;
if (shared)
@ -1330,7 +1327,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
stats->requested_visible_vram += size;
if (domain != AMDGPU_GEM_DOMAIN_VRAM) {
if (res->mem_type != TTM_PL_VRAM) {
stats->evicted_vram += size;
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
stats->evicted_visible_vram += size;
@ -1604,20 +1601,33 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
u64 size;
if (dma_resv_trylock(bo->tbo.base.resv)) {
unsigned int domain;
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
switch (bo->tbo.resource->mem_type) {
case TTM_PL_VRAM:
if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";
break;
case AMDGPU_GEM_DOMAIN_GTT:
case TTM_PL_TT:
placement = "GTT";
break;
case AMDGPU_GEM_DOMAIN_CPU:
case AMDGPU_PL_GDS:
placement = "GDS";
break;
case AMDGPU_PL_GWS:
placement = "GWS";
break;
case AMDGPU_PL_OA:
placement = "OA";
break;
case AMDGPU_PL_PREEMPT:
placement = "PREEMPTIBLE";
break;
case AMDGPU_PL_DOORBELL:
placement = "DOORBELL";
break;
case TTM_PL_SYSTEM:
default:
placement = "CPU";
break;

View File

@ -145,6 +145,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
adev->virt.autoload_ucode_id = 0;
break;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
ret = psp_init_cap_microcode(psp, ucode_prefix);
ret &= psp_init_ta_microcode(psp, ucode_prefix);
break;
@ -207,6 +208,7 @@ static int psp_early_init(void *handle)
psp->boot_time_tmr = false;
fallthrough;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
psp_v13_0_set_psp_funcs(psp);
psp->autoload_supported = false;
break;
@ -355,7 +357,8 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev,
bool ret = false;
int i;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6))
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))
return false;
db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET;
@ -847,6 +850,7 @@ static bool psp_skip_tmr(struct psp_context *psp)
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@ -1358,6 +1362,9 @@ static void psp_xgmi_reflect_topology_info(struct psp_context *psp,
uint8_t dst_num_links = node_info.num_links;
hive = amdgpu_get_xgmi_hive(psp->adev);
if (WARN_ON(!hive))
return;
list_for_each_entry(mirror_adev, &hive->device_list, gmc.xgmi.head) {
struct psp_xgmi_topology_info *mirror_top_info;
int j;
@ -1450,7 +1457,9 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
(psp->xgmi_context.supports_extended_data &&
get_extended_data) ||
amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
IP_VERSION(13, 0, 6);
IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
IP_VERSION(13, 0, 14);
bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
@ -2464,6 +2473,7 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
*type = GFX_FW_TYPE_DMUB;
break;
case AMDGPU_UCODE_ID_SDMA_UCODE_TH0:
case AMDGPU_UCODE_ID_SDMA_RS64:
*type = GFX_FW_TYPE_SDMA_UCODE_TH0;
break;
case AMDGPU_UCODE_ID_SDMA_UCODE_TH1:
@ -2635,7 +2645,8 @@ static int psp_load_p2s_table(struct psp_context *psp)
(adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)))
return 0;
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) {
if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) ||
amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) {
uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D :
0x0036003C;
if (psp->sos.fw_version < supp_vers)

View File

@ -334,7 +334,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
set_ta_context_funcs(psp, ta_type, &context);
if (!context->initialized) {
if (!context || !context->initialized) {
dev_err(adev->dev, "TA is not initialized\n");
ret = -EINVAL;
goto err_free_shared_buf;

View File

@ -1299,7 +1299,7 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a
return -EINVAL;
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count,
"ce", info.ce_count, "de", info.ue_count);
"ce", info.ce_count, "de", info.de_count);
}
static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
@ -1759,6 +1759,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
if (amdgpu_aca_is_enabled(adev))
return 0;
if (!obj || obj->attr_inuse)
return -EINVAL;
@ -1793,6 +1796,9 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev,
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
if (amdgpu_aca_is_enabled(adev))
return 0;
if (!obj || !obj->attr_inuse)
return -EINVAL;
@ -2172,12 +2178,15 @@ static void amdgpu_ras_interrupt_process_handler(struct work_struct *work)
int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev,
struct ras_dispatch_if *info)
{
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_ih_data *data = &obj->ih_data;
struct ras_manager *obj;
struct ras_ih_data *data;
obj = amdgpu_ras_find_obj(adev, &info->head);
if (!obj)
return -EINVAL;
data = &obj->ih_data;
if (data->inuse == 0)
return 0;
@ -2804,8 +2813,8 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work)
mutex_unlock(&con->umc_ecc_log.lock);
}
static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
enum amdgpu_ras_block ras_block, uint32_t timeout_ms)
static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
uint32_t timeout_ms)
{
int ret = 0;
struct ras_ecc_log_info *ecc_log;
@ -2814,7 +2823,7 @@ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
memset(&info, 0, sizeof(info));
info.head.block = ras_block;
info.head.block = AMDGPU_RAS_BLOCK__UMC;
ecc_log = &ras->umc_ecc_log;
ecc_log->de_updated = false;
@ -2822,7 +2831,7 @@ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
ret = amdgpu_ras_query_error_status(adev, &info);
if (ret) {
dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret);
return ret;
return;
}
if (timeout && !ecc_log->de_updated) {
@ -2833,21 +2842,11 @@ static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev,
if (timeout_ms && !timeout) {
dev_warn(adev->dev, "Can't find deferred error\n");
return -ETIMEDOUT;
return;
}
return 0;
}
static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev,
uint32_t timeout)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
int ret;
ret = amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout);
if (!ret)
schedule_delayed_work(&con->page_retirement_dwork, 0);
schedule_delayed_work(&ras->page_retirement_dwork, 0);
}
static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev,
@ -2896,7 +2895,7 @@ static int amdgpu_ras_page_retirement_thread(void *param)
ras_block = poison_msg.block;
dev_info(adev->dev, "Start processing ras block %s(%d)\n",
dev_dbg(adev->dev, "Start processing ras block %s(%d)\n",
ras_block_str(ras_block), ras_block);
if (ras_block == AMDGPU_RAS_BLOCK__UMC) {
@ -2927,7 +2926,6 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_err_handler_data **data;
u32 max_eeprom_records_count = 0;
bool exc_err_limit = false;
int ret;
if (!con || amdgpu_sriov_vf(adev))
@ -2964,12 +2962,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
*/
if (adev->gmc.xgmi.pending_reset)
return 0;
ret = amdgpu_ras_eeprom_init(&con->eeprom_control, &exc_err_limit);
ret = amdgpu_ras_eeprom_init(&con->eeprom_control);
/*
* This calling fails when exc_err_limit is true or
* This calling fails when is_rma is true or
* ret != 0.
*/
if (exc_err_limit || ret)
if (con->is_rma || ret)
goto free;
if (con->eeprom_control.ras_num_recs) {
@ -3017,7 +3015,7 @@ out:
* Except error threshold exceeding case, other failure cases in this
* function would not fail amdgpu driver init.
*/
if (!exc_err_limit)
if (!con->is_rma)
ret = 0;
else
ret = -EINVAL;
@ -3063,6 +3061,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@ -3074,6 +3073,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 14):
return true;
default:
return false;
@ -3297,6 +3297,24 @@ static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
amdgpu_put_xgmi_hive(hive);
}
static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
if (!con || (adev->flags & AMD_IS_APU))
return;
switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE;
break;
default:
break;
}
}
int amdgpu_ras_init(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@ -3402,6 +3420,8 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
/* Get RAS schema for particular SOC */
con->schema = amdgpu_get_ras_schema(adev);
amdgpu_ras_init_reserved_vram_size(adev);
if (amdgpu_ras_fs_init(adev)) {
r = -EINVAL;
goto release_con;
@ -3613,25 +3633,33 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
struct amdgpu_ras_block_object *obj;
int r;
/* Guest side doesn't need init ras feature */
if (amdgpu_sriov_vf(adev))
return 0;
amdgpu_ras_event_mgr_init(adev);
if (amdgpu_aca_is_enabled(adev)) {
if (amdgpu_in_reset(adev))
r = amdgpu_aca_reset(adev);
else
if (!amdgpu_in_reset(adev)) {
r = amdgpu_aca_init(adev);
if (r)
return r;
}
if (!amdgpu_sriov_vf(adev))
amdgpu_ras_set_aca_debug_mode(adev, false);
} else {
if (amdgpu_in_reset(adev))
r = amdgpu_mca_reset(adev);
else
r = amdgpu_mca_init(adev);
if (r)
return r;
amdgpu_ras_set_aca_debug_mode(adev, false);
} else {
amdgpu_ras_set_mca_debug_mode(adev, false);
if (!amdgpu_sriov_vf(adev))
amdgpu_ras_set_mca_debug_mode(adev, false);
}
/* Guest side doesn't need init ras feature */
if (amdgpu_sriov_vf(adev))
return 0;
list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
obj = node->ras_obj;
if (!obj) {
@ -3701,6 +3729,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
if (amdgpu_aca_is_enabled(adev))
amdgpu_aca_fini(adev);
else
amdgpu_mca_fini(adev);
WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared");
@ -4284,21 +4314,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr)
{
struct ras_err_addr *mca_err_addr;
/* This function will be retired. */
return;
mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL);
if (!mca_err_addr)
return;
INIT_LIST_HEAD(&mca_err_addr->node);
mca_err_addr->err_status = err_addr->err_status;
mca_err_addr->err_ipid = err_addr->err_ipid;
mca_err_addr->err_addr = err_addr->err_addr;
list_add_tail(&mca_err_addr->node, &err_info->err_addr_list);
}
void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr)
@ -4382,64 +4399,74 @@ int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data,
#define mmMP0_SMN_C2PMSG_92 0x1609C
#define mmMP0_SMN_C2PMSG_126 0x160BE
static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
u32 instance, u32 boot_error)
u32 instance)
{
u32 socket_id, aid_id, hbm_id;
u32 reg_data;
u32 fw_status;
u32 boot_error;
u64 reg_addr;
socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error);
/* The pattern for smn addressing in other SOC could be different from
* the one for aqua_vanjaram. We should revisit the code if the pattern
* is changed. In such case, replace the aqua_vanjaram implementation
* with more common helper */
reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) +
aqua_vanjaram_encode_ext_smn_addressing(instance);
fw_status = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
socket_id, aid_id, reg_data);
reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
aqua_vanjaram_encode_ext_smn_addressing(instance);
boot_error = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error);
aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error);
hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error);
if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
socket_id, aid_id, hbm_id);
dev_info(adev->dev,
"socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, memory training failed\n",
socket_id, aid_id, hbm_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
socket_id, aid_id);
dev_info(adev->dev,
"socket: %d, aid: %d, fw_status: 0x%x, firmware load failed at boot time\n",
socket_id, aid_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
socket_id, aid_id);
dev_info(adev->dev,
"socket: %d, aid: %d, fw_status: 0x%x, wafl link training failed\n",
socket_id, aid_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
socket_id, aid_id);
dev_info(adev->dev,
"socket: %d, aid: %d, fw_status: 0x%x, xgmi link training failed\n",
socket_id, aid_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
socket_id, aid_id);
dev_info(adev->dev,
"socket: %d, aid: %d, fw_status: 0x%x, usr cp link training failed\n",
socket_id, aid_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
socket_id, aid_id);
dev_info(adev->dev,
"socket: %d, aid: %d, fw_status: 0x%x, usr dp link training failed\n",
socket_id, aid_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
socket_id, aid_id, hbm_id);
dev_info(adev->dev,
"socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm memory test failed\n",
socket_id, aid_id, hbm_id, fw_status);
if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error))
dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
socket_id, aid_id, hbm_id);
dev_info(adev->dev,
"socket: %d, aid: %d, hbm: %d, fw_status: 0x%x, hbm bist test failed\n",
socket_id, aid_id, hbm_id, fw_status);
}
static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
u32 instance, u32 *boot_error)
static bool amdgpu_ras_boot_error_detected(struct amdgpu_device *adev,
u32 instance)
{
u32 reg_addr;
u64 reg_addr;
u32 reg_data;
int retry_loop;
@ -4448,41 +4475,22 @@ static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev,
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) {
*boot_error = AMDGPU_RAS_BOOT_SUCEESS;
return 0;
}
msleep(1);
if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS)
return false;
else
msleep(1);
}
/* The pattern for smn addressing in other SOC could be different from
* the one for aqua_vanjaram. We should revisit the code if the pattern
* is changed. In such case, replace the aqua_vanjaram implementation
* with more common helper */
reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) +
aqua_vanjaram_encode_ext_smn_addressing(instance);
for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) {
reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr);
if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) {
*boot_error = reg_data;
return 0;
}
msleep(1);
}
*boot_error = reg_data;
return -ETIME;
return true;
}
void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances)
{
u32 boot_error = 0;
u32 i;
for (i = 0; i < num_instances; i++) {
if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error))
amdgpu_ras_boot_time_error_reporting(adev, i, boot_error);
if (amdgpu_ras_boot_error_detected(adev, i))
amdgpu_ras_boot_time_error_reporting(adev, i);
}
}
@ -4501,3 +4509,21 @@ int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn)
return ret;
}
void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...)
{
struct va_format vaf;
va_list args;
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
if (amdgpu_ras_event_id_is_valid(adev, event_id))
dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf);
else
dev_printk(KERN_INFO, adev->dev, "%pV", &vaf);
va_end(args);
}

View File

@ -47,12 +47,10 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8)
#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11)
#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13)
#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31)
#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000
#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100
#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA
#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF
#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000
#define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0)
/* position of instance value in sub_block_index of
@ -64,16 +62,14 @@ struct amdgpu_iv_entry;
#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29
#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000
/* Reserve 8 physical dram row for possible retirement.
* In worst cases, it will lose 8 * 2MB memory in vram domain */
#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20)
/* The high three bits indicates socketid */
#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK)
#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \
do { \
if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \
dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \
else \
dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \
} while (0)
#define RAS_EVENT_LOG(adev, id, fmt, ...) \
amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__);
enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__UMC = 0,
@ -526,6 +522,7 @@ struct amdgpu_ras {
bool update_channel_flag;
/* Record status of smu mca debug mode */
bool is_aca_debug_mode;
bool is_rma;
/* Record special requirements of gpu reset caller */
uint32_t gpu_reset_flags;
@ -546,6 +543,7 @@ struct amdgpu_ras {
struct ras_event_manager __event_mgr;
struct ras_event_manager *event_mgr;
uint64_t reserved_pages_in_bytes;
};
struct ras_fs_data {
@ -956,4 +954,8 @@ int amdgpu_ras_put_poison_req(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset);
__printf(3, 4)
void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
const char *fmt, ...);
#endif

View File

@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 10):
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
return (adev->gmc.is_app_apu) ? false : true;
default:
return false;
@ -222,6 +223,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
return true;
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
case IP_VERSION(13, 0, 14):
control->i2c_address = EEPROM_I2C_MADDR_4;
return true;
default:
@ -748,6 +750,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
control->tbl_rai.health_percent = 0;
}
if (amdgpu_bad_page_threshold != -1)
ras->is_rma = true;
/* ignore the -ENOTSUPP return value */
amdgpu_dpm_send_rma_reason(adev);
}
@ -1319,8 +1324,7 @@ Out:
return res == RAS_TABLE_V2_1_INFO_SIZE ? 0 : res;
}
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit)
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
{
struct amdgpu_device *adev = to_amdgpu_device(control);
unsigned char buf[RAS_TABLE_HEADER_SIZE] = { 0 };
@ -1328,7 +1332,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
int res;
*exceed_err_limit = false;
ras->is_rma = false;
if (!__is_ras_eeprom_supported(adev))
return 0;
@ -1420,7 +1424,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1.");
res = 0;
} else {
*exceed_err_limit = true;
ras->is_rma = true;
dev_err(adev->dev,
"RAS records:%d exceed threshold:%d, "
"GPU will not be initialized. Replace this GPU or increase the threshold",

View File

@ -129,8 +129,7 @@ struct eeprom_table_record {
unsigned char mcumc_id;
} __packed;
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control,
bool *exceed_err_limit);
int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control);
int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control);

View File

@ -33,6 +33,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_init(adev);
break;
case IP_VERSION(11, 0, 7):
@ -55,6 +56,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) {
case IP_VERSION(13, 0, 2):
case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 14):
ret = aldebaran_reset_fini(adev);
break;
case IP_VERSION(11, 0, 7):
@ -158,3 +160,37 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
size_t len)
{
struct amdgpu_ring *ring;
if (!buf || !len)
return;
switch (rst_ctxt->src) {
case AMDGPU_RESET_SRC_JOB:
if (rst_ctxt->job) {
ring = amdgpu_job_ring(rst_ctxt->job);
snprintf(buf, len, "job hang on ring:%s", ring->name);
} else {
strscpy(buf, "job hang", len);
}
break;
case AMDGPU_RESET_SRC_RAS:
strscpy(buf, "RAS error", len);
break;
case AMDGPU_RESET_SRC_MES:
strscpy(buf, "MES hang", len);
break;
case AMDGPU_RESET_SRC_HWS:
strscpy(buf, "HWS hang", len);
break;
case AMDGPU_RESET_SRC_USER:
strscpy(buf, "user trigger", len);
break;
default:
strscpy(buf, "unknown", len);
}
}

View File

@ -33,6 +33,16 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_NEED_FULL_RESET = 0,
AMDGPU_SKIP_HW_RESET = 1,
AMDGPU_SKIP_COREDUMP = 2,
AMDGPU_HOST_FLR = 3,
};
enum AMDGPU_RESET_SRCS {
AMDGPU_RESET_SRC_UNKNOWN,
AMDGPU_RESET_SRC_JOB,
AMDGPU_RESET_SRC_RAS,
AMDGPU_RESET_SRC_MES,
AMDGPU_RESET_SRC_HWS,
AMDGPU_RESET_SRC_USER,
};
struct amdgpu_reset_context {
@ -42,6 +52,7 @@ struct amdgpu_reset_context {
struct amdgpu_hive_info *hive;
struct list_head *reset_device_list;
unsigned long flags;
enum AMDGPU_RESET_SRCS src;
};
struct amdgpu_reset_handler {
@ -129,6 +140,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
size_t len);
#define for_each_handler(i, handler, reset_ctl) \
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
(handler = (*reset_ctl->reset_handlers)[i]); \

View File

@ -352,7 +352,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->max_dw = max_dw;
ring->hw_prio = hw_prio;
if (!ring->no_scheduler) {
if (!ring->no_scheduler && ring->funcs->type < AMDGPU_HW_IP_NUM) {
hw_ip = ring->funcs->type;
num_sched = &adev->gpu_sched[hw_ip][hw_prio].num_scheds;
adev->gpu_sched[hw_ip][hw_prio].sched[(*num_sched)++] =
@ -473,8 +473,9 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_ring *ring = file_inode(f)->i_private;
int r, i;
uint32_t value, result, early[3];
loff_t i;
int r;
if (*pos & 3 || size & 3)
return -EINVAL;

View File

@ -112,6 +112,53 @@ typedef enum _SOC21_FIRMWARE_ID_ {
SOC21_FIRMWARE_ID_MAX = 37
} SOC21_FIRMWARE_ID;
typedef enum _SOC24_FIRMWARE_ID_ {
SOC24_FIRMWARE_ID_INVALID = 0,
SOC24_FIRMWARE_ID_RLC_G_UCODE = 1,
SOC24_FIRMWARE_ID_RLC_TOC = 2,
SOC24_FIRMWARE_ID_RLCG_SCRATCH = 3,
SOC24_FIRMWARE_ID_RLC_SRM_ARAM = 4,
SOC24_FIRMWARE_ID_RLC_P_UCODE = 5,
SOC24_FIRMWARE_ID_RLC_V_UCODE = 6,
SOC24_FIRMWARE_ID_RLX6_UCODE = 7,
SOC24_FIRMWARE_ID_RLX6_UCODE_CORE1 = 8,
SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT = 9,
SOC24_FIRMWARE_ID_RLX6_DRAM_BOOT_CORE1 = 10,
SOC24_FIRMWARE_ID_SDMA_UCODE_TH0 = 11,
SOC24_FIRMWARE_ID_SDMA_UCODE_TH1 = 12,
SOC24_FIRMWARE_ID_CP_PFP = 13,
SOC24_FIRMWARE_ID_CP_ME = 14,
SOC24_FIRMWARE_ID_CP_MEC = 15,
SOC24_FIRMWARE_ID_RS64_MES_P0 = 16,
SOC24_FIRMWARE_ID_RS64_MES_P1 = 17,
SOC24_FIRMWARE_ID_RS64_PFP = 18,
SOC24_FIRMWARE_ID_RS64_ME = 19,
SOC24_FIRMWARE_ID_RS64_MEC = 20,
SOC24_FIRMWARE_ID_RS64_MES_P0_STACK = 21,
SOC24_FIRMWARE_ID_RS64_MES_P1_STACK = 22,
SOC24_FIRMWARE_ID_RS64_PFP_P0_STACK = 23,
SOC24_FIRMWARE_ID_RS64_PFP_P1_STACK = 24,
SOC24_FIRMWARE_ID_RS64_ME_P0_STACK = 25,
SOC24_FIRMWARE_ID_RS64_ME_P1_STACK = 26,
SOC24_FIRMWARE_ID_RS64_MEC_P0_STACK = 27,
SOC24_FIRMWARE_ID_RS64_MEC_P1_STACK = 28,
SOC24_FIRMWARE_ID_RS64_MEC_P2_STACK = 29,
SOC24_FIRMWARE_ID_RS64_MEC_P3_STACK = 30,
SOC24_FIRMWARE_ID_RLC_SRM_DRAM_SR = 31,
SOC24_FIRMWARE_ID_RLCG_SCRATCH_SR = 32,
SOC24_FIRMWARE_ID_RLCP_SCRATCH_SR = 33,
SOC24_FIRMWARE_ID_RLCV_SCRATCH_SR = 34,
SOC24_FIRMWARE_ID_RLX6_DRAM_SR = 35,
SOC24_FIRMWARE_ID_RLX6_DRAM_SR_CORE1 = 36,
SOC24_FIRMWARE_ID_RLCDEBUGLOG = 37,
SOC24_FIRMWARE_ID_SRIOV_DEBUG = 38,
SOC24_FIRMWARE_ID_SRIOV_CSA_RLC = 39,
SOC24_FIRMWARE_ID_SRIOV_CSA_SDMA = 40,
SOC24_FIRMWARE_ID_SRIOV_CSA_CP = 41,
SOC24_FIRMWARE_ID_UMF_ZONE_PAD = 42,
SOC24_FIRMWARE_ID_MAX = 43
} SOC24_FIRMWARE_ID;
typedef struct _RLC_TABLE_OF_CONTENT {
union {
unsigned int DW0;
@ -155,6 +202,33 @@ typedef struct _RLC_TABLE_OF_CONTENT {
};
} RLC_TABLE_OF_CONTENT;
typedef struct _RLC_TABLE_OF_CONTENT_V2 {
union {
unsigned int DW0;
struct {
uint32_t offset : 25;
uint32_t id : 7;
};
};
union {
unsigned int DW1;
struct {
uint32_t reserved0 : 1;
uint32_t reserved1 : 1;
uint32_t reserved2 : 1;
uint32_t memory_destination : 2;
uint32_t vfflr_image_code : 4;
uint32_t reserved9 : 1;
uint32_t reserved10 : 1;
uint32_t reserved11 : 1;
uint32_t size_x16 : 1;
uint32_t reserved13 : 1;
uint32_t size : 18;
};
};
} RLC_TABLE_OF_CONTENT_V2;
#define RLC_TOC_MAX_SIZE 64
struct amdgpu_rlc_funcs {

View File

@ -158,6 +158,7 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
const struct common_firmware_header *header = NULL;
const struct sdma_firmware_header_v1_0 *hdr;
const struct sdma_firmware_header_v2_0 *hdr_v2;
const struct sdma_firmware_header_v3_0 *hdr_v3;
header = (const struct common_firmware_header *)
sdma_inst->fw->data;
@ -174,6 +175,11 @@ static int amdgpu_sdma_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst)
sdma_inst->fw_version = le32_to_cpu(hdr_v2->header.ucode_version);
sdma_inst->feature_version = le32_to_cpu(hdr_v2->ucode_feature_version);
break;
case 3:
hdr_v3 = (const struct sdma_firmware_header_v3_0 *)sdma_inst->fw->data;
sdma_inst->fw_version = le32_to_cpu(hdr_v3->header.ucode_version);
sdma_inst->feature_version = le32_to_cpu(hdr_v3->ucode_feature_version);
break;
default:
return -EINVAL;
}
@ -206,6 +212,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
const struct common_firmware_header *header = NULL;
int err, i;
const struct sdma_firmware_header_v2_0 *sdma_hdr;
const struct sdma_firmware_header_v3_0 *sdma_hv3;
uint16_t version_major;
char ucode_prefix[30];
char fw_name[52];
@ -251,11 +258,12 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
else {
/* Use a single copy per SDMA firmware type. PSP uses the same instance for all
* groups of SDMAs */
if (amdgpu_ip_version(adev, SDMA0_HWIP,
0) ==
IP_VERSION(4, 4, 2) &&
if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 2) ||
amdgpu_ip_version(adev, SDMA0_HWIP, 0) ==
IP_VERSION(4, 4, 5)) &&
adev->firmware.load_type ==
AMDGPU_FW_LOAD_PSP &&
AMDGPU_FW_LOAD_PSP &&
adev->sdma.num_inst_per_aid == i) {
break;
}
@ -281,6 +289,15 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev,
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(sdma_hdr->ctl_ucode_size_bytes), PAGE_SIZE);
break;
case 3:
sdma_hv3 = (const struct sdma_firmware_header_v3_0 *)
adev->sdma.instance[0].fw->data;
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA_RS64];
info->ucode_id = AMDGPU_UCODE_ID_SDMA_RS64;
info->fw = adev->sdma.instance[0].fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(sdma_hv3->ucode_size_bytes), PAGE_SIZE);
break;
default:
err = -EINVAL;
}

View File

@ -60,6 +60,10 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring page;
bool burst_nop;
uint32_t aid_id;
struct amdgpu_bo *sdma_fw_obj;
uint64_t sdma_fw_gpu_addr;
uint32_t *sdma_fw_ptr;
};
enum amdgpu_sdma_ras_memory_id {

View File

@ -135,6 +135,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
mutex_unlock(&psp->securedisplay_context.mutex);
break;
case 2:
if (size < 3 || phy_id >= TA_SECUREDISPLAY_MAX_PHY) {
dev_err(adev->dev, "Invalid input: %s\n", str);
return -EINVAL;
}
mutex_lock(&psp->securedisplay_context.mutex);
psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);

View File

@ -845,8 +845,7 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
uint64_t page_idx, pages_per_xcc;
int i;
uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
uint64_t ctrl_flags = AMDGPU_PTE_MTYPE_VG10(flags, AMDGPU_MTYPE_NC);
pages_per_xcc = total_pages;
do_div(pages_per_xcc, num_xcc);
@ -1401,7 +1400,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
*/
dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
DMA_RESV_USAGE_BOOKKEEP, f) {
if (amdkfd_fence_check_mm(f, current->mm))
if (amdkfd_fence_check_mm(f, current->mm) &&
!(place->flags & TTM_PL_FLAG_CONTIGUOUS))
return false;
}
@ -1742,7 +1742,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
if (!adev->bios &&
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)))
reserve_size = max(reserve_size, (uint32_t)280 << 20);
else if (!reserve_size)
reserve_size = DISCOVERY_TMR_OFFSET;

View File

@ -323,6 +323,12 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr)
DRM_DEBUG("ctl_ucode_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_ucode_offset));
DRM_DEBUG("ctl_jt_offset: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_offset));
DRM_DEBUG("ctl_jt_size: %u\n", le32_to_cpu(sdma_hdr->ctl_jt_size));
} else if (version_major == 3) {
const struct sdma_firmware_header_v3_0 *sdma_hdr =
container_of(hdr, struct sdma_firmware_header_v3_0, header);
DRM_DEBUG("ucode_reversion: %u\n",
le32_to_cpu(sdma_hdr->ucode_feature_version));
} else {
DRM_ERROR("Unknown SDMA ucode version: %u.%u\n",
version_major, version_minor);
@ -682,6 +688,30 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "UMSCH_MM_CMD_BUFFER";
case AMDGPU_UCODE_ID_JPEG_RAM:
return "JPEG";
case AMDGPU_UCODE_ID_SDMA_RS64:
return "RS64_SDMA";
case AMDGPU_UCODE_ID_CP_RS64_PFP:
return "RS64_PFP";
case AMDGPU_UCODE_ID_CP_RS64_ME:
return "RS64_ME";
case AMDGPU_UCODE_ID_CP_RS64_MEC:
return "RS64_MEC";
case AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK:
return "RS64_PFP_P0_STACK";
case AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK:
return "RS64_PFP_P1_STACK";
case AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK:
return "RS64_ME_P0_STACK";
case AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK:
return "RS64_ME_P1_STACK";
case AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK:
return "RS64_MEC_P0_STACK";
case AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK:
return "RS64_MEC_P1_STACK";
case AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK:
return "RS64_MEC_P2_STACK";
case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK:
return "RS64_MEC_P3_STACK";
default:
return "UNKNOWN UCODE";
}
@ -791,6 +821,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
const struct dmcub_firmware_header_v1_0 *dmcub_hdr = NULL;
const struct mes_firmware_header_v1_0 *mes_hdr = NULL;
const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL;
const struct sdma_firmware_header_v3_0 *sdmav3_hdr = NULL;
const struct imu_firmware_header_v1_0 *imu_hdr = NULL;
const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL;
const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL;
@ -812,6 +843,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
dmcub_hdr = (const struct dmcub_firmware_header_v1_0 *)ucode->fw->data;
mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data;
sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data;
sdmav3_hdr = (const struct sdma_firmware_header_v3_0 *)ucode->fw->data;
imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data;
vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data;
umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data;
@ -828,6 +860,11 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(sdma_hdr->ctl_ucode_offset);
break;
case AMDGPU_UCODE_ID_SDMA_RS64:
ucode->ucode_size = le32_to_cpu(sdmav3_hdr->ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +
le32_to_cpu(sdmav3_hdr->header.ucode_array_offset_bytes);
break;
case AMDGPU_UCODE_ID_CP_MEC1:
case AMDGPU_UCODE_ID_CP_MEC2:
ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) -

View File

@ -346,6 +346,14 @@ struct umsch_mm_firmware_header_v1_0 {
uint32_t umsch_mm_data_start_addr_hi;
};
/* version_major=3, version_minor=0 */
struct sdma_firmware_header_v3_0 {
struct common_firmware_header header;
uint32_t ucode_feature_version;
uint32_t ucode_offset_bytes;
uint32_t ucode_size_bytes;
};
/* gpu info payload */
struct gpu_info_firmware_v1_0 {
uint32_t gc_num_se;
@ -431,6 +439,7 @@ union amdgpu_firmware_header {
struct sdma_firmware_header_v1_0 sdma;
struct sdma_firmware_header_v1_1 sdma_v1_1;
struct sdma_firmware_header_v2_0 sdma_v2_0;
struct sdma_firmware_header_v3_0 sdma_v3_0;
struct gpu_info_firmware_header_v1_0 gpu_info;
struct dmcu_firmware_header_v1_0 dmcu;
struct dmcub_firmware_header_v1_0 dmcub;
@ -455,6 +464,7 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_SDMA7,
AMDGPU_UCODE_ID_SDMA_UCODE_TH0,
AMDGPU_UCODE_ID_SDMA_UCODE_TH1,
AMDGPU_UCODE_ID_SDMA_RS64,
AMDGPU_UCODE_ID_CP_CE,
AMDGPU_UCODE_ID_CP_PFP,
AMDGPU_UCODE_ID_CP_ME,

View File

@ -23,7 +23,10 @@
*/
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/debugfs.h>
#include <drm/drm_exec.h>
#include <drm/drm_drv.h>
#include "amdgpu.h"
#include "amdgpu_umsch_mm.h"
@ -743,6 +746,17 @@ static int umsch_mm_init(struct amdgpu_device *adev)
return r;
}
r = amdgpu_bo_create_kernel(adev, AMDGPU_UMSCHFW_LOG_SIZE, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM |
AMDGPU_GEM_DOMAIN_GTT,
&adev->umsch_mm.dbglog_bo,
&adev->umsch_mm.log_gpu_addr,
&adev->umsch_mm.log_cpu_addr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate umsch debug bo\n", r);
return r;
}
mutex_init(&adev->umsch_mm.mutex_hidden);
umsch_mm_agdb_index_init(adev);
@ -789,6 +803,7 @@ static int umsch_mm_sw_init(void *handle)
if (r)
return r;
amdgpu_umsch_fwlog_init(&adev->umsch_mm);
r = umsch_mm_ring_init(&adev->umsch_mm);
if (r)
return r;
@ -815,6 +830,10 @@ static int umsch_mm_sw_fini(void *handle)
&adev->umsch_mm.cmd_buf_gpu_addr,
(void **)&adev->umsch_mm.cmd_buf_ptr);
amdgpu_bo_free_kernel(&adev->umsch_mm.dbglog_bo,
&adev->umsch_mm.log_gpu_addr,
(void **)&adev->umsch_mm.log_cpu_addr);
amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index);
return 0;
@ -868,6 +887,106 @@ static int umsch_mm_resume(void *handle)
return umsch_mm_hw_init(adev);
}
void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm)
{
#if defined(CONFIG_DEBUG_FS)
void *fw_log_cpu_addr = umsch_mm->log_cpu_addr;
volatile struct amdgpu_umsch_fwlog *log_buf = fw_log_cpu_addr;
log_buf->header_size = sizeof(struct amdgpu_umsch_fwlog);
log_buf->buffer_size = AMDGPU_UMSCHFW_LOG_SIZE;
log_buf->rptr = log_buf->header_size;
log_buf->wptr = log_buf->header_size;
log_buf->wrapped = 0;
#endif
}
/*
* debugfs for mapping umsch firmware log buffer.
*/
#if defined(CONFIG_DEBUG_FS)
static ssize_t amdgpu_debugfs_umsch_fwlog_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_umsch_mm *umsch_mm;
void *log_buf;
volatile struct amdgpu_umsch_fwlog *plog;
unsigned int read_pos, write_pos, available, i, read_bytes = 0;
unsigned int read_num[2] = {0};
umsch_mm = file_inode(f)->i_private;
if (!umsch_mm)
return -ENODEV;
if (!umsch_mm->log_cpu_addr)
return -EFAULT;
log_buf = umsch_mm->log_cpu_addr;
plog = (volatile struct amdgpu_umsch_fwlog *)log_buf;
read_pos = plog->rptr;
write_pos = plog->wptr;
if (read_pos > AMDGPU_UMSCHFW_LOG_SIZE || write_pos > AMDGPU_UMSCHFW_LOG_SIZE)
return -EFAULT;
if (!size || (read_pos == write_pos))
return 0;
if (write_pos > read_pos) {
available = write_pos - read_pos;
read_num[0] = min_t(size_t, size, available);
} else {
read_num[0] = AMDGPU_UMSCHFW_LOG_SIZE - read_pos;
available = read_num[0] + write_pos - plog->header_size;
if (size > available)
read_num[1] = write_pos - plog->header_size;
else if (size > read_num[0])
read_num[1] = size - read_num[0];
else
read_num[0] = size;
}
for (i = 0; i < 2; i++) {
if (read_num[i]) {
if (read_pos == AMDGPU_UMSCHFW_LOG_SIZE)
read_pos = plog->header_size;
if (read_num[i] == copy_to_user((buf + read_bytes),
(log_buf + read_pos), read_num[i]))
return -EFAULT;
read_bytes += read_num[i];
read_pos += read_num[i];
}
}
plog->rptr = read_pos;
*pos += read_bytes;
return read_bytes;
}
static const struct file_operations amdgpu_debugfs_umschfwlog_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_umsch_fwlog_read,
.llseek = default_llseek
};
#endif
void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
struct amdgpu_umsch_mm *umsch_mm)
{
#if defined(CONFIG_DEBUG_FS)
struct drm_minor *minor = adev_to_drm(adev)->primary;
struct dentry *root = minor->debugfs_root;
char name[32];
sprintf(name, "amdgpu_umsch_fwlog");
debugfs_create_file_size(name, S_IFREG | 0444, root, umsch_mm,
&amdgpu_debugfs_umschfwlog_fops,
AMDGPU_UMSCHFW_LOG_SIZE);
#endif
}
static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = {
.name = "umsch_mm_v4_0",
.early_init = umsch_mm_early_init,

View File

@ -58,6 +58,14 @@ struct umsch_mm_set_resource_input {
};
};
struct amdgpu_umsch_fwlog {
uint32_t rptr;
uint32_t wptr;
uint32_t buffer_size;
uint32_t header_size;
uint32_t wrapped;
};
struct umsch_mm_add_queue_input {
uint32_t process_id;
uint64_t page_table_base_addr;
@ -166,6 +174,11 @@ struct amdgpu_umsch_mm {
uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS];
struct mutex mutex_hidden;
struct amdgpu_bo *dbglog_bo;
void *log_cpu_addr;
uint64_t log_gpu_addr;
uint32_t mem_size;
uint32_t log_offset;
};
int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws);
@ -179,6 +192,11 @@ int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch);
int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch);
void amdgpu_debugfs_umsch_fwlog_init(struct amdgpu_device *adev,
struct amdgpu_umsch_mm *umsch);
void amdgpu_umsch_fwlog_init(struct amdgpu_umsch_mm *umsch_mm);
#define WREG32_SOC15_UMSCH(reg, value) \
do { \
uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \

View File

@ -885,7 +885,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
@ -952,7 +952,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
ib->ptr[ib->length_dw++] = handle;
ib->ptr[ib->length_dw++] = upper_32_bits(addr);
ib->ptr[ib->length_dw++] = addr;
ib->ptr[ib->length_dw++] = 0x0000000b;
ib->ptr[ib->length_dw++] = 0x00000000;
ib->ptr[ib->length_dw++] = 0x00000014;
ib->ptr[ib->length_dw++] = 0x00000002;
@ -1078,7 +1078,6 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev)
IP_VERSION(4, 0, 3))
break;
}
dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
}
}

View File

@ -395,6 +395,8 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
else
vram_usage_va = adev->mman.drv_vram_usage_va;
memset(&bp, 0, sizeof(bp));
if (bp_block_size) {
bp_cnt = bp_block_size / sizeof(uint64_t);
for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
@ -583,7 +585,7 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
}
vf2pf_info->checksum =
amd_sriov_msg_checksum(
vf2pf_info, vf2pf_info->header.size, 0, 0);
vf2pf_info, sizeof(*vf2pf_info), 0, 0);
return 0;
}
@ -597,10 +599,10 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
if (ret) {
adev->virt.vf2pf_update_retry_cnt++;
if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) {
amdgpu_sriov_runtime(adev)) {
amdgpu_ras_set_fed(adev, true);
if (amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work))
&adev->kfd.reset_work))
return;
else
dev_err(adev->dev, "Failed to queue work! at %s", __func__);
@ -820,6 +822,8 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev)
*/
adev->gfx.is_poweron = false;
}
adev->mes.ring.sched.ready = false;
}
bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
@ -980,6 +984,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
mutex_lock(&adev->virt.rlcg_reg_lock);
if (reg_access_ctrl->spare_int)
spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
@ -1036,6 +1043,9 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f
}
ret = readl(scratch_reg0);
mutex_unlock(&adev->virt.rlcg_reg_lock);
return ret;
}

View File

@ -272,6 +272,8 @@ struct amdgpu_virt {
/* the ucode id to signal the autoload */
uint32_t autoload_ucode_id;
struct mutex rlcg_reg_lock;
};
struct amdgpu_video_codec_info;

View File

@ -333,7 +333,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
base->next = bo->vm_bo;
bo->vm_bo = base;
if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
return;
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
@ -1055,7 +1055,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.pages_addr = NULL;
}
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) {
addr = vram_base + cursor.start;
} else {
addr = 0;
@ -1101,13 +1101,13 @@ static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va,
* For now ignore BOs which are currently locked and potentially
* changing their location.
*/
if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv &&
if (!amdgpu_vm_is_bo_always_valid(vm, bo) &&
!dma_resv_trylock(bo->tbo.base.resv))
return;
amdgpu_bo_get_memory(bo, stats);
if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv)
dma_resv_unlock(bo->tbo.base.resv);
if (!amdgpu_vm_is_bo_always_valid(vm, bo))
dma_resv_unlock(bo->tbo.base.resv);
}
void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
@ -1203,8 +1203,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
uncached = false;
}
if (clear || (bo && bo->tbo.base.resv ==
vm->root.bo->tbo.base.resv))
if (clear || amdgpu_vm_is_bo_always_valid(vm, bo))
last_update = &vm->last_update;
else
last_update = &bo_va->last_pt_update;
@ -1246,7 +1245,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
* the evicted list so that it gets validated again on the
* next command submission.
*/
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
if (amdgpu_vm_is_bo_always_valid(vm, bo)) {
uint32_t mem_type = bo->tbo.resource->mem_type;
if (!(bo->preferred_domains &
@ -1369,7 +1368,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping,
struct dma_fence *fence)
{
if (mapping->flags & AMDGPU_PTE_PRT)
if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_add_prt_cb(adev, fence);
kfree(mapping);
}
@ -1637,13 +1636,12 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
list_add(&mapping->list, &bo_va->invalids);
amdgpu_vm_it_insert(mapping, &vm->va);
if (mapping->flags & AMDGPU_PTE_PRT)
if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
!bo_va->base.moved) {
if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved)
amdgpu_vm_bo_moved(&bo_va->base);
}
trace_amdgpu_vm_bo_map(bo_va, mapping);
}
@ -1939,10 +1937,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = before->bo_va->base.bo;
amdgpu_vm_it_insert(before, &vm->va);
if (before->flags & AMDGPU_PTE_PRT)
if (before->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!before->bo_va->base.moved)
amdgpu_vm_bo_moved(&before->bo_va->base);
} else {
@ -1954,10 +1952,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
struct amdgpu_bo *bo = after->bo_va->base.bo;
amdgpu_vm_it_insert(after, &vm->va);
if (after->flags & AMDGPU_PTE_PRT)
if (after->flags & AMDGPU_PTE_PRT_FLAG(adev))
amdgpu_vm_prt_get(adev);
if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv &&
if (amdgpu_vm_is_bo_always_valid(vm, bo) &&
!after->bo_va->base.moved)
amdgpu_vm_bo_moved(&after->bo_va->base);
} else {
@ -2037,7 +2035,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (bo) {
dma_resv_assert_held(bo->tbo.base.resv);
if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
if (amdgpu_vm_is_bo_always_valid(vm, bo))
ttm_bo_set_bulk_move(&bo->tbo, NULL);
for (base = &bo_va->base.bo->vm_bo; *base;
@ -2131,7 +2129,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
struct amdgpu_vm *vm = bo_base->vm;
if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) {
if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) {
amdgpu_vm_bo_evicted(bo_base);
continue;
}
@ -2142,7 +2140,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
if (bo->tbo.type == ttm_bo_type_kernel)
amdgpu_vm_bo_relocated(bo_base);
else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
else if (amdgpu_vm_is_bo_always_valid(vm, bo))
amdgpu_vm_bo_moved(bo_base);
else
amdgpu_vm_bo_invalidated(bo_base);
@ -2605,7 +2603,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) {
amdgpu_vm_prt_fini(adev, vm);
prt_fini_needed = false;
}
@ -3006,3 +3004,16 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev,
xa_unlock_irqrestore(&adev->vm_manager.pasids, flags);
}
/**
* amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid
*
* @vm: VM to test against.
* @bo: BO to be tested.
*
* Returns true if the BO shares the dma_resv object with the root PD and is
* always guaranteed to be valid inside the VM.
*/
bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo)
{
return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv;
}

View File

@ -94,8 +94,11 @@ struct amdgpu_mem_stats;
#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
AMDGPU_PTE_PRT)
/* For GFX9 */
#define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57)
#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL)
#define AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype) ((uint64_t)(mtype) << 57)
#define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10_SHIFT(3ULL)
#define AMDGPU_PTE_MTYPE_VG10(flags, mtype) \
(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_VG10_MASK)) | \
AMDGPU_PTE_MTYPE_VG10_SHIFT(mtype))
#define AMDGPU_MTYPE_NC 0
#define AMDGPU_MTYPE_CC 2
@ -108,8 +111,33 @@ struct amdgpu_mem_stats;
| AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC))
/* gfx10 */
#define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48)
#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL)
#define AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype) ((uint64_t)(mtype) << 48)
#define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10_SHIFT(7ULL)
#define AMDGPU_PTE_MTYPE_NV10(flags, mtype) \
(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_NV10_MASK)) | \
AMDGPU_PTE_MTYPE_NV10_SHIFT(mtype))
/* gfx12 */
#define AMDGPU_PTE_PRT_GFX12 (1ULL << 56)
#define AMDGPU_PTE_PRT_FLAG(adev) \
((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PTE_PRT_GFX12 : AMDGPU_PTE_PRT)
#define AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype) ((uint64_t)(mtype) << 54)
#define AMDGPU_PTE_MTYPE_GFX12_MASK AMDGPU_PTE_MTYPE_GFX12_SHIFT(3ULL)
#define AMDGPU_PTE_MTYPE_GFX12(flags, mtype) \
(((uint64_t)(flags) & (~AMDGPU_PTE_MTYPE_GFX12_MASK)) | \
AMDGPU_PTE_MTYPE_GFX12_SHIFT(mtype))
#define AMDGPU_PTE_IS_PTE (1ULL << 63)
/* PDE Block Fragment Size for gfx v12 */
#define AMDGPU_PDE_BFS_GFX12(a) ((uint64_t)((a) & 0x1fULL) << 58)
#define AMDGPU_PDE_BFS_FLAG(adev, a) \
((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_BFS_GFX12(a) : AMDGPU_PDE_BFS(a))
/* PDE is handled as PTE for gfx v12 */
#define AMDGPU_PDE_PTE_GFX12 (1ULL << 63)
#define AMDGPU_PDE_PTE_FLAG(adev) \
((amdgpu_ip_version((adev), GC_HWIP, 0) >= IP_VERSION(12, 0, 0)) ? AMDGPU_PDE_PTE_GFX12 : AMDGPU_PDE_PTE)
/* How to program VM fault handling */
#define AMDGPU_VM_FAULT_STOP_NEVER 0
@ -561,6 +589,8 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo);
/**
* amdgpu_vm_tlb_seq - return tlb flush sequence number
* @vm: the amdgpu_vm structure to query

View File

@ -413,7 +413,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (adev->asic_type >= CHIP_VEGA10) {
if (level != AMDGPU_VM_PTB) {
/* Handle leaf PDEs as PTEs */
flags |= AMDGPU_PDE_PTE;
flags |= AMDGPU_PDE_PTE_FLAG(adev);
amdgpu_gmc_get_vm_pde(adev, level,
&value, &flags);
} else {
@ -761,12 +761,12 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
struct amdgpu_device *adev = params->adev;
if (level != AMDGPU_VM_PTB) {
flags |= AMDGPU_PDE_PTE;
flags |= AMDGPU_PDE_PTE_FLAG(params->adev);
amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags);
} else if (adev->asic_type >= CHIP_VEGA10 &&
!(flags & AMDGPU_PTE_VALID) &&
!(flags & AMDGPU_PTE_PRT)) {
!(flags & AMDGPU_PTE_PRT_FLAG(params->adev))) {
/* Workaround for fault priority problem on GMC9 */
flags |= AMDGPU_PTE_EXECUTABLE;

View File

@ -102,6 +102,11 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
if (!r)
r = amdgpu_sync_push_to_job(&sync, p->job);
amdgpu_sync_free(&sync);
if (r) {
p->num_dw_left = 0;
amdgpu_job_free(p->job);
}
return r;
}

View File

@ -128,6 +128,7 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
struct dpm_clock *VPEClks;
struct dpm_clock *SOCClks;
uint32_t idx;
uint32_t vpeclk_enalbled_num = 0;
uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0;
uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0;
@ -144,6 +145,14 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
SOCClks = clock_table.SocClocks;
VPEClks = clock_table.VPEClocks;
/* Comfirm enabled vpe clk num
* Enabled VPE clocks are ordered from low to high in VPEClks
* The highest valid clock index+1 is the number of VPEClks
*/
for (idx = PP_SMU_NUM_VPECLK_DPM_LEVELS; idx && !vpeclk_enalbled_num; idx--)
if (VPEClks[idx-1].Freq)
vpeclk_enalbled_num = idx;
/* vpe dpm only cares 4 levels. */
for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) {
uint32_t soc_dpm_level;
@ -155,8 +164,8 @@ int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe)
soc_dpm_level = (idx * 2) + 1;
/* clamp the max level */
if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1)
soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1;
if (soc_dpm_level > vpeclk_enalbled_num - 1)
soc_dpm_level = vpeclk_enalbled_num - 1;
min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ?
SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq;

View File

@ -31,6 +31,8 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
#define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30)
struct amdgpu_vram_reservation {
u64 start;
u64 size;
@ -518,9 +520,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
else
min_block_size = mgr->default_page_size;
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
size = remaining_size;
if ((size >= (u64)pages_per_block << PAGE_SHIFT) &&
!(size & (((u64)pages_per_block << PAGE_SHIFT) - 1)))
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
@ -660,7 +660,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
amdgpu_res_next(&cursor, cursor.size);
amdgpu_res_next(&cursor, min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE));
}
r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL);
@ -680,7 +680,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
phys_addr_t phys = cursor.start + adev->gmc.aper_base;
size_t size = cursor.size;
unsigned long size = min(cursor.size, AMDGPU_MAX_SG_SEGMENT_SIZE);
dma_addr_t addr;
addr = dma_map_resource(dev, phys, size, dir,
@ -693,7 +693,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg_dma_address(sg) = addr;
sg_dma_len(sg) = size;
amdgpu_res_next(&cursor, cursor.size);
amdgpu_res_next(&cursor, size);
}
return 0;

View File

@ -434,6 +434,9 @@ static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev,
}
}
if (i == top->num_nodes)
return -EINVAL;
for (i = 0; i < top->num_nodes; i++) {
for (j = 0; j < top->nodes[i].num_links; j++)
/* node id in sysfs starts from 1 rather than 0 so +1 here */
@ -1443,7 +1446,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
dev_warn(adev->dev, "Failed to disallow df cstate");
ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW);
ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DISALLOW);
if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to disallow XGMI power down");
@ -1452,7 +1455,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev,
if (amdgpu_ras_intr_triggered())
return ret2;
ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT);
ret1 = amdgpu_dpm_set_pm_policy(adev, PP_PM_POLICY_XGMI_PLPD, XGMI_PLPD_DEFAULT);
if (ret1 && ret1 != -EOPNOTSUPP)
dev_warn(adev->dev, "Failed to allow XGMI power down");

View File

@ -501,6 +501,12 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr,
if (mode == AMDGPU_AUTO_COMPUTE_PARTITION_MODE) {
mode = __aqua_vanjaram_get_auto_mode(xcp_mgr);
if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) {
dev_err(adev->dev,
"Invalid config, no compatible compute partition mode found, available memory partitions: %d",
adev->gmc.num_mem_partitions);
return -EINVAL;
}
} else if (!__aqua_vanjaram_is_valid_mode(xcp_mgr, mode)) {
dev_err(adev->dev,
"Invalid compute partition mode requested, requested: %s, available memory partitions: %d",

View File

@ -0,0 +1,121 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __CLEARSTATE_GFX12_H_
#define __CLEARSTATE_GFX12_H_
static const unsigned int gfx12_SECT_CONTEXT_def_1[] = {
0x00000000, //mmSC_MEM_TEMPORAL
0x00000000, //mmSC_MEM_SPEC_READ
0x00000000, //mmPA_SC_VPORT_0_TL
0x00000000, //mmPA_SC_VPORT_0_BR
0x00000000, //mmPA_SC_VPORT_1_TL
0x00000000, //mmPA_SC_VPORT_1_BR
0x00000000, //mmPA_SC_VPORT_2_TL
0x00000000, //mmPA_SC_VPORT_2_BR
0x00000000, //mmPA_SC_VPORT_3_TL
0x00000000, //mmPA_SC_VPORT_3_BR
0x00000000, //mmPA_SC_VPORT_4_TL
0x00000000, //mmPA_SC_VPORT_4_BR
0x00000000, //mmPA_SC_VPORT_5_TL
0x00000000, //mmPA_SC_VPORT_5_BR
0x00000000, //mmPA_SC_VPORT_6_TL
0x00000000, //mmPA_SC_VPORT_6_BR
0x00000000, //mmPA_SC_VPORT_7_TL
0x00000000, //mmPA_SC_VPORT_7_BR
0x00000000, //mmPA_SC_VPORT_8_TL
0x00000000, //mmPA_SC_VPORT_8_BR
0x00000000, //mmPA_SC_VPORT_9_TL
0x00000000, //mmPA_SC_VPORT_9_BR
0x00000000, //mmPA_SC_VPORT_10_TL
0x00000000, //mmPA_SC_VPORT_10_BR
0x00000000, //mmPA_SC_VPORT_11_TL
0x00000000, //mmPA_SC_VPORT_11_BR
0x00000000, //mmPA_SC_VPORT_12_TL
0x00000000, //mmPA_SC_VPORT_12_BR
0x00000000, //mmPA_SC_VPORT_13_TL
0x00000000, //mmPA_SC_VPORT_13_BR
0x00000000, //mmPA_SC_VPORT_14_TL
0x00000000, //mmPA_SC_VPORT_14_BR
0x00000000, //mmPA_SC_VPORT_15_TL
0x00000000, //mmPA_SC_VPORT_15_BR
};
static const unsigned int gfx12_SECT_CONTEXT_def_2[] = {
0x00000000, //mmPA_CL_PROG_NEAR_CLIP_Z
0x00000000, //mmPA_RATE_CNTL
};
static const unsigned int gfx12_SECT_CONTEXT_def_3[] = {
0x00000000, //mmCP_PERFMON_CNTX_CNTL
};
static const unsigned int gfx12_SECT_CONTEXT_def_4[] = {
0x00000000, //mmCONTEXT_RESERVED_REG0
0x00000000, //mmCONTEXT_RESERVED_REG1
0x00000000, //mmPA_SC_CLIPRECT_0_EXT
0x00000000, //mmPA_SC_CLIPRECT_1_EXT
0x00000000, //mmPA_SC_CLIPRECT_2_EXT
0x00000000, //mmPA_SC_CLIPRECT_3_EXT
};
static const unsigned int gfx12_SECT_CONTEXT_def_5[] = {
0x00000000, //mmPA_SC_HIZ_INFO
0x00000000, //mmPA_SC_HIS_INFO
0x00000000, //mmPA_SC_HIZ_BASE
0x00000000, //mmPA_SC_HIZ_BASE_EXT
0x00000000, //mmPA_SC_HIZ_SIZE_XY
0x00000000, //mmPA_SC_HIS_BASE
0x00000000, //mmPA_SC_HIS_BASE_EXT
0x00000000, //mmPA_SC_HIS_SIZE_XY
0x00000000, //mmPA_SC_BINNER_OUTPUT_TIMEOUT_CNTL
0x00000000, //mmPA_SC_BINNER_DYNAMIC_BATCH_LIMIT
0x00000000, //mmPA_SC_HISZ_CONTROL
};
static const unsigned int gfx12_SECT_CONTEXT_def_6[] = {
0x00000000, //mmCB_MEM0_INFO
0x00000000, //mmCB_MEM1_INFO
0x00000000, //mmCB_MEM2_INFO
0x00000000, //mmCB_MEM3_INFO
0x00000000, //mmCB_MEM4_INFO
0x00000000, //mmCB_MEM5_INFO
0x00000000, //mmCB_MEM6_INFO
0x00000000, //mmCB_MEM7_INFO
};
static const struct cs_extent_def gfx12_SECT_CONTEXT_defs[] = {
{gfx12_SECT_CONTEXT_def_1, 0x0000a03e, 34 },
{gfx12_SECT_CONTEXT_def_2, 0x0000a0cc, 2 },
{gfx12_SECT_CONTEXT_def_3, 0x0000a0d8, 1 },
{gfx12_SECT_CONTEXT_def_4, 0x0000a0db, 6 },
{gfx12_SECT_CONTEXT_def_5, 0x0000a2e5, 11 },
{gfx12_SECT_CONTEXT_def_6, 0x0000a3c0, 8 },
{ 0, 0, 0 }
};
static const struct cs_section_def gfx12_cs_data[] = {
{ gfx12_SECT_CONTEXT_defs, SECT_CONTEXT },
{ 0, SECT_NONE }
};
#endif /* __CLEARSTATE_GFX12_H_ */

View File

@ -70,6 +70,8 @@ static u32 df_v1_7_get_hbm_channel_number(struct amdgpu_device *adev)
int fb_channel_number;
fb_channel_number = adev->df.funcs->get_fb_channel_number(adev);
if (fb_channel_number >= ARRAY_SIZE(df_v1_7_channel_number))
fb_channel_number = 0;
return df_v1_7_channel_number[fb_channel_number];
}

View File

@ -366,7 +366,89 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST)
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
};
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_10[] = {
/* compute registers */
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_SUSPEND_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_STATUS)
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_10[] = {
/* gfx queue registers */
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_ACTIVE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CSMD_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_MAPPED),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_QUE_MGR_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_CONTROL0),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_HQ_STATUS0),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_CSMD_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HQD_CE_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_MQD_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI)
};
static const struct soc15_reg_golden golden_settings_gc_10_1[] = {
@ -3651,14 +3733,8 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
enum amdgpu_unmap_queues_action action,
u64 gpu_addr, u64 seq)
{
struct amdgpu_device *adev = kiq_ring->adev;
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
if (adev->enable_mes && !adev->gfx.kiq[0].ring.sched.ready) {
amdgpu_mes_unmap_legacy_queue(adev, ring, action, gpu_addr, seq);
return;
}
amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_UNMAP_QUEUES_ACTION(action) |
@ -3916,33 +3992,18 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
memset(&ib, 0, sizeof(ib));
if (ring->is_mes_queue) {
uint32_t padding, offset;
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS);
padding = amdgpu_mes_ctx_get_offs(ring,
AMDGPU_MES_CTX_PADDING_OFFS);
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
cpu_ptr = &adev->wb.wb[index];
ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ib.ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, padding);
cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, padding);
*cpu_ptr = cpu_to_le32(0xCAFEDEAD);
} else {
r = amdgpu_device_wb_get(adev, &index);
if (r)
return r;
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
cpu_ptr = &adev->wb.wb[index];
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err1;
}
ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
@ -3969,12 +4030,10 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
else
r = -EINVAL;
err2:
if (!ring->is_mes_queue)
amdgpu_ib_free(adev, &ib, NULL);
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err1:
if (!ring->is_mes_queue)
amdgpu_device_wb_free(adev, index);
amdgpu_device_wb_free(adev, index);
return r;
}
@ -4583,19 +4642,44 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev)
static void gfx_v10_0_alloc_ip_dump(struct amdgpu_device *adev)
{
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
uint32_t *ptr;
uint32_t inst;
ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for IP Dump\n");
adev->gfx.ip_dump = NULL;
adev->gfx.reg_count = 0;
DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
adev->gfx.ip_dump_core = NULL;
} else {
adev->gfx.ip_dump = ptr;
adev->gfx.reg_count = reg_count;
adev->gfx.ip_dump_core = ptr;
}
/* Allocate memory for compute queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
adev->gfx.ip_dump_compute_queues = NULL;
} else {
adev->gfx.ip_dump_compute_queues = ptr;
}
/* Allocate memory for gfx queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
adev->gfx.me.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
adev->gfx.ip_dump_gfx_queues = NULL;
} else {
adev->gfx.ip_dump_gfx_queues = ptr;
}
}
@ -4724,18 +4808,16 @@ static int gfx_v10_0_sw_init(void *handle)
}
}
if (!adev->enable_mes_kiq) {
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
r = amdgpu_gfx_kiq_init(adev, GFX10_MEC_HPD_SIZE, 0);
if (r) {
DRM_ERROR("Failed to init KIQ BOs!\n");
return r;
}
r = amdgpu_gfx_kiq_init_ring(adev, xcc_id);
if (r)
return r;
r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd), 0);
if (r)
return r;
@ -4751,7 +4833,7 @@ static int gfx_v10_0_sw_init(void *handle)
gfx_v10_0_gpu_early_init(adev);
gfx_v10_0_alloc_dump_mem(adev);
gfx_v10_0_alloc_ip_dump(adev);
return 0;
}
@ -4789,10 +4871,8 @@ static int gfx_v10_0_sw_fini(void *handle)
amdgpu_gfx_mqd_sw_fini(adev, 0);
if (!adev->enable_mes_kiq) {
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
}
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring);
amdgpu_gfx_kiq_fini(adev, 0);
gfx_v10_0_pfp_fini(adev);
gfx_v10_0_ce_fini(adev);
@ -4805,7 +4885,9 @@ static int gfx_v10_0_sw_fini(void *handle)
gfx_v10_0_free_microcode(adev);
kfree(adev->gfx.ip_dump);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
@ -6994,10 +7076,7 @@ static int gfx_v10_0_cp_resume(struct amdgpu_device *adev)
return r;
}
if (adev->enable_mes_kiq && adev->mes.kiq_hw_init)
r = amdgpu_mes_kiq_hw_init(adev);
else
r = gfx_v10_0_kiq_resume(adev);
r = gfx_v10_0_kiq_resume(adev);
if (r)
return r;
@ -8052,15 +8131,24 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev,
static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev,
unsigned int vmid)
{
u32 data;
u32 reg, pre_data, data;
reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
/* not for *_SOC15 */
data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
pre_data = RREG32_NO_KIQ(reg);
else
pre_data = RREG32(reg);
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
if (pre_data != data) {
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
} else
WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
}
}
static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid)
@ -8309,45 +8397,17 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
uint64_t wptr_tmp;
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always being used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
upper_32_bits(ring->wptr));
}
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
upper_32_bits(ring->wptr));
}
}
@ -8372,42 +8432,13 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
uint64_t wptr_tmp;
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
}
BUG(); /* only DOORBELL method supported on gfx10 now */
}
}
@ -8466,10 +8497,6 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
if (ring->is_mes_queue)
/* inherit vmid from mqd */
control |= 0x400000;
amdgpu_ring_write(ring, header);
BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
@ -8489,10 +8516,6 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
if (ring->is_mes_queue)
/* inherit vmid from mqd */
control |= 0x40000000;
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@ -8550,8 +8573,7 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
amdgpu_ring_write(ring, upper_32_bits(addr));
amdgpu_ring_write(ring, lower_32_bits(seq));
amdgpu_ring_write(ring, upper_32_bits(seq));
amdgpu_ring_write(ring, ring->is_mes_queue ?
(ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0);
amdgpu_ring_write(ring, 0);
}
static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@ -8579,10 +8601,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned int vmid, uint64_t pd_addr)
{
if (ring->is_mes_queue)
gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0);
else
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
@ -8733,19 +8752,9 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
ce_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
} else {
offset = offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
}
offset = offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@ -8771,28 +8780,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
void *de_payload_cpu_addr;
int cnt;
if (ring->is_mes_queue) {
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gfx_meta_data) +
offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr =
amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
de_payload_cpu_addr =
amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
offset = offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
offset = offsetof(struct amdgpu_mes_ctx_meta_data,
gfx[0].gds_backup) +
offsetof(struct v10_gfx_meta_data, de_payload);
gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
} else {
offset = offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
}
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
AMDGPU_CSA_SIZE - adev->gds.gds_size,
PAGE_SIZE);
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@ -9044,49 +9038,34 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev,
int i;
u8 me_id, pipe_id, queue_id;
struct amdgpu_ring *ring;
uint32_t mes_queue_id = entry->src_data[0];
DRM_DEBUG("IH: CP EOP\n");
if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) {
struct amdgpu_mes_queue *queue;
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK;
spin_lock(&adev->mes.queue_id_lock);
queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id);
if (queue) {
DRM_DEBUG("process mes queue id = %d\n", mes_queue_id);
amdgpu_fence_process(queue->ring);
}
spin_unlock(&adev->mes.queue_id_lock);
} else {
me_id = (entry->ring_id & 0x0c) >> 2;
pipe_id = (entry->ring_id & 0x03) >> 0;
queue_id = (entry->ring_id & 0x70) >> 4;
switch (me_id) {
case 0:
if (pipe_id == 0)
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
else
amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
* The interrupt can only be enabled/disabled per pipe instead
* of per queue.
*/
if ((ring->me == me_id) &&
(ring->pipe == pipe_id) &&
(ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
switch (me_id) {
case 0:
if (pipe_id == 0)
amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
else
amdgpu_fence_process(&adev->gfx.gfx_ring[1]);
break;
case 1:
case 2:
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
/* Per-queue interrupt is supported for MEC starting from VI.
* The interrupt can only be enabled/disabled per pipe instead
* of per queue.
*/
if ((ring->me == me_id) &&
(ring->pipe == pipe_id) &&
(ring->queue == queue_id))
amdgpu_fence_process(ring);
}
break;
}
return 0;
@ -9270,30 +9249,129 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
if (!adev->gfx.ip_dump)
if (!adev->gfx.ip_dump_core)
return;
for (i = 0; i < reg_count; i++)
drm_printf(p, "%-50s \t 0x%08x\n",
gc_reg_list_10_1[i].reg_name,
adev->gfx.ip_dump[i]);
adev->gfx.ip_dump_core[i]);
/* print compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
adev->gfx.mec.num_mec,
adev->gfx.mec.num_pipe_per_mec,
adev->gfx.mec.num_queue_per_pipe);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
drm_printf(p, "%-50s \t 0x%08x\n",
gc_cp_reg_list_10[reg].reg_name,
adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
}
}
/* print gfx queue registers for all instances */
if (!adev->gfx.ip_dump_gfx_queues)
return;
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
adev->gfx.me.num_me,
adev->gfx.me.num_pipe_per_me,
adev->gfx.me.num_queue_per_pipe);
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
drm_printf(p, "%-50s \t 0x%08x\n",
gc_gfx_queue_reg_list_10[reg].reg_name,
adev->gfx.ip_dump_gfx_queues[index + reg]);
}
index += reg_count;
}
}
}
}
static void gfx_v10_ip_dump(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1);
if (!adev->gfx.ip_dump)
if (!adev->gfx.ip_dump_core)
return;
amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < reg_count; i++)
adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i]));
amdgpu_gfx_off_ctrl(adev, true);
/* dump compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_10);
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
/* ME0 is for GFX so start from 1 for CP */
nv_grbm_select(adev, 1 + i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
adev->gfx.ip_dump_compute_queues[index + reg] =
RREG32(SOC15_REG_ENTRY_OFFSET(
gc_cp_reg_list_10[reg]));
}
index += reg_count;
}
}
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_off_ctrl(adev, true);
/* dump gfx queue registers for all instances */
if (!adev->gfx.ip_dump_gfx_queues)
return;
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_10);
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
nv_grbm_select(adev, i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
adev->gfx.ip_dump_gfx_queues[index + reg] =
RREG32(SOC15_REG_ENTRY_OFFSET(
gc_gfx_queue_reg_list_10[reg]));
}
index += reg_count;
}
}
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_off_ctrl(adev, true);
}

View File

@ -94,6 +94,150 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin");
static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = {
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB1_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regGDS_PROTECTION_FAULT),
SOC15_REG_ENTRY_STR(GC, 0, regGDS_VM_PROTECTION_FAULT),
SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2),
SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regRLC_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES),
SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC1_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_DEBUG_INTERRUPT_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE4),
SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE5)
};
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_11[] = {
/* compute registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS)
};
static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_11[] = {
/* gfx queue registers */
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ)
};
static const struct soc15_reg_golden golden_settings_gc_11_0[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000)
};
@ -929,9 +1073,9 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
int me, int pipe, int queue)
{
int r;
struct amdgpu_ring *ring;
unsigned int irq_type;
unsigned int hw_prio;
ring = &adev->gfx.gfx_ring[ring_id];
@ -950,11 +1094,10 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id,
sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue);
irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe;
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
return r;
return 0;
hw_prio = amdgpu_gfx_is_high_priority_graphics_queue(adev, ring) ?
AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
}
static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
@ -1331,6 +1474,47 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev)
return 0;
}
static void gfx_v11_0_alloc_ip_dump(struct amdgpu_device *adev)
{
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
uint32_t *ptr;
uint32_t inst;
ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
adev->gfx.ip_dump_core = NULL;
} else {
adev->gfx.ip_dump_core = ptr;
}
/* Allocate memory for compute queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
adev->gfx.ip_dump_compute_queues = NULL;
} else {
adev->gfx.ip_dump_compute_queues = ptr;
}
/* Allocate memory for gfx queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me *
adev->gfx.me.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n");
adev->gfx.ip_dump_gfx_queues = NULL;
} else {
adev->gfx.ip_dump_gfx_queues = ptr;
}
}
static int gfx_v11_0_sw_init(void *handle)
{
int i, j, k, r, ring_id = 0;
@ -1485,6 +1669,8 @@ static int gfx_v11_0_sw_init(void *handle)
return -EINVAL;
}
gfx_v11_0_alloc_ip_dump(adev);
return 0;
}
@ -1544,6 +1730,10 @@ static int gfx_v11_0_sw_fini(void *handle)
gfx_v11_0_free_microcode(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
kfree(adev->gfx.ip_dump_gfx_queues);
return 0;
}
@ -3615,6 +3805,24 @@ static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev)
(adev->doorbell_index.userqueue_end * 2) << 2);
}
static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev,
struct v11_gfx_mqd *mqd,
struct amdgpu_mqd_prop *prop)
{
bool priority = 0;
u32 tmp;
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority
*/
if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH)
priority = 1;
tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority);
mqd->cp_gfx_hqd_queue_priority = tmp;
}
static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
struct amdgpu_mqd_prop *prop)
{
@ -3643,11 +3851,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0);
mqd->cp_gfx_hqd_vmid = 0;
/* set up default queue priority level
* 0x0 = low priority, 0x1 = high priority */
tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY);
tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0);
mqd->cp_gfx_hqd_queue_priority = tmp;
/* set up gfx queue priority */
gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop);
/* set up time quantum */
tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM);
@ -5030,24 +5235,31 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev,
static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid)
{
u32 data;
u32 reg, pre_data, data;
amdgpu_gfx_off_ctrl(adev, false);
reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
pre_data = RREG32_NO_KIQ(reg);
else
pre_data = RREG32(reg);
data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL);
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
if (pre_data != data) {
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data);
} else
WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data);
}
amdgpu_gfx_off_ctrl(adev, true);
if (ring
&& amdgpu_sriov_is_pp_one_vf(adev)
&& (pre_data != data)
&& ((ring->funcs->type == AMDGPU_RING_TYPE_GFX)
|| (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) {
uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL);
amdgpu_ring_emit_wreg(ring, reg, data);
}
}
@ -5293,7 +5505,7 @@ static void gfx_v11_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
reg_mem_engine = 0;
} else {
ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe;
reg_mem_engine = 1; /* pfp */
}
@ -6151,6 +6363,134 @@ static void gfx_v11_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
static void gfx_v11_ip_print(void *handle, struct drm_printer *p)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
if (!adev->gfx.ip_dump_core)
return;
for (i = 0; i < reg_count; i++)
drm_printf(p, "%-50s \t 0x%08x\n",
gc_reg_list_11_0[i].reg_name,
adev->gfx.ip_dump_core[i]);
/* print compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
adev->gfx.mec.num_mec,
adev->gfx.mec.num_pipe_per_mec,
adev->gfx.mec.num_queue_per_pipe);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
drm_printf(p, "%-50s \t 0x%08x\n",
gc_cp_reg_list_11[reg].reg_name,
adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
}
}
/* print gfx queue registers for all instances */
if (!adev->gfx.ip_dump_gfx_queues)
return;
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n",
adev->gfx.me.num_me,
adev->gfx.me.num_pipe_per_me,
adev->gfx.me.num_queue_per_pipe);
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
drm_printf(p, "%-50s \t 0x%08x\n",
gc_gfx_queue_reg_list_11[reg].reg_name,
adev->gfx.ip_dump_gfx_queues[index + reg]);
}
index += reg_count;
}
}
}
}
static void gfx_v11_ip_dump(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_11_0);
if (!adev->gfx.ip_dump_core)
return;
amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < reg_count; i++)
adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_11_0[i]));
amdgpu_gfx_off_ctrl(adev, true);
/* dump compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_11);
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
/* ME0 is for GFX so start from 1 for CP */
soc21_grbm_select(adev, 1+i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
adev->gfx.ip_dump_compute_queues[index + reg] =
RREG32(SOC15_REG_ENTRY_OFFSET(
gc_cp_reg_list_11[reg]));
}
index += reg_count;
}
}
}
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_off_ctrl(adev, true);
/* dump gfx queue registers for all instances */
if (!adev->gfx.ip_dump_gfx_queues)
return;
reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_11);
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.me.num_me; i++) {
for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) {
for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) {
soc21_grbm_select(adev, i, j, k, 0);
for (reg = 0; reg < reg_count; reg++) {
adev->gfx.ip_dump_gfx_queues[index + reg] =
RREG32(SOC15_REG_ENTRY_OFFSET(
gc_gfx_queue_reg_list_11[reg]));
}
index += reg_count;
}
}
}
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_off_ctrl(adev, true);
}
static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.name = "gfx_v11_0",
.early_init = gfx_v11_0_early_init,
@ -6169,8 +6509,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,
.dump_ip_state = NULL,
.print_ip_state = NULL,
.dump_ip_state = gfx_v11_ip_dump,
.print_ip_state = gfx_v11_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright 2019 Advanced Micro Devices, Inc.
* Copyright 2023 dvanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -21,9 +21,9 @@
*
*/
#ifndef __MES_V10_1_H__
#define __MES_V10_1_H__
#ifndef __GFX_V12_0_H__
#define __GFX_V12_0_H__
extern const struct amdgpu_ip_block_version mes_v10_1_ip_block;
extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block;
#endif

View File

@ -963,8 +963,6 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name);
if (err)
goto out;
out:
if (err) {
pr_err("gfx7: Failed to load firmware \"%s\"\n", fw_name);
@ -2757,44 +2755,6 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
return 0;
}
struct hqd_registers {
u32 cp_mqd_base_addr;
u32 cp_mqd_base_addr_hi;
u32 cp_hqd_active;
u32 cp_hqd_vmid;
u32 cp_hqd_persistent_state;
u32 cp_hqd_pipe_priority;
u32 cp_hqd_queue_priority;
u32 cp_hqd_quantum;
u32 cp_hqd_pq_base;
u32 cp_hqd_pq_base_hi;
u32 cp_hqd_pq_rptr;
u32 cp_hqd_pq_rptr_report_addr;
u32 cp_hqd_pq_rptr_report_addr_hi;
u32 cp_hqd_pq_wptr_poll_addr;
u32 cp_hqd_pq_wptr_poll_addr_hi;
u32 cp_hqd_pq_doorbell_control;
u32 cp_hqd_pq_wptr;
u32 cp_hqd_pq_control;
u32 cp_hqd_ib_base_addr;
u32 cp_hqd_ib_base_addr_hi;
u32 cp_hqd_ib_rptr;
u32 cp_hqd_ib_control;
u32 cp_hqd_iq_timer;
u32 cp_hqd_iq_rptr;
u32 cp_hqd_dequeue_request;
u32 cp_hqd_dma_offload;
u32 cp_hqd_sema_cmd;
u32 cp_hqd_msg_type;
u32 cp_hqd_atomic0_preop_lo;
u32 cp_hqd_atomic0_preop_hi;
u32 cp_hqd_atomic1_preop_lo;
u32 cp_hqd_atomic1_preop_hi;
u32 cp_hqd_hq_scheduler0;
u32 cp_hqd_hq_scheduler1;
u32 cp_mqd_control;
};
static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev,
int mec, int pipe)
{

View File

@ -149,6 +149,135 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin");
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026
#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1
static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = {
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ),
SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT),
SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT),
SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmSQ_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2),
SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT),
SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6),
/* cp header registers */
SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP),
/* SE status registers */
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2),
SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3)
};
static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = {
/* compute queue registers */
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_VMID),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ACTIVE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PERSISTENT_STATE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PIPE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUEUE_PRIORITY),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_QUANTUM),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_BASE_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_IB_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_DEQUEUE_REQUEST),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_RPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_EVENTS),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_BASE_ADDR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_CONTROL),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CNTL_STACK_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_WG_STATE_OFFSET),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_CTX_SAVE_SIZE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GDS_RESOURCE_STATE),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_ERROR),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_EOP_WPTR_MEM),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI),
SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS),
};
enum ta_ras_gfx_subblock {
/*CPC*/
TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
@ -1994,6 +2123,34 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
hw_prio, NULL);
}
static void gfx_v9_0_alloc_ip_dump(struct amdgpu_device *adev)
{
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
uint32_t *ptr;
uint32_t inst;
ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for GFX IP Dump\n");
adev->gfx.ip_dump_core = NULL;
} else {
adev->gfx.ip_dump_core = ptr;
}
/* Allocate memory for compute queue registers for all the instances */
reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec *
adev->gfx.mec.num_queue_per_pipe;
ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL);
if (ptr == NULL) {
DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n");
adev->gfx.ip_dump_compute_queues = NULL;
} else {
adev->gfx.ip_dump_compute_queues = ptr;
}
}
static int gfx_v9_0_sw_init(void *handle)
{
int i, j, k, r, ring_id;
@ -2171,6 +2328,8 @@ static int gfx_v9_0_sw_init(void *handle)
return -EINVAL;
}
gfx_v9_0_alloc_ip_dump(adev);
return 0;
}
@ -2206,6 +2365,9 @@ static int gfx_v9_0_sw_fini(void *handle)
}
gfx_v9_0_free_microcode(adev);
kfree(adev->gfx.ip_dump_core);
kfree(adev->gfx.ip_dump_compute_queues);
return 0;
}
@ -6840,6 +7002,88 @@ static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
}
}
static void gfx_v9_ip_print(void *handle, struct drm_printer *p)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
if (!adev->gfx.ip_dump_core)
return;
for (i = 0; i < reg_count; i++)
drm_printf(p, "%-50s \t 0x%08x\n",
gc_reg_list_9[i].reg_name,
adev->gfx.ip_dump_core[i]);
/* print compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n",
adev->gfx.mec.num_mec,
adev->gfx.mec.num_pipe_per_mec,
adev->gfx.mec.num_queue_per_pipe);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k);
for (reg = 0; reg < reg_count; reg++) {
drm_printf(p, "%-50s \t 0x%08x\n",
gc_cp_reg_list_9[reg].reg_name,
adev->gfx.ip_dump_compute_queues[index + reg]);
}
index += reg_count;
}
}
}
}
static void gfx_v9_ip_dump(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
uint32_t i, j, k, reg, index = 0;
uint32_t reg_count = ARRAY_SIZE(gc_reg_list_9);
if (!adev->gfx.ip_dump_core || !adev->gfx.num_gfx_rings)
return;
amdgpu_gfx_off_ctrl(adev, false);
for (i = 0; i < reg_count; i++)
adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_9[i]));
amdgpu_gfx_off_ctrl(adev, true);
/* dump compute queue registers for all instances */
if (!adev->gfx.ip_dump_compute_queues)
return;
reg_count = ARRAY_SIZE(gc_cp_reg_list_9);
amdgpu_gfx_off_ctrl(adev, false);
mutex_lock(&adev->srbm_mutex);
for (i = 0; i < adev->gfx.mec.num_mec; i++) {
for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) {
for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) {
/* ME0 is for GFX so start from 1 for CP */
soc15_grbm_select(adev, 1 + i, j, k, 0, 0);
for (reg = 0; reg < reg_count; reg++) {
adev->gfx.ip_dump_compute_queues[index + reg] =
RREG32(SOC15_REG_ENTRY_OFFSET(
gc_cp_reg_list_9[reg]));
}
index += reg_count;
}
}
}
soc15_grbm_select(adev, 0, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
amdgpu_gfx_off_ctrl(adev, true);
}
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@ -6856,8 +7100,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.set_clockgating_state = gfx_v9_0_set_clockgating_state,
.set_powergating_state = gfx_v9_0_set_powergating_state,
.get_clockgating_state = gfx_v9_0_get_clockgating_state,
.dump_ip_state = NULL,
.print_ip_state = NULL,
.dump_ip_state = gfx_v9_ip_dump,
.print_ip_state = gfx_v9_ip_print,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {

View File

@ -41,7 +41,9 @@
#include "amdgpu_aca.h"
MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin");
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
@ -755,6 +757,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@ -1392,21 +1395,23 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev)
static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring,
unsigned vmid)
{
u32 reg, data;
u32 reg, pre_data, data;
reg = SOC15_REG_OFFSET(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL);
if (amdgpu_sriov_is_pp_one_vf(adev))
data = RREG32_NO_KIQ(reg);
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev))
pre_data = RREG32_NO_KIQ(reg);
else
data = RREG32(reg);
pre_data = RREG32(reg);
data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
data = pre_data & (~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK);
data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
if (amdgpu_sriov_is_pp_one_vf(adev))
WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
else
WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
if (pre_data != data) {
if (amdgpu_sriov_is_pp_one_vf(adev) && !amdgpu_sriov_runtime(adev)) {
WREG32_SOC15_NO_KIQ(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
} else
WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_SPM_MC_CNTL, data);
}
}
static const struct soc15_reg_rlcg rlcg_access_gc_9_4_3[] = {
@ -2502,6 +2507,7 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle,
num_xcc = NUM_XCC(adev->gfx.xcc_mask);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
for (i = 0; i < num_xcc; i++)
gfx_v9_4_3_xcc_update_gfx_clock_gating(
adev, state == AMD_CG_STATE_GATE, i);
@ -4138,6 +4144,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
/* init asci gds info */
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
/* 9.4.3 removed all the GDS internal memory,
* only support GWS opcode in kernel, like barrier
* semaphore.etc */
@ -4150,6 +4157,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev)
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
/* deprecated for 9.4.3, no usage at all */
adev->gds.gds_compute_max_wave_id = 0;
break;

View File

@ -0,0 +1,501 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "gfxhub_v12_0.h"
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "soc24_enum.h"
#include "soc15_common.h"
#define regGCVM_L2_CNTL3_DEFAULT 0x80120007
#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1
#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0
#define regGRBM_GFX_INDEX_DEFAULT 0xe0000000
static const char *gfxhub_client_ids[] = {
/* TODO */
};
static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
gfxhub_v12_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
u32 cid = REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
dev_err(adev->dev,
"GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
status);
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid],
cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%lx\n",
REG_GET_FIELD(status,
GCVM_L2_PROTECTION_FAULT_STATUS_LO32, RW));
}
static u64 gfxhub_v12_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE);
base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 gfxhub_v12_0_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24;
}
static void gfxhub_v12_0_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid,
uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void gfxhub_v12_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
gfxhub_v12_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void gfxhub_v12_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
/* Program the AGP BAR */
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
}
static void gfxhub_v12_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void gfxhub_v12_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL,
L2_PDE0_CACHE_TAG_GENERATION_MODE, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp);
tmp = regGCVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp);
tmp = regGCVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp);
tmp = regGCVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp);
}
static void gfxhub_v12_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp);
}
static void gfxhub_v12_0_disable_identity_aperture(struct amdgpu_device *adev)
{
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32,
0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32,
0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0);
}
static void gfxhub_v12_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void gfxhub_v12_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
unsigned i;
for (i = 0 ; i < 18; ++i) {
WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int gfxhub_v12_0_gart_enable(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
/*
* GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
adev->gmc.vram_end >> 24);
}
/* GART Enable. */
gfxhub_v12_0_init_gart_aperture_regs(adev);
gfxhub_v12_0_init_system_aperture_regs(adev);
gfxhub_v12_0_init_tlb_regs(adev);
gfxhub_v12_0_init_cache_regs(adev);
gfxhub_v12_0_enable_system_domain(adev);
gfxhub_v12_0_disable_identity_aperture(adev);
gfxhub_v12_0_setup_vmid_config(adev);
gfxhub_v12_0_program_invalidation(adev);
return 0;
}
static void gfxhub_v12_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0);
}
/**
* gfxhub_v12_0_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void gfxhub_v12_0_set_fault_enable_default(struct amdgpu_device *adev,
bool value)
{
u32 tmp;
/* NO halt CP when page fault */
tmp = RREG32_SOC15(GC, 0, regCP_DEBUG);
tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1);
WREG32_SOC15(GC, 0, regCP_DEBUG, tmp);
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs gfxhub_v12_0_vmhub_funcs = {
.print_l2_protection_fault_status = gfxhub_v12_0_print_l2_protection_fault_status,
.get_invalidate_req = gfxhub_v12_0_get_invalidate_req,
};
static void gfxhub_v12_0_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(GC, 0,
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(GC, 0,
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ -
regGCVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vmhub_funcs = &gfxhub_v12_0_vmhub_funcs;
}
const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs = {
.get_fb_location = gfxhub_v12_0_get_fb_location,
.get_mc_fb_offset = gfxhub_v12_0_get_mc_fb_offset,
.setup_vm_pt_regs = gfxhub_v12_0_setup_vm_pt_regs,
.gart_enable = gfxhub_v12_0_gart_enable,
.gart_disable = gfxhub_v12_0_gart_disable,
.set_fault_enable_default = gfxhub_v12_0_set_fault_enable_default,
.init = gfxhub_v12_0_init,
};

View File

@ -0,0 +1,29 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GFXHUB_V12_0_H__
#define __GFXHUB_V12_0_H__
extern const struct amdgpu_gfxhub_funcs gfxhub_v12_0_funcs;
#endif

View File

@ -366,7 +366,9 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev,
amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 3));
IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) ==
IP_VERSION(9, 4, 4));
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, GET_INST(GC, j),

View File

@ -473,17 +473,17 @@ static uint64_t gmc_v10_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
case AMDGPU_VM_MTYPE_CC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
default:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
}
}
@ -536,8 +536,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
*flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
@ -763,7 +762,7 @@ static int gmc_v10_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);

View File

@ -438,17 +438,17 @@ static uint64_t gmc_v11_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_WC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_WC);
case AMDGPU_VM_MTYPE_CC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_CC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC);
default:
return AMDGPU_PTE_MTYPE_NV10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_NC);
}
}
@ -501,8 +501,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
AMDGPU_GEM_CREATE_EXT_COHERENT |
AMDGPU_GEM_CREATE_UNCACHED))
*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
*flags = AMDGPU_PTE_MTYPE_NV10(*flags, MTYPE_UC);
}
static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
@ -723,7 +722,7 @@ static int gmc_v11_0_gart_init(struct amdgpu_device *adev)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(MTYPE_UC) |
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_NV10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
return amdgpu_gart_table_vram_alloc(adev);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,30 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __GMC_V12_0_H__
#define __GMC_V12_0_H__
extern const struct amd_ip_funcs gmc_v12_0_ip_funcs;
extern const struct amdgpu_ip_block_version gmc_v12_0_ip_block;
#endif

View File

@ -644,7 +644,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
addr, entry->client_id,
soc15_ih_clientid_name[entry->client_id]);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n",
node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4,
node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : "");
@ -786,7 +787,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
uint32_t vmhub)
{
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
return false;
return ((vmhub == AMDGPU_MMHUB0(0) ||
@ -842,7 +844,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
if (vmhub >= AMDGPU_MMHUB0(0))
inst = GET_INST(GC, 0);
inst = 0;
else
inst = vmhub;
@ -874,9 +876,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acquire */
if (vmhub >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst);
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, GET_INST(GC, inst));
else
tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst);
tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, GET_INST(GC, inst));
if (tmp & 0x1)
break;
udelay(1);
@ -887,9 +889,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
if (vmhub >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst);
WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, GET_INST(GC, inst));
else
WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst);
WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, GET_INST(GC, inst));
/*
* Issue a dummy read to wait for the ACK register to
@ -902,9 +904,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
for (j = 0; j < adev->usec_timeout; j++) {
if (vmhub >= AMDGPU_MMHUB0(0))
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst);
tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, GET_INST(GC, inst));
else
tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst);
tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, GET_INST(GC, inst));
if (tmp & (1 << vmid))
break;
udelay(1);
@ -917,9 +919,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
* write with 0 means semaphore release
*/
if (vmhub >= AMDGPU_MMHUB0(0))
WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst);
WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, GET_INST(GC, inst));
else
WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst);
WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, GET_INST(GC, inst));
}
spin_unlock(&adev->gmc.invalidate_lock);
@ -1073,19 +1075,19 @@ static uint64_t gmc_v9_0_map_mtype(struct amdgpu_device *adev, uint32_t flags)
{
switch (flags) {
case AMDGPU_VM_MTYPE_DEFAULT:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_NC:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
case AMDGPU_VM_MTYPE_WC:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_WC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_WC);
case AMDGPU_VM_MTYPE_RW:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_RW);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_RW);
case AMDGPU_VM_MTYPE_CC:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_CC);
case AMDGPU_VM_MTYPE_UC:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_UC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC);
default:
return AMDGPU_PTE_MTYPE_VG10(MTYPE_NC);
return AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC);
}
}
@ -1170,6 +1172,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
}
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
/* Only local VRAM BOs or system memory on non-NUMA APUs
* can be assumed to be local in their entirety. Choose
* MTYPE_NC as safe fallback for all system memory BOs on
@ -1225,8 +1228,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
}
if (mtype != MTYPE_NC)
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(mtype);
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype);
*flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
}
@ -1261,7 +1264,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
/* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system
* memory can use more efficient MTYPEs.
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) &&
amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4))
return;
/* Only direct-mapped memory allows us to determine the NUMA node from
@ -1277,9 +1281,9 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
* and can also be overridden.
*/
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) &&
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC) &&
(*flags & AMDGPU_PTE_MTYPE_VG10_MASK) !=
AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) {
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC)) {
dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n");
return;
}
@ -1308,7 +1312,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
if (nid == local_node) {
uint64_t old_flags = *flags;
if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) ==
AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) {
AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_NC)) {
unsigned int mtype_local = MTYPE_RW;
if (amdgpu_mtype_local == 1)
@ -1316,12 +1320,10 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
else if (amdgpu_mtype_local == 2)
mtype_local = MTYPE_CC;
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(mtype_local);
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, mtype_local);
} else if (adev->rev_id) {
/* MTYPE_UC case */
*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
AMDGPU_PTE_MTYPE_VG10(MTYPE_CC);
*flags = AMDGPU_PTE_MTYPE_VG10(*flags, MTYPE_CC);
}
dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n",
@ -1506,7 +1508,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev)
static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
{
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
adev->gfxhub.funcs = &gfxhub_v1_2_funcs;
else
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
@ -1551,7 +1554,8 @@ static int gmc_v9_0_early_init(void *handle)
*/
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
adev->gmc.xgmi.supported = true;
if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) {
@ -1560,7 +1564,8 @@ static int gmc_v9_0_early_init(void *handle)
adev->smuio.funcs->is_host_gpu_xgmi_supported(adev);
}
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
enum amdgpu_pkg_type pkg_type =
adev->smuio.funcs->get_pkg_type(adev);
/* On GFXIP 9.4.3. APU, there is no physical VRAM domain present
@ -1722,6 +1727,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
case IP_VERSION(9, 4, 1):
case IP_VERSION(9, 4, 2):
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
default:
adev->gmc.gart_size = 512ULL << 20;
break;
@ -1764,7 +1770,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev)
if (r)
return r;
adev->gart.table_size = adev->gart.num_gpu_pages * 8;
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) |
adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(0ULL, MTYPE_UC) |
AMDGPU_PTE_EXECUTABLE;
if (!adev->gmc.real_vram_size) {
@ -1887,7 +1893,7 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
{
enum amdgpu_memory_partition mode;
u32 start_addr = 0, size;
int i;
int i, r, l;
mode = gmc_v9_0_query_memory_partition(adev);
@ -1910,23 +1916,39 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev,
break;
}
size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT;
size /= adev->gmc.num_mem_partitions;
/* Use NPS range info, if populated */
r = amdgpu_gmc_get_nps_memranges(adev, mem_ranges,
adev->gmc.num_mem_partitions);
if (!r) {
l = 0;
for (i = 1; i < adev->gmc.num_mem_partitions; ++i) {
if (mem_ranges[i].range.lpfn >
mem_ranges[i - 1].range.lpfn)
l = i;
}
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
mem_ranges[i].range.fpfn = start_addr;
mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT);
mem_ranges[i].range.lpfn = start_addr + size - 1;
start_addr += size;
} else {
/* Fallback to sw based calculation */
size = (adev->gmc.real_vram_size + SZ_16M) >> AMDGPU_GPU_PAGE_SHIFT;
size /= adev->gmc.num_mem_partitions;
for (i = 0; i < adev->gmc.num_mem_partitions; ++i) {
mem_ranges[i].range.fpfn = start_addr;
mem_ranges[i].size =
((u64)size << AMDGPU_GPU_PAGE_SHIFT);
mem_ranges[i].range.lpfn = start_addr + size - 1;
start_addr += size;
}
l = adev->gmc.num_mem_partitions - 1;
}
/* Adjust the last one */
mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn =
mem_ranges[l].range.lpfn =
(adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1;
mem_ranges[adev->gmc.num_mem_partitions - 1].size =
mem_ranges[l].size =
adev->gmc.real_vram_size -
((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn
<< AMDGPU_GPU_PAGE_SHIFT);
((u64)mem_ranges[l].range.fpfn << AMDGPU_GPU_PAGE_SHIFT);
}
static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev)
@ -1976,7 +1998,8 @@ static int gmc_v9_0_sw_init(void *handle)
spin_lock_init(&adev->gmc.invalidate_lock);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
gmc_v9_4_3_init_vram_info(adev);
} else if (!adev->bios) {
if (adev->flags & AMD_IS_APU) {
@ -2059,6 +2082,7 @@ static int gmc_v9_0_sw_init(void *handle)
adev->gmc.translate_further = adev->vm_manager.num_level > 1;
break;
case IP_VERSION(9, 4, 3):
case IP_VERSION(9, 4, 4):
bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0),
NUM_XCC(adev->gfx.xcc_mask));
@ -2124,7 +2148,8 @@ static int gmc_v9_0_sw_init(void *handle)
amdgpu_gmc_get_vbios_allocations(adev);
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
r = gmc_v9_0_init_mem_ranges(adev);
if (r)
return r;
@ -2152,7 +2177,8 @@ static int gmc_v9_0_sw_init(void *handle)
adev->vm_manager.first_kfd_vmid =
(amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) ?
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ?
3 :
8;
@ -2164,7 +2190,8 @@ static int gmc_v9_0_sw_init(void *handle)
if (r)
return r;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
amdgpu_gmc_sysfs_init(adev);
return 0;
@ -2174,7 +2201,8 @@ static int gmc_v9_0_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3))
if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
amdgpu_gmc_sysfs_fini(adev);
amdgpu_gmc_ras_fini(adev);

View File

@ -50,7 +50,8 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
struct amdgpu_ring *ring)
{
if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) ||
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2))
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
return;
if (!ring || !ring->funcs->emit_wreg)
@ -129,7 +130,8 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
{
int data;
if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) {
if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2) ||
amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5)) {
/* Default enabled */
*flags |= AMD_CG_SUPPORT_HDP_MGCG;
return;

View File

@ -0,0 +1,401 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_imu.h"
#include "amdgpu_dpm.h"
#include "imu_v12_0.h"
#include "gc/gc_12_0_0_offset.h"
#include "gc/gc_12_0_0_sh_mask.h"
#include "mmhub/mmhub_4_1_0_offset.h"
MODULE_FIRMWARE("amdgpu/gc_12_0_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_12_0_1_imu.bin");
#define TRANSFER_RAM_MASK 0x001c0000
static int imu_v12_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[40];
char ucode_prefix[15];
int err;
const struct imu_firmware_header_v1_0 *imu_hdr;
struct amdgpu_firmware_info *info = NULL;
DRM_DEBUG("\n");
amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix));
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_imu.bin", ucode_prefix);
err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, fw_name);
if (err)
goto out;
imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_I];
info->ucode_id = AMDGPU_UCODE_ID_IMU_I;
info->fw = adev->gfx.imu_fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes), PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_IMU_D];
info->ucode_id = AMDGPU_UCODE_ID_IMU_D;
info->fw = adev->gfx.imu_fw;
adev->firmware.fw_size +=
ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE);
}
out:
if (err) {
dev_err(adev->dev,
"gfx12: Failed to load firmware \"%s\"\n",
fw_name);
amdgpu_ucode_release(&adev->gfx.imu_fw);
}
return err;
}
static int imu_v12_0_load_microcode(struct amdgpu_device *adev)
{
const struct imu_firmware_header_v1_0 *hdr;
const __le32 *fw_data;
unsigned i, fw_size;
if (!adev->gfx.imu_fw)
return -EINVAL;
hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data;
fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes));
fw_size = le32_to_cpu(hdr->imu_iram_ucode_size_bytes) / 4;
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, 0);
for (i = 0; i < fw_size; i++)
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_DATA, le32_to_cpup(fw_data++));
WREG32_SOC15(GC, 0, regGFX_IMU_I_RAM_ADDR, adev->gfx.imu_fw_version);
fw_data = (const __le32 *)(adev->gfx.imu_fw->data +
le32_to_cpu(hdr->header.ucode_array_offset_bytes) +
le32_to_cpu(hdr->imu_iram_ucode_size_bytes));
fw_size = le32_to_cpu(hdr->imu_dram_ucode_size_bytes) / 4;
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, 0);
for (i = 0; i < fw_size; i++)
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_DATA, le32_to_cpup(fw_data++));
WREG32_SOC15(GC, 0, regGFX_IMU_D_RAM_ADDR, adev->gfx.imu_fw_version);
return 0;
}
static int imu_v12_0_wait_for_reset_status(struct amdgpu_device *adev)
{
int i, imu_reg_val = 0;
for (i = 0; i < adev->usec_timeout; i++) {
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_GFX_RESET_CTRL);
if ((imu_reg_val & 0x1f) == 0x1f)
break;
udelay(1);
}
if (i >= adev->usec_timeout) {
dev_err(adev->dev, "init imu: IMU start timeout\n");
return -ETIMEDOUT;
}
return 0;
}
static void imu_v12_0_setup(struct amdgpu_device *adev)
{
int imu_reg_val;
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL0, 0xffffff);
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_ACCESS_CTRL1, 0xffff);
if (adev->gfx.imu.mode == DEBUG_MODE) {
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16);
imu_reg_val |= 0x1;
WREG32_SOC15(GC, 0, regGFX_IMU_C2PMSG_16, imu_reg_val);
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10);
imu_reg_val |= 0x20010007;
WREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_10, imu_reg_val);
}
}
static int imu_v12_0_start(struct amdgpu_device *adev)
{
int imu_reg_val;
imu_reg_val = RREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL);
imu_reg_val &= 0xfffffffe;
WREG32_SOC15(GC, 0, regGFX_IMU_CORE_CTRL, imu_reg_val);
if (adev->flags & AMD_IS_APU)
amdgpu_dpm_set_gfx_power_up_by_imu(adev);
return imu_v12_0_wait_for_reset_status(adev);
}
static const struct imu_rlc_ram_golden imu_rlc_ram_golden_12_0_1[] = {
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCH_PIPE_STEER, 0x1e4, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1X_PIPE_STEER, 0x1e4, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL1_PIPE_STEER, 0x1e4, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_0, 0x13571357, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_1, 0x64206420, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_2, 0x2460246, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2_PIPE_STEER_3, 0x75317531, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xc0d41183, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA0_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regSDMA1_CHICKEN_BITS, 0x507d1c0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_RB_WPTR_POLL_CNTL, 0x600100, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_CREDITS, 0x3f7fff, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_CREDITS, 0x3f7ebf, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE0, 0x2e00000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE1, 0x1a078, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE0, 0x0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE1, 0x12030, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_TAG_RESERVE2, 0x0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE0, 0x19041000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE0, 0x1e080000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_VCC_RESERVE1, 0x80000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_PRIORITY, 0x880, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_PRIORITY, 0x8880, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ARB_FINAL, 0x17, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ARB_FINAL, 0x77, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_SDP_ENABLE, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_SDP_ENABLE, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL2, 0x20000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_APT_CNTL, 0x0c, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_CACHEABLE_DRAM_ADDRESS_END, 0xfffff, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_CPWD_MISC, 0x0091, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGC_EA_SE_MISC, 0x0091, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x00008500, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0x00880007, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regTD_CNTL, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000100, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0x00000101, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regRMI_GENERAL_CNTL, 0x01e00000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBM_GFX_INDEX, 0xe0000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x08200545, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGRBMH_CP_PERFMON_CNTL, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCB_PERFCOUNTER0_SELECT1, 0x000fffff, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_DEBUG_2, 0x00020000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCP_CPC_DEBUG, 0x00500010, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000500, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END, 0x0000000f, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_BASE, 0x00006000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_FB_LOCATION_TOP, 0x0000600f, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT0_CNTL, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_CONTEXT1_CNTL, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_TOP_OF_DRAM_SLOT1, 0xff800000, 0xe0000000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_LOWER_TOP_OF_DRAM2, 0x00000001, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_NB_UPPER_TOP_OF_DRAM2, 0x0000ffff, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BASE, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_BOT, 0x00000002, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_AGP_TOP, 0x00000000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, 0x00001ffc, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, 0x00000551, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL, 0x00080603, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL2, 0x00000003, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL3, 0x00100003, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCVM_L2_CNTL5, 0x00003fe0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, 0x0003d000, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, 0x0003d7ff, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, 0, 0x1c0000),
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, 0, 0x1c0000)
};
static void program_imu_rlc_ram_old(struct amdgpu_device *adev,
const struct imu_rlc_ram_golden *regs,
const u32 array_size)
{
const struct imu_rlc_ram_golden *entry;
u32 reg, data;
int i;
for (i = 0; i < array_size; ++i) {
entry = &regs[i];
reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
reg |= entry->addr_mask;
data = entry->data;
if (entry->reg == regGCMC_VM_AGP_BASE)
data = 0x00ffffff;
else if (entry->reg == regGCMC_VM_AGP_TOP)
data = 0x0;
else if (entry->reg == regGCMC_VM_FB_LOCATION_BASE)
data = adev->gmc.vram_start >> 24;
else if (entry->reg == regGCMC_VM_FB_LOCATION_TOP)
data = adev->gmc.vram_end >> 24;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
}
}
static u32 imu_v12_0_grbm_gfx_index_remap(struct amdgpu_device *adev,
u32 data, bool high)
{
u32 val, inst_index;
inst_index = REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_INDEX);
if (high)
val = inst_index >> 5;
else
val = REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES) << 18 |
REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_BROADCAST_WRITES) << 19 |
REG_GET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES) << 20 |
REG_GET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX) << 21 |
REG_GET_FIELD(data, GRBM_GFX_INDEX, SA_INDEX) << 25 |
(inst_index & 0x1f);
return val;
}
static u32 imu_v12_init_gfxhub_settings(struct amdgpu_device *adev,
u32 reg, u32 data)
{
if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_BASE))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_LOCATION_TOP))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_TOP);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_FB_OFFSET))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BASE))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_BOT))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_AGP_TOP))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_START))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_START);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_FB_ADDRESS_END))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_FB_ADDRESS_END);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_START))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_START);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_LOCAL_SYSMEM_ADDRESS_END))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_LOCAL_SYSMEM_ADDRESS_END);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB);
else if (reg == SOC15_REG_OFFSET(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB))
return RREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB);
else
return data;
}
static void program_imu_rlc_ram(struct amdgpu_device *adev,
const u32 *regs,
const u32 array_size)
{
u32 reg, data, val_h = 0, val_l = TRANSFER_RAM_MASK;
int i;
if (array_size % 3)
return;
for (i = 0; i < array_size; i += 3) {
reg = regs[i + 0];
data = regs[i + 2];
data = imu_v12_init_gfxhub_settings(adev, reg, data);
if (reg == SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_INDEX)) {
val_l = imu_v12_0_grbm_gfx_index_remap(adev, data, false);
val_h = imu_v12_0_grbm_gfx_index_remap(adev, data, true);
} else {
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, val_h);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, reg | val_l);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, data);
}
}
}
static void imu_v12_0_program_rlc_ram(struct amdgpu_device *adev)
{
u32 reg_data, size = 0;
const u32 *data;
int r = -EINVAL;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2);
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
if (!r)
program_imu_rlc_ram(adev, data, (const u32)size);
else
program_imu_rlc_ram_old(adev, imu_rlc_ram_golden_12_0_1,
(const u32)ARRAY_SIZE(imu_rlc_ram_golden_12_0_1));
break;
default:
BUG();
break;
}
//Indicate the latest entry
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_HIGH, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_ADDR_LOW, 0);
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_DATA, 0);
reg_data = RREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX);
reg_data |= GFX_IMU_RLC_RAM_INDEX__RAM_VALID_MASK;
WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, reg_data);
}
const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs = {
.init_microcode = imu_v12_0_init_microcode,
.load_microcode = imu_v12_0_load_microcode,
.setup_imu = imu_v12_0_setup,
.start_imu = imu_v12_0_start,
.program_rlc_ram = imu_v12_0_program_rlc_ram,
.wait_for_reset_status = imu_v12_0_wait_for_reset_status,
};

View File

@ -0,0 +1,30 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __IMU_V12_0_H__
#define __IMU_V12_0_H__
extern const struct amdgpu_imu_funcs gfx_v12_0_imu_funcs;
#endif

View File

@ -187,7 +187,7 @@ static int jpeg_v4_0_5_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int r, i;
int i, r = 0;
// TODO: Enable ring test with DPG support
if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) {

View File

@ -145,8 +145,6 @@ static int jpeg_v5_0_0_hw_init(void *handle)
if (r)
return r;
DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n");
return 0;
}
@ -549,7 +547,6 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev)
{
adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs;
DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n");
}
static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = {

View File

@ -52,7 +52,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin");
MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin");
static int mes_v11_0_hw_init(void *handle);
static int mes_v11_0_hw_fini(void *handle);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev);
@ -325,6 +325,31 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v11_0_map_legacy_queue(struct amdgpu_mes *mes,
struct mes_map_legacy_queue_input *input)
{
union MESAPI__ADD_QUEUE mes_add_queue_pkt;
memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt));
mes_add_queue_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_add_queue_pkt.header.opcode = MES_SCH_API_ADD_QUEUE;
mes_add_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_add_queue_pkt.pipe_id = input->pipe_id;
mes_add_queue_pkt.queue_id = input->queue_id;
mes_add_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_add_queue_pkt.mqd_addr = input->mqd_addr;
mes_add_queue_pkt.wptr_addr = input->wptr_addr;
mes_add_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
mes_add_queue_pkt.map_legacy_kq = 1;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
struct mes_unmap_legacy_queue_input *input)
{
@ -538,6 +563,7 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes)
static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
.add_hw_queue = mes_v11_0_add_hw_queue,
.remove_hw_queue = mes_v11_0_remove_hw_queue,
.map_legacy_queue = mes_v11_0_map_legacy_queue,
.unmap_legacy_queue = mes_v11_0_unmap_legacy_queue,
.suspend_gang = mes_v11_0_suspend_gang,
.resume_gang = mes_v11_0_resume_gang,
@ -670,7 +696,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)
if (amdgpu_emu_mode)
msleep(100);
else
udelay(50);
udelay(500);
} else {
data = RREG32_SOC15(GC, 0, regCP_MES_CNTL);
data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_ACTIVE, 0);
@ -1266,6 +1292,10 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)
if (r)
goto failure;
r = mes_v11_0_hw_init(adev);
if (r)
goto failure;
return r;
failure:
@ -1295,6 +1325,9 @@ static int mes_v11_0_hw_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->mes.ring.sched.ready)
goto out;
if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v11_0_load_microcode(adev,
@ -1330,6 +1363,7 @@ static int mes_v11_0_hw_init(void *handle)
goto failure;
}
out:
/*
* Disable KIQ ring usage from the driver once MES is enabled.
* MES uses KIQ ring exclusively so driver cannot access KIQ ring

View File

@ -0,0 +1,29 @@
/*
* Copyright 2021 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MES_V12_0_H__
#define __MES_V12_0_H__
extern const struct amdgpu_ip_block_version mes_v12_0_ip_block;
#endif

View File

@ -33,6 +33,10 @@
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
#define regDAGB0_L1TLB_REG_RW_3_3 0x00a4
#define regDAGB0_L1TLB_REG_RW_3_3_BASE_IDX 1
#define regDAGB1_L1TLB_REG_RW_3_3 0x0163
#define regDAGB1_L1TLB_REG_RW_3_3_BASE_IDX 1
static const char *mmhub_client_ids_v3_3[][2] = {
[0][0] = "VMC",
@ -359,6 +363,49 @@ static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev)
}
}
static void mmhub_v3_3_init_saw_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
uint32_t tmp;
/* Program page table base, gart start, gart end */
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
lower_32_bits(pt_base >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
upper_32_bits(pt_base >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXT0_CNTL, tmp);
/* Disable all contexts except context 0 */
tmp = 0xfffe;
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CONTEXTS_DISABLE, tmp);
/* Program saw cntl4 */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4);
tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PDE_REQUEST_SNOOP, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_SAW_CNTL4, VMC_TAP_CONTEXT0_PTE_REQUEST_SNOOP, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_SAW_CNTL4, tmp);
}
static void mmhub_v3_3_enable_tls(struct amdgpu_device *adev)
{
WREG32_SOC15(MMHUB, 0, regDAGB0_L1TLB_REG_RW_3_3, 0);
WREG32_SOC15(MMHUB, 0, regDAGB1_L1TLB_REG_RW_3_3, 3);
}
static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev)
{
/* GART Enable. */
@ -372,6 +419,12 @@ static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev)
mmhub_v3_3_setup_vmid_config(adev);
mmhub_v3_3_program_invalidation(adev);
/* standalone alone walker init */
mmhub_v3_3_init_saw_regs(adev);
/* enable mmhub tls */
mmhub_v3_3_enable_tls(adev);
return 0;
}

View File

@ -0,0 +1,654 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "amdgpu.h"
#include "mmhub_v4_1_0.h"
#include "mmhub/mmhub_4_1_0_offset.h"
#include "mmhub/mmhub_4_1_0_sh_mask.h"
#include "soc15_common.h"
#include "soc24_enum.h"
#define regMMVM_L2_CNTL3_DEFAULT 0x80100007
#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1
#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0
static const char *mmhub_client_ids_v4_1_0[][2] = {
[0][0] = "VMC",
[4][0] = "DCEDMC",
[5][0] = "DCEVGA",
[6][0] = "MP0",
[7][0] = "MP1",
[8][0] = "MPIO",
[16][0] = "HDP",
[17][0] = "LSDMA",
[18][0] = "JPEG",
[19][0] = "VCNU0",
[21][0] = "VSCH",
[22][0] = "VCNU1",
[23][0] = "VCN1",
[32+20][0] = "VCN0",
[2][1] = "DBGUNBIO",
[3][1] = "DCEDWB",
[4][1] = "DCEDMC",
[5][1] = "DCEVGA",
[6][1] = "MP0",
[7][1] = "MP1",
[8][1] = "MPIO",
[10][1] = "DBGU0",
[11][1] = "DBGU1",
[12][1] = "DBGU2",
[13][1] = "DBGU3",
[14][1] = "XDP",
[15][1] = "OSSSYS",
[16][1] = "HDP",
[17][1] = "LSDMA",
[18][1] = "JPEG",
[19][1] = "VCNU0",
[20][1] = "VCN0",
[21][1] = "VSCH",
[22][1] = "VCNU1",
[23][1] = "VCN1",
};
static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid,
uint32_t flush_type)
{
u32 req = 0;
/* invalidate using legacy mode on vmid*/
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
PER_VMID_INVALIDATE_REQ, 1 << vmid);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ,
CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0);
return req;
}
static void
mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev,
uint32_t status)
{
uint32_t cid, rw;
const char *mmhub_cid = NULL;
cid = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, CID);
rw = REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, RW);
dev_err(adev->dev,
"MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n",
status);
switch (adev->ip_versions[MMHUB_HWIP][0]) {
case IP_VERSION(4, 1, 0):
mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw];
break;
default:
mmhub_cid = NULL;
break;
}
dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n",
mmhub_cid ? mmhub_cid : "unknown", cid);
dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MORE_FAULTS));
dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, WALKER_ERROR));
dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, PERMISSION_FAULTS));
dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
REG_GET_FIELD(status,
MMVM_L2_PROTECTION_FAULT_STATUS_LO32, MAPPING_ERROR));
dev_err(adev->dev, "\t RW: 0x%x\n", rw);
}
static void mmhub_v4_1_0_setup_vm_pt_regs(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32,
hub->ctx_addr_distance * vmid,
lower_32_bits(page_table_base));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32,
hub->ctx_addr_distance * vmid,
upper_32_bits(page_table_base));
}
static void mmhub_v4_1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
{
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
mmhub_v4_1_0_setup_vm_pt_regs(adev, 0, pt_base);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
(u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
(u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
(u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
(u32)(adev->gmc.gart_end >> 44));
}
static void mmhub_v4_1_0_init_system_aperture_regs(struct amdgpu_device *adev)
{
uint64_t value;
uint32_t tmp;
/*
* the new L1 policy will block SRIOV guest from writing
* these regs, and they will be programed at host.
* so skip programing these regs.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Program the AGP BAR */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24);
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR,
min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18);
/* Set default page address. */
value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB,
(u32)(value >> 44));
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
(u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
(u32)((u64)adev->dummy_page_addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp);
}
static void mmhub_v4_1_0_init_tlb_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 1);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
SYSTEM_APERTURE_UNMAPPED_ACCESS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
MTYPE, MTYPE_UC); /* UC, uncached */
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
}
static void mmhub_v4_1_0_init_cache_regs(struct amdgpu_device *adev)
{
uint32_t tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL,
ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1);
/* XXX for emulation, Refer to closed source code.*/
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE,
0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp);
tmp = regMMVM_L2_CNTL3_DEFAULT;
if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
} else {
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 6);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp);
tmp = regMMVM_L2_CNTL4_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp);
tmp = regMMVM_L2_CNTL5_DEFAULT;
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
}
static void mmhub_v4_1_0_enable_system_domain(struct amdgpu_device *adev)
{
uint32_t tmp;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp);
}
static void mmhub_v4_1_0_disable_identity_aperture(struct amdgpu_device *adev)
{
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32,
0xFFFFFFFF);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32,
0x0000000F);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0);
WREG32_SOC15(MMHUB, 0,
regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32,
0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32,
0);
}
static void mmhub_v4_1_0_setup_vmid_config(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
int i;
uint32_t tmp;
for (i = 0; i <= 14; i++) {
tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH,
adev->vm_manager.num_level);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT,
1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
adev->vm_manager.block_size - 9);
/* Send no-retry XNACK on fault to suppress VM fault storm. */
tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
!amdgpu_noretry);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32,
i * hub->ctx_addr_distance, 0);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32,
i * hub->ctx_addr_distance,
lower_32_bits(adev->vm_manager.max_pfn - 1));
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32,
i * hub->ctx_addr_distance,
upper_32_bits(adev->vm_manager.max_pfn - 1));
}
hub->vm_cntx_cntl = tmp;
}
static void mmhub_v4_1_0_program_invalidation(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
unsigned i;
for (i = 0; i < 18; ++i) {
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32,
i * hub->eng_addr_distance, 0xffffffff);
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32,
i * hub->eng_addr_distance, 0x1f);
}
}
static int mmhub_v4_1_0_gart_enable(struct amdgpu_device *adev)
{
/* GART Enable. */
mmhub_v4_1_0_init_gart_aperture_regs(adev);
mmhub_v4_1_0_init_system_aperture_regs(adev);
mmhub_v4_1_0_init_tlb_regs(adev);
mmhub_v4_1_0_init_cache_regs(adev);
mmhub_v4_1_0_enable_system_domain(adev);
mmhub_v4_1_0_disable_identity_aperture(adev);
mmhub_v4_1_0_setup_vmid_config(adev);
mmhub_v4_1_0_program_invalidation(adev);
return 0;
}
static void mmhub_v4_1_0_gart_disable(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
u32 tmp;
u32 i;
/* Disable all tables */
for (i = 0; i < 16; i++)
WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL,
i * hub->ctx_distance, 0);
/* Setup TLB control */
tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0);
tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL,
ENABLE_ADVANCED_DRIVER_MODEL, 0);
WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp);
/* Setup L2 cache */
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp);
WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0);
}
/**
* mmhub_v4_1_0_set_fault_enable_default - update GART/VM fault handling
*
* @adev: amdgpu_device pointer
* @value: true redirects VM faults to the default page
*/
static void
mmhub_v4_1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
{
u32 tmp;
/* These registers are not accessible to VF-SRIOV.
* The PF will program them instead.
*/
if (amdgpu_sriov_vf(adev))
return;
tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT,
value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
READ_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value);
if (!value) {
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_NO_RETRY_FAULT, 1);
tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL,
CRASH_ON_RETRY_FAULT, 1);
}
WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp);
}
static const struct amdgpu_vmhub_funcs mmhub_v4_1_0_vmhub_funcs = {
.print_l2_protection_fault_status = mmhub_v4_1_0_print_l2_protection_fault_status,
.get_invalidate_req = mmhub_v4_1_0_get_invalidate_req,
};
static void mmhub_v4_1_0_init(struct amdgpu_device *adev)
{
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
hub->ctx0_ptb_addr_lo32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32);
hub->ctx0_ptb_addr_hi32 =
SOC15_REG_OFFSET(MMHUB, 0,
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32);
hub->vm_inv_eng0_sem =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM);
hub->vm_inv_eng0_req =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ);
hub->vm_inv_eng0_ack =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK);
hub->vm_context0_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL);
hub->vm_l2_pro_fault_status =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS_LO32);
hub->vm_l2_pro_fault_cntl =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL);
hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL;
hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 -
regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32;
hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ -
regMMVM_INVALIDATE_ENG0_REQ;
hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 -
regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32;
hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
hub->vm_l2_bank_select_reserved_cid2 =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_BANK_SELECT_RESERVED_CID2);
hub->vm_contexts_disable =
SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXTS_DISABLE);
hub->vmhub_funcs = &mmhub_v4_1_0_vmhub_funcs;
}
static u64 mmhub_v4_1_0_get_fb_location(struct amdgpu_device *adev)
{
u64 base;
base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE);
base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK;
base <<= 24;
return base;
}
static u64 mmhub_v4_1_0_get_mc_fb_offset(struct amdgpu_device *adev)
{
return (u64)RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET) << 24;
}
static void
mmhub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
bool enable)
{
#if 0
uint32_t def, data;
#endif
uint32_t def1, data1, def2 = 0, data2 = 0;
#if 0
def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
#endif
def1 = data1 = RREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2);
def2 = data2 = RREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2);
if (enable) {
#if 0
data |= MM_ATC_L2_MISC_CG__ENABLE_MASK;
#endif
data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
} else {
#if 0
data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK;
#endif
data1 |= (DAGB0_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
DAGB0_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
data2 |= (DAGB1_CNTL_MISC2__DISABLE_RDRET_TAP_CHAIN_FGCG_MASK |
DAGB1_CNTL_MISC2__DISABLE_WRRET_TAP_CHAIN_FGCG_MASK);
}
#if 0
if (def != data)
WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
#endif
if (def1 != data1)
WREG32_SOC15(MMHUB, 0, regDAGB0_CNTL_MISC2, data1);
if (def2 != data2)
WREG32_SOC15(MMHUB, 0, regDAGB1_CNTL_MISC2, data2);
}
static void
mmhub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev,
bool enable)
{
#if 0
uint32_t def, data;
def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
if (enable)
data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
else
data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
if (def != data)
WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data);
#endif
}
static int mmhub_v4_1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state)
{
if (amdgpu_sriov_vf(adev))
return 0;
if (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)
mmhub_v4_1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE);
if (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)
mmhub_v4_1_0_update_medium_grain_light_sleep(adev,
state == AMD_CG_STATE_GATE);
return 0;
}
static void mmhub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
#if 0
int data;
if (amdgpu_sriov_vf(adev))
*flags = 0;
data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG);
/* AMD_CG_SUPPORT_MC_MGCG */
if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_MGCG;
/* AMD_CG_SUPPORT_MC_LS */
if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK)
*flags |= AMD_CG_SUPPORT_MC_LS;
#endif
}
const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs = {
.init = mmhub_v4_1_0_init,
.get_fb_location = mmhub_v4_1_0_get_fb_location,
.get_mc_fb_offset = mmhub_v4_1_0_get_mc_fb_offset,
.gart_enable = mmhub_v4_1_0_gart_enable,
.set_fault_enable_default = mmhub_v4_1_0_set_fault_enable_default,
.gart_disable = mmhub_v4_1_0_gart_disable,
.set_clockgating = mmhub_v4_1_0_set_clockgating,
.get_clockgating = mmhub_v4_1_0_get_clockgating,
.setup_vm_pt_regs = mmhub_v4_1_0_setup_vm_pt_regs,
};

View File

@ -0,0 +1,28 @@
/*
* Copyright 2023 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __MMHUB_V4_1_0_H__
#define __MMHUB_V4_1_0_H__
extern const struct amdgpu_mmhub_funcs mmhub_v4_1_0_funcs;
#endif

View File

@ -292,6 +292,7 @@ flr_done:
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
@ -319,7 +320,7 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work),
"Failed to queue work! at %s",

View File

@ -181,7 +181,7 @@ send_request:
if (event != -1) {
r = xgpu_nv_poll_msg(adev, event);
if (r) {
if (retry++ < 2)
if (retry++ < 5)
goto send_request;
if (req != IDH_REQ_GPU_INIT_DATA) {
@ -328,6 +328,7 @@ flr_done:
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
@ -358,7 +359,7 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev,
switch (event) {
case IDH_FLR_NOTIFICATION:
if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev))
if (amdgpu_sriov_runtime(adev))
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work),
"Failed to queue work! at %s",

View File

@ -529,6 +529,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
set_bit(AMDGPU_HOST_FLR, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
@ -560,7 +561,7 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev,
r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
/* only handle FLR_NOTIFY now */
if (!r && !amdgpu_in_reset(adev))
if (!r)
WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->virt.flr_work),
"Failed to queue work! at %s",

View File

@ -315,6 +315,7 @@ static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev)
static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev)
{
uint32_t def, data;
u16 devctl2;
def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL);
data = 0x35EB;
@ -328,13 +329,15 @@ static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev)
if (def != data)
WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data);
def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2);
pcie_capability_read_word(adev->pdev, PCI_EXP_DEVCTL2, &devctl2);
if (adev->pdev->ltr_path == (devctl2 & PCI_EXP_DEVCTL2_LTR_EN))
return;
if (adev->pdev->ltr_path)
data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
pcie_capability_set_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
else
data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK;
if (def != data)
WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data);
pcie_capability_clear_word(adev->pdev, PCI_EXP_DEVCTL2, PCI_EXP_DEVCTL2_LTR_EN);
}
#endif
@ -424,6 +427,20 @@ static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev)
#endif
}
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
static void nbif_v6_3_1_set_reg_remap(struct amdgpu_device *adev)
{
if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
} else {
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
regBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
adev->rmmio_remap.bus_addr = 0;
}
}
const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
.get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset,
@ -446,6 +463,7 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = {
.remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
.get_rom_offset = nbif_v6_3_1_get_rom_offset,
.program_aspm = nbif_v6_3_1_program_aspm,
.set_reg_remap = nbif_v6_3_1_set_reg_remap,
};
@ -492,4 +510,5 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = {
.init_registers = nbif_v6_3_1_init_registers,
.remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers,
.get_rom_offset = nbif_v6_3_1_get_rom_offset,
.set_reg_remap = nbif_v6_3_1_set_reg_remap,
};

View File

@ -339,10 +339,6 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
if (def != data)
WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
if (amdgpu_sriov_vf(adev))
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
}
#define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1
@ -553,6 +549,20 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev)
}
}
#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
static void nbio_v2_3_set_reg_remap(struct amdgpu_device *adev)
{
if (!amdgpu_sriov_vf(adev) && (PAGE_SIZE <= 4096)) {
adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
} else {
adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2;
adev->rmmio_remap.bus_addr = 0;
}
}
const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.get_hdp_flush_req_offset = nbio_v2_3_get_hdp_flush_req_offset,
.get_hdp_flush_done_offset = nbio_v2_3_get_hdp_flush_done_offset,
@ -577,4 +587,5 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = {
.apply_lc_spc_mode_wa = nbio_v2_3_apply_lc_spc_mode_wa,
.apply_l1_link_width_reconfig_wa = nbio_v2_3_apply_l1_link_width_reconfig_wa,
.clear_doorbell_interrupt = nbio_v2_3_clear_doorbell_interrupt,
.set_reg_remap = nbio_v2_3_set_reg_remap,
};

Some files were not shown because too many files have changed in this diff Show More