mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-08-17 20:18:38 +00:00

The bluefield_edac_readl() routine returns an uninitialized result on error
paths. In those cases the calling routine should not use the uninitialized
result. The driver should simply log the error, and then return early.
Fixes: e419675754
("EDAC/bluefield: Use Arm SMC for EMI access on BlueField-2")
Signed-off-by: David Thompson <davthompson@nvidia.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Shravan Kumar Ramani <shravankr@nvidia.com>
Link: https://lore.kernel.org/20250318214747.12271-1-davthompson@nvidia.com
495 lines
12 KiB
C
495 lines
12 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Bluefield-specific EDAC driver.
|
|
*
|
|
* Copyright (c) 2019 Mellanox Technologies.
|
|
*/
|
|
|
|
#include <linux/acpi.h>
|
|
#include <linux/arm-smccc.h>
|
|
#include <linux/bitfield.h>
|
|
#include <linux/edac.h>
|
|
#include <linux/io.h>
|
|
#include <linux/module.h>
|
|
#include <linux/platform_device.h>
|
|
|
|
#include "edac_module.h"
|
|
|
|
#define DRIVER_NAME "bluefield-edac"
|
|
|
|
/*
|
|
* Mellanox BlueField EMI (External Memory Interface) register definitions.
|
|
*/
|
|
|
|
#define MLXBF_ECC_CNT 0x340
|
|
#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0)
|
|
#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16)
|
|
|
|
#define MLXBF_ECC_ERR 0x348
|
|
#define MLXBF_ECC_ERR__SECC BIT(0)
|
|
#define MLXBF_ECC_ERR__DECC BIT(16)
|
|
|
|
#define MLXBF_ECC_LATCH_SEL 0x354
|
|
#define MLXBF_ECC_LATCH_SEL__START BIT(24)
|
|
|
|
#define MLXBF_ERR_ADDR_0 0x358
|
|
|
|
#define MLXBF_ERR_ADDR_1 0x37c
|
|
|
|
#define MLXBF_SYNDROM 0x35c
|
|
#define MLXBF_SYNDROM__DERR BIT(0)
|
|
#define MLXBF_SYNDROM__SERR BIT(1)
|
|
#define MLXBF_SYNDROM__SYN GENMASK(25, 16)
|
|
|
|
#define MLXBF_ADD_INFO 0x364
|
|
#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8)
|
|
|
|
#define MLXBF_EDAC_MAX_DIMM_PER_MC 2
|
|
#define MLXBF_EDAC_ERROR_GRAIN 8
|
|
|
|
#define MLXBF_WRITE_REG_32 (0x82000009)
|
|
#define MLXBF_READ_REG_32 (0x8200000A)
|
|
#define MLXBF_SIP_SVC_VERSION (0x8200ff03)
|
|
|
|
#define MLXBF_SMCCC_ACCESS_VIOLATION (-4)
|
|
|
|
#define MLXBF_SVC_REQ_MAJOR 0
|
|
#define MLXBF_SVC_REQ_MINOR 3
|
|
|
|
/*
|
|
* Request MLXBF_SIP_GET_DIMM_INFO
|
|
*
|
|
* Retrieve information about DIMM on a certain slot.
|
|
*
|
|
* Call register usage:
|
|
* a0: MLXBF_SIP_GET_DIMM_INFO
|
|
* a1: (Memory controller index) << 16 | (Dimm index in memory controller)
|
|
* a2-7: not used.
|
|
*
|
|
* Return status:
|
|
* a0: MLXBF_DIMM_INFO defined below describing the DIMM.
|
|
* a1-3: not used.
|
|
*/
|
|
#define MLXBF_SIP_GET_DIMM_INFO 0x82000008
|
|
|
|
/* Format for the SMC response about the memory information */
|
|
#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0)
|
|
#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16)
|
|
#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17)
|
|
#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18)
|
|
#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21)
|
|
#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24)
|
|
|
|
struct bluefield_edac_priv {
|
|
/* pointer to device structure */
|
|
struct device *dev;
|
|
int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC];
|
|
void __iomem *emi_base;
|
|
int dimm_per_mc;
|
|
/* access to secure regs supported */
|
|
bool svc_sreg_support;
|
|
/* SMC table# for secure regs access */
|
|
u32 sreg_tbl;
|
|
};
|
|
|
|
static u64 smc_call1(u64 smc_op, u64 smc_arg)
|
|
{
|
|
struct arm_smccc_res res;
|
|
|
|
arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
|
|
|
|
return res.a0;
|
|
}
|
|
|
|
static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl)
|
|
{
|
|
struct arm_smccc_res res;
|
|
int status;
|
|
|
|
arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr,
|
|
0, 0, 0, 0, 0, &res);
|
|
|
|
status = res.a0;
|
|
|
|
if (status == SMCCC_RET_NOT_SUPPORTED ||
|
|
status == MLXBF_SMCCC_ACCESS_VIOLATION)
|
|
return -1;
|
|
|
|
*result = (u32)res.a1;
|
|
return 0;
|
|
}
|
|
|
|
static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl)
|
|
{
|
|
struct arm_smccc_res res;
|
|
int status;
|
|
|
|
arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr,
|
|
0, 0, 0, 0, &res);
|
|
|
|
status = res.a0;
|
|
|
|
if (status == SMCCC_RET_NOT_SUPPORTED ||
|
|
status == MLXBF_SMCCC_ACCESS_VIOLATION)
|
|
return -1;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result)
|
|
{
|
|
void __iomem *addr;
|
|
int err = 0;
|
|
|
|
addr = priv->emi_base + offset;
|
|
|
|
if (priv->svc_sreg_support)
|
|
err = secure_readl(addr, result, priv->sreg_tbl);
|
|
else
|
|
*result = readl(addr);
|
|
|
|
return err;
|
|
}
|
|
|
|
static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data)
|
|
{
|
|
void __iomem *addr;
|
|
int err = 0;
|
|
|
|
addr = priv->emi_base + offset;
|
|
|
|
if (priv->svc_sreg_support)
|
|
err = secure_writel(addr, data, priv->sreg_tbl);
|
|
else
|
|
writel(data, addr);
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* Gather the ECC information from the External Memory Interface registers
|
|
* and report it to the edac handler.
|
|
*/
|
|
static void bluefield_gather_report_ecc(struct mem_ctl_info *mci,
|
|
int error_cnt,
|
|
int is_single_ecc)
|
|
{
|
|
struct bluefield_edac_priv *priv = mci->pvt_info;
|
|
u32 dram_additional_info, err_prank, edea0, edea1;
|
|
u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom;
|
|
enum hw_event_mc_err_type ecc_type;
|
|
u64 ecc_dimm_addr;
|
|
int ecc_dimm, err;
|
|
|
|
ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED :
|
|
HW_EVENT_ERR_UNCORRECTED;
|
|
|
|
/*
|
|
* Tell the External Memory Interface to populate the relevant
|
|
* registers with information about the last ECC error occurrence.
|
|
*/
|
|
ecc_latch_select = MLXBF_ECC_LATCH_SEL__START;
|
|
err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select);
|
|
if (err)
|
|
dev_err(priv->dev, "ECC latch select write failed.\n");
|
|
|
|
/*
|
|
* Verify that the ECC reported info in the registers is of the
|
|
* same type as the one asked to report. If not, just report the
|
|
* error without the detailed information.
|
|
*/
|
|
err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom);
|
|
if (err) {
|
|
dev_err(priv->dev, "DRAM syndrom read failed.\n");
|
|
return;
|
|
}
|
|
|
|
serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom);
|
|
derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom);
|
|
syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom);
|
|
|
|
if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) {
|
|
edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0,
|
|
0, 0, -1, mci->ctl_name, "");
|
|
return;
|
|
}
|
|
|
|
err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info);
|
|
if (err) {
|
|
dev_err(priv->dev, "DRAM additional info read failed.\n");
|
|
return;
|
|
}
|
|
|
|
err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info);
|
|
|
|
ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0;
|
|
|
|
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0);
|
|
if (err) {
|
|
dev_err(priv->dev, "Error addr 0 read failed.\n");
|
|
return;
|
|
}
|
|
|
|
err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1);
|
|
if (err) {
|
|
dev_err(priv->dev, "Error addr 1 read failed.\n");
|
|
return;
|
|
}
|
|
|
|
ecc_dimm_addr = ((u64)edea1 << 32) | edea0;
|
|
|
|
edac_mc_handle_error(ecc_type, mci, error_cnt,
|
|
PFN_DOWN(ecc_dimm_addr),
|
|
offset_in_page(ecc_dimm_addr),
|
|
syndrom, ecc_dimm, 0, 0, mci->ctl_name, "");
|
|
}
|
|
|
|
static void bluefield_edac_check(struct mem_ctl_info *mci)
|
|
{
|
|
struct bluefield_edac_priv *priv = mci->pvt_info;
|
|
u32 ecc_count, single_error_count, double_error_count, ecc_error = 0;
|
|
int err;
|
|
|
|
/*
|
|
* The memory controller might not be initialized by the firmware
|
|
* when there isn't memory, which may lead to bad register readings.
|
|
*/
|
|
if (mci->edac_cap == EDAC_FLAG_NONE)
|
|
return;
|
|
|
|
err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count);
|
|
if (err) {
|
|
dev_err(priv->dev, "ECC count read failed.\n");
|
|
return;
|
|
}
|
|
|
|
single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count);
|
|
double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
|
|
|
|
if (single_error_count) {
|
|
ecc_error |= MLXBF_ECC_ERR__SECC;
|
|
|
|
bluefield_gather_report_ecc(mci, single_error_count, 1);
|
|
}
|
|
|
|
if (double_error_count) {
|
|
ecc_error |= MLXBF_ECC_ERR__DECC;
|
|
|
|
bluefield_gather_report_ecc(mci, double_error_count, 0);
|
|
}
|
|
|
|
/* Write to clear reported errors. */
|
|
if (ecc_count) {
|
|
err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error);
|
|
if (err)
|
|
dev_err(priv->dev, "ECC Error write failed.\n");
|
|
}
|
|
}
|
|
|
|
/* Initialize the DIMMs information for the given memory controller. */
|
|
static void bluefield_edac_init_dimms(struct mem_ctl_info *mci)
|
|
{
|
|
struct bluefield_edac_priv *priv = mci->pvt_info;
|
|
u64 mem_ctrl_idx = mci->mc_idx;
|
|
struct dimm_info *dimm;
|
|
u64 smc_info, smc_arg;
|
|
int is_empty = 1, i;
|
|
|
|
for (i = 0; i < priv->dimm_per_mc; i++) {
|
|
dimm = mci->dimms[i];
|
|
|
|
smc_arg = mem_ctrl_idx << 16 | i;
|
|
smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg);
|
|
|
|
if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) {
|
|
dimm->mtype = MEM_EMPTY;
|
|
continue;
|
|
}
|
|
|
|
is_empty = 0;
|
|
|
|
dimm->edac_mode = EDAC_SECDED;
|
|
|
|
if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info))
|
|
dimm->mtype = MEM_NVDIMM;
|
|
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info))
|
|
dimm->mtype = MEM_LRDDR4;
|
|
else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info))
|
|
dimm->mtype = MEM_RDDR4;
|
|
else
|
|
dimm->mtype = MEM_DDR4;
|
|
|
|
dimm->nr_pages =
|
|
FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) *
|
|
(SZ_1G / PAGE_SIZE);
|
|
dimm->grain = MLXBF_EDAC_ERROR_GRAIN;
|
|
|
|
/* Mem controller for BlueField only supports x4, x8 and x16 */
|
|
switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) {
|
|
case 4:
|
|
dimm->dtype = DEV_X4;
|
|
break;
|
|
case 8:
|
|
dimm->dtype = DEV_X8;
|
|
break;
|
|
case 16:
|
|
dimm->dtype = DEV_X16;
|
|
break;
|
|
default:
|
|
dimm->dtype = DEV_UNKNOWN;
|
|
}
|
|
|
|
priv->dimm_ranks[i] =
|
|
FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info);
|
|
}
|
|
|
|
if (is_empty)
|
|
mci->edac_cap = EDAC_FLAG_NONE;
|
|
else
|
|
mci->edac_cap = EDAC_FLAG_SECDED;
|
|
}
|
|
|
|
static int bluefield_edac_mc_probe(struct platform_device *pdev)
|
|
{
|
|
struct bluefield_edac_priv *priv;
|
|
struct device *dev = &pdev->dev;
|
|
struct edac_mc_layer layers[1];
|
|
struct arm_smccc_res res;
|
|
struct mem_ctl_info *mci;
|
|
struct resource *emi_res;
|
|
unsigned int mc_idx, dimm_count;
|
|
int rc, ret;
|
|
|
|
/* Read the MSS (Memory SubSystem) index from ACPI table. */
|
|
if (device_property_read_u32(dev, "mss_number", &mc_idx)) {
|
|
dev_warn(dev, "bf_edac: MSS number unknown\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Read the DIMMs per MC from ACPI table. */
|
|
if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) {
|
|
dev_warn(dev, "bf_edac: DIMMs per MC unknown\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) {
|
|
dev_warn(dev, "bf_edac: DIMMs per MC not valid\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
|
|
if (!emi_res)
|
|
return -EINVAL;
|
|
|
|
layers[0].type = EDAC_MC_LAYER_SLOT;
|
|
layers[0].size = dimm_count;
|
|
layers[0].is_virt_csrow = true;
|
|
|
|
mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv));
|
|
if (!mci)
|
|
return -ENOMEM;
|
|
|
|
priv = mci->pvt_info;
|
|
priv->dev = dev;
|
|
|
|
/*
|
|
* The "sec_reg_block" property in the ACPI table determines the method
|
|
* the driver uses to access the EMI registers:
|
|
* a) property is not present - directly access registers via readl/writel
|
|
* b) property is present - indirectly access registers via SMC calls
|
|
* (assuming required Silicon Provider service version found)
|
|
*/
|
|
if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) {
|
|
priv->svc_sreg_support = false;
|
|
} else {
|
|
/*
|
|
* Check for minimum required Arm Silicon Provider (SiP) service
|
|
* version, ensuring support of required SMC function IDs.
|
|
*/
|
|
arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res);
|
|
if (res.a0 == MLXBF_SVC_REQ_MAJOR &&
|
|
res.a1 >= MLXBF_SVC_REQ_MINOR) {
|
|
priv->svc_sreg_support = true;
|
|
} else {
|
|
dev_err(dev, "Required SMCs are not supported.\n");
|
|
ret = -EINVAL;
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
priv->dimm_per_mc = dimm_count;
|
|
if (!priv->svc_sreg_support) {
|
|
priv->emi_base = devm_ioremap_resource(dev, emi_res);
|
|
if (IS_ERR(priv->emi_base)) {
|
|
dev_err(dev, "failed to map EMI IO resource\n");
|
|
ret = PTR_ERR(priv->emi_base);
|
|
goto err;
|
|
}
|
|
} else {
|
|
priv->emi_base = (void __iomem *)emi_res->start;
|
|
}
|
|
|
|
mci->pdev = dev;
|
|
mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 |
|
|
MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM;
|
|
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
|
|
|
|
mci->mod_name = DRIVER_NAME;
|
|
mci->ctl_name = "BlueField_Memory_Controller";
|
|
mci->dev_name = dev_name(dev);
|
|
mci->edac_check = bluefield_edac_check;
|
|
|
|
/* Initialize mci with the actual populated DIMM information. */
|
|
bluefield_edac_init_dimms(mci);
|
|
|
|
platform_set_drvdata(pdev, mci);
|
|
|
|
/* Register with EDAC core */
|
|
rc = edac_mc_add_mc(mci);
|
|
if (rc) {
|
|
dev_err(dev, "failed to register with EDAC core\n");
|
|
ret = rc;
|
|
goto err;
|
|
}
|
|
|
|
/* Only POLL mode supported so far. */
|
|
edac_op_state = EDAC_OPSTATE_POLL;
|
|
|
|
return 0;
|
|
|
|
err:
|
|
edac_mc_free(mci);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void bluefield_edac_mc_remove(struct platform_device *pdev)
|
|
{
|
|
struct mem_ctl_info *mci = platform_get_drvdata(pdev);
|
|
|
|
edac_mc_del_mc(&pdev->dev);
|
|
edac_mc_free(mci);
|
|
}
|
|
|
|
static const struct acpi_device_id bluefield_mc_acpi_ids[] = {
|
|
{"MLNXBF08", 0},
|
|
{}
|
|
};
|
|
|
|
MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids);
|
|
|
|
static struct platform_driver bluefield_edac_mc_driver = {
|
|
.driver = {
|
|
.name = DRIVER_NAME,
|
|
.acpi_match_table = bluefield_mc_acpi_ids,
|
|
},
|
|
.probe = bluefield_edac_mc_probe,
|
|
.remove = bluefield_edac_mc_remove,
|
|
};
|
|
|
|
module_platform_driver(bluefield_edac_mc_driver);
|
|
|
|
MODULE_DESCRIPTION("Mellanox BlueField memory edac driver");
|
|
MODULE_AUTHOR("Mellanox Technologies");
|
|
MODULE_LICENSE("GPL v2");
|