drm/xe: Expose PCIe link downgrade attributes

Expose sysfs attributes for PCIe link downgrade capability and status.

v2: Move from debugfs to sysfs (Lucas, Rodrigo, Badal)
    Rework macros and their naming (Rodrigo)
v3: Use sysfs_create_files() (Riana)
    Fix checkpatch warning (Riana)
v4: s/downspeed/downgrade (Lucas, Rodrigo, Riana)
v5: Use PCIe Gen agnostic naming (Rodrigo)
v6: s/pcie_gen/auto_link (Lucas)

Signed-off-by: Raag Jadav <raag.jadav@intel.com>
Reviewed-by: Riana Tauro <riana.tauro@intel.com>
Link: https://lore.kernel.org/r/20250506054835.3395220-3-raag.jadav@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
This commit is contained in:
Raag Jadav 2025-05-06 11:18:34 +05:30 committed by Rodrigo Vivi
parent f3e875b3c0
commit 0e414bf7ad
No known key found for this signature in database
GPG Key ID: FA625F640EEB13CA
2 changed files with 96 additions and 2 deletions

View File

@ -3,14 +3,16 @@
* Copyright © 2023 Intel Corporation
*/
#include <linux/device.h>
#include <linux/kobject.h>
#include <linux/pci.h>
#include <linux/sysfs.h>
#include <drm/drm_managed.h>
#include "xe_device.h"
#include "xe_device_sysfs.h"
#include "xe_mmio.h"
#include "xe_pcode_api.h"
#include "xe_pcode.h"
#include "xe_pm.h"
/**
@ -63,12 +65,93 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RW(vram_d3cold_threshold);
/**
* DOC: PCIe Gen5 Limitations
*
* Default link speed of discrete GPUs is determined by configuration parameters
* stored in their flash memory, which are subject to override through user
* initiated firmware updates. It has been observed that devices configured with
* PCIe Gen5 as their default link speed can come across link quality issues due
* to host or motherboard limitations and may have to auto-downgrade their link
* to PCIe Gen4 speed when faced with unstable link at Gen5, which makes
* firmware updates rather risky on such setups. It is required to ensure that
* the device is capable of auto-downgrading its link to PCIe Gen4 speed before
* pushing the firmware image with PCIe Gen5 as default configuration. This can
* be done by reading ``auto_link_downgrade_capable`` sysfs entry, which will
* denote if the device is capable of auto-downgrading its link to PCIe Gen4
* speed with boolean output value of ``0`` or ``1``, meaning `incapable` or
* `capable` respectively.
*
* .. code-block:: shell
*
* $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_capable
*
* Pushing the firmware image with PCIe Gen5 as default configuration on a auto
* link downgrade incapable device and facing link instability due to host or
* motherboard limitations can result in driver failing to bind to the device,
* making further firmware updates impossible with RMA being the only last
* resort.
*
* Link downgrade status of auto link downgrade capable devices is available
* through ``auto_link_downgrade_status`` sysfs entry with boolean output value
* of ``0`` or ``1``, where ``0`` means no auto-downgrading was required during
* link training (which is the optimal scenario) and ``1`` means the device has
* auto-downgraded its link to PCIe Gen4 speed due to unstable Gen5 link.
*
* .. code-block:: shell
*
* $ cat /sys/bus/pci/devices/<bdf>/auto_link_downgrade_status
*/
static ssize_t
auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct xe_device *xe = pdev_to_xe_device(pdev);
u32 cap, val;
xe_pm_runtime_get(xe);
val = xe_mmio_read32(xe_root_tile_mmio(xe), BMG_PCIE_CAP);
xe_pm_runtime_put(xe);
cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false);
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable);
static ssize_t
auto_link_downgrade_status_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct xe_device *xe = pdev_to_xe_device(pdev);
u32 val;
int ret;
xe_pm_runtime_get(xe);
ret = xe_pcode_read(xe_device_get_root_tile(xe),
PCODE_MBOX(DGFX_PCODE_STATUS, DGFX_GET_INIT_STATUS, 0),
&val, NULL);
xe_pm_runtime_put(xe);
return ret ?: sysfs_emit(buf, "%u\n", REG_FIELD_GET(DGFX_LINK_DOWNGRADE_STATUS, val));
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);
static const struct attribute *auto_link_downgrade_attrs[] = {
&dev_attr_auto_link_downgrade_capable.attr,
&dev_attr_auto_link_downgrade_status.attr,
NULL
};
static void xe_device_sysfs_fini(void *arg)
{
struct xe_device *xe = arg;
if (xe->d3cold.capable)
sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
if (xe->info.platform == XE_BATTLEMAGE)
sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
}
int xe_device_sysfs_init(struct xe_device *xe)
@ -82,5 +165,11 @@ int xe_device_sysfs_init(struct xe_device *xe)
return ret;
}
if (xe->info.platform == XE_BATTLEMAGE) {
ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
if (ret)
return ret;
}
return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
}

View File

@ -34,6 +34,7 @@
#define DGFX_PCODE_STATUS 0x7E
#define DGFX_GET_INIT_STATUS 0x0
#define DGFX_INIT_STATUS_COMPLETE 0x1
#define DGFX_LINK_DOWNGRADE_STATUS REG_BIT(31)
#define PCODE_POWER_SETUP 0x7C
#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4
@ -66,6 +67,10 @@
/* Auxiliary info bits */
#define AUXINFO_HISTORY_OFFSET REG_GENMASK(31, 29)
#define BMG_PCIE_CAP XE_REG(0x138340)
#define LINK_DOWNGRADE REG_GENMASK(1, 0)
#define DOWNGRADE_CAPABLE 2
struct pcode_err_decode {
int errno;
const char *str;