mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-27 06:50:37 +00:00

The memory uncorrected error could be signaled by asynchronous interrupt
(specifically, SPI in arm64 platform), e.g. when an error is detected by
a background scrubber, or signaled by synchronous exception
(specifically, data abort exception in arm64 platform), e.g. when a CPU
tries to access a poisoned cache line. Currently, both synchronous and
asynchronous errors use memory_failure_queue() to schedule
memory_failure() to exectute in a kworker context.
As a result, when a user-space process is accessing a poisoned data, a
data abort is taken and the memory_failure() is executed in the kworker
context, which:
- will send wrong si_code by SIGBUS signal in early_kill mode, and
- can not kill the user-space in some cases resulting a synchronous
error infinite loop
Issue 1: send wrong si_code in early_kill mode
Since commit a70297d221
("ACPI: APEI: set memory failure flags as
MF_ACTION_REQUIRED on synchronous events")', the flag MF_ACTION_REQUIRED
could be used to determine whether a synchronous exception occurs on
ARM64 platform. When a synchronous exception is detected, the kernel is
expected to terminate the current process which has accessed a poisoned
page. This is done by sending a SIGBUS signal with error code
BUS_MCEERR_AR, indicating an action-required machine check error on
read.
However, when kill_proc() is called to terminate the processes who has
the poisoned page mapped, it sends the incorrect SIGBUS error code
BUS_MCEERR_AO because the context in which it operates is not the one
where the error was triggered.
To reproduce this problem:
#sysctl -w vm.memory_failure_early_kill=1
vm.memory_failure_early_kill = 1
# STEP2: inject an UCE error and consume it to trigger a synchronous error
#einj_mem_uc single
0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400
injecting ...
triggering ...
signal 7 code 5 addr 0xffffb0d75000
page not present
Test passed
The si_code (code 5) from einj_mem_uc indicates that it is BUS_MCEERR_AO
error and it is not factually correct.
After this change:
# STEP1: enable early kill mode
#sysctl -w vm.memory_failure_early_kill=1
vm.memory_failure_early_kill = 1
# STEP2: inject an UCE error and consume it to trigger a synchronous error
#einj_mem_uc single
0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400
injecting ...
triggering ...
signal 7 code 4 addr 0xffffb0d75000
page not present
Test passed
The si_code (code 4) from einj_mem_uc indicates that it is a BUS_MCEERR_AR
error as expected.
Issue 2: a synchronous error infinite loop
If a user-space process, e.g. devmem, accesses a poisoned page for which
the HWPoison flag is set, kill_accessing_process() is called to send
SIGBUS to current processs with error info. Since the memory_failure()
is executed in the kworker context, it will just do nothing but return
EFAULT. So, devmem will access the posioned page and trigger an
exception again, resulting in a synchronous error infinite loop. Such
exception loop may cause platform firmware to exceed some threshold and
reboot when Linux could have recovered from this error.
To reproduce this problem:
# STEP 1: inject an UCE error, and kernel will set HWPosion flag for related page
#einj_mem_uc single
0: single vaddr = 0xffffb0d75400 paddr = 4092d55b400
injecting ...
triggering ...
signal 7 code 4 addr 0xffffb0d75000
page not present
Test passed
# STEP 2: access the same page and it will trigger a synchronous error infinite loop
devmem 0x4092d55b400
To fix above two issues, queue memory_failure() as a task_work so that
it runs in the context of the process that is actually consuming the
poisoned data.
Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
Tested-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Xiaofei Tan <tanxiaofei@huawei.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Jane Chu <jane.chu@oracle.com>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Link: https://patch.msgid.link/20250714114212.31660-3-xueshuai@linux.alibaba.com
[ rjw: Changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
134 lines
3.5 KiB
C
134 lines
3.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef GHES_H
|
|
#define GHES_H
|
|
|
|
#include <acpi/apei.h>
|
|
#include <acpi/hed.h>
|
|
|
|
/*
|
|
* One struct ghes is created for each generic hardware error source.
|
|
* It provides the context for APEI hardware error timer/IRQ/SCI/NMI
|
|
* handler.
|
|
*
|
|
* estatus: memory buffer for error status block, allocated during
|
|
* HEST parsing.
|
|
*/
|
|
#define GHES_EXITING 0x0002
|
|
|
|
struct ghes {
|
|
union {
|
|
struct acpi_hest_generic *generic;
|
|
struct acpi_hest_generic_v2 *generic_v2;
|
|
};
|
|
struct acpi_hest_generic_status *estatus;
|
|
unsigned long flags;
|
|
union {
|
|
struct list_head list;
|
|
struct timer_list timer;
|
|
unsigned int irq;
|
|
};
|
|
struct device *dev;
|
|
struct list_head elist;
|
|
};
|
|
|
|
struct ghes_estatus_node {
|
|
struct llist_node llnode;
|
|
struct acpi_hest_generic *generic;
|
|
struct ghes *ghes;
|
|
};
|
|
|
|
struct ghes_estatus_cache {
|
|
u32 estatus_len;
|
|
atomic_t count;
|
|
struct acpi_hest_generic *generic;
|
|
unsigned long long time_in;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
enum {
|
|
GHES_SEV_NO = 0x0,
|
|
GHES_SEV_CORRECTED = 0x1,
|
|
GHES_SEV_RECOVERABLE = 0x2,
|
|
GHES_SEV_PANIC = 0x3,
|
|
};
|
|
|
|
#ifdef CONFIG_ACPI_APEI_GHES
|
|
/**
|
|
* ghes_register_vendor_record_notifier - register a notifier for vendor
|
|
* records that the kernel would otherwise ignore.
|
|
* @nb: pointer to the notifier_block structure of the event handler.
|
|
*
|
|
* return 0 : SUCCESS, non-zero : FAIL
|
|
*/
|
|
int ghes_register_vendor_record_notifier(struct notifier_block *nb);
|
|
|
|
/**
|
|
* ghes_unregister_vendor_record_notifier - unregister the previously
|
|
* registered vendor record notifier.
|
|
* @nb: pointer to the notifier_block structure of the vendor record handler.
|
|
*/
|
|
void ghes_unregister_vendor_record_notifier(struct notifier_block *nb);
|
|
|
|
struct list_head *ghes_get_devices(void);
|
|
|
|
void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
|
|
#else
|
|
static inline struct list_head *ghes_get_devices(void) { return NULL; }
|
|
|
|
static inline void ghes_estatus_pool_region_free(unsigned long addr, u32 size) { return; }
|
|
#endif
|
|
|
|
int ghes_estatus_pool_init(unsigned int num_ghes);
|
|
|
|
static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
return gdata->revision >> 8;
|
|
}
|
|
|
|
static inline void *acpi_hest_get_payload(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
if (acpi_hest_get_version(gdata) >= 3)
|
|
return (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1);
|
|
|
|
return gdata + 1;
|
|
}
|
|
|
|
static inline int acpi_hest_get_error_length(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
return ((struct acpi_hest_generic_data *)(gdata))->error_data_length;
|
|
}
|
|
|
|
static inline int acpi_hest_get_size(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
if (acpi_hest_get_version(gdata) >= 3)
|
|
return sizeof(struct acpi_hest_generic_data_v300);
|
|
|
|
return sizeof(struct acpi_hest_generic_data);
|
|
}
|
|
|
|
static inline int acpi_hest_get_record_size(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
return (acpi_hest_get_size(gdata) + acpi_hest_get_error_length(gdata));
|
|
}
|
|
|
|
static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata)
|
|
{
|
|
return (void *)(gdata) + acpi_hest_get_record_size(gdata);
|
|
}
|
|
|
|
#define apei_estatus_for_each_section(estatus, section) \
|
|
for (section = (struct acpi_hest_generic_data *)(estatus + 1); \
|
|
(void *)section - (void *)(estatus + 1) < estatus->data_length; \
|
|
section = acpi_hest_get_next(section))
|
|
|
|
#ifdef CONFIG_ACPI_APEI_SEA
|
|
int ghes_notify_sea(void);
|
|
#else
|
|
static inline int ghes_notify_sea(void) { return -ENOENT; }
|
|
#endif
|
|
|
|
struct notifier_block;
|
|
extern void ghes_register_report_chain(struct notifier_block *nb);
|
|
extern void ghes_unregister_report_chain(struct notifier_block *nb);
|
|
#endif /* GHES_H */
|