cherry-pick fixes for AMD Epyc genua systems

both patches are queued for 6.14.2:
https://lore.kernel.org/all/20250409115934.968141886@linuxfoundation.org/
issue was reported in our community forum:
https://forum.proxmox.com/threads/.164497/post-762617

as we have access to a server where we could reproduce the issue
(crash+loop, before the system was up[0]) I tested with those 2
a kernel with those 2 patches applied - and the system booted
successfully.

FWIW: I tried building with the original series as well (containing a
removal of some PCI-ids), and it also resolved the issue:
https://lore.kernel.org/all/20250203162511.911946-1-Basavaraj.Natikar@amd.com/

[0] before proxmox-boot-cleanup.service (so pinning with --next-boot
did not help)

Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com>
Link: https://lore.proxmox.com/20250410130834.1745644-1-s.ivanov@proxmox.com
This commit is contained in:
Stoiko Ivanov 2025-04-10 15:08:34 +02:00 committed by Thomas Lamprecht
parent 44e828fe16
commit 4a6063d2f9
2 changed files with 232 additions and 0 deletions

View File

@ -0,0 +1,31 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Mon, 3 Feb 2025 21:55:10 +0530
Subject: [PATCH] dmaengine: ae4dma: Use the MSI count and its corresponding
IRQ number
Instead of using the defined maximum hardware queue, which can lead to
incorrect values if the counts mismatch, use the exact supported MSI
count and its corresponding IRQ number.
Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver")
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
---
drivers/dma/amd/ae4dma/ae4dma-pci.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
index aad0dc4294a3945245737978c077eecf740ccb3a..587c5a10c1a8b2dbb925c31af86b1d0b23438b45 100644
--- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
+++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
@@ -46,8 +46,8 @@ static int ae4_get_irqs(struct ae4_device *ae4)
} else {
ae4_msix->msix_count = ret;
- for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
- ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
+ for (i = 0; i < ae4_msix->msix_count; i++)
+ ae4->ae4_irq[i] = pci_irq_vector(pdev, i);
}
return ret;

View File

@ -0,0 +1,201 @@
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
Date: Mon, 3 Feb 2025 21:55:11 +0530
Subject: [PATCH] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue
functionality
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
As AE4DMA offers multi-channel functionality compared to PTDMAs single
queue, utilize multi-queue, which supports higher speeds than PTDMA, to
achieve higher performance using the AE4DMA workqueue based mechanism.
Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version")
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
---
drivers/dma/amd/ae4dma/ae4dma.h | 2 +
drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
2 files changed, 89 insertions(+), 3 deletions(-)
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
index 265c5d4360080d6a0cc77f2bab507fde761d5461..57f6048726bb68da03e145d0c69f4bdcd4012c6f 100644
--- a/drivers/dma/amd/ae4dma/ae4dma.h
+++ b/drivers/dma/amd/ae4dma/ae4dma.h
@@ -37,6 +37,8 @@
#define AE4_DMA_VERSION 4
#define CMD_AE4_DESC_DW0_VAL 2
+#define AE4_TIME_OUT 5000
+
struct ae4_msix {
int msix_count;
struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
index 35c84ec9608b4fd119972e3cd9abedf818dff743..715ac3ae067b857830db85e170787e30f3ae6b1d 100644
--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
+++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
@@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
{
struct dma_async_tx_descriptor *tx_desc;
struct virt_dma_desc *vd;
+ struct pt_device *pt;
unsigned long flags;
+ pt = chan->pt;
/* Loop over descriptors until one is found with commands */
do {
if (desc) {
@@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
spin_lock_irqsave(&chan->vc.lock, flags);
- if (desc) {
+ if (pt->ver != AE4_DMA_VERSION && desc) {
if (desc->status != DMA_COMPLETE) {
if (desc->status != DMA_ERROR)
desc->status = DMA_COMPLETE;
@@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
spin_unlock_irqrestore(&chan->vc.lock, flags);
- if (tx_desc) {
+ if (pt->ver != AE4_DMA_VERSION && tx_desc) {
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
dma_run_dependencies(tx_desc);
vchan_vdesc_fini(vd);
@@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
return NULL;
}
+static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
+{
+ u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
+ u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
+
+ if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1))
+ return true;
+
+ return false;
+}
+
static void pt_cmd_callback(void *data, int err)
{
struct pt_dma_desc *desc = data;
+ struct ae4_cmd_queue *ae4cmd_q;
struct dma_chan *dma_chan;
struct pt_dma_chan *chan;
+ struct ae4_device *ae4;
+ struct pt_device *pt;
int ret;
if (err == -EINPROGRESS)
@@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
dma_chan = desc->vd.tx.chan;
chan = to_pt_chan(dma_chan);
+ pt = chan->pt;
if (err)
desc->status = DMA_ERROR;
while (true) {
+ if (pt->ver == AE4_DMA_VERSION) {
+ ae4 = container_of(pt, struct ae4_device, pt);
+ ae4cmd_q = &ae4->ae4cmd_q[chan->id];
+
+ if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
+ ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
+ wake_up(&ae4cmd_q->q_w);
+
+ if (wait_for_completion_timeout(&ae4cmd_q->cmp,
+ msecs_to_jiffies(AE4_TIME_OUT))
+ == 0) {
+ dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
+ break;
+ }
+
+ reinit_completion(&ae4cmd_q->cmp);
+ continue;
+ }
+ }
+
/* Check for DMA descriptor completion */
desc = pt_handle_active_desc(chan, desc);
@@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
return desc;
}
+static void pt_cmd_callback_work(void *data, int err)
+{
+ struct dma_async_tx_descriptor *tx_desc;
+ struct pt_dma_desc *desc = data;
+ struct dma_chan *dma_chan;
+ struct virt_dma_desc *vd;
+ struct pt_dma_chan *chan;
+ unsigned long flags;
+
+ dma_chan = desc->vd.tx.chan;
+ chan = to_pt_chan(dma_chan);
+
+ if (err == -EINPROGRESS)
+ return;
+
+ tx_desc = &desc->vd.tx;
+ vd = &desc->vd;
+
+ if (err)
+ desc->status = DMA_ERROR;
+
+ spin_lock_irqsave(&chan->vc.lock, flags);
+ if (desc) {
+ if (desc->status != DMA_COMPLETE) {
+ if (desc->status != DMA_ERROR)
+ desc->status = DMA_COMPLETE;
+
+ dma_cookie_complete(tx_desc);
+ dma_descriptor_unmap(tx_desc);
+ } else {
+ tx_desc = NULL;
+ }
+ }
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+ if (tx_desc) {
+ dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+ dma_run_dependencies(tx_desc);
+ list_del(&desc->vd.node);
+ vchan_vdesc_fini(vd);
+ }
+}
+
static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
dma_addr_t dst,
dma_addr_t src,
@@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
desc->len = len;
if (pt->ver == AE4_DMA_VERSION) {
+ pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
ae4 = container_of(pt, struct ae4_device, pt);
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
mutex_lock(&ae4cmd_q->cmd_lock);
@@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_dma_desc *desc;
+ struct pt_device *pt;
unsigned long flags;
bool engine_is_idle = true;
+ pt = chan->pt;
+
spin_lock_irqsave(&chan->vc.lock, flags);
desc = pt_next_dma_desc(chan);
- if (desc)
+ if (desc && pt->ver != AE4_DMA_VERSION)
engine_is_idle = false;
vchan_issue_pending(&chan->vc);