mirror of
https://git.proxmox.com/git/pve-kernel
synced 2025-04-28 05:52:04 +00:00
cherry-pick fixes for AMD Epyc genua systems
both patches are queued for 6.14.2: https://lore.kernel.org/all/20250409115934.968141886@linuxfoundation.org/ issue was reported in our community forum: https://forum.proxmox.com/threads/.164497/post-762617 as we have access to a server where we could reproduce the issue (crash+loop, before the system was up[0]) I tested with those 2 a kernel with those 2 patches applied - and the system booted successfully. FWIW: I tried building with the original series as well (containing a removal of some PCI-ids), and it also resolved the issue: https://lore.kernel.org/all/20250203162511.911946-1-Basavaraj.Natikar@amd.com/ [0] before proxmox-boot-cleanup.service (so pinning with --next-boot did not help) Signed-off-by: Stoiko Ivanov <s.ivanov@proxmox.com> Link: https://lore.proxmox.com/20250410130834.1745644-1-s.ivanov@proxmox.com
This commit is contained in:
parent
44e828fe16
commit
4a6063d2f9
@ -0,0 +1,31 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
|
||||
Date: Mon, 3 Feb 2025 21:55:10 +0530
|
||||
Subject: [PATCH] dmaengine: ae4dma: Use the MSI count and its corresponding
|
||||
IRQ number
|
||||
|
||||
Instead of using the defined maximum hardware queue, which can lead to
|
||||
incorrect values if the counts mismatch, use the exact supported MSI
|
||||
count and its corresponding IRQ number.
|
||||
|
||||
Fixes: 90a30e268d9b ("dmaengine: ae4dma: Add AMD ae4dma controller driver")
|
||||
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
|
||||
---
|
||||
drivers/dma/amd/ae4dma/ae4dma-pci.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/drivers/dma/amd/ae4dma/ae4dma-pci.c b/drivers/dma/amd/ae4dma/ae4dma-pci.c
|
||||
index aad0dc4294a3945245737978c077eecf740ccb3a..587c5a10c1a8b2dbb925c31af86b1d0b23438b45 100644
|
||||
--- a/drivers/dma/amd/ae4dma/ae4dma-pci.c
|
||||
+++ b/drivers/dma/amd/ae4dma/ae4dma-pci.c
|
||||
@@ -46,8 +46,8 @@ static int ae4_get_irqs(struct ae4_device *ae4)
|
||||
|
||||
} else {
|
||||
ae4_msix->msix_count = ret;
|
||||
- for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
|
||||
- ae4->ae4_irq[i] = ae4_msix->msix_entry[i].vector;
|
||||
+ for (i = 0; i < ae4_msix->msix_count; i++)
|
||||
+ ae4->ae4_irq[i] = pci_irq_vector(pdev, i);
|
||||
}
|
||||
|
||||
return ret;
|
@ -0,0 +1,201 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
|
||||
Date: Mon, 3 Feb 2025 21:55:11 +0530
|
||||
Subject: [PATCH] dmaengine: ptdma: Utilize the AE4DMA engine's multi-queue
|
||||
functionality
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
As AE4DMA offers multi-channel functionality compared to PTDMA’s single
|
||||
queue, utilize multi-queue, which supports higher speeds than PTDMA, to
|
||||
achieve higher performance using the AE4DMA workqueue based mechanism.
|
||||
|
||||
Fixes: 69a47b16a51b ("dmaengine: ptdma: Extend ptdma to support multi-channel and version")
|
||||
Signed-off-by: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
|
||||
---
|
||||
drivers/dma/amd/ae4dma/ae4dma.h | 2 +
|
||||
drivers/dma/amd/ptdma/ptdma-dmaengine.c | 90 ++++++++++++++++++++++++-
|
||||
2 files changed, 89 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/drivers/dma/amd/ae4dma/ae4dma.h b/drivers/dma/amd/ae4dma/ae4dma.h
|
||||
index 265c5d4360080d6a0cc77f2bab507fde761d5461..57f6048726bb68da03e145d0c69f4bdcd4012c6f 100644
|
||||
--- a/drivers/dma/amd/ae4dma/ae4dma.h
|
||||
+++ b/drivers/dma/amd/ae4dma/ae4dma.h
|
||||
@@ -37,6 +37,8 @@
|
||||
#define AE4_DMA_VERSION 4
|
||||
#define CMD_AE4_DESC_DW0_VAL 2
|
||||
|
||||
+#define AE4_TIME_OUT 5000
|
||||
+
|
||||
struct ae4_msix {
|
||||
int msix_count;
|
||||
struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
|
||||
diff --git a/drivers/dma/amd/ptdma/ptdma-dmaengine.c b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
|
||||
index 35c84ec9608b4fd119972e3cd9abedf818dff743..715ac3ae067b857830db85e170787e30f3ae6b1d 100644
|
||||
--- a/drivers/dma/amd/ptdma/ptdma-dmaengine.c
|
||||
+++ b/drivers/dma/amd/ptdma/ptdma-dmaengine.c
|
||||
@@ -198,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
|
||||
{
|
||||
struct dma_async_tx_descriptor *tx_desc;
|
||||
struct virt_dma_desc *vd;
|
||||
+ struct pt_device *pt;
|
||||
unsigned long flags;
|
||||
|
||||
+ pt = chan->pt;
|
||||
/* Loop over descriptors until one is found with commands */
|
||||
do {
|
||||
if (desc) {
|
||||
@@ -217,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
|
||||
|
||||
spin_lock_irqsave(&chan->vc.lock, flags);
|
||||
|
||||
- if (desc) {
|
||||
+ if (pt->ver != AE4_DMA_VERSION && desc) {
|
||||
if (desc->status != DMA_COMPLETE) {
|
||||
if (desc->status != DMA_ERROR)
|
||||
desc->status = DMA_COMPLETE;
|
||||
@@ -235,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
|
||||
|
||||
spin_unlock_irqrestore(&chan->vc.lock, flags);
|
||||
|
||||
- if (tx_desc) {
|
||||
+ if (pt->ver != AE4_DMA_VERSION && tx_desc) {
|
||||
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
|
||||
dma_run_dependencies(tx_desc);
|
||||
vchan_vdesc_fini(vd);
|
||||
@@ -245,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
+static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
|
||||
+{
|
||||
+ u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
|
||||
+ u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
|
||||
+
|
||||
+ if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
static void pt_cmd_callback(void *data, int err)
|
||||
{
|
||||
struct pt_dma_desc *desc = data;
|
||||
+ struct ae4_cmd_queue *ae4cmd_q;
|
||||
struct dma_chan *dma_chan;
|
||||
struct pt_dma_chan *chan;
|
||||
+ struct ae4_device *ae4;
|
||||
+ struct pt_device *pt;
|
||||
int ret;
|
||||
|
||||
if (err == -EINPROGRESS)
|
||||
@@ -257,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
|
||||
|
||||
dma_chan = desc->vd.tx.chan;
|
||||
chan = to_pt_chan(dma_chan);
|
||||
+ pt = chan->pt;
|
||||
|
||||
if (err)
|
||||
desc->status = DMA_ERROR;
|
||||
|
||||
while (true) {
|
||||
+ if (pt->ver == AE4_DMA_VERSION) {
|
||||
+ ae4 = container_of(pt, struct ae4_device, pt);
|
||||
+ ae4cmd_q = &ae4->ae4cmd_q[chan->id];
|
||||
+
|
||||
+ if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
|
||||
+ ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
|
||||
+ wake_up(&ae4cmd_q->q_w);
|
||||
+
|
||||
+ if (wait_for_completion_timeout(&ae4cmd_q->cmp,
|
||||
+ msecs_to_jiffies(AE4_TIME_OUT))
|
||||
+ == 0) {
|
||||
+ dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ reinit_completion(&ae4cmd_q->cmp);
|
||||
+ continue;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Check for DMA descriptor completion */
|
||||
desc = pt_handle_active_desc(chan, desc);
|
||||
|
||||
@@ -296,6 +333,49 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
|
||||
return desc;
|
||||
}
|
||||
|
||||
+static void pt_cmd_callback_work(void *data, int err)
|
||||
+{
|
||||
+ struct dma_async_tx_descriptor *tx_desc;
|
||||
+ struct pt_dma_desc *desc = data;
|
||||
+ struct dma_chan *dma_chan;
|
||||
+ struct virt_dma_desc *vd;
|
||||
+ struct pt_dma_chan *chan;
|
||||
+ unsigned long flags;
|
||||
+
|
||||
+ dma_chan = desc->vd.tx.chan;
|
||||
+ chan = to_pt_chan(dma_chan);
|
||||
+
|
||||
+ if (err == -EINPROGRESS)
|
||||
+ return;
|
||||
+
|
||||
+ tx_desc = &desc->vd.tx;
|
||||
+ vd = &desc->vd;
|
||||
+
|
||||
+ if (err)
|
||||
+ desc->status = DMA_ERROR;
|
||||
+
|
||||
+ spin_lock_irqsave(&chan->vc.lock, flags);
|
||||
+ if (desc) {
|
||||
+ if (desc->status != DMA_COMPLETE) {
|
||||
+ if (desc->status != DMA_ERROR)
|
||||
+ desc->status = DMA_COMPLETE;
|
||||
+
|
||||
+ dma_cookie_complete(tx_desc);
|
||||
+ dma_descriptor_unmap(tx_desc);
|
||||
+ } else {
|
||||
+ tx_desc = NULL;
|
||||
+ }
|
||||
+ }
|
||||
+ spin_unlock_irqrestore(&chan->vc.lock, flags);
|
||||
+
|
||||
+ if (tx_desc) {
|
||||
+ dmaengine_desc_get_callback_invoke(tx_desc, NULL);
|
||||
+ dma_run_dependencies(tx_desc);
|
||||
+ list_del(&desc->vd.node);
|
||||
+ vchan_vdesc_fini(vd);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
|
||||
dma_addr_t dst,
|
||||
dma_addr_t src,
|
||||
@@ -327,6 +407,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
|
||||
desc->len = len;
|
||||
|
||||
if (pt->ver == AE4_DMA_VERSION) {
|
||||
+ pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
|
||||
ae4 = container_of(pt, struct ae4_device, pt);
|
||||
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
|
||||
mutex_lock(&ae4cmd_q->cmd_lock);
|
||||
@@ -367,13 +448,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
|
||||
{
|
||||
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
|
||||
struct pt_dma_desc *desc;
|
||||
+ struct pt_device *pt;
|
||||
unsigned long flags;
|
||||
bool engine_is_idle = true;
|
||||
|
||||
+ pt = chan->pt;
|
||||
+
|
||||
spin_lock_irqsave(&chan->vc.lock, flags);
|
||||
|
||||
desc = pt_next_dma_desc(chan);
|
||||
- if (desc)
|
||||
+ if (desc && pt->ver != AE4_DMA_VERSION)
|
||||
engine_is_idle = false;
|
||||
|
||||
vchan_issue_pending(&chan->vc);
|
Loading…
Reference in New Issue
Block a user