drm/xe: Add devcoredump chunking

Chunk devcoredump into 1.5G pieces to avoid hitting the kvmalloc limit
of 2G. Simple algorithm reads 1.5G at time in xe_devcoredump_read
callback as needed.

Some memory allocations are changed to GFP_ATOMIC as they done in
xe_devcoredump_read which holds lock in the path of reclaim. The
allocations are small, so in practice should never fail.

v2:
 - Update commit message wrt gfp atomic (John H)
v6:
 - Drop GFP_ATOMIC change for hwconfig (John H)

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com>
Link: https://lore.kernel.org/r/20250423171725.597955-2-matthew.brost@intel.com
This commit is contained in:
Matthew Brost 2025-04-23 10:17:22 -07:00
parent f9e4d8bb6a
commit c4a2e5f865
2 changed files with 48 additions and 11 deletions

View File

@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc; return &q->gt->uc.guc;
} }
static ssize_t __xe_devcoredump_read(char *buffer, size_t count, static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
ssize_t start,
struct xe_devcoredump *coredump) struct xe_devcoredump *coredump)
{ {
struct xe_device *xe; struct xe_device *xe;
@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
ss = &coredump->snapshot; ss = &coredump->snapshot;
iter.data = buffer; iter.data = buffer;
iter.start = 0; iter.start = start;
iter.remain = count; iter.remain = count;
p = drm_coredump_printer(&iter); p = drm_coredump_printer(&iter);
@ -168,6 +169,8 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
ss->vm = NULL; ss->vm = NULL;
} }
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen) size_t count, void *data, size_t datalen)
{ {
@ -183,6 +186,9 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */ /* Ensure delayed work is captured before continuing */
flush_work(&ss->work); flush_work(&ss->work);
if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
xe_pm_runtime_get(gt_to_xe(ss->gt));
mutex_lock(&coredump->lock); mutex_lock(&coredump->lock);
if (!ss->read.buffer) { if (!ss->read.buffer) {
@ -195,12 +201,26 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
return 0; return 0;
} }
if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
offset < ss->read.chunk_position) {
ss->read.chunk_position =
ALIGN_DOWN(offset, XE_DEVCOREDUMP_CHUNK_MAX);
__xe_devcoredump_read(ss->read.buffer,
XE_DEVCOREDUMP_CHUNK_MAX,
ss->read.chunk_position, coredump);
}
byte_copied = count < ss->read.size - offset ? count : byte_copied = count < ss->read.size - offset ? count :
ss->read.size - offset; ss->read.size - offset;
memcpy(buffer, ss->read.buffer + offset, byte_copied); memcpy(buffer, ss->read.buffer +
(offset % XE_DEVCOREDUMP_CHUNK_MAX), byte_copied);
mutex_unlock(&coredump->lock); mutex_unlock(&coredump->lock);
if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
xe_pm_runtime_put(gt_to_xe(ss->gt));
return byte_copied; return byte_copied;
} }
@ -254,17 +274,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
xe_pm_runtime_put(xe); ss->read.chunk_position = 0;
/* Calculate devcoredump size */ /* Calculate devcoredump size */
ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
if (!ss->read.buffer) ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
return; GFP_USER);
if (!ss->read.buffer)
goto put_pm;
__xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); __xe_devcoredump_read(ss->read.buffer,
xe_devcoredump_snapshot_free(ss); XE_DEVCOREDUMP_CHUNK_MAX,
0, coredump);
} else {
ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
if (!ss->read.buffer)
goto put_pm;
__xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
coredump);
xe_devcoredump_snapshot_free(ss);
}
put_pm:
xe_pm_runtime_put(xe);
} }
static void devcoredump_snapshot(struct xe_devcoredump *coredump, static void devcoredump_snapshot(struct xe_devcoredump *coredump,
@ -425,7 +460,7 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffi
if (offset & 3) if (offset & 3)
drm_printf(p, "Offset not word aligned: %zu", offset); drm_printf(p, "Offset not word aligned: %zu", offset);
line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
if (!line_buff) { if (!line_buff) {
drm_printf(p, "Failed to allocate line buffer\n"); drm_printf(p, "Failed to allocate line buffer\n");
return; return;

View File

@ -66,6 +66,8 @@ struct xe_devcoredump_snapshot {
struct { struct {
/** @read.size: size of devcoredump in human readable format */ /** @read.size: size of devcoredump in human readable format */
ssize_t size; ssize_t size;
/** @read.chunk_position: position of devcoredump chunk */
ssize_t chunk_position;
/** @read.buffer: buffer of devcoredump in human readable format */ /** @read.buffer: buffer of devcoredump in human readable format */
char *buffer; char *buffer;
} read; } read;