mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-08-29 22:34:46 +00:00

On current platforms with multiple GTs, all of the GT IDs are consecutive; as a result we know that the GT IDs range from 0 to gt_count-1 and can determine if a GT ID is valid by comparing against the count. The consecutive nature of GT IDs may not hold true on future platforms if/when we have platforms that are both multi-tile and have multiple GTs within each tile. Once such platforms exist, it's quite possible that we could wind up with something like a GT list composed of IDs 0, 2, and 3 with no GT 1 (which would be a 2-tile platform with media only on the second tile). To future-proof the code we should stop comparing against the GT count to determine whether a GT ID is valid or not. Instead we should do an actual lookup of the ID to determine whether the GT exists. This also means that our GT loop macro should not end at the GT count, but should rather examine the entire space up to (# of tiles) * (max GT per tile) to ensure it doesn't stop prematurely. Reviewed-by: Jonathan Cavitt <jonathan.cavitt@intel.com> Link: https://lore.kernel.org/r/20250701201320.2514369-15-matthew.d.roper@intel.com Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
967 lines
28 KiB
C
967 lines
28 KiB
C
// SPDX-License-Identifier: MIT
|
|
/*
|
|
* Copyright © 2025 Intel Corporation
|
|
*/
|
|
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <drm/drm_drv.h>
|
|
#include <generated/xe_wa_oob.h>
|
|
#include <uapi/drm/xe_drm.h>
|
|
|
|
#include "xe_bo.h"
|
|
#include "xe_device.h"
|
|
#include "xe_eu_stall.h"
|
|
#include "xe_force_wake.h"
|
|
#include "xe_gt_mcr.h"
|
|
#include "xe_gt_printk.h"
|
|
#include "xe_gt_topology.h"
|
|
#include "xe_macros.h"
|
|
#include "xe_observation.h"
|
|
#include "xe_pm.h"
|
|
#include "xe_trace.h"
|
|
#include "xe_wa.h"
|
|
|
|
#include "regs/xe_eu_stall_regs.h"
|
|
#include "regs/xe_gt_regs.h"
|
|
|
|
#define POLL_PERIOD_MS 5
|
|
|
|
static size_t per_xecore_buf_size = SZ_512K;
|
|
|
|
struct per_xecore_buf {
|
|
/* Buffer vaddr */
|
|
u8 *vaddr;
|
|
/* Write pointer */
|
|
u32 write;
|
|
/* Read pointer */
|
|
u32 read;
|
|
};
|
|
|
|
struct xe_eu_stall_data_stream {
|
|
bool pollin;
|
|
bool enabled;
|
|
int wait_num_reports;
|
|
int sampling_rate_mult;
|
|
wait_queue_head_t poll_wq;
|
|
size_t data_record_size;
|
|
size_t per_xecore_buf_size;
|
|
|
|
struct xe_gt *gt;
|
|
struct xe_bo *bo;
|
|
/* Lock to protect data buffer pointers */
|
|
struct mutex xecore_buf_lock;
|
|
struct per_xecore_buf *xecore_buf;
|
|
struct {
|
|
bool reported_to_user;
|
|
xe_dss_mask_t mask;
|
|
} data_drop;
|
|
struct delayed_work buf_poll_work;
|
|
};
|
|
|
|
struct xe_eu_stall_gt {
|
|
/* Lock to protect stream */
|
|
struct mutex stream_lock;
|
|
/* EU stall data stream */
|
|
struct xe_eu_stall_data_stream *stream;
|
|
/* Workqueue to schedule buffer pointers polling work */
|
|
struct workqueue_struct *buf_ptr_poll_wq;
|
|
};
|
|
|
|
/**
|
|
* struct eu_stall_open_properties - EU stall sampling properties received
|
|
* from user space at open.
|
|
* @sampling_rate_mult: EU stall sampling rate multiplier.
|
|
* HW will sample every (sampling_rate_mult x 251) cycles.
|
|
* @wait_num_reports: Minimum number of EU stall data reports to unblock poll().
|
|
* @gt: GT on which EU stall data will be captured.
|
|
*/
|
|
struct eu_stall_open_properties {
|
|
int sampling_rate_mult;
|
|
int wait_num_reports;
|
|
struct xe_gt *gt;
|
|
};
|
|
|
|
/*
|
|
* EU stall data format for PVC
|
|
*/
|
|
struct xe_eu_stall_data_pvc {
|
|
__u64 ip_addr:29; /* Bits 0 to 28 */
|
|
__u64 active_count:8; /* Bits 29 to 36 */
|
|
__u64 other_count:8; /* Bits 37 to 44 */
|
|
__u64 control_count:8; /* Bits 45 to 52 */
|
|
__u64 pipestall_count:8; /* Bits 53 to 60 */
|
|
__u64 send_count:8; /* Bits 61 to 68 */
|
|
__u64 dist_acc_count:8; /* Bits 69 to 76 */
|
|
__u64 sbid_count:8; /* Bits 77 to 84 */
|
|
__u64 sync_count:8; /* Bits 85 to 92 */
|
|
__u64 inst_fetch_count:8; /* Bits 93 to 100 */
|
|
__u64 unused_bits:27;
|
|
__u64 unused[6];
|
|
} __packed;
|
|
|
|
/*
|
|
* EU stall data format for Xe2 arch GPUs (LNL, BMG).
|
|
*/
|
|
struct xe_eu_stall_data_xe2 {
|
|
__u64 ip_addr:29; /* Bits 0 to 28 */
|
|
__u64 tdr_count:8; /* Bits 29 to 36 */
|
|
__u64 other_count:8; /* Bits 37 to 44 */
|
|
__u64 control_count:8; /* Bits 45 to 52 */
|
|
__u64 pipestall_count:8; /* Bits 53 to 60 */
|
|
__u64 send_count:8; /* Bits 61 to 68 */
|
|
__u64 dist_acc_count:8; /* Bits 69 to 76 */
|
|
__u64 sbid_count:8; /* Bits 77 to 84 */
|
|
__u64 sync_count:8; /* Bits 85 to 92 */
|
|
__u64 inst_fetch_count:8; /* Bits 93 to 100 */
|
|
__u64 active_count:8; /* Bits 101 to 108 */
|
|
__u64 ex_id:3; /* Bits 109 to 111 */
|
|
__u64 end_flag:1; /* Bit 112 */
|
|
__u64 unused_bits:15;
|
|
__u64 unused[6];
|
|
} __packed;
|
|
|
|
const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
|
|
|
|
/**
|
|
* xe_eu_stall_get_sampling_rates - get EU stall sampling rates information.
|
|
*
|
|
* @num_rates: Pointer to a u32 to return the number of sampling rates.
|
|
* @rates: double u64 pointer to point to an array of sampling rates.
|
|
*
|
|
* Stores the number of sampling rates and pointer to the array of
|
|
* sampling rates in the input pointers.
|
|
*
|
|
* Returns: Size of the EU stall sampling rates array.
|
|
*/
|
|
size_t xe_eu_stall_get_sampling_rates(u32 *num_rates, const u64 **rates)
|
|
{
|
|
*num_rates = ARRAY_SIZE(eu_stall_sampling_rates);
|
|
*rates = eu_stall_sampling_rates;
|
|
|
|
return sizeof(eu_stall_sampling_rates);
|
|
}
|
|
|
|
/**
|
|
* xe_eu_stall_get_per_xecore_buf_size - get per XeCore buffer size.
|
|
*
|
|
* Returns: The per XeCore buffer size used to allocate the per GT
|
|
* EU stall data buffer.
|
|
*/
|
|
size_t xe_eu_stall_get_per_xecore_buf_size(void)
|
|
{
|
|
return per_xecore_buf_size;
|
|
}
|
|
|
|
/**
|
|
* xe_eu_stall_data_record_size - get EU stall data record size.
|
|
*
|
|
* @xe: Pointer to a Xe device.
|
|
*
|
|
* Returns: EU stall data record size.
|
|
*/
|
|
size_t xe_eu_stall_data_record_size(struct xe_device *xe)
|
|
{
|
|
size_t record_size = 0;
|
|
|
|
if (xe->info.platform == XE_PVC)
|
|
record_size = sizeof(struct xe_eu_stall_data_pvc);
|
|
else if (GRAPHICS_VER(xe) >= 20)
|
|
record_size = sizeof(struct xe_eu_stall_data_xe2);
|
|
|
|
xe_assert(xe, is_power_of_2(record_size));
|
|
|
|
return record_size;
|
|
}
|
|
|
|
/**
|
|
* num_data_rows - Return the number of EU stall data rows of 64B each
|
|
* for a given data size.
|
|
*
|
|
* @data_size: EU stall data size
|
|
*/
|
|
static u32 num_data_rows(u32 data_size)
|
|
{
|
|
return data_size >> 6;
|
|
}
|
|
|
|
static void xe_eu_stall_fini(void *arg)
|
|
{
|
|
struct xe_gt *gt = arg;
|
|
|
|
destroy_workqueue(gt->eu_stall->buf_ptr_poll_wq);
|
|
mutex_destroy(>->eu_stall->stream_lock);
|
|
kfree(gt->eu_stall);
|
|
}
|
|
|
|
/**
|
|
* xe_eu_stall_init() - Allocate and initialize GT level EU stall data
|
|
* structure xe_eu_stall_gt within struct xe_gt.
|
|
*
|
|
* @gt: GT being initialized.
|
|
*
|
|
* Returns: zero on success or a negative error code.
|
|
*/
|
|
int xe_eu_stall_init(struct xe_gt *gt)
|
|
{
|
|
struct xe_device *xe = gt_to_xe(gt);
|
|
int ret;
|
|
|
|
if (!xe_eu_stall_supported_on_platform(xe))
|
|
return 0;
|
|
|
|
gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
|
|
if (!gt->eu_stall) {
|
|
ret = -ENOMEM;
|
|
goto exit;
|
|
}
|
|
|
|
mutex_init(>->eu_stall->stream_lock);
|
|
|
|
gt->eu_stall->buf_ptr_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
|
|
if (!gt->eu_stall->buf_ptr_poll_wq) {
|
|
ret = -ENOMEM;
|
|
goto exit_free;
|
|
}
|
|
|
|
return devm_add_action_or_reset(xe->drm.dev, xe_eu_stall_fini, gt);
|
|
exit_free:
|
|
mutex_destroy(>->eu_stall->stream_lock);
|
|
kfree(gt->eu_stall);
|
|
exit:
|
|
return ret;
|
|
}
|
|
|
|
static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
|
|
struct eu_stall_open_properties *props)
|
|
{
|
|
value = div_u64(value, 251);
|
|
if (value == 0 || value > 7) {
|
|
drm_dbg(&xe->drm, "Invalid EU stall sampling rate %llu\n", value);
|
|
return -EINVAL;
|
|
}
|
|
props->sampling_rate_mult = value;
|
|
return 0;
|
|
}
|
|
|
|
static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
|
|
struct eu_stall_open_properties *props)
|
|
{
|
|
props->wait_num_reports = value;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
|
|
struct eu_stall_open_properties *props)
|
|
{
|
|
struct xe_gt *gt = xe_device_get_gt(xe, value);
|
|
|
|
if (!gt) {
|
|
drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
|
|
return -EINVAL;
|
|
}
|
|
props->gt = gt;
|
|
return 0;
|
|
}
|
|
|
|
typedef int (*set_eu_stall_property_fn)(struct xe_device *xe, u64 value,
|
|
struct eu_stall_open_properties *props);
|
|
|
|
static const set_eu_stall_property_fn xe_set_eu_stall_property_funcs[] = {
|
|
[DRM_XE_EU_STALL_PROP_SAMPLE_RATE] = set_prop_eu_stall_sampling_rate,
|
|
[DRM_XE_EU_STALL_PROP_WAIT_NUM_REPORTS] = set_prop_eu_stall_wait_num_reports,
|
|
[DRM_XE_EU_STALL_PROP_GT_ID] = set_prop_eu_stall_gt_id,
|
|
};
|
|
|
|
static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension,
|
|
struct eu_stall_open_properties *props)
|
|
{
|
|
u64 __user *address = u64_to_user_ptr(extension);
|
|
struct drm_xe_ext_set_property ext;
|
|
int err;
|
|
u32 idx;
|
|
|
|
err = copy_from_user(&ext, address, sizeof(ext));
|
|
if (XE_IOCTL_DBG(xe, err))
|
|
return -EFAULT;
|
|
|
|
if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
|
|
XE_IOCTL_DBG(xe, ext.pad))
|
|
return -EINVAL;
|
|
|
|
idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
|
|
return xe_set_eu_stall_property_funcs[idx](xe, ext.value, props);
|
|
}
|
|
|
|
typedef int (*xe_eu_stall_user_extension_fn)(struct xe_device *xe, u64 extension,
|
|
struct eu_stall_open_properties *props);
|
|
static const xe_eu_stall_user_extension_fn xe_eu_stall_user_extension_funcs[] = {
|
|
[DRM_XE_EU_STALL_EXTENSION_SET_PROPERTY] = xe_eu_stall_user_ext_set_property,
|
|
};
|
|
|
|
#define MAX_USER_EXTENSIONS 5
|
|
static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
|
|
int ext_number, struct eu_stall_open_properties *props)
|
|
{
|
|
u64 __user *address = u64_to_user_ptr(extension);
|
|
struct drm_xe_user_extension ext;
|
|
int err;
|
|
u32 idx;
|
|
|
|
if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
|
|
return -E2BIG;
|
|
|
|
err = copy_from_user(&ext, address, sizeof(ext));
|
|
if (XE_IOCTL_DBG(xe, err))
|
|
return -EFAULT;
|
|
|
|
if (XE_IOCTL_DBG(xe, ext.pad) ||
|
|
XE_IOCTL_DBG(xe, ext.name >= ARRAY_SIZE(xe_eu_stall_user_extension_funcs)))
|
|
return -EINVAL;
|
|
|
|
idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_eu_stall_user_extension_funcs));
|
|
err = xe_eu_stall_user_extension_funcs[idx](xe, extension, props);
|
|
if (XE_IOCTL_DBG(xe, err))
|
|
return err;
|
|
|
|
if (ext.next_extension)
|
|
return xe_eu_stall_user_extensions(xe, ext.next_extension, ++ext_number, props);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* buf_data_size - Calculate the number of bytes in a circular buffer
|
|
* given the read and write pointers and the size of
|
|
* the buffer.
|
|
*
|
|
* @buf_size: Size of the circular buffer
|
|
* @read_ptr: Read pointer with an additional overflow bit
|
|
* @write_ptr: Write pointer with an additional overflow bit
|
|
*
|
|
* Since the read and write pointers have an additional overflow bit,
|
|
* this function calculates the offsets from the pointers and use the
|
|
* offsets to calculate the data size in the buffer.
|
|
*
|
|
* Returns: number of bytes of data in the buffer
|
|
*/
|
|
static u32 buf_data_size(size_t buf_size, u32 read_ptr, u32 write_ptr)
|
|
{
|
|
u32 read_offset, write_offset, size = 0;
|
|
|
|
if (read_ptr == write_ptr)
|
|
goto exit;
|
|
|
|
read_offset = read_ptr & (buf_size - 1);
|
|
write_offset = write_ptr & (buf_size - 1);
|
|
|
|
if (write_offset > read_offset)
|
|
size = write_offset - read_offset;
|
|
else
|
|
size = buf_size - read_offset + write_offset;
|
|
exit:
|
|
return size;
|
|
}
|
|
|
|
/**
|
|
* eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
|
|
*
|
|
* @stream: xe EU stall data stream instance
|
|
*
|
|
* Returns: true if the EU stall buffer contains minimum stall data as
|
|
* specified by the event report count, else false.
|
|
*/
|
|
static bool eu_stall_data_buf_poll(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
u32 read_ptr, write_ptr_reg, write_ptr, total_data = 0;
|
|
u32 buf_size = stream->per_xecore_buf_size;
|
|
struct per_xecore_buf *xecore_buf;
|
|
struct xe_gt *gt = stream->gt;
|
|
bool min_data_present = false;
|
|
u16 group, instance;
|
|
unsigned int xecore;
|
|
|
|
mutex_lock(&stream->xecore_buf_lock);
|
|
for_each_dss_steering(xecore, gt, group, instance) {
|
|
xecore_buf = &stream->xecore_buf[xecore];
|
|
read_ptr = xecore_buf->read;
|
|
write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT,
|
|
group, instance);
|
|
write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
|
|
write_ptr <<= 6;
|
|
write_ptr &= ((buf_size << 1) - 1);
|
|
if (!min_data_present) {
|
|
total_data += buf_data_size(buf_size, read_ptr, write_ptr);
|
|
if (num_data_rows(total_data) >= stream->wait_num_reports)
|
|
min_data_present = true;
|
|
}
|
|
if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
|
|
set_bit(xecore, stream->data_drop.mask);
|
|
xecore_buf->write = write_ptr;
|
|
}
|
|
mutex_unlock(&stream->xecore_buf_lock);
|
|
|
|
return min_data_present;
|
|
}
|
|
|
|
static void clear_dropped_eviction_line_bit(struct xe_gt *gt, u16 group, u16 instance)
|
|
{
|
|
struct xe_device *xe = gt_to_xe(gt);
|
|
u32 write_ptr_reg;
|
|
|
|
/* On PVC, the overflow bit has to be cleared by writing 1 to it.
|
|
* On Xe2 and later GPUs, the bit has to be cleared by writing 0 to it.
|
|
*/
|
|
if (GRAPHICS_VER(xe) >= 20)
|
|
write_ptr_reg = _MASKED_BIT_DISABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
|
|
else
|
|
write_ptr_reg = _MASKED_BIT_ENABLE(XEHPC_EUSTALL_REPORT_OVERFLOW_DROP);
|
|
|
|
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT, write_ptr_reg, group, instance);
|
|
}
|
|
|
|
static int xe_eu_stall_data_buf_read(struct xe_eu_stall_data_stream *stream,
|
|
char __user *buf, size_t count,
|
|
size_t *total_data_size, struct xe_gt *gt,
|
|
u16 group, u16 instance, unsigned int xecore)
|
|
{
|
|
size_t read_data_size, copy_size, buf_size;
|
|
u32 read_ptr_reg, read_ptr, write_ptr;
|
|
u8 *xecore_start_vaddr, *read_vaddr;
|
|
struct per_xecore_buf *xecore_buf;
|
|
u32 read_offset, write_offset;
|
|
|
|
/* Hardware increments the read and write pointers such that they can
|
|
* overflow into one additional bit. For example, a 256KB size buffer
|
|
* offset pointer needs 18 bits. But HW uses 19 bits for the read and
|
|
* write pointers. This technique avoids wasting a slot in the buffer.
|
|
* Read and write offsets are calculated from the pointers in order to
|
|
* check if the write pointer has wrapped around the array.
|
|
*/
|
|
xecore_buf = &stream->xecore_buf[xecore];
|
|
xecore_start_vaddr = xecore_buf->vaddr;
|
|
read_ptr = xecore_buf->read;
|
|
write_ptr = xecore_buf->write;
|
|
buf_size = stream->per_xecore_buf_size;
|
|
|
|
read_data_size = buf_data_size(buf_size, read_ptr, write_ptr);
|
|
/* Read only the data that the user space buffer can accommodate */
|
|
read_data_size = min_t(size_t, count - *total_data_size, read_data_size);
|
|
if (read_data_size == 0)
|
|
goto exit_drop;
|
|
|
|
read_offset = read_ptr & (buf_size - 1);
|
|
write_offset = write_ptr & (buf_size - 1);
|
|
read_vaddr = xecore_start_vaddr + read_offset;
|
|
|
|
if (write_offset > read_offset) {
|
|
if (copy_to_user(buf + *total_data_size, read_vaddr, read_data_size))
|
|
return -EFAULT;
|
|
} else {
|
|
if (read_data_size >= buf_size - read_offset)
|
|
copy_size = buf_size - read_offset;
|
|
else
|
|
copy_size = read_data_size;
|
|
if (copy_to_user(buf + *total_data_size, read_vaddr, copy_size))
|
|
return -EFAULT;
|
|
if (copy_to_user(buf + *total_data_size + copy_size,
|
|
xecore_start_vaddr, read_data_size - copy_size))
|
|
return -EFAULT;
|
|
}
|
|
|
|
*total_data_size += read_data_size;
|
|
read_ptr += read_data_size;
|
|
|
|
/* Read pointer can overflow into one additional bit */
|
|
read_ptr &= (buf_size << 1) - 1;
|
|
read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, (read_ptr >> 6));
|
|
read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
|
|
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
|
|
xecore_buf->read = read_ptr;
|
|
trace_xe_eu_stall_data_read(group, instance, read_ptr, write_ptr,
|
|
read_data_size, *total_data_size);
|
|
exit_drop:
|
|
/* Clear drop bit (if set) after any data was read or if the buffer was empty.
|
|
* Drop bit can be set even if the buffer is empty as the buffer may have been emptied
|
|
* in the previous read() and the data drop bit was set during the previous read().
|
|
*/
|
|
if (test_bit(xecore, stream->data_drop.mask)) {
|
|
clear_dropped_eviction_line_bit(gt, group, instance);
|
|
clear_bit(xecore, stream->data_drop.mask);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* xe_eu_stall_stream_read_locked - copy EU stall counters data from the
|
|
* per xecore buffers to the userspace buffer
|
|
* @stream: A stream opened for EU stall count metrics
|
|
* @file: An xe EU stall data stream file
|
|
* @buf: destination buffer given by userspace
|
|
* @count: the number of bytes userspace wants to read
|
|
*
|
|
* Returns: Number of bytes copied or a negative error code
|
|
* If we've successfully copied any data then reporting that takes
|
|
* precedence over any internal error status, so the data isn't lost.
|
|
*/
|
|
static ssize_t xe_eu_stall_stream_read_locked(struct xe_eu_stall_data_stream *stream,
|
|
struct file *file, char __user *buf,
|
|
size_t count)
|
|
{
|
|
struct xe_gt *gt = stream->gt;
|
|
size_t total_size = 0;
|
|
u16 group, instance;
|
|
unsigned int xecore;
|
|
int ret = 0;
|
|
|
|
mutex_lock(&stream->xecore_buf_lock);
|
|
if (bitmap_weight(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS)) {
|
|
if (!stream->data_drop.reported_to_user) {
|
|
stream->data_drop.reported_to_user = true;
|
|
xe_gt_dbg(gt, "EU stall data dropped in XeCores: %*pb\n",
|
|
XE_MAX_DSS_FUSE_BITS, stream->data_drop.mask);
|
|
mutex_unlock(&stream->xecore_buf_lock);
|
|
return -EIO;
|
|
}
|
|
stream->data_drop.reported_to_user = false;
|
|
}
|
|
|
|
for_each_dss_steering(xecore, gt, group, instance) {
|
|
ret = xe_eu_stall_data_buf_read(stream, buf, count, &total_size,
|
|
gt, group, instance, xecore);
|
|
if (ret || count == total_size)
|
|
break;
|
|
}
|
|
mutex_unlock(&stream->xecore_buf_lock);
|
|
return total_size ?: (ret ?: -EAGAIN);
|
|
}
|
|
|
|
/*
|
|
* Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
|
|
* before calling read().
|
|
*
|
|
* Returns: The number of bytes copied or a negative error code on failure.
|
|
* -EIO if HW drops any EU stall data when the buffer is full.
|
|
*/
|
|
static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream = file->private_data;
|
|
struct xe_gt *gt = stream->gt;
|
|
ssize_t ret, aligned_count;
|
|
|
|
aligned_count = ALIGN_DOWN(count, stream->data_record_size);
|
|
if (aligned_count == 0)
|
|
return -EINVAL;
|
|
|
|
if (!stream->enabled) {
|
|
xe_gt_dbg(gt, "EU stall data stream not enabled to read\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!(file->f_flags & O_NONBLOCK)) {
|
|
do {
|
|
ret = wait_event_interruptible(stream->poll_wq, stream->pollin);
|
|
if (ret)
|
|
return -EINTR;
|
|
|
|
mutex_lock(>->eu_stall->stream_lock);
|
|
ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
|
|
mutex_unlock(>->eu_stall->stream_lock);
|
|
} while (ret == -EAGAIN);
|
|
} else {
|
|
mutex_lock(>->eu_stall->stream_lock);
|
|
ret = xe_eu_stall_stream_read_locked(stream, file, buf, aligned_count);
|
|
mutex_unlock(>->eu_stall->stream_lock);
|
|
}
|
|
|
|
/*
|
|
* This may not work correctly if the user buffer is very small.
|
|
* We don't want to block the next read() when there is data in the buffer
|
|
* now, but couldn't be accommodated in the small user buffer.
|
|
*/
|
|
stream->pollin = false;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
struct xe_gt *gt = stream->gt;
|
|
|
|
mutex_destroy(&stream->xecore_buf_lock);
|
|
gt->eu_stall->stream = NULL;
|
|
kfree(stream);
|
|
}
|
|
|
|
static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
xe_bo_unpin_map_no_vm(stream->bo);
|
|
kfree(stream->xecore_buf);
|
|
}
|
|
|
|
static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
|
|
u16 last_xecore)
|
|
{
|
|
struct xe_tile *tile = stream->gt->tile;
|
|
struct xe_bo *bo;
|
|
u32 size;
|
|
|
|
stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
|
|
if (!stream->xecore_buf)
|
|
return -ENOMEM;
|
|
|
|
size = stream->per_xecore_buf_size * last_xecore;
|
|
|
|
bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
|
|
size, ~0ull, ttm_bo_type_kernel,
|
|
XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
|
|
if (IS_ERR(bo)) {
|
|
kfree(stream->xecore_buf);
|
|
return PTR_ERR(bo);
|
|
}
|
|
|
|
XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
|
|
stream->bo = bo;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
|
|
struct per_xecore_buf *xecore_buf;
|
|
struct xe_gt *gt = stream->gt;
|
|
u16 group, instance;
|
|
unsigned int fw_ref;
|
|
int xecore;
|
|
|
|
/* Take runtime pm ref and forcewake to disable RC6 */
|
|
xe_pm_runtime_get(gt_to_xe(gt));
|
|
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
|
|
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
|
|
xe_gt_err(gt, "Failed to get RENDER forcewake\n");
|
|
xe_pm_runtime_put(gt_to_xe(gt));
|
|
return -ETIMEDOUT;
|
|
}
|
|
|
|
if (XE_WA(gt, 22016596838))
|
|
xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
|
|
_MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
|
|
|
|
for_each_dss_steering(xecore, gt, group, instance) {
|
|
write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
|
|
/* Clear any drop bits set and not cleared in the previous session. */
|
|
if (write_ptr_reg & XEHPC_EUSTALL_REPORT_OVERFLOW_DROP)
|
|
clear_dropped_eviction_line_bit(gt, group, instance);
|
|
write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
|
|
read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
|
|
read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
|
|
/* Initialize the read pointer to the write pointer */
|
|
xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
|
|
write_ptr <<= 6;
|
|
write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
|
|
xecore_buf = &stream->xecore_buf[xecore];
|
|
xecore_buf->write = write_ptr;
|
|
xecore_buf->read = write_ptr;
|
|
}
|
|
stream->data_drop.reported_to_user = false;
|
|
bitmap_zero(stream->data_drop.mask, XE_MAX_DSS_FUSE_BITS);
|
|
|
|
reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
|
|
REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
|
|
REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
|
|
stream->sampling_rate_mult));
|
|
xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
|
|
/* GGTT addresses can never be > 32 bits */
|
|
xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
|
|
reg_value = xe_bo_ggtt_addr(stream->bo);
|
|
reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ,
|
|
stream->per_xecore_buf_size / SZ_256K);
|
|
reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
|
|
xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void eu_stall_data_buf_poll_work_fn(struct work_struct *work)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream =
|
|
container_of(work, typeof(*stream), buf_poll_work.work);
|
|
struct xe_gt *gt = stream->gt;
|
|
|
|
if (eu_stall_data_buf_poll(stream)) {
|
|
stream->pollin = true;
|
|
wake_up(&stream->poll_wq);
|
|
}
|
|
queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
|
|
&stream->buf_poll_work,
|
|
msecs_to_jiffies(POLL_PERIOD_MS));
|
|
}
|
|
|
|
static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
|
|
struct eu_stall_open_properties *props)
|
|
{
|
|
unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
|
|
struct per_xecore_buf *xecore_buf;
|
|
struct xe_gt *gt = stream->gt;
|
|
xe_dss_mask_t all_xecores;
|
|
u16 group, instance;
|
|
u32 vaddr_offset;
|
|
int ret;
|
|
|
|
bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
|
|
XE_MAX_DSS_FUSE_BITS);
|
|
num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
|
|
last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
|
|
|
|
max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
|
|
if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
|
|
xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
|
|
props->wait_num_reports);
|
|
xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
|
|
max_wait_num_reports);
|
|
return -EINVAL;
|
|
}
|
|
|
|
init_waitqueue_head(&stream->poll_wq);
|
|
mutex_init(&stream->xecore_buf_lock);
|
|
INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
|
|
stream->per_xecore_buf_size = per_xecore_buf_size;
|
|
stream->sampling_rate_mult = props->sampling_rate_mult;
|
|
stream->wait_num_reports = props->wait_num_reports;
|
|
stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
|
|
|
|
ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
|
|
if (ret)
|
|
return ret;
|
|
|
|
for_each_dss_steering(xecore, gt, group, instance) {
|
|
xecore_buf = &stream->xecore_buf[xecore];
|
|
vaddr_offset = xecore * stream->per_xecore_buf_size;
|
|
xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static __poll_t xe_eu_stall_stream_poll_locked(struct xe_eu_stall_data_stream *stream,
|
|
struct file *file, poll_table *wait)
|
|
{
|
|
__poll_t events = 0;
|
|
|
|
poll_wait(file, &stream->poll_wq, wait);
|
|
|
|
if (stream->pollin)
|
|
events |= EPOLLIN;
|
|
|
|
return events;
|
|
}
|
|
|
|
static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream = file->private_data;
|
|
struct xe_gt *gt = stream->gt;
|
|
__poll_t ret;
|
|
|
|
mutex_lock(>->eu_stall->stream_lock);
|
|
ret = xe_eu_stall_stream_poll_locked(stream, file, wait);
|
|
mutex_unlock(>->eu_stall->stream_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
struct xe_gt *gt = stream->gt;
|
|
int ret = 0;
|
|
|
|
if (stream->enabled)
|
|
return ret;
|
|
|
|
stream->enabled = true;
|
|
|
|
ret = xe_eu_stall_stream_enable(stream);
|
|
|
|
queue_delayed_work(gt->eu_stall->buf_ptr_poll_wq,
|
|
&stream->buf_poll_work,
|
|
msecs_to_jiffies(POLL_PERIOD_MS));
|
|
return ret;
|
|
}
|
|
|
|
static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
|
|
{
|
|
struct xe_gt *gt = stream->gt;
|
|
|
|
if (!stream->enabled)
|
|
return 0;
|
|
|
|
stream->enabled = false;
|
|
|
|
xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
|
|
|
|
cancel_delayed_work_sync(&stream->buf_poll_work);
|
|
|
|
if (XE_WA(gt, 22016596838))
|
|
xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
|
|
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
|
|
|
|
xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
|
|
xe_pm_runtime_put(gt_to_xe(gt));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
|
|
unsigned int cmd, unsigned long arg)
|
|
{
|
|
switch (cmd) {
|
|
case DRM_XE_OBSERVATION_IOCTL_ENABLE:
|
|
return xe_eu_stall_enable_locked(stream);
|
|
case DRM_XE_OBSERVATION_IOCTL_DISABLE:
|
|
return xe_eu_stall_disable_locked(stream);
|
|
}
|
|
|
|
return -EINVAL;
|
|
}
|
|
|
|
static long xe_eu_stall_stream_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream = file->private_data;
|
|
struct xe_gt *gt = stream->gt;
|
|
long ret;
|
|
|
|
mutex_lock(>->eu_stall->stream_lock);
|
|
ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
|
|
mutex_unlock(>->eu_stall->stream_lock);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream = file->private_data;
|
|
struct xe_gt *gt = stream->gt;
|
|
|
|
drm_dev_put(>->tile->xe->drm);
|
|
|
|
mutex_lock(>->eu_stall->stream_lock);
|
|
xe_eu_stall_disable_locked(stream);
|
|
xe_eu_stall_data_buf_destroy(stream);
|
|
xe_eu_stall_stream_free(stream);
|
|
mutex_unlock(>->eu_stall->stream_lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const struct file_operations fops_eu_stall = {
|
|
.owner = THIS_MODULE,
|
|
.llseek = noop_llseek,
|
|
.release = xe_eu_stall_stream_close,
|
|
.poll = xe_eu_stall_stream_poll,
|
|
.read = xe_eu_stall_stream_read,
|
|
.unlocked_ioctl = xe_eu_stall_stream_ioctl,
|
|
.compat_ioctl = xe_eu_stall_stream_ioctl,
|
|
};
|
|
|
|
static int xe_eu_stall_stream_open_locked(struct drm_device *dev,
|
|
struct eu_stall_open_properties *props,
|
|
struct drm_file *file)
|
|
{
|
|
struct xe_eu_stall_data_stream *stream;
|
|
struct xe_gt *gt = props->gt;
|
|
unsigned long f_flags = 0;
|
|
int ret, stream_fd;
|
|
|
|
/* Only one session can be active at any time */
|
|
if (gt->eu_stall->stream) {
|
|
xe_gt_dbg(gt, "EU stall sampling session already active\n");
|
|
return -EBUSY;
|
|
}
|
|
|
|
stream = kzalloc(sizeof(*stream), GFP_KERNEL);
|
|
if (!stream)
|
|
return -ENOMEM;
|
|
|
|
gt->eu_stall->stream = stream;
|
|
stream->gt = gt;
|
|
|
|
ret = xe_eu_stall_stream_init(stream, props);
|
|
if (ret) {
|
|
xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
|
|
goto err_free;
|
|
}
|
|
|
|
stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
|
|
if (stream_fd < 0) {
|
|
ret = stream_fd;
|
|
xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
|
|
goto err_destroy;
|
|
}
|
|
|
|
/* Take a reference on the driver that will be kept with stream_fd
|
|
* until its release.
|
|
*/
|
|
drm_dev_get(>->tile->xe->drm);
|
|
|
|
return stream_fd;
|
|
|
|
err_destroy:
|
|
xe_eu_stall_data_buf_destroy(stream);
|
|
err_free:
|
|
xe_eu_stall_stream_free(stream);
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* xe_eu_stall_stream_open - Open a xe EU stall data stream fd
|
|
*
|
|
* @dev: DRM device pointer
|
|
* @data: pointer to first struct @drm_xe_ext_set_property in
|
|
* the chain of input properties from the user space.
|
|
* @file: DRM file pointer
|
|
*
|
|
* This function opens a EU stall data stream with input properties from
|
|
* the user space.
|
|
*
|
|
* Returns: EU stall data stream fd on success or a negative error code.
|
|
*/
|
|
int xe_eu_stall_stream_open(struct drm_device *dev, u64 data, struct drm_file *file)
|
|
{
|
|
struct xe_device *xe = to_xe_device(dev);
|
|
struct eu_stall_open_properties props = {};
|
|
int ret;
|
|
|
|
if (!xe_eu_stall_supported_on_platform(xe)) {
|
|
drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
if (xe_observation_paranoid && !perfmon_capable()) {
|
|
drm_dbg(&xe->drm, "Insufficient privileges for EU stall monitoring\n");
|
|
return -EACCES;
|
|
}
|
|
|
|
/* Initialize and set default values */
|
|
props.wait_num_reports = 1;
|
|
props.sampling_rate_mult = 4;
|
|
|
|
ret = xe_eu_stall_user_extensions(xe, data, 0, &props);
|
|
if (ret)
|
|
return ret;
|
|
|
|
if (!props.gt) {
|
|
drm_dbg(&xe->drm, "GT ID not provided for EU stall sampling\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
mutex_lock(&props.gt->eu_stall->stream_lock);
|
|
ret = xe_eu_stall_stream_open_locked(dev, &props, file);
|
|
mutex_unlock(&props.gt->eu_stall->stream_lock);
|
|
|
|
return ret;
|
|
}
|