mirror of
https://git.proxmox.com/git/mirror_ubuntu-kernels.git
synced 2026-01-08 18:24:39 +00:00
drm/amdkfd: Move TLB flushing logic into amdgpu
This will make it possible for amdgpu GEM ioctls to flush TLBs on compute VMs. This removes VMID-based TLB flushing and always uses PASID-based flushing. This still works because it scans the VMID-PASID mapping registers to find the right VMID. It's only slightly less efficient. This is not a production use case. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
e6ed364efa
commit
94e2dae0a8
@ -710,35 +710,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
|
||||
return false;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
|
||||
uint16_t vmid)
|
||||
{
|
||||
if (adev->family == AMDGPU_FAMILY_AI) {
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS)
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
|
||||
} else {
|
||||
amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
||||
uint16_t pasid,
|
||||
enum TLB_FLUSH_TYPE flush_type,
|
||||
uint32_t inst)
|
||||
{
|
||||
bool all_hub = false;
|
||||
|
||||
if (adev->family == AMDGPU_FAMILY_AI ||
|
||||
adev->family == AMDGPU_FAMILY_RV)
|
||||
all_hub = true;
|
||||
|
||||
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst);
|
||||
}
|
||||
|
||||
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
|
||||
{
|
||||
return adev->have_atomics_support;
|
||||
|
||||
@ -162,11 +162,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
|
||||
uint32_t *ib_cmd, uint32_t ib_len);
|
||||
void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle);
|
||||
bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev);
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev,
|
||||
uint16_t vmid);
|
||||
int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
|
||||
uint16_t pasid, enum TLB_FLUSH_TYPE flush_type,
|
||||
uint32_t inst);
|
||||
|
||||
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
|
||||
|
||||
|
||||
@ -1437,6 +1437,50 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM
|
||||
*
|
||||
* @adev: amdgpu_device pointer
|
||||
* @vm: requested vm
|
||||
* @flush_type: flush type
|
||||
*
|
||||
* Flush TLB if needed for a compute VM.
|
||||
*
|
||||
* Returns:
|
||||
* 0 for success.
|
||||
*/
|
||||
int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
uint32_t flush_type,
|
||||
uint32_t xcc_mask)
|
||||
{
|
||||
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
|
||||
bool all_hub = false;
|
||||
int xcc = 0, r = 0;
|
||||
|
||||
WARN_ON_ONCE(!vm->is_compute_context);
|
||||
|
||||
/*
|
||||
* It can be that we race and lose here, but that is extremely unlikely
|
||||
* and the worst thing which could happen is that we flush the changes
|
||||
* into the TLB once more which is harmless.
|
||||
*/
|
||||
if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq)
|
||||
return 0;
|
||||
|
||||
if (adev->family == AMDGPU_FAMILY_AI ||
|
||||
adev->family == AMDGPU_FAMILY_RV)
|
||||
all_hub = true;
|
||||
|
||||
for_each_inst(xcc, xcc_mask) {
|
||||
r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type,
|
||||
all_hub, xcc);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
/**
|
||||
* amdgpu_vm_bo_add - add a bo to a specific vm
|
||||
*
|
||||
|
||||
@ -324,6 +324,7 @@ struct amdgpu_vm {
|
||||
/* Last finished delayed update */
|
||||
atomic64_t tlb_seq;
|
||||
struct dma_fence *last_tlb_flush;
|
||||
atomic64_t kfd_last_flushed_seq;
|
||||
|
||||
/* How many times we had to re-generate the page tables */
|
||||
uint64_t generation;
|
||||
@ -445,6 +446,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
|
||||
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
struct ww_acquire_ctx *ticket);
|
||||
int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
|
||||
struct amdgpu_vm *vm,
|
||||
uint32_t flush_type,
|
||||
uint32_t xcc_mask);
|
||||
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
|
||||
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
|
||||
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
||||
|
||||
@ -748,7 +748,6 @@ struct kfd_process_device {
|
||||
/* VM context for GPUVM allocations */
|
||||
struct file *drm_file;
|
||||
void *drm_priv;
|
||||
atomic64_t tlb_seq;
|
||||
|
||||
/* GPUVM allocations storage */
|
||||
struct idr alloc_idr;
|
||||
@ -1462,7 +1461,14 @@ void kfd_signal_reset_event(struct kfd_node *dev);
|
||||
|
||||
void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid);
|
||||
|
||||
void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type);
|
||||
static inline void kfd_flush_tlb(struct kfd_process_device *pdd,
|
||||
enum TLB_FLUSH_TYPE type)
|
||||
{
|
||||
struct amdgpu_device *adev = pdd->dev->adev;
|
||||
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
|
||||
|
||||
amdgpu_vm_flush_compute_tlb(adev, vm, type, pdd->dev->xcc_mask);
|
||||
}
|
||||
|
||||
static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
|
||||
{
|
||||
|
||||
@ -1667,7 +1667,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
|
||||
return ret;
|
||||
}
|
||||
pdd->drm_priv = drm_file->private_data;
|
||||
atomic64_set(&pdd->tlb_seq, 0);
|
||||
|
||||
ret = kfd_process_device_reserve_ib_mem(pdd);
|
||||
if (ret)
|
||||
@ -2059,36 +2058,6 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
|
||||
KFD_CWSR_TBA_TMA_SIZE, vma->vm_page_prot);
|
||||
}
|
||||
|
||||
void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
|
||||
{
|
||||
struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv);
|
||||
uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm);
|
||||
struct kfd_node *dev = pdd->dev;
|
||||
uint32_t xcc_mask = dev->xcc_mask;
|
||||
int xcc = 0;
|
||||
|
||||
/*
|
||||
* It can be that we race and lose here, but that is extremely unlikely
|
||||
* and the worst thing which could happen is that we flush the changes
|
||||
* into the TLB once more which is harmless.
|
||||
*/
|
||||
if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq)
|
||||
return;
|
||||
|
||||
if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
|
||||
/* Nothing to flush until a VMID is assigned, which
|
||||
* only happens when the first queue is created.
|
||||
*/
|
||||
if (pdd->qpd.vmid)
|
||||
amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,
|
||||
pdd->qpd.vmid);
|
||||
} else {
|
||||
for_each_inst(xcc, xcc_mask)
|
||||
amdgpu_amdkfd_flush_gpu_tlb_pasid(
|
||||
dev->adev, pdd->process->pasid, type, xcc);
|
||||
}
|
||||
}
|
||||
|
||||
/* assumes caller holds process lock. */
|
||||
int kfd_process_drain_interrupts(struct kfd_process_device *pdd)
|
||||
{
|
||||
|
||||
Loading…
Reference in New Issue
Block a user