linux-loongson/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
Linus Torvalds c93529ad4f iommufd 6.17 merge window pull
- IOMMU HW now has features to directly assign HW command queues to a
   guest VM. In this mode the command queue operates on a limited set of
   invalidation commands that are suitable for improving guest invalidation
   performance and easy for the HW to virtualize.
 
   This PR brings the generic infrastructure to allow IOMMU drivers to
   expose such command queues through the iommufd uAPI, mmap the doorbell
   pages, and get the guest physical range for the command queue ring
   itself.
 
 - An implementation for the NVIDIA SMMUv3 extension "cmdqv" is built on
   the new iommufd command queue features. It works with the existing SMMU
   driver support for cmdqv in guest VMs.
 
 - Many precursor cleanups and improvements to support the above cleanly,
   changes to the general ioctl and object helpers, driver support for
   VDEVICE, and mmap pgoff cookie infrastructure.
 
 - Sequence VDEVICE destruction to always happen before VFIO device
   destruction. When using the above type features, and also in future
   confidential compute, the internal virtual device representation becomes
   linked to HW or CC TSM configuration and objects. If a VFIO device is
   removed from iommufd those HW objects should also be cleaned up to
   prevent a sort of UAF. This became important now that we have HW backing
   the VDEVICE.
 
 - Fix one syzkaller found error related to math overflows during iova
   allocation
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCaIpl9AAKCRCFwuHvBreF
 YS5tAP9MDIRML5a/2IOhzcsc4LiDkWTMKm2m1wcRYd+iU2aFVQEAjdghINLHrUlx
 HVuIDvNvWIUED/oTAp5kCxQ7PBFN4gU=
 =NmCO
 -----END PGP SIGNATURE-----

Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd

Pull iommufd updates from Jason Gunthorpe:
 "This broadly brings the assigned HW command queue support to iommufd.
  This feature is used to improve SVA performance in VMs by avoiding
  paravirtualization traps during SVA invalidations.

  Along the way I think some of the core logic is in a much better state
  to support future driver backed features.

  Summary:

   - IOMMU HW now has features to directly assign HW command queues to a
     guest VM. In this mode the command queue operates on a limited set
     of invalidation commands that are suitable for improving guest
     invalidation performance and easy for the HW to virtualize.

     This brings the generic infrastructure to allow IOMMU drivers to
     expose such command queues through the iommufd uAPI, mmap the
     doorbell pages, and get the guest physical range for the command
     queue ring itself.

   - An implementation for the NVIDIA SMMUv3 extension "cmdqv" is built
     on the new iommufd command queue features. It works with the
     existing SMMU driver support for cmdqv in guest VMs.

   - Many precursor cleanups and improvements to support the above
     cleanly, changes to the general ioctl and object helpers, driver
     support for VDEVICE, and mmap pgoff cookie infrastructure.

   - Sequence VDEVICE destruction to always happen before VFIO device
     destruction. When using the above type features, and also in future
     confidential compute, the internal virtual device representation
     becomes linked to HW or CC TSM configuration and objects. If a VFIO
     device is removed from iommufd those HW objects should also be
     cleaned up to prevent a sort of UAF. This became important now that
     we have HW backing the VDEVICE.

   - Fix one syzkaller found error related to math overflows during iova
     allocation"

* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (57 commits)
  iommu/arm-smmu-v3: Replace vsmmu_size/type with get_viommu_size
  iommu/arm-smmu-v3: Do not bother impl_ops if IOMMU_VIOMMU_TYPE_ARM_SMMUV3
  iommufd: Rename some shortterm-related identifiers
  iommufd/selftest: Add coverage for vdevice tombstone
  iommufd/selftest: Explicitly skip tests for inapplicable variant
  iommufd/vdevice: Remove struct device reference from struct vdevice
  iommufd: Destroy vdevice on idevice destroy
  iommufd: Add a pre_destroy() op for objects
  iommufd: Add iommufd_object_tombstone_user() helper
  iommufd/viommu: Roll back to use iommufd_object_alloc() for vdevice
  iommufd/selftest: Test reserved regions near ULONG_MAX
  iommufd: Prevent ALIGN() overflow
  iommu/tegra241-cmdqv: import IOMMUFD module namespace
  iommufd: Do not allow _iommufd_object_alloc_ucmd if abort op is set
  iommu/tegra241-cmdqv: Add IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV support
  iommu/tegra241-cmdqv: Add user-space use support
  iommu/tegra241-cmdqv: Do not statically map LVCMDQs
  iommu/tegra241-cmdqv: Simplify deinit flow in tegra241_cmdqv_remove_vintf()
  iommu/tegra241-cmdqv: Use request_threaded_irq
  iommu/arm-smmu-v3-iommufd: Add hw_info to impl_ops
  ...
2025-07-31 12:43:08 -07:00

4896 lines
136 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU API for ARM architected SMMUv3 implementations.
*
* Copyright (C) 2015 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
*
* This driver is powered by bad coffee and bombay mix.
*/
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
#include <linux/bitops.h>
#include <linux/crash_dump.h>
#include <linux/delay.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io-pgtable.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/platform_device.h>
#include <linux/string_choices.h>
#include <kunit/visibility.h>
#include <uapi/linux/iommufd.h>
#include "arm-smmu-v3.h"
#include "../../dma-iommu.h"
static bool disable_msipolling;
module_param(disable_msipolling, bool, 0444);
MODULE_PARM_DESC(disable_msipolling,
"Disable MSI-based polling for CMD_SYNC completion.");
static const struct iommu_ops arm_smmu_ops;
static struct iommu_dirty_ops arm_smmu_dirty_ops;
enum arm_smmu_msi_index {
EVTQ_MSI_INDEX,
GERROR_MSI_INDEX,
PRIQ_MSI_INDEX,
ARM_SMMU_MAX_MSIS,
};
#define NUM_ENTRY_QWORDS 8
static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64));
static_assert(sizeof(struct arm_smmu_cd) == NUM_ENTRY_QWORDS * sizeof(u64));
static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
[EVTQ_MSI_INDEX] = {
ARM_SMMU_EVTQ_IRQ_CFG0,
ARM_SMMU_EVTQ_IRQ_CFG1,
ARM_SMMU_EVTQ_IRQ_CFG2,
},
[GERROR_MSI_INDEX] = {
ARM_SMMU_GERROR_IRQ_CFG0,
ARM_SMMU_GERROR_IRQ_CFG1,
ARM_SMMU_GERROR_IRQ_CFG2,
},
[PRIQ_MSI_INDEX] = {
ARM_SMMU_PRIQ_IRQ_CFG0,
ARM_SMMU_PRIQ_IRQ_CFG1,
ARM_SMMU_PRIQ_IRQ_CFG2,
},
};
struct arm_smmu_option_prop {
u32 opt;
const char *prop;
};
DEFINE_XARRAY_ALLOC1(arm_smmu_asid_xa);
DEFINE_MUTEX(arm_smmu_asid_lock);
static struct arm_smmu_option_prop arm_smmu_options[] = {
{ ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
{ ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium,cn9900-broken-page1-regspace"},
{ 0, NULL},
};
static const char * const event_str[] = {
[EVT_ID_BAD_STREAMID_CONFIG] = "C_BAD_STREAMID",
[EVT_ID_STE_FETCH_FAULT] = "F_STE_FETCH",
[EVT_ID_BAD_STE_CONFIG] = "C_BAD_STE",
[EVT_ID_STREAM_DISABLED_FAULT] = "F_STREAM_DISABLED",
[EVT_ID_BAD_SUBSTREAMID_CONFIG] = "C_BAD_SUBSTREAMID",
[EVT_ID_CD_FETCH_FAULT] = "F_CD_FETCH",
[EVT_ID_BAD_CD_CONFIG] = "C_BAD_CD",
[EVT_ID_TRANSLATION_FAULT] = "F_TRANSLATION",
[EVT_ID_ADDR_SIZE_FAULT] = "F_ADDR_SIZE",
[EVT_ID_ACCESS_FAULT] = "F_ACCESS",
[EVT_ID_PERMISSION_FAULT] = "F_PERMISSION",
[EVT_ID_VMS_FETCH_FAULT] = "F_VMS_FETCH",
};
static const char * const event_class_str[] = {
[0] = "CD fetch",
[1] = "Stage 1 translation table fetch",
[2] = "Input address caused fault",
[3] = "Reserved",
};
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master);
static void parse_driver_options(struct arm_smmu_device *smmu)
{
int i = 0;
do {
if (of_property_read_bool(smmu->dev->of_node,
arm_smmu_options[i].prop)) {
smmu->options |= arm_smmu_options[i].opt;
dev_notice(smmu->dev, "option %s\n",
arm_smmu_options[i].prop);
}
} while (arm_smmu_options[++i].opt);
}
/* Low-level queue manipulation functions */
static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
{
u32 space, prod, cons;
prod = Q_IDX(q, q->prod);
cons = Q_IDX(q, q->cons);
if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
space = (1 << q->max_n_shift) - (prod - cons);
else
space = cons - prod;
return space >= n;
}
static bool queue_full(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) != Q_WRP(q, q->cons);
}
static bool queue_empty(struct arm_smmu_ll_queue *q)
{
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
}
static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
{
return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
}
static void queue_sync_cons_out(struct arm_smmu_queue *q)
{
/*
* Ensure that all CPU accesses (reads and writes) to the queue
* are complete before we update the cons pointer.
*/
__iomb();
writel_relaxed(q->llq.cons, q->cons_reg);
}
static void queue_inc_cons(struct arm_smmu_ll_queue *q)
{
u32 cons = (Q_WRP(q, q->cons) | Q_IDX(q, q->cons)) + 1;
q->cons = Q_OVF(q->cons) | Q_WRP(q, cons) | Q_IDX(q, cons);
}
static void queue_sync_cons_ovf(struct arm_smmu_queue *q)
{
struct arm_smmu_ll_queue *llq = &q->llq;
if (likely(Q_OVF(llq->prod) == Q_OVF(llq->cons)))
return;
llq->cons = Q_OVF(llq->prod) | Q_WRP(llq, llq->cons) |
Q_IDX(llq, llq->cons);
queue_sync_cons_out(q);
}
static int queue_sync_prod_in(struct arm_smmu_queue *q)
{
u32 prod;
int ret = 0;
/*
* We can't use the _relaxed() variant here, as we must prevent
* speculative reads of the queue before we have determined that
* prod has indeed moved.
*/
prod = readl(q->prod_reg);
if (Q_OVF(prod) != Q_OVF(q->llq.prod))
ret = -EOVERFLOW;
q->llq.prod = prod;
return ret;
}
static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
{
u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
}
static void queue_poll_init(struct arm_smmu_device *smmu,
struct arm_smmu_queue_poll *qp)
{
qp->delay = 1;
qp->spin_cnt = 0;
qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
}
static int queue_poll(struct arm_smmu_queue_poll *qp)
{
if (ktime_compare(ktime_get(), qp->timeout) > 0)
return -ETIMEDOUT;
if (qp->wfe) {
wfe();
} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
cpu_relax();
} else {
udelay(qp->delay);
qp->delay *= 2;
qp->spin_cnt = 0;
}
return 0;
}
static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
{
int i;
for (i = 0; i < n_dwords; ++i)
*dst++ = cpu_to_le64(*src++);
}
static void queue_read(u64 *dst, __le64 *src, size_t n_dwords)
{
int i;
for (i = 0; i < n_dwords; ++i)
*dst++ = le64_to_cpu(*src++);
}
static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
{
if (queue_empty(&q->llq))
return -EAGAIN;
queue_read(ent, Q_ENT(q, q->llq.cons), q->ent_dwords);
queue_inc_cons(&q->llq);
queue_sync_cons_out(q);
return 0;
}
/* High-level queue accessors */
static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
{
memset(cmd, 0, 1 << CMDQ_ENT_SZ_SHIFT);
cmd[0] |= FIELD_PREP(CMDQ_0_OP, ent->opcode);
switch (ent->opcode) {
case CMDQ_OP_TLBI_EL2_ALL:
case CMDQ_OP_TLBI_NSNH_ALL:
break;
case CMDQ_OP_PREFETCH_CFG:
cmd[0] |= FIELD_PREP(CMDQ_PREFETCH_0_SID, ent->prefetch.sid);
break;
case CMDQ_OP_CFGI_CD:
cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid);
fallthrough;
case CMDQ_OP_CFGI_STE:
cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf);
break;
case CMDQ_OP_CFGI_CD_ALL:
cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid);
break;
case CMDQ_OP_CFGI_ALL:
/* Cover the entire SID range */
cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
break;
case CMDQ_OP_TLBI_NH_VA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
fallthrough;
case CMDQ_OP_TLBI_EL2_VA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
break;
case CMDQ_OP_TLBI_S2_IPA:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
break;
case CMDQ_OP_TLBI_NH_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
fallthrough;
case CMDQ_OP_TLBI_NH_ALL:
case CMDQ_OP_TLBI_S12_VMALL:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
break;
case CMDQ_OP_TLBI_EL2_ASID:
cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
break;
case CMDQ_OP_ATC_INV:
cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
cmd[0] |= FIELD_PREP(CMDQ_ATC_0_GLOBAL, ent->atc.global);
cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SSID, ent->atc.ssid);
cmd[0] |= FIELD_PREP(CMDQ_ATC_0_SID, ent->atc.sid);
cmd[1] |= FIELD_PREP(CMDQ_ATC_1_SIZE, ent->atc.size);
cmd[1] |= ent->atc.addr & CMDQ_ATC_1_ADDR_MASK;
break;
case CMDQ_OP_PRI_RESP:
cmd[0] |= FIELD_PREP(CMDQ_0_SSV, ent->substream_valid);
cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SSID, ent->pri.ssid);
cmd[0] |= FIELD_PREP(CMDQ_PRI_0_SID, ent->pri.sid);
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_GRPID, ent->pri.grpid);
switch (ent->pri.resp) {
case PRI_RESP_DENY:
case PRI_RESP_FAIL:
case PRI_RESP_SUCC:
break;
default:
return -EINVAL;
}
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
break;
case CMDQ_OP_RESUME:
cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
} else {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
}
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
break;
default:
return -ENOENT;
}
return 0;
}
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
{
struct arm_smmu_cmdq *cmdq = NULL;
if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
return cmdq ?: &smmu->cmdq;
}
static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq)
{
if (cmdq == &smmu->cmdq)
return false;
return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
}
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq, u32 prod)
{
struct arm_smmu_queue *q = &cmdq->q;
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
};
/*
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
* payload, so the write will zero the entire command on that platform.
*/
if (smmu->options & ARM_SMMU_OPT_MSIPOLL) {
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
q->ent_dwords * 8;
}
arm_smmu_cmdq_build_cmd(cmd, &ent);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
}
void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq)
{
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
[CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
[CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout",
};
struct arm_smmu_queue *q = &cmdq->q;
int i;
u64 cmd[CMDQ_ENT_DWORDS];
u32 cons = readl_relaxed(q->cons_reg);
u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons);
struct arm_smmu_cmdq_ent cmd_sync = {
.opcode = CMDQ_OP_CMD_SYNC,
};
dev_err(smmu->dev, "CMDQ error (cons 0x%08x): %s\n", cons,
idx < ARRAY_SIZE(cerror_str) ? cerror_str[idx] : "Unknown");
switch (idx) {
case CMDQ_ERR_CERROR_ABT_IDX:
dev_err(smmu->dev, "retrying command fetch\n");
return;
case CMDQ_ERR_CERROR_NONE_IDX:
return;
case CMDQ_ERR_CERROR_ATC_INV_IDX:
/*
* ATC Invalidation Completion timeout. CONS is still pointing
* at the CMD_SYNC. Attempt to complete other pending commands
* by repeating the CMD_SYNC, though we might well end up back
* here since the ATC invalidation may still be pending.
*/
return;
case CMDQ_ERR_CERROR_ILL_IDX:
default:
break;
}
/*
* We may have concurrent producers, so we need to be careful
* not to touch any of the shadow cmdq state.
*/
queue_read(cmd, Q_ENT(q, cons), q->ent_dwords);
dev_err(smmu->dev, "skipping command in error state:\n");
for (i = 0; i < ARRAY_SIZE(cmd); ++i)
dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]);
/* Convert the erroneous command into a CMD_SYNC */
arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
}
/*
* Command queue locking.
* This is a form of bastardised rwlock with the following major changes:
*
* - The only LOCK routines are exclusive_trylock() and shared_lock().
* Neither have barrier semantics, and instead provide only a control
* dependency.
*
* - The UNLOCK routines are supplemented with shared_tryunlock(), which
* fails if the caller appears to be the last lock holder (yes, this is
* racy). All successful UNLOCK routines have RELEASE semantics.
*/
static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
{
int val;
/*
* We can try to avoid the cmpxchg() loop by simply incrementing the
* lock counter. When held in exclusive state, the lock counter is set
* to INT_MIN so these increments won't hurt as the value will remain
* negative.
*/
if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
return;
do {
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
}
static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
{
(void)atomic_dec_return_release(&cmdq->lock);
}
static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
{
if (atomic_read(&cmdq->lock) == 1)
return false;
arm_smmu_cmdq_shared_unlock(cmdq);
return true;
}
#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
({ \
bool __ret; \
local_irq_save(flags); \
__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
if (!__ret) \
local_irq_restore(flags); \
__ret; \
})
#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
({ \
atomic_set_release(&cmdq->lock, 0); \
local_irq_restore(flags); \
})
/*
* Command queue insertion.
* This is made fiddly by our attempts to achieve some sort of scalability
* since there is one queue shared amongst all of the CPUs in the system. If
* you like mixed-size concurrency, dependency ordering and relaxed atomics,
* then you'll *love* this monstrosity.
*
* The basic idea is to split the queue up into ranges of commands that are
* owned by a given CPU; the owner may not have written all of the commands
* itself, but is responsible for advancing the hardware prod pointer when
* the time comes. The algorithm is roughly:
*
* 1. Allocate some space in the queue. At this point we also discover
* whether the head of the queue is currently owned by another CPU,
* or whether we are the owner.
*
* 2. Write our commands into our allocated slots in the queue.
*
* 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
*
* 4. If we are an owner:
* a. Wait for the previous owner to finish.
* b. Mark the queue head as unowned, which tells us the range
* that we are responsible for publishing.
* c. Wait for all commands in our owned range to become valid.
* d. Advance the hardware prod pointer.
* e. Tell the next owner we've finished.
*
* 5. If we are inserting a CMD_SYNC (we may or may not have been an
* owner), then we need to stick around until it has completed:
* a. If we have MSIs, the SMMU can write back into the CMD_SYNC
* to clear the first 4 bytes.
* b. Otherwise, we spin waiting for the hardware cons pointer to
* advance past our command.
*
* The devil is in the details, particularly the use of locking for handling
* SYNC completion and freeing up space in the queue before we think that it is
* full.
*/
static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod, bool set)
{
u32 swidx, sbidx, ewidx, ebidx;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = sprod,
};
ewidx = BIT_WORD(Q_IDX(&llq, eprod));
ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
while (llq.prod != eprod) {
unsigned long mask;
atomic_long_t *ptr;
u32 limit = BITS_PER_LONG;
swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
ptr = &cmdq->valid_map[swidx];
if ((swidx == ewidx) && (sbidx < ebidx))
limit = ebidx;
mask = GENMASK(limit - 1, sbidx);
/*
* The valid bit is the inverse of the wrap bit. This means
* that a zero-initialised queue is invalid and, after marking
* all entries as valid, they become invalid again when we
* wrap.
*/
if (set) {
atomic_long_xor(mask, ptr);
} else { /* Poll */
unsigned long valid;
valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
}
llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
}
}
/* Mark all entries in the range [sprod, eprod) as valid */
static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
}
/* Wait for all entries in the range [sprod, eprod) to become valid */
static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
u32 sprod, u32 eprod)
{
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
}
/* Wait for the command queue to become non-full */
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
unsigned long flags;
struct arm_smmu_queue_poll qp;
int ret = 0;
/*
* Try to update our copy of cons by grabbing exclusive cmdq access. If
* that fails, spin until somebody else updates it for us.
*/
if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
llq->val = READ_ONCE(cmdq->q.llq.val);
return 0;
}
queue_poll_init(smmu, &qp);
do {
llq->val = READ_ONCE(cmdq->q.llq.val);
if (!queue_full(llq))
break;
ret = queue_poll(&qp);
} while (!ret);
return ret;
}
/*
* Wait until the SMMU signals a CMD_SYNC completion MSI.
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
int ret = 0;
struct arm_smmu_queue_poll qp;
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp);
/*
* The MSI won't generate an event, since it's being written back
* into the command queue.
*/
qp.wfe = false;
smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
return ret;
}
/*
* Wait until the SMMU cons index passes llq->prod.
* Must be called with the cmdq lock held in some capacity.
*/
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
struct arm_smmu_queue_poll qp;
u32 prod = llq->prod;
int ret = 0;
queue_poll_init(smmu, &qp);
llq->val = READ_ONCE(cmdq->q.llq.val);
do {
if (queue_consumed(llq, prod))
break;
ret = queue_poll(&qp);
/*
* This needs to be a readl() so that our subsequent call
* to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
*
* Specifically, we need to ensure that we observe all
* shared_lock()s by other CMD_SYNCs that share our owner,
* so that a failing call to tryunlock() means that we're
* the last one out and therefore we can safely advance
* cmdq->q.llq.cons. Roughly speaking:
*
* CPU 0 CPU1 CPU2 (us)
*
* if (sync)
* shared_lock();
*
* dma_wmb();
* set_valid_map();
*
* if (owner) {
* poll_valid_map();
* <control dependency>
* writel(prod_reg);
*
* readl(cons_reg);
* tryunlock();
*
* Requires us to see CPU 0's shared_lock() acquisition.
*/
llq->cons = readl(cmdq->q.cons_reg);
} while (!ret);
return ret;
}
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq,
struct arm_smmu_ll_queue *llq)
{
if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
!arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);
return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
}
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
u32 prod, int n)
{
int i;
struct arm_smmu_ll_queue llq = {
.max_n_shift = cmdq->q.llq.max_n_shift,
.prod = prod,
};
for (i = 0; i < n; ++i) {
u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
prod = queue_inc_prod_n(&llq, i);
queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
}
}
/*
* This is the actual insertion function, and provides the following
* ordering guarantees to callers:
*
* - There is a dma_wmb() before publishing any commands to the queue.
* This can be relied upon to order prior writes to data structures
* in memory (such as a CD or an STE) before the command.
*
* - On completion of a CMD_SYNC, there is a control dependency.
* This can be relied upon to order subsequent writes to memory (e.g.
* freeing an IOVA) after completion of the CMD_SYNC.
*
* - Command insertion is totally ordered, so if two CPUs each race to
* insert their own list of commands then all of the commands from one
* CPU will appear before any of the commands from the other CPU.
*/
int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq, u64 *cmds, int n,
bool sync)
{
u64 cmd_sync[CMDQ_ENT_DWORDS];
u32 prod;
unsigned long flags;
bool owner;
struct arm_smmu_ll_queue llq, head;
int ret = 0;
llq.max_n_shift = cmdq->q.llq.max_n_shift;
/* 1. Allocate some space in the queue */
local_irq_save(flags);
llq.val = READ_ONCE(cmdq->q.llq.val);
do {
u64 old;
while (!queue_has_space(&llq, n + sync)) {
local_irq_restore(flags);
if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
local_irq_save(flags);
}
head.cons = llq.cons;
head.prod = queue_inc_prod_n(&llq, n + sync) |
CMDQ_PROD_OWNED_FLAG;
old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
if (old == llq.val)
break;
llq.val = old;
} while (1);
owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
head.prod &= ~CMDQ_PROD_OWNED_FLAG;
llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
/*
* 2. Write our commands into the queue
* Dependency ordering from the cmpxchg() loop above.
*/
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
if (sync) {
prod = queue_inc_prod_n(&llq, n);
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
* In order to determine completion of our CMD_SYNC, we must
* ensure that the queue can't wrap twice without us noticing.
* We achieve that by taking the cmdq lock as shared before
* marking our slot as valid.
*/
arm_smmu_cmdq_shared_lock(cmdq);
}
/* 3. Mark our slots as valid, ensuring commands are visible first */
dma_wmb();
arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
/* 4. If we are the owner, take control of the SMMU hardware */
if (owner) {
/* a. Wait for previous owner to finish */
atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
/* b. Stop gathering work by clearing the owned flag */
prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
&cmdq->q.llq.atomic.prod);
prod &= ~CMDQ_PROD_OWNED_FLAG;
/*
* c. Wait for any gathered work to be written to the queue.
* Note that we read our own entries so that we have the control
* dependency required by (d).
*/
arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
/*
* d. Advance the hardware prod pointer
* Control dependency ordering from the entries becoming valid.
*/
writel_relaxed(prod, cmdq->q.prod_reg);
/*
* e. Tell the next owner we're done
* Make sure we've updated the hardware first, so that we don't
* race to update prod and potentially move it backwards.
*/
atomic_set_release(&cmdq->owner_prod, prod);
}
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
if (sync) {
llq.prod = queue_inc_prod_n(&llq, n);
ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
if (ret) {
dev_err_ratelimited(smmu->dev,
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
llq.prod,
readl_relaxed(cmdq->q.prod_reg),
readl_relaxed(cmdq->q.cons_reg));
}
/*
* Try to unlock the cmdq lock. This will fail if we're the last
* reader, in which case we can safely update cmdq->q.llq.cons
*/
if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
arm_smmu_cmdq_shared_unlock(cmdq);
}
}
local_irq_restore(flags);
return ret;
}
static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent,
bool sync)
{
u64 cmd[CMDQ_ENT_DWORDS];
if (unlikely(arm_smmu_cmdq_build_cmd(cmd, ent))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
ent->opcode);
return -EINVAL;
}
return arm_smmu_cmdq_issue_cmdlist(
smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
}
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
{
return __arm_smmu_cmdq_issue_cmd(smmu, ent, false);
}
static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
{
return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
}
static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *ent)
{
cmds->num = 0;
cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
}
static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds,
struct arm_smmu_cmdq_ent *cmd)
{
bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
(smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
int index;
if (force_sync || unsupported_cmd) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, true);
arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
if (cmds->num == CMDQ_BATCH_ENTRIES) {
arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, false);
arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
}
index = cmds->num * CMDQ_ENT_DWORDS;
if (unlikely(arm_smmu_cmdq_build_cmd(&cmds->cmds[index], cmd))) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
cmd->opcode);
return;
}
cmds->num++;
}
static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_batch *cmds)
{
return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
cmds->num, true);
}
static void arm_smmu_page_response(struct device *dev, struct iopf_fault *unused,
struct iommu_page_response *resp)
{
struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
int sid = master->streams[0].id;
if (WARN_ON(!master->stall_enabled))
return;
cmd.opcode = CMDQ_OP_RESUME;
cmd.resume.sid = sid;
cmd.resume.stag = resp->grpid;
switch (resp->code) {
case IOMMU_PAGE_RESP_INVALID:
case IOMMU_PAGE_RESP_FAILURE:
cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
break;
case IOMMU_PAGE_RESP_SUCCESS:
cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
break;
default:
break;
}
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
/*
* Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
* RESUME consumption guarantees that the stalled transaction will be
* terminated... at some point in the future. PRI_RESP is fire and
* forget.
*/
}
/* Context descriptor manipulation functions */
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
struct arm_smmu_cmdq_ent cmd = {
.opcode = smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_ASID : CMDQ_OP_TLBI_NH_ASID,
.tlbi.asid = asid,
};
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
/*
* Based on the value of ent report which bits of the STE the HW will access. It
* would be nice if this was complete according to the spec, but minimally it
* has to capture the bits this driver uses.
*/
VISIBLE_IF_KUNIT
void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
{
unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0]));
used_bits[0] = cpu_to_le64(STRTAB_STE_0_V);
if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V)))
return;
used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG);
/* S1 translates */
if (cfg & BIT(0)) {
used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT |
STRTAB_STE_0_S1CTXPTR_MASK |
STRTAB_STE_0_S1CDMAX);
used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR |
STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH |
STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW |
STRTAB_STE_1_EATS | STRTAB_STE_1_MEV);
used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID);
/*
* See 13.5 Summary of attribute/permission configuration fields
* for the SHCFG behavior.
*/
if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) ==
STRTAB_STE_1_S1DSS_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
/* S2 translates */
if (cfg & BIT(1)) {
used_bits[1] |=
cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
STRTAB_STE_1_SHCFG | STRTAB_STE_1_MEV);
used_bits[2] |=
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2S |
STRTAB_STE_2_S2R);
used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK);
}
if (cfg == STRTAB_STE_0_CFG_BYPASS)
used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG);
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_ste_used);
/*
* Figure out if we can do a hitless update of entry to become target. Returns a
* bit mask where 1 indicates that qword needs to be set disruptively.
* unused_update is an intermediate value of entry that has unused bits set to
* their new values.
*/
static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer,
const __le64 *entry, const __le64 *target,
__le64 *unused_update)
{
__le64 target_used[NUM_ENTRY_QWORDS] = {};
__le64 cur_used[NUM_ENTRY_QWORDS] = {};
u8 used_qword_diff = 0;
unsigned int i;
writer->ops->get_used(entry, cur_used);
writer->ops->get_used(target, target_used);
for (i = 0; i != NUM_ENTRY_QWORDS; i++) {
/*
* Check that masks are up to date, the make functions are not
* allowed to set a bit to 1 if the used function doesn't say it
* is used.
*/
WARN_ON_ONCE(target[i] & ~target_used[i]);
/* Bits can change because they are not currently being used */
unused_update[i] = (entry[i] & cur_used[i]) |
(target[i] & ~cur_used[i]);
/*
* Each bit indicates that a used bit in a qword needs to be
* changed after unused_update is applied.
*/
if ((unused_update[i] & target_used[i]) != target[i])
used_qword_diff |= 1 << i;
}
return used_qword_diff;
}
static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry,
const __le64 *target, unsigned int start,
unsigned int len)
{
bool changed = false;
unsigned int i;
for (i = start; len != 0; len--, i++) {
if (entry[i] != target[i]) {
WRITE_ONCE(entry[i], target[i]);
changed = true;
}
}
if (changed)
writer->ops->sync(writer);
return changed;
}
/*
* Update the STE/CD to the target configuration. The transition from the
* current entry to the target entry takes place over multiple steps that
* attempts to make the transition hitless if possible. This function takes care
* not to create a situation where the HW can perceive a corrupted entry. HW is
* only required to have a 64 bit atomicity with stores from the CPU, while
* entries are many 64 bit values big.
*
* The difference between the current value and the target value is analyzed to
* determine which of three updates are required - disruptive, hitless or no
* change.
*
* In the most general disruptive case we can make any update in three steps:
* - Disrupting the entry (V=0)
* - Fill now unused qwords, execpt qword 0 which contains V
* - Make qword 0 have the final value and valid (V=1) with a single 64
* bit store
*
* However this disrupts the HW while it is happening. There are several
* interesting cases where a STE/CD can be updated without disturbing the HW
* because only a small number of bits are changing (S1DSS, CONFIG, etc) or
* because the used bits don't intersect. We can detect this by calculating how
* many 64 bit values need update after adjusting the unused bits and skip the
* V=0 process. This relies on the IGNORED behavior described in the
* specification.
*/
VISIBLE_IF_KUNIT
void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, __le64 *entry,
const __le64 *target)
{
__le64 unused_update[NUM_ENTRY_QWORDS];
u8 used_qword_diff;
used_qword_diff =
arm_smmu_entry_qword_diff(writer, entry, target, unused_update);
if (hweight8(used_qword_diff) == 1) {
/*
* Only one qword needs its used bits to be changed. This is a
* hitless update, update all bits the current STE/CD is
* ignoring to their new values, then update a single "critical
* qword" to change the STE/CD and finally 0 out any bits that
* are now unused in the target configuration.
*/
unsigned int critical_qword_index = ffs(used_qword_diff) - 1;
/*
* Skip writing unused bits in the critical qword since we'll be
* writing it in the next step anyways. This can save a sync
* when the only change is in that qword.
*/
unused_update[critical_qword_index] =
entry[critical_qword_index];
entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS);
entry_set(writer, entry, target, critical_qword_index, 1);
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS);
} else if (used_qword_diff) {
/*
* At least two qwords need their inuse bits to be changed. This
* requires a breaking update, zero the V bit, write all qwords
* but 0, then set qword 0
*/
unused_update[0] = 0;
entry_set(writer, entry, unused_update, 0, 1);
entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1);
entry_set(writer, entry, target, 0, 1);
} else {
/*
* No inuse bit changed. Sanity check that all unused bits are 0
* in the entry. The target was already sanity checked by
* compute_qword_diff().
*/
WARN_ON_ONCE(
entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_write_entry);
static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
size_t i;
struct arm_smmu_cmdq_batch cmds;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_CD,
.cfgi = {
.ssid = ssid,
.leaf = leaf,
},
};
arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.cfgi.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
static void arm_smmu_write_cd_l1_desc(struct arm_smmu_cdtab_l1 *dst,
dma_addr_t l2ptr_dma)
{
u64 val = (l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) | CTXDESC_L1_DESC_V;
/* The HW has 64 bit atomicity with stores to the L2 CD table */
WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
static dma_addr_t arm_smmu_cd_l1_get_desc(const struct arm_smmu_cdtab_l1 *src)
{
return le64_to_cpu(src->l2ptr) & CTXDESC_L1_DESC_L2PTR_MASK;
}
struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master,
u32 ssid)
{
struct arm_smmu_cdtab_l2 *l2;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (!arm_smmu_cdtab_allocated(cd_table))
return NULL;
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
return &cd_table->linear.table[ssid];
l2 = cd_table->l2.l2ptrs[arm_smmu_cdtab_l1_idx(ssid)];
if (!l2)
return NULL;
return &l2->cds[arm_smmu_cdtab_l2_idx(ssid)];
}
static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master,
u32 ssid)
{
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
might_sleep();
iommu_group_mutex_assert(master->dev);
if (!arm_smmu_cdtab_allocated(cd_table)) {
if (arm_smmu_alloc_cd_tables(master))
return NULL;
}
if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) {
unsigned int idx = arm_smmu_cdtab_l1_idx(ssid);
struct arm_smmu_cdtab_l2 **l2ptr = &cd_table->l2.l2ptrs[idx];
if (!*l2ptr) {
dma_addr_t l2ptr_dma;
*l2ptr = dma_alloc_coherent(smmu->dev, sizeof(**l2ptr),
&l2ptr_dma, GFP_KERNEL);
if (!*l2ptr)
return NULL;
arm_smmu_write_cd_l1_desc(&cd_table->l2.l1tab[idx],
l2ptr_dma);
/* An invalid L1CD can be cached */
arm_smmu_sync_cd(master, ssid, false);
}
}
return arm_smmu_get_cd_ptr(master, ssid);
}
struct arm_smmu_cd_writer {
struct arm_smmu_entry_writer writer;
unsigned int ssid;
};
VISIBLE_IF_KUNIT
void arm_smmu_get_cd_used(const __le64 *ent, __le64 *used_bits)
{
used_bits[0] = cpu_to_le64(CTXDESC_CD_0_V);
if (!(ent[0] & cpu_to_le64(CTXDESC_CD_0_V)))
return;
memset(used_bits, 0xFF, sizeof(struct arm_smmu_cd));
/*
* If EPD0 is set by the make function it means
* T0SZ/TG0/IR0/OR0/SH0/TTB0 are IGNORED
*/
if (ent[0] & cpu_to_le64(CTXDESC_CD_0_TCR_EPD0)) {
used_bits[0] &= ~cpu_to_le64(
CTXDESC_CD_0_TCR_T0SZ | CTXDESC_CD_0_TCR_TG0 |
CTXDESC_CD_0_TCR_IRGN0 | CTXDESC_CD_0_TCR_ORGN0 |
CTXDESC_CD_0_TCR_SH0);
used_bits[1] &= ~cpu_to_le64(CTXDESC_CD_1_TTB0_MASK);
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_get_cd_used);
static void arm_smmu_cd_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
struct arm_smmu_cd_writer *cd_writer =
container_of(writer, struct arm_smmu_cd_writer, writer);
arm_smmu_sync_cd(writer->master, cd_writer->ssid, true);
}
static const struct arm_smmu_entry_writer_ops arm_smmu_cd_writer_ops = {
.sync = arm_smmu_cd_writer_sync_entry,
.get_used = arm_smmu_get_cd_used,
};
void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid,
struct arm_smmu_cd *cdptr,
const struct arm_smmu_cd *target)
{
bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V);
struct arm_smmu_cd_writer cd_writer = {
.writer = {
.ops = &arm_smmu_cd_writer_ops,
.master = master,
},
.ssid = ssid,
};
if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) {
if (cur_valid)
master->cd_table.used_ssids--;
else
master->cd_table.used_ssids++;
}
arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data);
}
void arm_smmu_make_s1_cd(struct arm_smmu_cd *target,
struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
const struct io_pgtable_cfg *pgtbl_cfg =
&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr =
&pgtbl_cfg->arm_lpae_s1_cfg.tcr;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
#ifdef __BIG_ENDIAN
CTXDESC_CD_0_ENDI |
#endif
CTXDESC_CD_0_TCR_EPD1 |
CTXDESC_CD_0_V |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
CTXDESC_CD_0_AA64 |
(master->stall_enabled ? CTXDESC_CD_0_S : 0) |
CTXDESC_CD_0_R |
CTXDESC_CD_0_A |
CTXDESC_CD_0_ASET |
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid)
);
/* To enable dirty flag update, set both Access flag and dirty state update */
if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD)
target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA |
CTXDESC_CD_0_TCR_HD);
target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr &
CTXDESC_CD_1_TTB0_MASK);
target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair);
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s1_cd);
void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid)
{
struct arm_smmu_cd target = {};
struct arm_smmu_cd *cdptr;
if (!arm_smmu_cdtab_allocated(&master->cd_table))
return;
cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (WARN_ON(!cdptr))
return;
arm_smmu_write_cd_entry(master, ssid, cdptr, &target);
}
static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
{
int ret;
size_t l1size;
size_t max_contexts;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
cd_table->s1cdmax = master->ssid_bits;
max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
cd_table->linear.num_ents = max_contexts;
l1size = max_contexts * sizeof(struct arm_smmu_cd);
cd_table->linear.table = dma_alloc_coherent(smmu->dev, l1size,
&cd_table->cdtab_dma,
GFP_KERNEL);
if (!cd_table->linear.table)
return -ENOMEM;
} else {
cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
cd_table->l2.num_l1_ents =
DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES);
cd_table->l2.l2ptrs = kcalloc(cd_table->l2.num_l1_ents,
sizeof(*cd_table->l2.l2ptrs),
GFP_KERNEL);
if (!cd_table->l2.l2ptrs)
return -ENOMEM;
l1size = cd_table->l2.num_l1_ents * sizeof(struct arm_smmu_cdtab_l1);
cd_table->l2.l1tab = dma_alloc_coherent(smmu->dev, l1size,
&cd_table->cdtab_dma,
GFP_KERNEL);
if (!cd_table->l2.l2ptrs) {
ret = -ENOMEM;
goto err_free_l2ptrs;
}
}
return 0;
err_free_l2ptrs:
kfree(cd_table->l2.l2ptrs);
cd_table->l2.l2ptrs = NULL;
return ret;
}
static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
if (cd_table->s1fmt != STRTAB_STE_0_S1FMT_LINEAR) {
for (i = 0; i < cd_table->l2.num_l1_ents; i++) {
if (!cd_table->l2.l2ptrs[i])
continue;
dma_free_coherent(smmu->dev,
sizeof(*cd_table->l2.l2ptrs[i]),
cd_table->l2.l2ptrs[i],
arm_smmu_cd_l1_get_desc(&cd_table->l2.l1tab[i]));
}
kfree(cd_table->l2.l2ptrs);
dma_free_coherent(smmu->dev,
cd_table->l2.num_l1_ents *
sizeof(struct arm_smmu_cdtab_l1),
cd_table->l2.l1tab, cd_table->cdtab_dma);
} else {
dma_free_coherent(smmu->dev,
cd_table->linear.num_ents *
sizeof(struct arm_smmu_cd),
cd_table->linear.table, cd_table->cdtab_dma);
}
}
/* Stream table manipulation functions */
static void arm_smmu_write_strtab_l1_desc(struct arm_smmu_strtab_l1 *dst,
dma_addr_t l2ptr_dma)
{
u64 val = 0;
val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1);
val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;
/* The HW has 64 bit atomicity with stores to the L2 STE table */
WRITE_ONCE(dst->l2ptr, cpu_to_le64(val));
}
struct arm_smmu_ste_writer {
struct arm_smmu_entry_writer writer;
u32 sid;
};
static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer)
{
struct arm_smmu_ste_writer *ste_writer =
container_of(writer, struct arm_smmu_ste_writer, writer);
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_STE,
.cfgi = {
.sid = ste_writer->sid,
.leaf = true,
},
};
arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd);
}
static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = {
.sync = arm_smmu_ste_writer_sync_entry,
.get_used = arm_smmu_get_ste_used,
};
static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid,
struct arm_smmu_ste *ste,
const struct arm_smmu_ste *target)
{
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_ste_writer ste_writer = {
.writer = {
.ops = &arm_smmu_ste_writer_ops,
.master = master,
},
.sid = sid,
};
arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data);
/* It's likely that we'll want to use the new STE soon */
if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) {
struct arm_smmu_cmdq_ent
prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG,
.prefetch = {
.sid = sid,
} };
arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd);
}
}
void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT));
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_abort_ste);
VISIBLE_IF_KUNIT
void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu,
struct arm_smmu_ste *target)
{
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS));
if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
target->data[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste);
VISIBLE_IF_KUNIT
void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master, bool ats_enabled,
unsigned int s1dss)
{
struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt) |
(cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax));
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) |
FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) |
FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) |
((smmu->features & ARM_SMMU_FEAT_STALLS &&
!master->stall_enabled) ?
STRTAB_STE_1_S1STALLD :
0) |
FIELD_PREP(STRTAB_STE_1_EATS,
ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) &&
s1dss == STRTAB_STE_1_S1DSS_BYPASS)
target->data[1] |= cpu_to_le64(FIELD_PREP(
STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING));
if (smmu->features & ARM_SMMU_FEAT_E2H) {
/*
* To support BTM the streamworld needs to match the
* configuration of the CPU so that the ASID broadcasts are
* properly matched. This means either S/NS-EL2-E2H (hypervisor)
* or NS-EL1 (guest). Since an SVA domain can be installed in a
* PASID this should always use a BTM compatible configuration
* if the HW supports it.
*/
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_EL2));
} else {
target->data[1] |= cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_STRW, STRTAB_STE_1_STRW_NSEL1));
/*
* VMID 0 is reserved for stage-2 bypass EL1 STEs, see
* arm_smmu_domain_alloc_id()
*/
target->data[2] =
cpu_to_le64(FIELD_PREP(STRTAB_STE_2_S2VMID, 0));
}
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste);
void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain,
bool ats_enabled)
{
struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg;
const struct io_pgtable_cfg *pgtbl_cfg =
&io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops)->cfg;
typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr =
&pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
u64 vtcr_val;
struct arm_smmu_device *smmu = master->smmu;
memset(target, 0, sizeof(*target));
target->data[0] = cpu_to_le64(
STRTAB_STE_0_V |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S2_TRANS));
target->data[1] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_1_EATS,
ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
STRTAB_STE_1_SHCFG_INCOMING));
vtcr_val = FIELD_PREP(STRTAB_STE_2_VTCR_S2T0SZ, vtcr->tsz) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SL0, vtcr->sl) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2IR0, vtcr->irgn) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2OR0, vtcr->orgn) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2SH0, vtcr->sh) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2TG, vtcr->tg) |
FIELD_PREP(STRTAB_STE_2_VTCR_S2PS, vtcr->ps);
target->data[2] = cpu_to_le64(
FIELD_PREP(STRTAB_STE_2_S2VMID, s2_cfg->vmid) |
FIELD_PREP(STRTAB_STE_2_VTCR, vtcr_val) |
STRTAB_STE_2_S2AA64 |
#ifdef __BIG_ENDIAN
STRTAB_STE_2_S2ENDI |
#endif
STRTAB_STE_2_S2PTW |
(master->stall_enabled ? STRTAB_STE_2_S2S : 0) |
STRTAB_STE_2_S2R);
target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s2_cfg.vttbr &
STRTAB_STE_3_S2TTB_MASK);
}
EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_s2_domain_ste);
/*
* This can safely directly manipulate the STE memory without a sync sequence
* because the STE table has not been installed in the SMMU yet.
*/
static void arm_smmu_init_initial_stes(struct arm_smmu_ste *strtab,
unsigned int nent)
{
unsigned int i;
for (i = 0; i < nent; ++i) {
arm_smmu_make_abort_ste(strtab);
strtab++;
}
}
static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
{
dma_addr_t l2ptr_dma;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
struct arm_smmu_strtab_l2 **l2table;
l2table = &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)];
if (*l2table)
return 0;
*l2table = dmam_alloc_coherent(smmu->dev, sizeof(**l2table),
&l2ptr_dma, GFP_KERNEL);
if (!*l2table) {
dev_err(smmu->dev,
"failed to allocate l2 stream table for SID %u\n",
sid);
return -ENOMEM;
}
arm_smmu_init_initial_stes((*l2table)->stes,
ARRAY_SIZE((*l2table)->stes));
arm_smmu_write_strtab_l1_desc(&cfg->l2.l1tab[arm_smmu_strtab_l1_idx(sid)],
l2ptr_dma);
return 0;
}
static int arm_smmu_streams_cmp_key(const void *lhs, const struct rb_node *rhs)
{
struct arm_smmu_stream *stream_rhs =
rb_entry(rhs, struct arm_smmu_stream, node);
const u32 *sid_lhs = lhs;
if (*sid_lhs < stream_rhs->id)
return -1;
if (*sid_lhs > stream_rhs->id)
return 1;
return 0;
}
static int arm_smmu_streams_cmp_node(struct rb_node *lhs,
const struct rb_node *rhs)
{
return arm_smmu_streams_cmp_key(
&rb_entry(lhs, struct arm_smmu_stream, node)->id, rhs);
}
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
struct rb_node *node;
lockdep_assert_held(&smmu->streams_mutex);
node = rb_find(&sid, &smmu->streams, arm_smmu_streams_cmp_key);
if (!node)
return NULL;
return rb_entry(node, struct arm_smmu_stream, node)->master;
}
/* IRQ and event handlers */
static void arm_smmu_decode_event(struct arm_smmu_device *smmu, u64 *raw,
struct arm_smmu_event *event)
{
struct arm_smmu_master *master;
event->id = FIELD_GET(EVTQ_0_ID, raw[0]);
event->sid = FIELD_GET(EVTQ_0_SID, raw[0]);
event->ssv = FIELD_GET(EVTQ_0_SSV, raw[0]);
event->ssid = event->ssv ? FIELD_GET(EVTQ_0_SSID, raw[0]) : IOMMU_NO_PASID;
event->privileged = FIELD_GET(EVTQ_1_PnU, raw[1]);
event->instruction = FIELD_GET(EVTQ_1_InD, raw[1]);
event->s2 = FIELD_GET(EVTQ_1_S2, raw[1]);
event->read = FIELD_GET(EVTQ_1_RnW, raw[1]);
event->stag = FIELD_GET(EVTQ_1_STAG, raw[1]);
event->stall = FIELD_GET(EVTQ_1_STALL, raw[1]);
event->class = FIELD_GET(EVTQ_1_CLASS, raw[1]);
event->iova = FIELD_GET(EVTQ_2_ADDR, raw[2]);
event->ipa = raw[3] & EVTQ_3_IPA;
event->fetch_addr = raw[3] & EVTQ_3_FETCH_ADDR;
event->ttrnw = FIELD_GET(EVTQ_1_TT_READ, raw[1]);
event->class_tt = false;
event->dev = NULL;
if (event->id == EVT_ID_PERMISSION_FAULT)
event->class_tt = (event->class == EVTQ_1_CLASS_TT);
mutex_lock(&smmu->streams_mutex);
master = arm_smmu_find_master(smmu, event->sid);
if (master)
event->dev = get_device(master->dev);
mutex_unlock(&smmu->streams_mutex);
}
static int arm_smmu_handle_event(struct arm_smmu_device *smmu, u64 *evt,
struct arm_smmu_event *event)
{
int ret = 0;
u32 perm = 0;
struct arm_smmu_master *master;
struct iopf_fault fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
switch (event->id) {
case EVT_ID_BAD_STE_CONFIG:
case EVT_ID_STREAM_DISABLED_FAULT:
case EVT_ID_BAD_SUBSTREAMID_CONFIG:
case EVT_ID_BAD_CD_CONFIG:
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
case EVT_ID_PERMISSION_FAULT:
break;
default:
return -EOPNOTSUPP;
}
if (event->stall) {
if (event->read)
perm |= IOMMU_FAULT_PERM_READ;
else
perm |= IOMMU_FAULT_PERM_WRITE;
if (event->instruction)
perm |= IOMMU_FAULT_PERM_EXEC;
if (event->privileged)
perm |= IOMMU_FAULT_PERM_PRIV;
flt->type = IOMMU_FAULT_PAGE_REQ;
flt->prm = (struct iommu_fault_page_request){
.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
.grpid = event->stag,
.perm = perm,
.addr = event->iova,
};
if (event->ssv) {
flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
flt->prm.pasid = event->ssid;
}
}
mutex_lock(&smmu->streams_mutex);
master = arm_smmu_find_master(smmu, event->sid);
if (!master) {
ret = -EINVAL;
goto out_unlock;
}
if (event->stall)
ret = iommu_report_device_fault(master->dev, &fault_evt);
else if (master->vmaster && !event->s2)
ret = arm_vmaster_report_event(master->vmaster, evt);
else
ret = -EOPNOTSUPP; /* Unhandled events should be pinned */
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
}
static void arm_smmu_dump_raw_event(struct arm_smmu_device *smmu, u64 *raw,
struct arm_smmu_event *event)
{
int i;
dev_err(smmu->dev, "event 0x%02x received:\n", event->id);
for (i = 0; i < EVTQ_ENT_DWORDS; ++i)
dev_err(smmu->dev, "\t0x%016llx\n", raw[i]);
}
#define ARM_SMMU_EVT_KNOWN(e) ((e)->id < ARRAY_SIZE(event_str) && event_str[(e)->id])
#define ARM_SMMU_LOG_EVT_STR(e) ARM_SMMU_EVT_KNOWN(e) ? event_str[(e)->id] : "UNKNOWN"
#define ARM_SMMU_LOG_CLIENT(e) (e)->dev ? dev_name((e)->dev) : "(unassigned sid)"
static void arm_smmu_dump_event(struct arm_smmu_device *smmu, u64 *raw,
struct arm_smmu_event *evt,
struct ratelimit_state *rs)
{
if (!__ratelimit(rs))
return;
arm_smmu_dump_raw_event(smmu, raw, evt);
switch (evt->id) {
case EVT_ID_TRANSLATION_FAULT:
case EVT_ID_ADDR_SIZE_FAULT:
case EVT_ID_ACCESS_FAULT:
case EVT_ID_PERMISSION_FAULT:
dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x iova: %#llx ipa: %#llx",
ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
evt->sid, evt->ssid, evt->iova, evt->ipa);
dev_err(smmu->dev, "%s %s %s %s \"%s\"%s%s stag: %#x",
evt->privileged ? "priv" : "unpriv",
evt->instruction ? "inst" : "data",
str_read_write(evt->read),
evt->s2 ? "s2" : "s1", event_class_str[evt->class],
evt->class_tt ? (evt->ttrnw ? " ttd_read" : " ttd_write") : "",
evt->stall ? " stall" : "", evt->stag);
break;
case EVT_ID_STE_FETCH_FAULT:
case EVT_ID_CD_FETCH_FAULT:
case EVT_ID_VMS_FETCH_FAULT:
dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x fetch_addr: %#llx",
ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
evt->sid, evt->ssid, evt->fetch_addr);
break;
default:
dev_err(smmu->dev, "event: %s client: %s sid: %#x ssid: %#x",
ARM_SMMU_LOG_EVT_STR(evt), ARM_SMMU_LOG_CLIENT(evt),
evt->sid, evt->ssid);
}
}
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
u64 evt[EVTQ_ENT_DWORDS];
struct arm_smmu_event event = {0};
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
do {
while (!queue_remove_raw(q, evt)) {
arm_smmu_decode_event(smmu, evt, &event);
if (arm_smmu_handle_event(smmu, evt, &event))
arm_smmu_dump_event(smmu, evt, &event, &rs);
put_device(event.dev);
cond_resched();
}
/*
* Not much we can do on overflow, so scream and pretend we're
* trying harder.
*/
if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
queue_sync_cons_ovf(q);
return IRQ_HANDLED;
}
static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
{
u32 sid, ssid;
u16 grpid;
bool ssv, last;
sid = FIELD_GET(PRIQ_0_SID, evt[0]);
ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]);
ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID;
last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]);
grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]);
dev_info(smmu->dev, "unexpected PRI request received:\n");
dev_info(smmu->dev,
"\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
sid, ssid, grpid, last ? "L" : "",
evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
evt[0] & PRIQ_0_PERM_READ ? "R" : "",
evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
evt[1] & PRIQ_1_ADDR_MASK);
if (last) {
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_PRI_RESP,
.substream_valid = ssv,
.pri = {
.sid = sid,
.ssid = ssid,
.grpid = grpid,
.resp = PRI_RESP_DENY,
},
};
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
}
}
static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
{
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->priq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
u64 evt[PRIQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt))
arm_smmu_handle_ppr(smmu, evt);
if (queue_sync_prod_in(q) == -EOVERFLOW)
dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
} while (!queue_empty(llq));
/* Sync our overflow flag, as we believe we're up to speed */
queue_sync_cons_ovf(q);
return IRQ_HANDLED;
}
static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
{
u32 gerror, gerrorn, active;
struct arm_smmu_device *smmu = dev;
gerror = readl_relaxed(smmu->base + ARM_SMMU_GERROR);
gerrorn = readl_relaxed(smmu->base + ARM_SMMU_GERRORN);
active = gerror ^ gerrorn;
if (!(active & GERROR_ERR_MASK))
return IRQ_NONE; /* No errors pending */
dev_warn(smmu->dev,
"unexpected global error reported (0x%08x), this could be serious\n",
active);
if (active & GERROR_SFM_ERR) {
dev_err(smmu->dev, "device has entered Service Failure Mode!\n");
arm_smmu_device_disable(smmu);
}
if (active & GERROR_MSI_GERROR_ABT_ERR)
dev_warn(smmu->dev, "GERROR MSI write aborted\n");
if (active & GERROR_MSI_PRIQ_ABT_ERR)
dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
if (active & GERROR_MSI_EVTQ_ABT_ERR)
dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
if (active & GERROR_MSI_CMDQ_ABT_ERR)
dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
if (active & GERROR_EVTQ_ABT_ERR)
dev_err(smmu->dev, "EVTQ write aborted -- events may have been lost\n");
if (active & GERROR_CMDQ_ERR)
arm_smmu_cmdq_skip_err(smmu);
writel(gerror, smmu->base + ARM_SMMU_GERRORN);
return IRQ_HANDLED;
}
static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
{
struct arm_smmu_device *smmu = dev;
arm_smmu_evtq_thread(irq, dev);
if (smmu->features & ARM_SMMU_FEAT_PRI)
arm_smmu_priq_thread(irq, dev);
return IRQ_HANDLED;
}
static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
{
arm_smmu_gerror_handler(irq, dev);
return IRQ_WAKE_THREAD;
}
static void
arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
struct arm_smmu_cmdq_ent *cmd)
{
size_t log2_span;
size_t span_mask;
/* ATC invalidates are always on 4096-bytes pages */
size_t inval_grain_shift = 12;
unsigned long page_start, page_end;
/*
* ATS and PASID:
*
* If substream_valid is clear, the PCIe TLP is sent without a PASID
* prefix. In that case all ATC entries within the address range are
* invalidated, including those that were requested with a PASID! There
* is no way to invalidate only entries without PASID.
*
* When using STRTAB_STE_1_S1DSS_SSID0 (reserving CD 0 for non-PASID
* traffic), translation requests without PASID create ATC entries
* without PASID, which must be invalidated with substream_valid clear.
* This has the unpleasant side-effect of invalidating all PASID-tagged
* ATC entries within the address range.
*/
*cmd = (struct arm_smmu_cmdq_ent) {
.opcode = CMDQ_OP_ATC_INV,
.substream_valid = (ssid != IOMMU_NO_PASID),
.atc.ssid = ssid,
};
if (!size) {
cmd->atc.size = ATC_INV_SIZE_ALL;
return;
}
page_start = iova >> inval_grain_shift;
page_end = (iova + size - 1) >> inval_grain_shift;
/*
* In an ATS Invalidate Request, the address must be aligned on the
* range size, which must be a power of two number of page sizes. We
* thus have to choose between grossly over-invalidating the region, or
* splitting the invalidation into multiple commands. For simplicity
* we'll go with the first solution, but should refine it in the future
* if multiple commands are shown to be more efficient.
*
* Find the smallest power of two that covers the range. The most
* significant differing bit between the start and end addresses,
* fls(start ^ end), indicates the required span. For example:
*
* We want to invalidate pages [8; 11]. This is already the ideal range:
* x = 0b1000 ^ 0b1011 = 0b11
* span = 1 << fls(x) = 4
*
* To invalidate pages [7; 10], we need to invalidate [0; 15]:
* x = 0b0111 ^ 0b1010 = 0b1101
* span = 1 << fls(x) = 16
*/
log2_span = fls_long(page_start ^ page_end);
span_mask = (1ULL << log2_span) - 1;
page_start &= ~span_mask;
cmd->atc.addr = page_start << inval_grain_shift;
cmd->atc.size = log2_span;
}
static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
ioasid_t ssid)
{
int i;
struct arm_smmu_cmdq_ent cmd;
struct arm_smmu_cmdq_batch cmds;
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
}
return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
}
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
unsigned long iova, size_t size)
{
struct arm_smmu_master_domain *master_domain;
int i;
unsigned long flags;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_ATC_INV,
};
struct arm_smmu_cmdq_batch cmds;
if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
return 0;
/*
* Ensure that we've completed prior invalidation of the main TLBs
* before we read 'nr_ats_masters' in case of a concurrent call to
* arm_smmu_enable_ats():
*
* // unmap() // arm_smmu_enable_ats()
* TLBI+SYNC atomic_inc(&nr_ats_masters);
* smp_mb(); [...]
* atomic_read(&nr_ats_masters); pci_enable_ats() // writel()
*
* Ensures that we always see the incremented 'nr_ats_masters' count if
* ATS was enabled at the PCI device before completion of the TLBI.
*/
smp_mb();
if (!atomic_read(&smmu_domain->nr_ats_masters))
return 0;
arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices,
devices_elm) {
struct arm_smmu_master *master = master_domain->master;
if (!master->ats_enabled)
continue;
if (master_domain->nested_ats_flush) {
/*
* If a S2 used as a nesting parent is changed we have
* no option but to completely flush the ATC.
*/
arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);
} else {
arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size,
&cmd);
}
for (i = 0; i < master->num_streams; i++) {
cmd.atc.sid = master->streams[i].id;
arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
}
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
}
/* IO_PGTABLE API */
static void arm_smmu_tlb_inv_context(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct arm_smmu_device *smmu = smmu_domain->smmu;
struct arm_smmu_cmdq_ent cmd;
/*
* NOTE: when io-pgtable is in non-strict mode, we may get here with
* PTEs previously cleared by unmaps on the current CPU not yet visible
* to the SMMU. We are relying on the dma_wmb() implicit during cmd
* insertion to guarantee those are observed before the TLBI. Do be
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
arm_smmu_atc_inv_domain(smmu_domain, 0, 0);
}
static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
unsigned long iova, size_t size,
size_t granule,
struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_device *smmu = smmu_domain->smmu;
unsigned long end = iova + size, num_pages = 0, tg = 0;
size_t inv_range = granule;
struct arm_smmu_cmdq_batch cmds;
if (!size)
return;
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
/* Get the leaf page size */
tg = __ffs(smmu_domain->domain.pgsize_bitmap);
num_pages = size >> tg;
/* Convert page size of 12,14,16 (log2) to 1,2,3 */
cmd->tlbi.tg = (tg - 10) / 2;
/*
* Determine what level the granule is at. For non-leaf, both
* io-pgtable and SVA pass a nominal last-level granule because
* they don't know what level(s) actually apply, so ignore that
* and leave TTL=0. However for various errata reasons we still
* want to use a range command, so avoid the SVA corner case
* where both scale and num could be 0 as well.
*/
if (cmd->tlbi.leaf)
cmd->tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));
else if ((num_pages & CMDQ_TLBI_RANGE_NUM_MAX) == 1)
num_pages++;
}
arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);
while (iova < end) {
if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
/*
* On each iteration of the loop, the range is 5 bits
* worth of the aligned size remaining.
* The range in pages is:
*
* range = (num_pages & (0x1f << __ffs(num_pages)))
*/
unsigned long scale, num;
/* Determine the power of 2 multiple number of pages */
scale = __ffs(num_pages);
cmd->tlbi.scale = scale;
/* Determine how many chunks of 2^scale size we have */
num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
cmd->tlbi.num = num - 1;
/* range is num * 2^scale * pgsize */
inv_range = num << (scale + tg);
/* Clear out the lower order bits for the next iteration */
num_pages -= num << scale;
}
cmd->tlbi.addr = iova;
arm_smmu_cmdq_batch_add(smmu, &cmds, cmd);
iova += inv_range;
}
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_cmdq_ent cmd = {
.tlbi = {
.leaf = leaf,
},
};
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
cmd.tlbi.asid = smmu_domain->cd.asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
if (smmu_domain->nest_parent) {
/*
* When the S2 domain changes all the nested S1 ASIDs have to be
* flushed too.
*/
cmd.opcode = CMDQ_OP_TLBI_NH_ALL;
arm_smmu_cmdq_issue_cmd_with_sync(smmu_domain->smmu, &cmd);
}
/*
* Unfortunately, this can't be leaf-only since we may have
* zapped an entire table.
*/
arm_smmu_atc_inv_domain(smmu_domain, iova, size);
}
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
size_t granule, bool leaf,
struct arm_smmu_domain *smmu_domain)
{
struct arm_smmu_cmdq_ent cmd = {
.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA,
.tlbi = {
.asid = asid,
.leaf = leaf,
},
};
__arm_smmu_tlb_inv_range(&cmd, iova, size, granule, smmu_domain);
}
static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather,
unsigned long iova, size_t granule,
void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
struct iommu_domain *domain = &smmu_domain->domain;
iommu_iotlb_gather_add_page(domain, gather, iova, granule);
}
static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size,
size_t granule, void *cookie)
{
arm_smmu_tlb_inv_range_domain(iova, size, granule, false, cookie);
}
static const struct iommu_flush_ops arm_smmu_flush_ops = {
.tlb_flush_all = arm_smmu_tlb_inv_context,
.tlb_flush_walk = arm_smmu_tlb_inv_walk,
.tlb_add_page = arm_smmu_tlb_inv_page_nosync,
};
static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu)
{
u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY);
return (smmu->features & features) == features;
}
/* IOMMU API */
static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
/* Assume that a coherent TCU implies coherent TBUs */
return master->smmu->features & ARM_SMMU_FEAT_COHERENCY;
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
return arm_smmu_master_canwbs(master);
case IOMMU_CAP_NOEXEC:
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
case IOMMU_CAP_DIRTY_TRACKING:
return arm_smmu_dbm_capable(master->smmu);
default:
return false;
}
}
static bool arm_smmu_enforce_cache_coherency(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master_domain *master_domain;
unsigned long flags;
bool ret = true;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_for_each_entry(master_domain, &smmu_domain->devices,
devices_elm) {
if (!arm_smmu_master_canwbs(master_domain->master)) {
ret = false;
break;
}
}
smmu_domain->enforce_cache_coherency = ret;
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
return ret;
}
struct arm_smmu_domain *arm_smmu_domain_alloc(void)
{
struct arm_smmu_domain *smmu_domain;
smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
if (!smmu_domain)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&smmu_domain->devices);
spin_lock_init(&smmu_domain->devices_lock);
return smmu_domain;
}
static void arm_smmu_domain_free_paging(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
/* Free the ASID or VMID */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
if (cfg->vmid)
ida_free(&smmu->vmid_map, cfg->vmid);
}
kfree(smmu_domain);
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu,
struct arm_smmu_domain *smmu_domain)
{
int ret;
u32 asid = 0;
struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
cd->asid = (u16)asid;
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu,
struct arm_smmu_domain *smmu_domain)
{
int vmid;
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
/* Reserve VMID 0 for stage-2 bypass STEs */
vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1,
GFP_KERNEL);
if (vmid < 0)
return vmid;
cfg->vmid = (u16)vmid;
return 0;
}
static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
struct arm_smmu_device *smmu, u32 flags)
{
int ret;
enum io_pgtable_fmt fmt;
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_device *smmu,
struct arm_smmu_domain *smmu_domain);
bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
pgtbl_cfg = (struct io_pgtable_cfg) {
.pgsize_bitmap = smmu->pgsize_bitmap,
.coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY,
.tlb = &arm_smmu_flush_ops,
.iommu_dev = smmu->dev,
};
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1: {
unsigned long ias = (smmu->features &
ARM_SMMU_FEAT_VAX) ? 52 : 48;
pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS);
pgtbl_cfg.oas = smmu->ias;
if (enable_dirty)
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;
fmt = ARM_64_LPAE_S1;
finalise_stage_fn = arm_smmu_domain_finalise_s1;
break;
}
case ARM_SMMU_DOMAIN_S2:
if (enable_dirty)
return -EOPNOTSUPP;
pgtbl_cfg.ias = smmu->ias;
pgtbl_cfg.oas = smmu->oas;
fmt = ARM_64_LPAE_S2;
finalise_stage_fn = arm_smmu_domain_finalise_s2;
if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
(flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
break;
default:
return -EINVAL;
}
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops)
return -ENOMEM;
smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
smmu_domain->domain.geometry.force_aperture = true;
if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops;
ret = finalise_stage_fn(smmu, smmu_domain);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
}
smmu_domain->pgtbl_ops = pgtbl_ops;
smmu_domain->smmu = smmu;
return 0;
}
static struct arm_smmu_ste *
arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
{
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
/* Two-level walk */
return &cfg->l2.l2ptrs[arm_smmu_strtab_l1_idx(sid)]
->stes[arm_smmu_strtab_l2_idx(sid)];
} else {
/* Simple linear lookup */
return &cfg->linear.table[sid];
}
}
void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master,
const struct arm_smmu_ste *target)
{
int i, j;
struct arm_smmu_device *smmu = master->smmu;
master->cd_table.in_ste =
FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) ==
STRTAB_STE_0_CFG_S1_TRANS;
master->ste_ats_enabled =
FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) ==
STRTAB_STE_1_EATS_TRANS;
for (i = 0; i < master->num_streams; ++i) {
u32 sid = master->streams[i].id;
struct arm_smmu_ste *step =
arm_smmu_get_step_for_sid(smmu, sid);
/* Bridged PCI devices may end up with duplicated IDs */
for (j = 0; j < i; j++)
if (master->streams[j].id == sid)
break;
if (j < i)
continue;
arm_smmu_write_ste(master, sid, step, target);
}
}
static bool arm_smmu_ats_supported(struct arm_smmu_master *master)
{
struct device *dev = master->dev;
struct arm_smmu_device *smmu = master->smmu;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (!(smmu->features & ARM_SMMU_FEAT_ATS))
return false;
if (!(fwspec->flags & IOMMU_FWSPEC_PCI_RC_ATS))
return false;
return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev));
}
static void arm_smmu_enable_ats(struct arm_smmu_master *master)
{
size_t stu;
struct pci_dev *pdev;
struct arm_smmu_device *smmu = master->smmu;
/* Smallest Translation Unit: log2 of the smallest supported granule */
stu = __ffs(smmu->pgsize_bitmap);
pdev = to_pci_dev(master->dev);
/*
* ATC invalidation of PASID 0 causes the entire ATC to be flushed.
*/
arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
if (pci_enable_ats(pdev, stu))
dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu);
}
static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
{
int ret;
int features;
int num_pasids;
struct pci_dev *pdev;
if (!dev_is_pci(master->dev))
return -ENODEV;
pdev = to_pci_dev(master->dev);
features = pci_pasid_features(pdev);
if (features < 0)
return features;
num_pasids = pci_max_pasids(pdev);
if (num_pasids <= 0)
return num_pasids;
ret = pci_enable_pasid(pdev, features);
if (ret) {
dev_err(&pdev->dev, "Failed to enable PASID\n");
return ret;
}
master->ssid_bits = min_t(u8, ilog2(num_pasids),
master->smmu->ssid_bits);
return 0;
}
static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
{
struct pci_dev *pdev;
if (!dev_is_pci(master->dev))
return;
pdev = to_pci_dev(master->dev);
if (!pdev->pasid_enabled)
return;
master->ssid_bits = 0;
pci_disable_pasid(pdev);
}
static struct arm_smmu_master_domain *
arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain,
struct iommu_domain *domain,
struct arm_smmu_master *master,
ioasid_t ssid, bool nested_ats_flush)
{
struct arm_smmu_master_domain *master_domain;
lockdep_assert_held(&smmu_domain->devices_lock);
list_for_each_entry(master_domain, &smmu_domain->devices,
devices_elm) {
if (master_domain->master == master &&
master_domain->domain == domain &&
master_domain->ssid == ssid &&
master_domain->nested_ats_flush == nested_ats_flush)
return master_domain;
}
return NULL;
}
/*
* If the domain uses the smmu_domain->devices list return the arm_smmu_domain
* structure, otherwise NULL. These domains track attached devices so they can
* issue invalidations.
*/
static struct arm_smmu_domain *
to_smmu_domain_devices(struct iommu_domain *domain)
{
/* The domain can be NULL only when processing the first attach */
if (!domain)
return NULL;
if ((domain->type & __IOMMU_DOMAIN_PAGING) ||
domain->type == IOMMU_DOMAIN_SVA)
return to_smmu_domain(domain);
if (domain->type == IOMMU_DOMAIN_NESTED)
return to_smmu_nested_domain(domain)->vsmmu->s2_parent;
return NULL;
}
static int arm_smmu_enable_iopf(struct arm_smmu_master *master,
struct arm_smmu_master_domain *master_domain)
{
int ret;
iommu_group_mutex_assert(master->dev);
if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
return -EOPNOTSUPP;
/*
* Drivers for devices supporting PRI or stall require iopf others have
* device-specific fault handlers and don't need IOPF, so this is not a
* failure.
*/
if (!master->stall_enabled)
return 0;
/* We're not keeping track of SIDs in fault events */
if (master->num_streams != 1)
return -EOPNOTSUPP;
if (master->iopf_refcount) {
master->iopf_refcount++;
master_domain->using_iopf = true;
return 0;
}
ret = iopf_queue_add_device(master->smmu->evtq.iopf, master->dev);
if (ret)
return ret;
master->iopf_refcount = 1;
master_domain->using_iopf = true;
return 0;
}
static void arm_smmu_disable_iopf(struct arm_smmu_master *master,
struct arm_smmu_master_domain *master_domain)
{
iommu_group_mutex_assert(master->dev);
if (!IS_ENABLED(CONFIG_ARM_SMMU_V3_SVA))
return;
if (!master_domain || !master_domain->using_iopf)
return;
master->iopf_refcount--;
if (master->iopf_refcount == 0)
iopf_queue_remove_device(master->smmu->evtq.iopf, master->dev);
}
static void arm_smmu_remove_master_domain(struct arm_smmu_master *master,
struct iommu_domain *domain,
ioasid_t ssid)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain);
struct arm_smmu_master_domain *master_domain;
bool nested_ats_flush = false;
unsigned long flags;
if (!smmu_domain)
return;
if (domain->type == IOMMU_DOMAIN_NESTED)
nested_ats_flush = to_smmu_nested_domain(domain)->enable_ats;
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
master_domain = arm_smmu_find_master_domain(smmu_domain, domain, master,
ssid, nested_ats_flush);
if (master_domain) {
list_del(&master_domain->devices_elm);
if (master->ats_enabled)
atomic_dec(&smmu_domain->nr_ats_masters);
}
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_disable_iopf(master, master_domain);
kfree(master_domain);
}
/*
* Start the sequence to attach a domain to a master. The sequence contains three
* steps:
* arm_smmu_attach_prepare()
* arm_smmu_install_ste_for_dev()
* arm_smmu_attach_commit()
*
* If prepare succeeds then the sequence must be completed. The STE installed
* must set the STE.EATS field according to state.ats_enabled.
*
* If the device supports ATS then this determines if EATS should be enabled
* in the STE, and starts sequencing EATS disable if required.
*
* The change of the EATS in the STE and the PCI ATS config space is managed by
* this sequence to be in the right order so that if PCI ATS is enabled then
* STE.ETAS is enabled.
*
* new_domain can be a non-paging domain. In this case ATS will not be enabled,
* and invalidations won't be tracked.
*/
int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state,
struct iommu_domain *new_domain)
{
struct arm_smmu_master *master = state->master;
struct arm_smmu_master_domain *master_domain;
struct arm_smmu_domain *smmu_domain =
to_smmu_domain_devices(new_domain);
unsigned long flags;
int ret;
/*
* arm_smmu_share_asid() must not see two domains pointing to the same
* arm_smmu_master_domain contents otherwise it could randomly write one
* or the other to the CD.
*/
lockdep_assert_held(&arm_smmu_asid_lock);
if (smmu_domain || state->cd_needs_ats) {
/*
* The SMMU does not support enabling ATS with bypass/abort.
* When the STE is in bypass (STE.Config[2:0] == 0b100), ATS
* Translation Requests and Translated transactions are denied
* as though ATS is disabled for the stream (STE.EATS == 0b00),
* causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events
* (IHI0070Ea 5.2 Stream Table Entry).
*
* However, if we have installed a CD table and are using S1DSS
* then ATS will work in S1DSS bypass. See "13.6.4 Full ATS
* skipping stage 1".
*
* Disable ATS if we are going to create a normal 0b100 bypass
* STE.
*/
state->ats_enabled = !state->disable_ats &&
arm_smmu_ats_supported(master);
}
if (smmu_domain) {
if (new_domain->type == IOMMU_DOMAIN_NESTED) {
ret = arm_smmu_attach_prepare_vmaster(
state, to_smmu_nested_domain(new_domain));
if (ret)
return ret;
}
master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL);
if (!master_domain) {
ret = -ENOMEM;
goto err_free_vmaster;
}
master_domain->domain = new_domain;
master_domain->master = master;
master_domain->ssid = state->ssid;
if (new_domain->type == IOMMU_DOMAIN_NESTED)
master_domain->nested_ats_flush =
to_smmu_nested_domain(new_domain)->enable_ats;
if (new_domain->iopf_handler) {
ret = arm_smmu_enable_iopf(master, master_domain);
if (ret)
goto err_free_master_domain;
}
/*
* During prepare we want the current smmu_domain and new
* smmu_domain to be in the devices list before we change any
* HW. This ensures that both domains will send ATS
* invalidations to the master until we are done.
*
* It is tempting to make this list only track masters that are
* using ATS, but arm_smmu_share_asid() also uses this to change
* the ASID of a domain, unrelated to ATS.
*
* Notice if we are re-attaching the same domain then the list
* will have two identical entries and commit will remove only
* one of them.
*/
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
if (smmu_domain->enforce_cache_coherency &&
!arm_smmu_master_canwbs(master)) {
spin_unlock_irqrestore(&smmu_domain->devices_lock,
flags);
ret = -EINVAL;
goto err_iopf;
}
if (state->ats_enabled)
atomic_inc(&smmu_domain->nr_ats_masters);
list_add(&master_domain->devices_elm, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
}
if (!state->ats_enabled && master->ats_enabled) {
pci_disable_ats(to_pci_dev(master->dev));
/*
* This is probably overkill, but the config write for disabling
* ATS should complete before the STE is configured to generate
* UR to avoid AER noise.
*/
wmb();
}
return 0;
err_iopf:
arm_smmu_disable_iopf(master, master_domain);
err_free_master_domain:
kfree(master_domain);
err_free_vmaster:
kfree(state->vmaster);
return ret;
}
/*
* Commit is done after the STE/CD are configured with the EATS setting. It
* completes synchronizing the PCI device's ATC and finishes manipulating the
* smmu_domain->devices list.
*/
void arm_smmu_attach_commit(struct arm_smmu_attach_state *state)
{
struct arm_smmu_master *master = state->master;
lockdep_assert_held(&arm_smmu_asid_lock);
arm_smmu_attach_commit_vmaster(state);
if (state->ats_enabled && !master->ats_enabled) {
arm_smmu_enable_ats(master);
} else if (state->ats_enabled && master->ats_enabled) {
/*
* The translation has changed, flush the ATC. At this point the
* SMMU is translating for the new domain and both the old&new
* domain will issue invalidations.
*/
arm_smmu_atc_inv_master(master, state->ssid);
} else if (!state->ats_enabled && master->ats_enabled) {
/* ATS is being switched off, invalidate the entire ATC */
arm_smmu_atc_inv_master(master, IOMMU_NO_PASID);
}
master->ats_enabled = state->ats_enabled;
arm_smmu_remove_master_domain(master, state->old_domain, state->ssid);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
int ret = 0;
struct arm_smmu_ste target;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
struct arm_smmu_device *smmu;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_attach_state state = {
.old_domain = iommu_get_domain_for_dev(dev),
.ssid = IOMMU_NO_PASID,
};
struct arm_smmu_master *master;
struct arm_smmu_cd *cdptr;
if (!fwspec)
return -ENOENT;
state.master = master = dev_iommu_priv_get(dev);
smmu = master->smmu;
if (smmu_domain->smmu != smmu)
return -EINVAL;
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID);
if (!cdptr)
return -ENOMEM;
} else if (arm_smmu_ssids_in_use(&master->cd_table))
return -EBUSY;
/*
* Prevent arm_smmu_share_asid() from trying to change the ASID
* of either the old or new domain while we are working on it.
* This allows the STE and the smmu_domain->devices list to
* be inconsistent during this routine.
*/
mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_attach_prepare(&state, domain);
if (ret) {
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1: {
struct arm_smmu_cd target_cd;
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr,
&target_cd);
arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled,
STRTAB_STE_1_S1DSS_SSID0);
arm_smmu_install_ste_for_dev(master, &target);
break;
}
case ARM_SMMU_DOMAIN_S2:
arm_smmu_make_s2_domain_ste(&target, master, smmu_domain,
state.ats_enabled);
arm_smmu_install_ste_for_dev(master, &target);
arm_smmu_clear_cd(master, IOMMU_NO_PASID);
break;
}
arm_smmu_attach_commit(&state);
mutex_unlock(&arm_smmu_asid_lock);
return 0;
}
static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id,
struct iommu_domain *old)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cd target_cd;
if (smmu_domain->smmu != smmu)
return -EINVAL;
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
return -EINVAL;
/*
* We can read cd.asid outside the lock because arm_smmu_set_pasid()
* will fix it
*/
arm_smmu_make_s1_cd(&target_cd, master, smmu_domain);
return arm_smmu_set_pasid(master, to_smmu_domain(domain), id,
&target_cd, old);
}
static void arm_smmu_update_ste(struct arm_smmu_master *master,
struct iommu_domain *sid_domain,
bool ats_enabled)
{
unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE;
struct arm_smmu_ste ste;
if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled)
return;
if (sid_domain->type == IOMMU_DOMAIN_IDENTITY)
s1dss = STRTAB_STE_1_S1DSS_BYPASS;
else
WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED);
/*
* Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior
* using s1dss if necessary. If the cd_table is already installed then
* the S1DSS is correct and this will just update the EATS. Otherwise it
* installs the entire thing. This will be hitless.
*/
arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss);
arm_smmu_install_ste_for_dev(master, &ste);
}
int arm_smmu_set_pasid(struct arm_smmu_master *master,
struct arm_smmu_domain *smmu_domain, ioasid_t pasid,
struct arm_smmu_cd *cd, struct iommu_domain *old)
{
struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev);
struct arm_smmu_attach_state state = {
.master = master,
.ssid = pasid,
.old_domain = old,
};
struct arm_smmu_cd *cdptr;
int ret;
/* The core code validates pasid */
if (smmu_domain->smmu != master->smmu)
return -EINVAL;
if (!master->cd_table.in_ste &&
sid_domain->type != IOMMU_DOMAIN_IDENTITY &&
sid_domain->type != IOMMU_DOMAIN_BLOCKED)
return -EINVAL;
cdptr = arm_smmu_alloc_cd_ptr(master, pasid);
if (!cdptr)
return -ENOMEM;
mutex_lock(&arm_smmu_asid_lock);
ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain);
if (ret)
goto out_unlock;
/*
* We don't want to obtain to the asid_lock too early, so fix up the
* caller set ASID under the lock in case it changed.
*/
cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID);
cd->data[0] |= cpu_to_le64(
FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid));
arm_smmu_write_cd_entry(master, pasid, cdptr, cd);
arm_smmu_update_ste(master, sid_domain, state.ats_enabled);
arm_smmu_attach_commit(&state);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
struct device *dev, ioasid_t pasid,
struct iommu_domain *old_domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
mutex_lock(&arm_smmu_asid_lock);
arm_smmu_clear_cd(master, pasid);
if (master->ats_enabled)
arm_smmu_atc_inv_master(master, pasid);
arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid);
mutex_unlock(&arm_smmu_asid_lock);
/*
* When the last user of the CD table goes away downgrade the STE back
* to a non-cd_table one.
*/
if (!arm_smmu_ssids_in_use(&master->cd_table)) {
struct iommu_domain *sid_domain =
iommu_get_domain_for_dev(master->dev);
if (sid_domain->type == IOMMU_DOMAIN_IDENTITY ||
sid_domain->type == IOMMU_DOMAIN_BLOCKED)
sid_domain->ops->attach_dev(sid_domain, dev);
}
return 0;
}
static void arm_smmu_attach_dev_ste(struct iommu_domain *domain,
struct device *dev,
struct arm_smmu_ste *ste,
unsigned int s1dss)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_attach_state state = {
.master = master,
.old_domain = iommu_get_domain_for_dev(dev),
.ssid = IOMMU_NO_PASID,
};
/*
* Do not allow any ASID to be changed while are working on the STE,
* otherwise we could miss invalidations.
*/
mutex_lock(&arm_smmu_asid_lock);
/*
* If the CD table is not in use we can use the provided STE, otherwise
* we use a cdtable STE with the provided S1DSS.
*/
if (arm_smmu_ssids_in_use(&master->cd_table)) {
/*
* If a CD table has to be present then we need to run with ATS
* on because we have to assume a PASID is using ATS. For
* IDENTITY this will setup things so that S1DSS=bypass which
* follows the explanation in "13.6.4 Full ATS skipping stage 1"
* and allows for ATS on the RID to work.
*/
state.cd_needs_ats = true;
arm_smmu_attach_prepare(&state, domain);
arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss);
} else {
arm_smmu_attach_prepare(&state, domain);
}
arm_smmu_install_ste_for_dev(master, ste);
arm_smmu_attach_commit(&state);
mutex_unlock(&arm_smmu_asid_lock);
/*
* This has to be done after removing the master from the
* arm_smmu_domain->devices to avoid races updating the same context
* descriptor from arm_smmu_share_asid().
*/
arm_smmu_clear_cd(master, IOMMU_NO_PASID);
}
static int arm_smmu_attach_dev_identity(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
arm_smmu_master_clear_vmaster(master);
arm_smmu_make_bypass_ste(master->smmu, &ste);
arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS);
return 0;
}
static const struct iommu_domain_ops arm_smmu_identity_ops = {
.attach_dev = arm_smmu_attach_dev_identity,
};
static struct iommu_domain arm_smmu_identity_domain = {
.type = IOMMU_DOMAIN_IDENTITY,
.ops = &arm_smmu_identity_ops,
};
static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
struct device *dev)
{
struct arm_smmu_ste ste;
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
arm_smmu_master_clear_vmaster(master);
arm_smmu_make_abort_ste(&ste);
arm_smmu_attach_dev_ste(domain, dev, &ste,
STRTAB_STE_1_S1DSS_TERMINATE);
return 0;
}
static const struct iommu_domain_ops arm_smmu_blocked_ops = {
.attach_dev = arm_smmu_attach_dev_blocked,
.set_dev_pasid = arm_smmu_blocking_set_dev_pasid,
};
static struct iommu_domain arm_smmu_blocked_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &arm_smmu_blocked_ops,
};
static struct iommu_domain *
arm_smmu_domain_alloc_paging_flags(struct device *dev, u32 flags,
const struct iommu_user_data *user_data)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
struct arm_smmu_device *smmu = master->smmu;
const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING |
IOMMU_HWPT_ALLOC_PASID |
IOMMU_HWPT_ALLOC_NEST_PARENT;
struct arm_smmu_domain *smmu_domain;
int ret;
if (flags & ~PAGING_FLAGS)
return ERR_PTR(-EOPNOTSUPP);
if (user_data)
return ERR_PTR(-EOPNOTSUPP);
smmu_domain = arm_smmu_domain_alloc();
if (IS_ERR(smmu_domain))
return ERR_CAST(smmu_domain);
switch (flags) {
case 0:
/* Prefer S1 if available */
if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
else
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
break;
case IOMMU_HWPT_ALLOC_NEST_PARENT:
if (!(smmu->features & ARM_SMMU_FEAT_NESTING)) {
ret = -EOPNOTSUPP;
goto err_free;
}
smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
smmu_domain->nest_parent = true;
break;
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_PASID:
case IOMMU_HWPT_ALLOC_PASID:
if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) {
ret = -EOPNOTSUPP;
goto err_free;
}
smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
break;
default:
ret = -EOPNOTSUPP;
goto err_free;
}
smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED;
smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops;
ret = arm_smmu_domain_finalise(smmu_domain, smmu, flags);
if (ret)
goto err_free;
return &smmu_domain->domain;
err_free:
kfree(smmu_domain);
return ERR_PTR(ret);
}
static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
if (!ops)
return -ENODEV;
return ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
}
static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount,
struct iommu_iotlb_gather *gather)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
if (!ops)
return 0;
return ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
}
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (smmu_domain->smmu)
arm_smmu_tlb_inv_context(smmu_domain);
}
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (!gather->pgsize)
return;
arm_smmu_tlb_inv_range_domain(gather->start,
gather->end - gather->start + 1,
gather->pgsize, true, smmu_domain);
}
static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{
struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
if (!ops)
return 0;
return ops->iova_to_phys(ops, iova);
}
static struct platform_driver arm_smmu_driver;
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
{
struct device *dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode);
put_device(dev);
return dev ? dev_get_drvdata(dev) : NULL;
}
static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
{
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
return arm_smmu_strtab_l1_idx(sid) < smmu->strtab_cfg.l2.num_l1_ents;
return sid < smmu->strtab_cfg.linear.num_ents;
}
static int arm_smmu_init_sid_strtab(struct arm_smmu_device *smmu, u32 sid)
{
/* Check the SIDs are in range of the SMMU and our stream table */
if (!arm_smmu_sid_in_range(smmu, sid))
return -ERANGE;
/* Ensure l2 strtab is initialised */
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
return arm_smmu_init_l2_strtab(smmu, sid);
return 0;
}
static int arm_smmu_insert_master(struct arm_smmu_device *smmu,
struct arm_smmu_master *master)
{
int i;
int ret = 0;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
master->streams = kcalloc(fwspec->num_ids, sizeof(*master->streams),
GFP_KERNEL);
if (!master->streams)
return -ENOMEM;
master->num_streams = fwspec->num_ids;
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++) {
struct arm_smmu_stream *new_stream = &master->streams[i];
struct rb_node *existing;
u32 sid = fwspec->ids[i];
new_stream->id = sid;
new_stream->master = master;
ret = arm_smmu_init_sid_strtab(smmu, sid);
if (ret)
break;
/* Insert into SID tree */
existing = rb_find_add(&new_stream->node, &smmu->streams,
arm_smmu_streams_cmp_node);
if (existing) {
struct arm_smmu_master *existing_master =
rb_entry(existing, struct arm_smmu_stream, node)
->master;
/* Bridged PCI devices may end up with duplicated IDs */
if (existing_master == master)
continue;
dev_warn(master->dev,
"Aliasing StreamID 0x%x (from %s) unsupported, expect DMA to be broken\n",
sid, dev_name(existing_master->dev));
ret = -ENODEV;
break;
}
}
if (ret) {
for (i--; i >= 0; i--)
rb_erase(&master->streams[i].node, &smmu->streams);
kfree(master->streams);
}
mutex_unlock(&smmu->streams_mutex);
return ret;
}
static void arm_smmu_remove_master(struct arm_smmu_master *master)
{
int i;
struct arm_smmu_device *smmu = master->smmu;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(master->dev);
if (!smmu || !master->streams)
return;
mutex_lock(&smmu->streams_mutex);
for (i = 0; i < fwspec->num_ids; i++)
rb_erase(&master->streams[i].node, &smmu->streams);
mutex_unlock(&smmu->streams_mutex);
kfree(master->streams);
}
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
{
int ret;
struct arm_smmu_device *smmu;
struct arm_smmu_master *master;
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
if (WARN_ON_ONCE(dev_iommu_priv_get(dev)))
return ERR_PTR(-EBUSY);
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
if (!smmu)
return ERR_PTR(-ENODEV);
master = kzalloc(sizeof(*master), GFP_KERNEL);
if (!master)
return ERR_PTR(-ENOMEM);
master->dev = dev;
master->smmu = smmu;
dev_iommu_priv_set(dev, master);
ret = arm_smmu_insert_master(smmu, master);
if (ret)
goto err_free_master;
device_property_read_u32(dev, "pasid-num-bits", &master->ssid_bits);
master->ssid_bits = min(smmu->ssid_bits, master->ssid_bits);
/*
* Note that PASID must be enabled before, and disabled after ATS:
* PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
*
* Behavior is undefined if this bit is Set and the value of the PASID
* Enable, Execute Requested Enable, or Privileged Mode Requested bits
* are changed.
*/
arm_smmu_enable_pasid(master);
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
master->ssid_bits = min_t(u8, master->ssid_bits,
CTXDESC_LINEAR_CDMAX);
if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
device_property_read_bool(dev, "dma-can-stall")) ||
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
if (dev_is_pci(dev)) {
unsigned int stu = __ffs(smmu->pgsize_bitmap);
pci_prepare_ats(to_pci_dev(dev), stu);
}
return &smmu->iommu;
err_free_master:
kfree(master);
return ERR_PTR(ret);
}
static void arm_smmu_release_device(struct device *dev)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
WARN_ON(master->iopf_refcount);
/* Put the STE back to what arm_smmu_init_strtab() sets */
if (dev->iommu->require_direct)
arm_smmu_attach_dev_identity(&arm_smmu_identity_domain, dev);
else
arm_smmu_attach_dev_blocked(&arm_smmu_blocked_domain, dev);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
if (arm_smmu_cdtab_allocated(&master->cd_table))
arm_smmu_free_cd_tables(master);
kfree(master);
}
static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain,
unsigned long iova, size_t size,
unsigned long flags,
struct iommu_dirty_bitmap *dirty)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
}
static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain,
bool enabled)
{
/*
* Always enabled and the dirty bitmap is cleared prior to
* set_dirty_tracking().
*/
return 0;
}
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
struct iommu_group *group;
/*
* We don't support devices sharing stream IDs other than PCI RID
* aliases, since the necessary ID-to-device lookup becomes rather
* impractical given a potential sparse 32-bit stream ID space.
*/
if (dev_is_pci(dev))
group = pci_device_group(dev);
else
group = generic_device_group(dev);
return group;
}
static int arm_smmu_of_xlate(struct device *dev,
const struct of_phandle_args *args)
{
return iommu_fwspec_add_ids(dev, args->args, 1);
}
static void arm_smmu_get_resv_regions(struct device *dev,
struct list_head *head)
{
struct iommu_resv_region *region;
int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
if (!region)
return;
list_add_tail(&region->list, head);
iommu_dma_get_resv_regions(dev, head);
}
/*
* HiSilicon PCIe tune and trace device can be used to trace TLP headers on the
* PCIe link and save the data to memory by DMA. The hardware is restricted to
* use identity mapping only.
*/
#define IS_HISI_PTT_DEVICE(pdev) ((pdev)->vendor == PCI_VENDOR_ID_HUAWEI && \
(pdev)->device == 0xa12e)
static int arm_smmu_def_domain_type(struct device *dev)
{
if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev);
if (IS_HISI_PTT_DEVICE(pdev))
return IOMMU_DOMAIN_IDENTITY;
}
return 0;
}
static const struct iommu_ops arm_smmu_ops = {
.identity_domain = &arm_smmu_identity_domain,
.blocked_domain = &arm_smmu_blocked_domain,
.capable = arm_smmu_capable,
.hw_info = arm_smmu_hw_info,
.domain_alloc_sva = arm_smmu_sva_domain_alloc,
.domain_alloc_paging_flags = arm_smmu_domain_alloc_paging_flags,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.device_group = arm_smmu_device_group,
.of_xlate = arm_smmu_of_xlate,
.get_resv_regions = arm_smmu_get_resv_regions,
.page_response = arm_smmu_page_response,
.def_domain_type = arm_smmu_def_domain_type,
.get_viommu_size = arm_smmu_get_viommu_size,
.viommu_init = arm_vsmmu_init,
.user_pasid_table = 1,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = arm_smmu_attach_dev,
.enforce_cache_coherency = arm_smmu_enforce_cache_coherency,
.set_dev_pasid = arm_smmu_s1_set_dev_pasid,
.map_pages = arm_smmu_map_pages,
.unmap_pages = arm_smmu_unmap_pages,
.flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.free = arm_smmu_domain_free_paging,
}
};
static struct iommu_dirty_ops arm_smmu_dirty_ops = {
.read_and_clear_dirty = arm_smmu_read_and_clear_dirty,
.set_dirty_tracking = arm_smmu_set_dirty_tracking,
};
/* Probing and initialisation functions */
int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
struct arm_smmu_queue *q, void __iomem *page,
unsigned long prod_off, unsigned long cons_off,
size_t dwords, const char *name)
{
size_t qsz;
do {
qsz = ((1 << q->llq.max_n_shift) * dwords) << 3;
q->base = dmam_alloc_coherent(smmu->dev, qsz, &q->base_dma,
GFP_KERNEL);
if (q->base || qsz < PAGE_SIZE)
break;
q->llq.max_n_shift--;
} while (1);
if (!q->base) {
dev_err(smmu->dev,
"failed to allocate queue (0x%zx bytes) for %s\n",
qsz, name);
return -ENOMEM;
}
if (!WARN_ON(q->base_dma & (qsz - 1))) {
dev_info(smmu->dev, "allocated %u entries for %s\n",
1 << q->llq.max_n_shift, name);
}
q->prod_reg = page + prod_off;
q->cons_reg = page + cons_off;
q->ent_dwords = dwords;
q->q_base = Q_BASE_RWA;
q->q_base |= q->base_dma & Q_BASE_ADDR_MASK;
q->q_base |= FIELD_PREP(Q_BASE_LOG2SIZE, q->llq.max_n_shift);
q->llq.prod = q->llq.cons = 0;
return 0;
}
int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq *cmdq)
{
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
atomic_set(&cmdq->owner_prod, 0);
atomic_set(&cmdq->lock, 0);
cmdq->valid_map = (atomic_long_t *)devm_bitmap_zalloc(smmu->dev, nents,
GFP_KERNEL);
if (!cmdq->valid_map)
return -ENOMEM;
return 0;
}
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
int ret;
/* cmdq */
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, smmu->base,
ARM_SMMU_CMDQ_PROD, ARM_SMMU_CMDQ_CONS,
CMDQ_ENT_DWORDS, "cmdq");
if (ret)
return ret;
ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
if (ret)
return ret;
/* evtq */
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, smmu->page1,
ARM_SMMU_EVTQ_PROD, ARM_SMMU_EVTQ_CONS,
EVTQ_ENT_DWORDS, "evtq");
if (ret)
return ret;
if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
(smmu->features & ARM_SMMU_FEAT_STALLS)) {
smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
if (!smmu->evtq.iopf)
return -ENOMEM;
}
/* priq */
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
return arm_smmu_init_one_queue(smmu, &smmu->priq.q, smmu->page1,
ARM_SMMU_PRIQ_PROD, ARM_SMMU_PRIQ_CONS,
PRIQ_ENT_DWORDS, "priq");
}
static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu)
{
u32 l1size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
unsigned int last_sid_idx =
arm_smmu_strtab_l1_idx((1ULL << smmu->sid_bits) - 1);
/* Calculate the L1 size, capped to the SIDSIZE. */
cfg->l2.num_l1_ents = min(last_sid_idx + 1, STRTAB_MAX_L1_ENTRIES);
if (cfg->l2.num_l1_ents <= last_sid_idx)
dev_warn(smmu->dev,
"2-level strtab only covers %u/%u bits of SID\n",
ilog2(cfg->l2.num_l1_ents * STRTAB_NUM_L2_STES),
smmu->sid_bits);
l1size = cfg->l2.num_l1_ents * sizeof(struct arm_smmu_strtab_l1);
cfg->l2.l1tab = dmam_alloc_coherent(smmu->dev, l1size, &cfg->l2.l1_dma,
GFP_KERNEL);
if (!cfg->l2.l1tab) {
dev_err(smmu->dev,
"failed to allocate l1 stream table (%u bytes)\n",
l1size);
return -ENOMEM;
}
cfg->l2.l2ptrs = devm_kcalloc(smmu->dev, cfg->l2.num_l1_ents,
sizeof(*cfg->l2.l2ptrs), GFP_KERNEL);
if (!cfg->l2.l2ptrs)
return -ENOMEM;
return 0;
}
static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
{
u32 size;
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
size = (1 << smmu->sid_bits) * sizeof(struct arm_smmu_ste);
cfg->linear.table = dmam_alloc_coherent(smmu->dev, size,
&cfg->linear.ste_dma,
GFP_KERNEL);
if (!cfg->linear.table) {
dev_err(smmu->dev,
"failed to allocate linear stream table (%u bytes)\n",
size);
return -ENOMEM;
}
cfg->linear.num_ents = 1 << smmu->sid_bits;
arm_smmu_init_initial_stes(cfg->linear.table, cfg->linear.num_ents);
return 0;
}
static int arm_smmu_init_strtab(struct arm_smmu_device *smmu)
{
int ret;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB)
ret = arm_smmu_init_strtab_2lvl(smmu);
else
ret = arm_smmu_init_strtab_linear(smmu);
if (ret)
return ret;
ida_init(&smmu->vmid_map);
return 0;
}
static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{
int ret;
mutex_init(&smmu->streams_mutex);
smmu->streams = RB_ROOT;
ret = arm_smmu_init_queues(smmu);
if (ret)
return ret;
ret = arm_smmu_init_strtab(smmu);
if (ret)
return ret;
if (smmu->impl_ops && smmu->impl_ops->init_structures)
return smmu->impl_ops->init_structures(smmu);
return 0;
}
static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
unsigned int reg_off, unsigned int ack_off)
{
u32 reg;
writel_relaxed(val, smmu->base + reg_off);
return readl_relaxed_poll_timeout(smmu->base + ack_off, reg, reg == val,
1, ARM_SMMU_POLL_TIMEOUT_US);
}
/* GBPA is "special" */
static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
{
int ret;
u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
1, ARM_SMMU_POLL_TIMEOUT_US);
if (ret)
return ret;
reg &= ~clr;
reg |= set;
writel_relaxed(reg | GBPA_UPDATE, gbpa);
ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
1, ARM_SMMU_POLL_TIMEOUT_US);
if (ret)
dev_err(smmu->dev, "GBPA not responding to update\n");
return ret;
}
static void arm_smmu_free_msis(void *data)
{
struct device *dev = data;
platform_device_msi_free_irqs_all(dev);
}
static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
{
phys_addr_t doorbell;
struct device *dev = msi_desc_to_dev(desc);
struct arm_smmu_device *smmu = dev_get_drvdata(dev);
phys_addr_t *cfg = arm_smmu_msi_cfg[desc->msi_index];
doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
doorbell &= MSI_CFG0_ADDR_MASK;
writeq_relaxed(doorbell, smmu->base + cfg[0]);
writel_relaxed(msg->data, smmu->base + cfg[1]);
writel_relaxed(ARM_SMMU_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
}
static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
{
int ret, nvec = ARM_SMMU_MAX_MSIS;
struct device *dev = smmu->dev;
/* Clear the MSI address regs */
writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
if (smmu->features & ARM_SMMU_FEAT_PRI)
writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
else
nvec--;
if (!(smmu->features & ARM_SMMU_FEAT_MSI))
return;
if (!dev->msi.domain) {
dev_info(smmu->dev, "msi_domain absent - falling back to wired irqs\n");
return;
}
/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
ret = platform_device_msi_init_and_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
if (ret) {
dev_warn(dev, "failed to allocate MSIs - falling back to wired irqs\n");
return;
}
smmu->evtq.q.irq = msi_get_virq(dev, EVTQ_MSI_INDEX);
smmu->gerr_irq = msi_get_virq(dev, GERROR_MSI_INDEX);
smmu->priq.q.irq = msi_get_virq(dev, PRIQ_MSI_INDEX);
/* Add callback to free MSIs on teardown */
devm_add_action_or_reset(dev, arm_smmu_free_msis, dev);
}
static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
{
int irq, ret;
arm_smmu_setup_msis(smmu);
/* Request interrupt lines */
irq = smmu->evtq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
arm_smmu_evtq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-evtq", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable evtq irq\n");
} else {
dev_warn(smmu->dev, "no evtq irq - events will not be reported!\n");
}
irq = smmu->gerr_irq;
if (irq) {
ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
0, "arm-smmu-v3-gerror", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable gerror irq\n");
} else {
dev_warn(smmu->dev, "no gerr irq - errors will not be reported!\n");
}
if (smmu->features & ARM_SMMU_FEAT_PRI) {
irq = smmu->priq.q.irq;
if (irq) {
ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
arm_smmu_priq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-priq",
smmu);
if (ret < 0)
dev_warn(smmu->dev,
"failed to enable priq irq\n");
} else {
dev_warn(smmu->dev, "no priq irq - PRI will be broken\n");
}
}
}
static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
{
int ret, irq;
u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;
/* Disable IRQs first */
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL,
ARM_SMMU_IRQ_CTRLACK);
if (ret) {
dev_err(smmu->dev, "failed to disable irqs\n");
return ret;
}
irq = smmu->combined_irq;
if (irq) {
/*
* Cavium ThunderX2 implementation doesn't support unique irq
* lines. Use a single irq line for all the SMMUv3 interrupts.
*/
ret = devm_request_threaded_irq(smmu->dev, irq,
arm_smmu_combined_irq_handler,
arm_smmu_combined_irq_thread,
IRQF_ONESHOT,
"arm-smmu-v3-combined-irq", smmu);
if (ret < 0)
dev_warn(smmu->dev, "failed to enable combined irq\n");
} else
arm_smmu_setup_unique_irqs(smmu);
if (smmu->features & ARM_SMMU_FEAT_PRI)
irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
/* Enable interrupt generation on the SMMU */
ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
if (ret)
dev_warn(smmu->dev, "failed to enable irqs\n");
return 0;
}
static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
{
int ret;
ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_CR0, ARM_SMMU_CR0ACK);
if (ret)
dev_err(smmu->dev, "failed to clear cr0\n");
return ret;
}
static void arm_smmu_write_strtab(struct arm_smmu_device *smmu)
{
struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg;
dma_addr_t dma;
u32 reg;
if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
STRTAB_BASE_CFG_FMT_2LVL) |
FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE,
ilog2(cfg->l2.num_l1_ents) + STRTAB_SPLIT) |
FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT);
dma = cfg->l2.l1_dma;
} else {
reg = FIELD_PREP(STRTAB_BASE_CFG_FMT,
STRTAB_BASE_CFG_FMT_LINEAR) |
FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, smmu->sid_bits);
dma = cfg->linear.ste_dma;
}
writeq_relaxed((dma & STRTAB_BASE_ADDR_MASK) | STRTAB_BASE_RA,
smmu->base + ARM_SMMU_STRTAB_BASE);
writel_relaxed(reg, smmu->base + ARM_SMMU_STRTAB_BASE_CFG);
}
static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
int ret;
u32 reg, enables;
struct arm_smmu_cmdq_ent cmd;
/* Clear CR0 and sync (disables SMMU and queue processing) */
reg = readl_relaxed(smmu->base + ARM_SMMU_CR0);
if (reg & CR0_SMMUEN) {
dev_warn(smmu->dev, "SMMU currently enabled! Resetting...\n");
arm_smmu_update_gbpa(smmu, GBPA_ABORT, 0);
}
ret = arm_smmu_device_disable(smmu);
if (ret)
return ret;
/* CR1 (table and queue memory attributes) */
reg = FIELD_PREP(CR1_TABLE_SH, ARM_SMMU_SH_ISH) |
FIELD_PREP(CR1_TABLE_OC, CR1_CACHE_WB) |
FIELD_PREP(CR1_TABLE_IC, CR1_CACHE_WB) |
FIELD_PREP(CR1_QUEUE_SH, ARM_SMMU_SH_ISH) |
FIELD_PREP(CR1_QUEUE_OC, CR1_CACHE_WB) |
FIELD_PREP(CR1_QUEUE_IC, CR1_CACHE_WB);
writel_relaxed(reg, smmu->base + ARM_SMMU_CR1);
/* CR2 (random crap) */
reg = CR2_PTM | CR2_RECINVSID;
if (smmu->features & ARM_SMMU_FEAT_E2H)
reg |= CR2_E2H;
writel_relaxed(reg, smmu->base + ARM_SMMU_CR2);
/* Stream table */
arm_smmu_write_strtab(smmu);
/* Command queue */
writeq_relaxed(smmu->cmdq.q.q_base, smmu->base + ARM_SMMU_CMDQ_BASE);
writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
enables = CR0_CMDQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable command queue\n");
return ret;
}
/* Invalidate any cached configuration */
cmd.opcode = CMDQ_OP_CFGI_ALL;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
cmd.opcode = CMDQ_OP_TLBI_EL2_ALL;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
writel_relaxed(smmu->evtq.q.llq.prod, smmu->page1 + ARM_SMMU_EVTQ_PROD);
writel_relaxed(smmu->evtq.q.llq.cons, smmu->page1 + ARM_SMMU_EVTQ_CONS);
enables |= CR0_EVTQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable event queue\n");
return ret;
}
/* PRI queue */
if (smmu->features & ARM_SMMU_FEAT_PRI) {
writeq_relaxed(smmu->priq.q.q_base,
smmu->base + ARM_SMMU_PRIQ_BASE);
writel_relaxed(smmu->priq.q.llq.prod,
smmu->page1 + ARM_SMMU_PRIQ_PROD);
writel_relaxed(smmu->priq.q.llq.cons,
smmu->page1 + ARM_SMMU_PRIQ_CONS);
enables |= CR0_PRIQEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable PRI queue\n");
return ret;
}
}
if (smmu->features & ARM_SMMU_FEAT_ATS) {
enables |= CR0_ATSCHK;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable ATS check\n");
return ret;
}
}
ret = arm_smmu_setup_irqs(smmu);
if (ret) {
dev_err(smmu->dev, "failed to setup irqs\n");
return ret;
}
if (is_kdump_kernel())
enables &= ~(CR0_EVTQEN | CR0_PRIQEN);
/* Enable the SMMU interface */
enables |= CR0_SMMUEN;
ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
ARM_SMMU_CR0ACK);
if (ret) {
dev_err(smmu->dev, "failed to enable SMMU interface\n");
return ret;
}
if (smmu->impl_ops && smmu->impl_ops->device_reset) {
ret = smmu->impl_ops->device_reset(smmu);
if (ret) {
dev_err(smmu->dev, "failed to reset impl\n");
return ret;
}
}
return 0;
}
#define IIDR_IMPLEMENTER_ARM 0x43b
#define IIDR_PRODUCTID_ARM_MMU_600 0x483
#define IIDR_PRODUCTID_ARM_MMU_700 0x487
static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
{
u32 reg;
unsigned int implementer, productid, variant, revision;
reg = readl_relaxed(smmu->base + ARM_SMMU_IIDR);
implementer = FIELD_GET(IIDR_IMPLEMENTER, reg);
productid = FIELD_GET(IIDR_PRODUCTID, reg);
variant = FIELD_GET(IIDR_VARIANT, reg);
revision = FIELD_GET(IIDR_REVISION, reg);
switch (implementer) {
case IIDR_IMPLEMENTER_ARM:
switch (productid) {
case IIDR_PRODUCTID_ARM_MMU_600:
/* Arm erratum 1076982 */
if (variant == 0 && revision <= 2)
smmu->features &= ~ARM_SMMU_FEAT_SEV;
/* Arm erratum 1209401 */
if (variant < 2)
smmu->features &= ~ARM_SMMU_FEAT_NESTING;
break;
case IIDR_PRODUCTID_ARM_MMU_700:
/* Arm erratum 2812531 */
smmu->features &= ~ARM_SMMU_FEAT_BTM;
smmu->options |= ARM_SMMU_OPT_CMDQ_FORCE_SYNC;
/* Arm errata 2268618, 2812531 */
smmu->features &= ~ARM_SMMU_FEAT_NESTING;
break;
}
break;
}
}
static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
{
u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
u32 hw_features = 0;
switch (FIELD_GET(IDR0_HTTU, reg)) {
case IDR0_HTTU_ACCESS_DIRTY:
hw_features |= ARM_SMMU_FEAT_HD;
fallthrough;
case IDR0_HTTU_ACCESS:
hw_features |= ARM_SMMU_FEAT_HA;
}
if (smmu->dev->of_node)
smmu->features |= hw_features;
else if (hw_features != fw_features)
/* ACPI IORT sets the HTTU bits */
dev_warn(smmu->dev,
"IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n",
hw_features, fw_features);
}
static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
u32 reg;
bool coherent = smmu->features & ARM_SMMU_FEAT_COHERENCY;
/* IDR0 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR0);
/* 2-level structures */
if (FIELD_GET(IDR0_ST_LVL, reg) == IDR0_ST_LVL_2LVL)
smmu->features |= ARM_SMMU_FEAT_2_LVL_STRTAB;
if (reg & IDR0_CD2L)
smmu->features |= ARM_SMMU_FEAT_2_LVL_CDTAB;
/*
* Translation table endianness.
* We currently require the same endianness as the CPU, but this
* could be changed later by adding a new IO_PGTABLE_QUIRK.
*/
switch (FIELD_GET(IDR0_TTENDIAN, reg)) {
case IDR0_TTENDIAN_MIXED:
smmu->features |= ARM_SMMU_FEAT_TT_LE | ARM_SMMU_FEAT_TT_BE;
break;
#ifdef __BIG_ENDIAN
case IDR0_TTENDIAN_BE:
smmu->features |= ARM_SMMU_FEAT_TT_BE;
break;
#else
case IDR0_TTENDIAN_LE:
smmu->features |= ARM_SMMU_FEAT_TT_LE;
break;
#endif
default:
dev_err(smmu->dev, "unknown/unsupported TT endianness!\n");
return -ENXIO;
}
/* Boolean feature flags */
if (IS_ENABLED(CONFIG_PCI_PRI) && reg & IDR0_PRI)
smmu->features |= ARM_SMMU_FEAT_PRI;
if (IS_ENABLED(CONFIG_PCI_ATS) && reg & IDR0_ATS)
smmu->features |= ARM_SMMU_FEAT_ATS;
if (reg & IDR0_SEV)
smmu->features |= ARM_SMMU_FEAT_SEV;
if (reg & IDR0_MSI) {
smmu->features |= ARM_SMMU_FEAT_MSI;
if (coherent && !disable_msipolling)
smmu->options |= ARM_SMMU_OPT_MSIPOLL;
}
if (reg & IDR0_HYP) {
smmu->features |= ARM_SMMU_FEAT_HYP;
if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
smmu->features |= ARM_SMMU_FEAT_E2H;
}
arm_smmu_get_httu(smmu, reg);
/*
* The coherency feature as set by FW is used in preference to the ID
* register, but warn on mismatch.
*/
if (!!(reg & IDR0_COHACC) != coherent)
dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
str_true_false(coherent));
switch (FIELD_GET(IDR0_STALL_MODEL, reg)) {
case IDR0_STALL_MODEL_FORCE:
smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
fallthrough;
case IDR0_STALL_MODEL_STALL:
smmu->features |= ARM_SMMU_FEAT_STALLS;
}
if (reg & IDR0_S1P)
smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
if (reg & IDR0_S2P)
smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
if (!(reg & (IDR0_S1P | IDR0_S2P))) {
dev_err(smmu->dev, "no translation support!\n");
return -ENXIO;
}
/* We only support the AArch64 table format at present */
switch (FIELD_GET(IDR0_TTF, reg)) {
case IDR0_TTF_AARCH32_64:
smmu->ias = 40;
fallthrough;
case IDR0_TTF_AARCH64:
break;
default:
dev_err(smmu->dev, "AArch64 table format not supported!\n");
return -ENXIO;
}
/* ASID/VMID sizes */
smmu->asid_bits = reg & IDR0_ASID16 ? 16 : 8;
smmu->vmid_bits = reg & IDR0_VMID16 ? 16 : 8;
/* IDR1 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
if (reg & (IDR1_TABLES_PRESET | IDR1_QUEUES_PRESET | IDR1_REL)) {
dev_err(smmu->dev, "embedded implementation not supported\n");
return -ENXIO;
}
if (reg & IDR1_ATTR_TYPES_OVR)
smmu->features |= ARM_SMMU_FEAT_ATTR_TYPES_OVR;
/* Queue sizes, capped to ensure natural alignment */
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_CMDQS, reg));
if (smmu->cmdq.q.llq.max_n_shift <= ilog2(CMDQ_BATCH_ENTRIES)) {
/*
* We don't support splitting up batches, so one batch of
* commands plus an extra sync needs to fit inside the command
* queue. There's also no way we can handle the weird alignment
* restrictions on the base pointer for a unit-length queue.
*/
dev_err(smmu->dev, "command queue size <= %d entries not supported\n",
CMDQ_BATCH_ENTRIES);
return -ENXIO;
}
smmu->evtq.q.llq.max_n_shift = min_t(u32, EVTQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_EVTQS, reg));
smmu->priq.q.llq.max_n_shift = min_t(u32, PRIQ_MAX_SZ_SHIFT,
FIELD_GET(IDR1_PRIQS, reg));
/* SID/SSID sizes */
smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
/*
* If the SMMU supports fewer bits than would fill a single L2 stream
* table, use a linear table instead.
*/
if (smmu->sid_bits <= STRTAB_SPLIT)
smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;
/* IDR3 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
if (FIELD_GET(IDR3_RIL, reg))
smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
if (FIELD_GET(IDR3_FWB, reg))
smmu->features |= ARM_SMMU_FEAT_S2FWB;
if (FIELD_GET(IDR3_BBM, reg) == 2)
smmu->features |= ARM_SMMU_FEAT_BBML2;
/* IDR5 */
reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
/* Maximum number of outstanding stalls */
smmu->evtq.max_stalls = FIELD_GET(IDR5_STALL_MAX, reg);
/* Page sizes */
if (reg & IDR5_GRAN64K)
smmu->pgsize_bitmap |= SZ_64K | SZ_512M;
if (reg & IDR5_GRAN16K)
smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
if (reg & IDR5_GRAN4K)
smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
/* Input address size */
if (FIELD_GET(IDR5_VAX, reg) == IDR5_VAX_52_BIT)
smmu->features |= ARM_SMMU_FEAT_VAX;
/* Output address size */
switch (FIELD_GET(IDR5_OAS, reg)) {
case IDR5_OAS_32_BIT:
smmu->oas = 32;
break;
case IDR5_OAS_36_BIT:
smmu->oas = 36;
break;
case IDR5_OAS_40_BIT:
smmu->oas = 40;
break;
case IDR5_OAS_42_BIT:
smmu->oas = 42;
break;
case IDR5_OAS_44_BIT:
smmu->oas = 44;
break;
case IDR5_OAS_52_BIT:
smmu->oas = 52;
smmu->pgsize_bitmap |= 1ULL << 42; /* 4TB */
break;
default:
dev_info(smmu->dev,
"unknown output address size. Truncating to 48-bit\n");
fallthrough;
case IDR5_OAS_48_BIT:
smmu->oas = 48;
}
/* Set the DMA mask for our table walker */
if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(smmu->oas)))
dev_warn(smmu->dev,
"failed to set DMA mask for table walker\n");
smmu->ias = max(smmu->ias, smmu->oas);
if ((smmu->features & ARM_SMMU_FEAT_TRANS_S1) &&
(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
smmu->features |= ARM_SMMU_FEAT_NESTING;
arm_smmu_device_iidr_probe(smmu);
if (arm_smmu_sva_supported(smmu))
smmu->features |= ARM_SMMU_FEAT_SVA;
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
smmu->ias, smmu->oas, smmu->features);
return 0;
}
#ifdef CONFIG_ACPI
#ifdef CONFIG_TEGRA241_CMDQV
static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
struct arm_smmu_device *smmu)
{
const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
struct acpi_device *adev;
/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
if (adev) {
/* Tegra241 CMDQV driver is responsible for put_device() */
smmu->impl_dev = &adev->dev;
smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
dev_info(smmu->dev, "found companion CMDQV device: %s\n",
dev_name(smmu->impl_dev));
}
kfree(uid);
}
#else
static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
struct arm_smmu_device *smmu)
{
}
#endif
static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
struct arm_smmu_device *smmu)
{
struct acpi_iort_smmu_v3 *iort_smmu =
(struct acpi_iort_smmu_v3 *)node->node_data;
switch (iort_smmu->model) {
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break;
case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break;
case ACPI_IORT_SMMU_V3_GENERIC:
/*
* Tegra241 implementation stores its SMMU options and impl_dev
* in DSDT. Thus, go through the ACPI tables unconditionally.
*/
acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
break;
}
dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
return 0;
}
static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
struct arm_smmu_device *smmu)
{
struct acpi_iort_smmu_v3 *iort_smmu;
struct device *dev = smmu->dev;
struct acpi_iort_node *node;
node = *(struct acpi_iort_node **)dev_get_platdata(dev);
/* Retrieve SMMUv3 specific data */
iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
case IDR0_HTTU_ACCESS_DIRTY:
smmu->features |= ARM_SMMU_FEAT_HD;
fallthrough;
case IDR0_HTTU_ACCESS:
smmu->features |= ARM_SMMU_FEAT_HA;
}
return acpi_smmu_iort_probe_model(node, smmu);
}
#else
static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
struct arm_smmu_device *smmu)
{
return -ENODEV;
}
#endif
static int arm_smmu_device_dt_probe(struct platform_device *pdev,
struct arm_smmu_device *smmu)
{
struct device *dev = &pdev->dev;
u32 cells;
int ret = -EINVAL;
if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
dev_err(dev, "missing #iommu-cells property\n");
else if (cells != 1)
dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
else
ret = 0;
parse_driver_options(smmu);
if (of_dma_is_coherent(dev->of_node))
smmu->features |= ARM_SMMU_FEAT_COHERENCY;
return ret;
}
static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
{
if (smmu->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
return SZ_64K;
else
return SZ_128K;
}
static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
resource_size_t size)
{
struct resource res = DEFINE_RES_MEM(start, size);
return devm_ioremap_resource(dev, &res);
}
static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
{
struct list_head rmr_list;
struct iommu_resv_region *e;
INIT_LIST_HEAD(&rmr_list);
iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
list_for_each_entry(e, &rmr_list, list) {
struct iommu_iort_rmr_data *rmr;
int ret, i;
rmr = container_of(e, struct iommu_iort_rmr_data, rr);
for (i = 0; i < rmr->num_sids; i++) {
ret = arm_smmu_init_sid_strtab(smmu, rmr->sids[i]);
if (ret) {
dev_err(smmu->dev, "RMR SID(0x%x) bypass failed\n",
rmr->sids[i]);
continue;
}
/*
* STE table is not programmed to HW, see
* arm_smmu_initial_bypass_stes()
*/
arm_smmu_make_bypass_ste(smmu,
arm_smmu_get_step_for_sid(smmu, rmr->sids[i]));
}
}
iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}
static void arm_smmu_impl_remove(void *data)
{
struct arm_smmu_device *smmu = data;
if (smmu->impl_ops && smmu->impl_ops->device_remove)
smmu->impl_ops->device_remove(smmu);
}
/*
* Probe all the compiled in implementations. Each one checks to see if it
* matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
* replaces the callers. Otherwise the original is returned or ERR_PTR.
*/
static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
{
struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
const struct arm_smmu_impl_ops *ops;
int ret;
if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
new_smmu = tegra241_cmdqv_probe(smmu);
if (new_smmu == ERR_PTR(-ENODEV))
return smmu;
if (IS_ERR(new_smmu))
return new_smmu;
ops = new_smmu->impl_ops;
if (ops) {
/* get_viommu_size and vsmmu_init ops must be paired */
if (WARN_ON(!ops->get_viommu_size != !ops->vsmmu_init)) {
ret = -EINVAL;
goto err_remove;
}
}
ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
new_smmu);
if (ret)
return ERR_PTR(ret);
return new_smmu;
err_remove:
arm_smmu_impl_remove(new_smmu);
return ERR_PTR(ret);
}
static int arm_smmu_device_probe(struct platform_device *pdev)
{
int irq, ret;
struct resource *res;
resource_size_t ioaddr;
struct arm_smmu_device *smmu;
struct device *dev = &pdev->dev;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu)
return -ENOMEM;
smmu->dev = dev;
if (dev->of_node) {
ret = arm_smmu_device_dt_probe(pdev, smmu);
} else {
ret = arm_smmu_device_acpi_probe(pdev, smmu);
}
if (ret)
return ret;
smmu = arm_smmu_impl_probe(smmu);
if (IS_ERR(smmu))
return PTR_ERR(smmu);
/* Base address */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res)
return -EINVAL;
if (resource_size(res) < arm_smmu_resource_size(smmu)) {
dev_err(dev, "MMIO region too small (%pr)\n", res);
return -EINVAL;
}
ioaddr = res->start;
/*
* Don't map the IMPLEMENTATION DEFINED regions, since they may contain
* the PMCG registers which are reserved by the PMU driver.
*/
smmu->base = arm_smmu_ioremap(dev, ioaddr, ARM_SMMU_REG_SZ);
if (IS_ERR(smmu->base))
return PTR_ERR(smmu->base);
if (arm_smmu_resource_size(smmu) > SZ_64K) {
smmu->page1 = arm_smmu_ioremap(dev, ioaddr + SZ_64K,
ARM_SMMU_REG_SZ);
if (IS_ERR(smmu->page1))
return PTR_ERR(smmu->page1);
} else {
smmu->page1 = smmu->base;
}
/* Interrupt lines */
irq = platform_get_irq_byname_optional(pdev, "combined");
if (irq > 0)
smmu->combined_irq = irq;
else {
irq = platform_get_irq_byname_optional(pdev, "eventq");
if (irq > 0)
smmu->evtq.q.irq = irq;
irq = platform_get_irq_byname_optional(pdev, "priq");
if (irq > 0)
smmu->priq.q.irq = irq;
irq = platform_get_irq_byname_optional(pdev, "gerror");
if (irq > 0)
smmu->gerr_irq = irq;
}
/* Probe the h/w */
ret = arm_smmu_device_hw_probe(smmu);
if (ret)
return ret;
/* Initialise in-memory data structures */
ret = arm_smmu_init_structures(smmu);
if (ret)
goto err_free_iopf;
/* Record our private device structure */
platform_set_drvdata(pdev, smmu);
/* Check for RMRs and install bypass STEs if any */
arm_smmu_rmr_install_bypass_ste(smmu);
/* Reset the device */
ret = arm_smmu_device_reset(smmu);
if (ret)
goto err_disable;
/* And we're up. Go go go! */
ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
"smmu3.%pa", &ioaddr);
if (ret)
goto err_disable;
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
goto err_free_sysfs;
}
return 0;
err_free_sysfs:
iommu_device_sysfs_remove(&smmu->iommu);
err_disable:
arm_smmu_device_disable(smmu);
err_free_iopf:
iopf_queue_free(smmu->evtq.iopf);
return ret;
}
static void arm_smmu_device_remove(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
iommu_device_unregister(&smmu->iommu);
iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_device_disable(smmu);
iopf_queue_free(smmu->evtq.iopf);
ida_destroy(&smmu->vmid_map);
}
static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
arm_smmu_device_disable(smmu);
}
static const struct of_device_id arm_smmu_of_match[] = {
{ .compatible = "arm,smmu-v3", },
{ },
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
static void arm_smmu_driver_unregister(struct platform_driver *drv)
{
arm_smmu_sva_notifier_synchronize();
platform_driver_unregister(drv);
}
static struct platform_driver arm_smmu_driver = {
.driver = {
.name = "arm-smmu-v3",
.of_match_table = arm_smmu_of_match,
.suppress_bind_attrs = true,
},
.probe = arm_smmu_device_probe,
.remove = arm_smmu_device_remove,
.shutdown = arm_smmu_device_shutdown,
};
module_driver(arm_smmu_driver, platform_driver_register,
arm_smmu_driver_unregister);
MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
MODULE_AUTHOR("Will Deacon <will@kernel.org>");
MODULE_ALIAS("platform:arm-smmu-v3");
MODULE_LICENSE("GPL v2");