mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson
synced 2025-08-31 22:23:05 +00:00
iommu/arm-smmu-v3: Reduce contention during command-queue insertion
The SMMU command queue is a bottleneck in large systems, thanks to the spin_lock which serialises accesses from all CPUs to the single queue supported by the hardware. Attempt to improve this situation by moving to a new algorithm for inserting commands into the queue, which is lock-free on the fast-path. Tested-by: Ganapatrao Kulkarni <gkulkarni@marvell.com> Signed-off-by: Will Deacon <will@kernel.org>
This commit is contained in:
parent
7c288a5b27
commit
587e6c10a7
@ -183,7 +183,7 @@
|
|||||||
|
|
||||||
#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
|
#define Q_IDX(llq, p) ((p) & ((1 << (llq)->max_n_shift) - 1))
|
||||||
#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
|
#define Q_WRP(llq, p) ((p) & (1 << (llq)->max_n_shift))
|
||||||
#define Q_OVERFLOW_FLAG (1 << 31)
|
#define Q_OVERFLOW_FLAG (1U << 31)
|
||||||
#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
|
#define Q_OVF(p) ((p) & Q_OVERFLOW_FLAG)
|
||||||
#define Q_ENT(q, p) ((q)->base + \
|
#define Q_ENT(q, p) ((q)->base + \
|
||||||
Q_IDX(&((q)->llq), p) * \
|
Q_IDX(&((q)->llq), p) * \
|
||||||
@ -307,6 +307,8 @@
|
|||||||
#define CMDQ_ERR_CERROR_ABT_IDX 2
|
#define CMDQ_ERR_CERROR_ABT_IDX 2
|
||||||
#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
|
#define CMDQ_ERR_CERROR_ATC_INV_IDX 3
|
||||||
|
|
||||||
|
#define CMDQ_PROD_OWNED_FLAG Q_OVERFLOW_FLAG
|
||||||
|
|
||||||
#define CMDQ_0_OP GENMASK_ULL(7, 0)
|
#define CMDQ_0_OP GENMASK_ULL(7, 0)
|
||||||
#define CMDQ_0_SSV (1UL << 11)
|
#define CMDQ_0_SSV (1UL << 11)
|
||||||
|
|
||||||
@ -369,9 +371,8 @@
|
|||||||
#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
|
#define PRIQ_1_ADDR_MASK GENMASK_ULL(63, 12)
|
||||||
|
|
||||||
/* High-level queue structures */
|
/* High-level queue structures */
|
||||||
#define ARM_SMMU_POLL_TIMEOUT_US 100
|
#define ARM_SMMU_POLL_TIMEOUT_US 1000000 /* 1s! */
|
||||||
#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
|
#define ARM_SMMU_POLL_SPIN_COUNT 10
|
||||||
#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
|
|
||||||
|
|
||||||
#define MSI_IOVA_BASE 0x8000000
|
#define MSI_IOVA_BASE 0x8000000
|
||||||
#define MSI_IOVA_LENGTH 0x100000
|
#define MSI_IOVA_LENGTH 0x100000
|
||||||
@ -473,15 +474,24 @@ struct arm_smmu_cmdq_ent {
|
|||||||
|
|
||||||
#define CMDQ_OP_CMD_SYNC 0x46
|
#define CMDQ_OP_CMD_SYNC 0x46
|
||||||
struct {
|
struct {
|
||||||
u32 msidata;
|
|
||||||
u64 msiaddr;
|
u64 msiaddr;
|
||||||
} sync;
|
} sync;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct arm_smmu_ll_queue {
|
struct arm_smmu_ll_queue {
|
||||||
u32 prod;
|
union {
|
||||||
u32 cons;
|
u64 val;
|
||||||
|
struct {
|
||||||
|
u32 prod;
|
||||||
|
u32 cons;
|
||||||
|
};
|
||||||
|
struct {
|
||||||
|
atomic_t prod;
|
||||||
|
atomic_t cons;
|
||||||
|
} atomic;
|
||||||
|
u8 __pad[SMP_CACHE_BYTES];
|
||||||
|
} ____cacheline_aligned_in_smp;
|
||||||
u32 max_n_shift;
|
u32 max_n_shift;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -499,9 +509,18 @@ struct arm_smmu_queue {
|
|||||||
u32 __iomem *cons_reg;
|
u32 __iomem *cons_reg;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct arm_smmu_queue_poll {
|
||||||
|
ktime_t timeout;
|
||||||
|
unsigned int delay;
|
||||||
|
unsigned int spin_cnt;
|
||||||
|
bool wfe;
|
||||||
|
};
|
||||||
|
|
||||||
struct arm_smmu_cmdq {
|
struct arm_smmu_cmdq {
|
||||||
struct arm_smmu_queue q;
|
struct arm_smmu_queue q;
|
||||||
spinlock_t lock;
|
atomic_long_t *valid_map;
|
||||||
|
atomic_t owner_prod;
|
||||||
|
atomic_t lock;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct arm_smmu_evtq {
|
struct arm_smmu_evtq {
|
||||||
@ -581,8 +600,6 @@ struct arm_smmu_device {
|
|||||||
|
|
||||||
int gerr_irq;
|
int gerr_irq;
|
||||||
int combined_irq;
|
int combined_irq;
|
||||||
u32 sync_nr;
|
|
||||||
u8 prev_cmd_opcode;
|
|
||||||
|
|
||||||
unsigned long ias; /* IPA */
|
unsigned long ias; /* IPA */
|
||||||
unsigned long oas; /* PA */
|
unsigned long oas; /* PA */
|
||||||
@ -601,12 +618,6 @@ struct arm_smmu_device {
|
|||||||
|
|
||||||
struct arm_smmu_strtab_cfg strtab_cfg;
|
struct arm_smmu_strtab_cfg strtab_cfg;
|
||||||
|
|
||||||
/* Hi16xx adds an extra 32 bits of goodness to its MSI payload */
|
|
||||||
union {
|
|
||||||
u32 sync_count;
|
|
||||||
u64 padding;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* IOMMU core code handle */
|
/* IOMMU core code handle */
|
||||||
struct iommu_device iommu;
|
struct iommu_device iommu;
|
||||||
};
|
};
|
||||||
@ -690,6 +701,21 @@ static void parse_driver_options(struct arm_smmu_device *smmu)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Low-level queue manipulation functions */
|
/* Low-level queue manipulation functions */
|
||||||
|
static bool queue_has_space(struct arm_smmu_ll_queue *q, u32 n)
|
||||||
|
{
|
||||||
|
u32 space, prod, cons;
|
||||||
|
|
||||||
|
prod = Q_IDX(q, q->prod);
|
||||||
|
cons = Q_IDX(q, q->cons);
|
||||||
|
|
||||||
|
if (Q_WRP(q, q->prod) == Q_WRP(q, q->cons))
|
||||||
|
space = (1 << q->max_n_shift) - (prod - cons);
|
||||||
|
else
|
||||||
|
space = cons - prod;
|
||||||
|
|
||||||
|
return space >= n;
|
||||||
|
}
|
||||||
|
|
||||||
static bool queue_full(struct arm_smmu_ll_queue *q)
|
static bool queue_full(struct arm_smmu_ll_queue *q)
|
||||||
{
|
{
|
||||||
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
|
return Q_IDX(q, q->prod) == Q_IDX(q, q->cons) &&
|
||||||
@ -702,9 +728,12 @@ static bool queue_empty(struct arm_smmu_ll_queue *q)
|
|||||||
Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
|
Q_WRP(q, q->prod) == Q_WRP(q, q->cons);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void queue_sync_cons_in(struct arm_smmu_queue *q)
|
static bool queue_consumed(struct arm_smmu_ll_queue *q, u32 prod)
|
||||||
{
|
{
|
||||||
q->llq.cons = readl_relaxed(q->cons_reg);
|
return ((Q_WRP(q, q->cons) == Q_WRP(q, prod)) &&
|
||||||
|
(Q_IDX(q, q->cons) > Q_IDX(q, prod))) ||
|
||||||
|
((Q_WRP(q, q->cons) != Q_WRP(q, prod)) &&
|
||||||
|
(Q_IDX(q, q->cons) <= Q_IDX(q, prod)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void queue_sync_cons_out(struct arm_smmu_queue *q)
|
static void queue_sync_cons_out(struct arm_smmu_queue *q)
|
||||||
@ -735,46 +764,34 @@ static int queue_sync_prod_in(struct arm_smmu_queue *q)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void queue_sync_prod_out(struct arm_smmu_queue *q)
|
static u32 queue_inc_prod_n(struct arm_smmu_ll_queue *q, int n)
|
||||||
{
|
{
|
||||||
writel(q->llq.prod, q->prod_reg);
|
u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + n;
|
||||||
|
return Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void queue_inc_prod(struct arm_smmu_ll_queue *q)
|
static void queue_poll_init(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_queue_poll *qp)
|
||||||
{
|
{
|
||||||
u32 prod = (Q_WRP(q, q->prod) | Q_IDX(q, q->prod)) + 1;
|
qp->delay = 1;
|
||||||
q->prod = Q_OVF(q->prod) | Q_WRP(q, prod) | Q_IDX(q, prod);
|
qp->spin_cnt = 0;
|
||||||
|
qp->wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
|
||||||
|
qp->timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
static int queue_poll(struct arm_smmu_queue_poll *qp)
|
||||||
* Wait for the SMMU to consume items. If sync is true, wait until the queue
|
|
||||||
* is empty. Otherwise, wait until there is at least one free slot.
|
|
||||||
*/
|
|
||||||
static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
|
|
||||||
{
|
{
|
||||||
ktime_t timeout;
|
if (ktime_compare(ktime_get(), qp->timeout) > 0)
|
||||||
unsigned int delay = 1, spin_cnt = 0;
|
return -ETIMEDOUT;
|
||||||
|
|
||||||
/* Wait longer if it's a CMD_SYNC */
|
if (qp->wfe) {
|
||||||
timeout = ktime_add_us(ktime_get(), sync ?
|
wfe();
|
||||||
ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
|
} else if (++qp->spin_cnt < ARM_SMMU_POLL_SPIN_COUNT) {
|
||||||
ARM_SMMU_POLL_TIMEOUT_US);
|
cpu_relax();
|
||||||
|
} else {
|
||||||
while (queue_sync_cons_in(q),
|
udelay(qp->delay);
|
||||||
(sync ? !queue_empty(&q->llq) : queue_full(&q->llq))) {
|
qp->delay *= 2;
|
||||||
if (ktime_compare(ktime_get(), timeout) > 0)
|
qp->spin_cnt = 0;
|
||||||
return -ETIMEDOUT;
|
|
||||||
|
|
||||||
if (wfe) {
|
|
||||||
wfe();
|
|
||||||
} else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
|
|
||||||
cpu_relax();
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
udelay(delay);
|
|
||||||
delay *= 2;
|
|
||||||
spin_cnt = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -788,17 +805,6 @@ static void queue_write(__le64 *dst, u64 *src, size_t n_dwords)
|
|||||||
*dst++ = cpu_to_le64(*src++);
|
*dst++ = cpu_to_le64(*src++);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int queue_insert_raw(struct arm_smmu_queue *q, u64 *ent)
|
|
||||||
{
|
|
||||||
if (queue_full(&q->llq))
|
|
||||||
return -ENOSPC;
|
|
||||||
|
|
||||||
queue_write(Q_ENT(q, q->llq.prod), ent, q->ent_dwords);
|
|
||||||
queue_inc_prod(&q->llq);
|
|
||||||
queue_sync_prod_out(q);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
|
static void queue_read(__le64 *dst, u64 *src, size_t n_dwords)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
@ -881,20 +887,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
|
|||||||
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
|
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
|
||||||
break;
|
break;
|
||||||
case CMDQ_OP_CMD_SYNC:
|
case CMDQ_OP_CMD_SYNC:
|
||||||
if (ent->sync.msiaddr)
|
if (ent->sync.msiaddr) {
|
||||||
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
|
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
|
||||||
else
|
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
|
||||||
|
} else {
|
||||||
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
|
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_SEV);
|
||||||
|
}
|
||||||
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
|
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSH, ARM_SMMU_SH_ISH);
|
||||||
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
|
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIATTR, ARM_SMMU_MEMATTR_OIWB);
|
||||||
/*
|
|
||||||
* Commands are written little-endian, but we want the SMMU to
|
|
||||||
* receive MSIData, and thus write it back to memory, in CPU
|
|
||||||
* byte order, so big-endian needs an extra byteswap here.
|
|
||||||
*/
|
|
||||||
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_MSIDATA,
|
|
||||||
cpu_to_le32(ent->sync.msidata));
|
|
||||||
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -ENOENT;
|
return -ENOENT;
|
||||||
@ -903,6 +903,27 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
|
||||||
|
u32 prod)
|
||||||
|
{
|
||||||
|
struct arm_smmu_queue *q = &smmu->cmdq.q;
|
||||||
|
struct arm_smmu_cmdq_ent ent = {
|
||||||
|
.opcode = CMDQ_OP_CMD_SYNC,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Beware that Hi16xx adds an extra 32 bits of goodness to its MSI
|
||||||
|
* payload, so the write will zero the entire command on that platform.
|
||||||
|
*/
|
||||||
|
if (smmu->features & ARM_SMMU_FEAT_MSI &&
|
||||||
|
smmu->features & ARM_SMMU_FEAT_COHERENCY) {
|
||||||
|
ent.sync.msiaddr = q->base_dma + Q_IDX(&q->llq, prod) *
|
||||||
|
q->ent_dwords * 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
arm_smmu_cmdq_build_cmd(cmd, &ent);
|
||||||
|
}
|
||||||
|
|
||||||
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
|
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
|
||||||
{
|
{
|
||||||
static const char *cerror_str[] = {
|
static const char *cerror_str[] = {
|
||||||
@ -961,109 +982,440 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
|
|||||||
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
|
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
|
/*
|
||||||
|
* Command queue locking.
|
||||||
|
* This is a form of bastardised rwlock with the following major changes:
|
||||||
|
*
|
||||||
|
* - The only LOCK routines are exclusive_trylock() and shared_lock().
|
||||||
|
* Neither have barrier semantics, and instead provide only a control
|
||||||
|
* dependency.
|
||||||
|
*
|
||||||
|
* - The UNLOCK routines are supplemented with shared_tryunlock(), which
|
||||||
|
* fails if the caller appears to be the last lock holder (yes, this is
|
||||||
|
* racy). All successful UNLOCK routines have RELEASE semantics.
|
||||||
|
*/
|
||||||
|
static void arm_smmu_cmdq_shared_lock(struct arm_smmu_cmdq *cmdq)
|
||||||
{
|
{
|
||||||
struct arm_smmu_queue *q = &smmu->cmdq.q;
|
int val;
|
||||||
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
|
|
||||||
|
|
||||||
smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
|
/*
|
||||||
|
* We can try to avoid the cmpxchg() loop by simply incrementing the
|
||||||
|
* lock counter. When held in exclusive state, the lock counter is set
|
||||||
|
* to INT_MIN so these increments won't hurt as the value will remain
|
||||||
|
* negative.
|
||||||
|
*/
|
||||||
|
if (atomic_fetch_inc_relaxed(&cmdq->lock) >= 0)
|
||||||
|
return;
|
||||||
|
|
||||||
while (queue_insert_raw(q, cmd) == -ENOSPC) {
|
do {
|
||||||
if (queue_poll_cons(q, false, wfe))
|
val = atomic_cond_read_relaxed(&cmdq->lock, VAL >= 0);
|
||||||
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
|
} while (atomic_cmpxchg_relaxed(&cmdq->lock, val, val + 1) != val);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void arm_smmu_cmdq_shared_unlock(struct arm_smmu_cmdq *cmdq)
|
||||||
|
{
|
||||||
|
(void)atomic_dec_return_release(&cmdq->lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool arm_smmu_cmdq_shared_tryunlock(struct arm_smmu_cmdq *cmdq)
|
||||||
|
{
|
||||||
|
if (atomic_read(&cmdq->lock) == 1)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
arm_smmu_cmdq_shared_unlock(cmdq);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags) \
|
||||||
|
({ \
|
||||||
|
bool __ret; \
|
||||||
|
local_irq_save(flags); \
|
||||||
|
__ret = !atomic_cmpxchg_relaxed(&cmdq->lock, 0, INT_MIN); \
|
||||||
|
if (!__ret) \
|
||||||
|
local_irq_restore(flags); \
|
||||||
|
__ret; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags) \
|
||||||
|
({ \
|
||||||
|
atomic_set_release(&cmdq->lock, 0); \
|
||||||
|
local_irq_restore(flags); \
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Command queue insertion.
|
||||||
|
* This is made fiddly by our attempts to achieve some sort of scalability
|
||||||
|
* since there is one queue shared amongst all of the CPUs in the system. If
|
||||||
|
* you like mixed-size concurrency, dependency ordering and relaxed atomics,
|
||||||
|
* then you'll *love* this monstrosity.
|
||||||
|
*
|
||||||
|
* The basic idea is to split the queue up into ranges of commands that are
|
||||||
|
* owned by a given CPU; the owner may not have written all of the commands
|
||||||
|
* itself, but is responsible for advancing the hardware prod pointer when
|
||||||
|
* the time comes. The algorithm is roughly:
|
||||||
|
*
|
||||||
|
* 1. Allocate some space in the queue. At this point we also discover
|
||||||
|
* whether the head of the queue is currently owned by another CPU,
|
||||||
|
* or whether we are the owner.
|
||||||
|
*
|
||||||
|
* 2. Write our commands into our allocated slots in the queue.
|
||||||
|
*
|
||||||
|
* 3. Mark our slots as valid in arm_smmu_cmdq.valid_map.
|
||||||
|
*
|
||||||
|
* 4. If we are an owner:
|
||||||
|
* a. Wait for the previous owner to finish.
|
||||||
|
* b. Mark the queue head as unowned, which tells us the range
|
||||||
|
* that we are responsible for publishing.
|
||||||
|
* c. Wait for all commands in our owned range to become valid.
|
||||||
|
* d. Advance the hardware prod pointer.
|
||||||
|
* e. Tell the next owner we've finished.
|
||||||
|
*
|
||||||
|
* 5. If we are inserting a CMD_SYNC (we may or may not have been an
|
||||||
|
* owner), then we need to stick around until it has completed:
|
||||||
|
* a. If we have MSIs, the SMMU can write back into the CMD_SYNC
|
||||||
|
* to clear the first 4 bytes.
|
||||||
|
* b. Otherwise, we spin waiting for the hardware cons pointer to
|
||||||
|
* advance past our command.
|
||||||
|
*
|
||||||
|
* The devil is in the details, particularly the use of locking for handling
|
||||||
|
* SYNC completion and freeing up space in the queue before we think that it is
|
||||||
|
* full.
|
||||||
|
*/
|
||||||
|
static void __arm_smmu_cmdq_poll_set_valid_map(struct arm_smmu_cmdq *cmdq,
|
||||||
|
u32 sprod, u32 eprod, bool set)
|
||||||
|
{
|
||||||
|
u32 swidx, sbidx, ewidx, ebidx;
|
||||||
|
struct arm_smmu_ll_queue llq = {
|
||||||
|
.max_n_shift = cmdq->q.llq.max_n_shift,
|
||||||
|
.prod = sprod,
|
||||||
|
};
|
||||||
|
|
||||||
|
ewidx = BIT_WORD(Q_IDX(&llq, eprod));
|
||||||
|
ebidx = Q_IDX(&llq, eprod) % BITS_PER_LONG;
|
||||||
|
|
||||||
|
while (llq.prod != eprod) {
|
||||||
|
unsigned long mask;
|
||||||
|
atomic_long_t *ptr;
|
||||||
|
u32 limit = BITS_PER_LONG;
|
||||||
|
|
||||||
|
swidx = BIT_WORD(Q_IDX(&llq, llq.prod));
|
||||||
|
sbidx = Q_IDX(&llq, llq.prod) % BITS_PER_LONG;
|
||||||
|
|
||||||
|
ptr = &cmdq->valid_map[swidx];
|
||||||
|
|
||||||
|
if ((swidx == ewidx) && (sbidx < ebidx))
|
||||||
|
limit = ebidx;
|
||||||
|
|
||||||
|
mask = GENMASK(limit - 1, sbidx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The valid bit is the inverse of the wrap bit. This means
|
||||||
|
* that a zero-initialised queue is invalid and, after marking
|
||||||
|
* all entries as valid, they become invalid again when we
|
||||||
|
* wrap.
|
||||||
|
*/
|
||||||
|
if (set) {
|
||||||
|
atomic_long_xor(mask, ptr);
|
||||||
|
} else { /* Poll */
|
||||||
|
unsigned long valid;
|
||||||
|
|
||||||
|
valid = (ULONG_MAX + !!Q_WRP(&llq, llq.prod)) & mask;
|
||||||
|
atomic_long_cond_read_relaxed(ptr, (VAL & mask) == valid);
|
||||||
|
}
|
||||||
|
|
||||||
|
llq.prod = queue_inc_prod_n(&llq, limit - sbidx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
|
/* Mark all entries in the range [sprod, eprod) as valid */
|
||||||
struct arm_smmu_cmdq_ent *ent)
|
static void arm_smmu_cmdq_set_valid_map(struct arm_smmu_cmdq *cmdq,
|
||||||
|
u32 sprod, u32 eprod)
|
||||||
|
{
|
||||||
|
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all entries in the range [sprod, eprod) to become valid */
|
||||||
|
static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,
|
||||||
|
u32 sprod, u32 eprod)
|
||||||
|
{
|
||||||
|
__arm_smmu_cmdq_poll_set_valid_map(cmdq, sprod, eprod, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for the command queue to become non-full */
|
||||||
|
static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_ll_queue *llq)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
struct arm_smmu_queue_poll qp;
|
||||||
|
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to update our copy of cons by grabbing exclusive cmdq access. If
|
||||||
|
* that fails, spin until somebody else updates it for us.
|
||||||
|
*/
|
||||||
|
if (arm_smmu_cmdq_exclusive_trylock_irqsave(cmdq, flags)) {
|
||||||
|
WRITE_ONCE(cmdq->q.llq.cons, readl_relaxed(cmdq->q.cons_reg));
|
||||||
|
arm_smmu_cmdq_exclusive_unlock_irqrestore(cmdq, flags);
|
||||||
|
llq->val = READ_ONCE(cmdq->q.llq.val);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue_poll_init(smmu, &qp);
|
||||||
|
do {
|
||||||
|
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
|
||||||
|
if (!queue_full(llq))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = queue_poll(&qp);
|
||||||
|
} while (!ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait until the SMMU signals a CMD_SYNC completion MSI.
|
||||||
|
* Must be called with the cmdq lock held in some capacity.
|
||||||
|
*/
|
||||||
|
static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_ll_queue *llq)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct arm_smmu_queue_poll qp;
|
||||||
|
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
|
||||||
|
u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
|
||||||
|
|
||||||
|
queue_poll_init(smmu, &qp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The MSI won't generate an event, since it's being written back
|
||||||
|
* into the command queue.
|
||||||
|
*/
|
||||||
|
qp.wfe = false;
|
||||||
|
smp_cond_load_relaxed(cmd, !VAL || (ret = queue_poll(&qp)));
|
||||||
|
llq->cons = ret ? llq->prod : queue_inc_prod_n(llq, 1);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Wait until the SMMU cons index passes llq->prod.
|
||||||
|
* Must be called with the cmdq lock held in some capacity.
|
||||||
|
*/
|
||||||
|
static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_ll_queue *llq)
|
||||||
|
{
|
||||||
|
struct arm_smmu_queue_poll qp;
|
||||||
|
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
|
||||||
|
u32 prod = llq->prod;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
queue_poll_init(smmu, &qp);
|
||||||
|
llq->val = READ_ONCE(smmu->cmdq.q.llq.val);
|
||||||
|
do {
|
||||||
|
if (queue_consumed(llq, prod))
|
||||||
|
break;
|
||||||
|
|
||||||
|
ret = queue_poll(&qp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This needs to be a readl() so that our subsequent call
|
||||||
|
* to arm_smmu_cmdq_shared_tryunlock() can fail accurately.
|
||||||
|
*
|
||||||
|
* Specifically, we need to ensure that we observe all
|
||||||
|
* shared_lock()s by other CMD_SYNCs that share our owner,
|
||||||
|
* so that a failing call to tryunlock() means that we're
|
||||||
|
* the last one out and therefore we can safely advance
|
||||||
|
* cmdq->q.llq.cons. Roughly speaking:
|
||||||
|
*
|
||||||
|
* CPU 0 CPU1 CPU2 (us)
|
||||||
|
*
|
||||||
|
* if (sync)
|
||||||
|
* shared_lock();
|
||||||
|
*
|
||||||
|
* dma_wmb();
|
||||||
|
* set_valid_map();
|
||||||
|
*
|
||||||
|
* if (owner) {
|
||||||
|
* poll_valid_map();
|
||||||
|
* <control dependency>
|
||||||
|
* writel(prod_reg);
|
||||||
|
*
|
||||||
|
* readl(cons_reg);
|
||||||
|
* tryunlock();
|
||||||
|
*
|
||||||
|
* Requires us to see CPU 0's shared_lock() acquisition.
|
||||||
|
*/
|
||||||
|
llq->cons = readl(cmdq->q.cons_reg);
|
||||||
|
} while (!ret);
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_ll_queue *llq)
|
||||||
|
{
|
||||||
|
if (smmu->features & ARM_SMMU_FEAT_MSI &&
|
||||||
|
smmu->features & ARM_SMMU_FEAT_COHERENCY)
|
||||||
|
return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
|
||||||
|
|
||||||
|
return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
|
||||||
|
u32 prod, int n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
struct arm_smmu_ll_queue llq = {
|
||||||
|
.max_n_shift = cmdq->q.llq.max_n_shift,
|
||||||
|
.prod = prod,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (i = 0; i < n; ++i) {
|
||||||
|
u64 *cmd = &cmds[i * CMDQ_ENT_DWORDS];
|
||||||
|
|
||||||
|
prod = queue_inc_prod_n(&llq, i);
|
||||||
|
queue_write(Q_ENT(&cmdq->q, prod), cmd, CMDQ_ENT_DWORDS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
|
||||||
|
u64 *cmds, int n, bool sync)
|
||||||
|
{
|
||||||
|
u64 cmd_sync[CMDQ_ENT_DWORDS];
|
||||||
|
u32 prod;
|
||||||
|
unsigned long flags;
|
||||||
|
bool owner;
|
||||||
|
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
|
||||||
|
struct arm_smmu_ll_queue llq = {
|
||||||
|
.max_n_shift = cmdq->q.llq.max_n_shift,
|
||||||
|
}, head = llq;
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
/* 1. Allocate some space in the queue */
|
||||||
|
local_irq_save(flags);
|
||||||
|
llq.val = READ_ONCE(cmdq->q.llq.val);
|
||||||
|
do {
|
||||||
|
u64 old;
|
||||||
|
|
||||||
|
while (!queue_has_space(&llq, n + sync)) {
|
||||||
|
local_irq_restore(flags);
|
||||||
|
if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
|
||||||
|
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
|
||||||
|
local_irq_save(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
head.cons = llq.cons;
|
||||||
|
head.prod = queue_inc_prod_n(&llq, n + sync) |
|
||||||
|
CMDQ_PROD_OWNED_FLAG;
|
||||||
|
|
||||||
|
old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
|
||||||
|
if (old == llq.val)
|
||||||
|
break;
|
||||||
|
|
||||||
|
llq.val = old;
|
||||||
|
} while (1);
|
||||||
|
owner = !(llq.prod & CMDQ_PROD_OWNED_FLAG);
|
||||||
|
head.prod &= ~CMDQ_PROD_OWNED_FLAG;
|
||||||
|
llq.prod &= ~CMDQ_PROD_OWNED_FLAG;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 2. Write our commands into the queue
|
||||||
|
* Dependency ordering from the cmpxchg() loop above.
|
||||||
|
*/
|
||||||
|
arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
|
||||||
|
if (sync) {
|
||||||
|
prod = queue_inc_prod_n(&llq, n);
|
||||||
|
arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
|
||||||
|
queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order to determine completion of our CMD_SYNC, we must
|
||||||
|
* ensure that the queue can't wrap twice without us noticing.
|
||||||
|
* We achieve that by taking the cmdq lock as shared before
|
||||||
|
* marking our slot as valid.
|
||||||
|
*/
|
||||||
|
arm_smmu_cmdq_shared_lock(cmdq);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 3. Mark our slots as valid, ensuring commands are visible first */
|
||||||
|
dma_wmb();
|
||||||
|
arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
|
||||||
|
|
||||||
|
/* 4. If we are the owner, take control of the SMMU hardware */
|
||||||
|
if (owner) {
|
||||||
|
/* a. Wait for previous owner to finish */
|
||||||
|
atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
|
||||||
|
|
||||||
|
/* b. Stop gathering work by clearing the owned flag */
|
||||||
|
prod = atomic_fetch_andnot_relaxed(CMDQ_PROD_OWNED_FLAG,
|
||||||
|
&cmdq->q.llq.atomic.prod);
|
||||||
|
prod &= ~CMDQ_PROD_OWNED_FLAG;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* c. Wait for any gathered work to be written to the queue.
|
||||||
|
* Note that we read our own entries so that we have the control
|
||||||
|
* dependency required by (d).
|
||||||
|
*/
|
||||||
|
arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* d. Advance the hardware prod pointer
|
||||||
|
* Control dependency ordering from the entries becoming valid.
|
||||||
|
*/
|
||||||
|
writel_relaxed(prod, cmdq->q.prod_reg);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* e. Tell the next owner we're done
|
||||||
|
* Make sure we've updated the hardware first, so that we don't
|
||||||
|
* race to update prod and potentially move it backwards.
|
||||||
|
*/
|
||||||
|
atomic_set_release(&cmdq->owner_prod, prod);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
|
||||||
|
if (sync) {
|
||||||
|
llq.prod = queue_inc_prod_n(&llq, n);
|
||||||
|
ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
|
||||||
|
if (ret) {
|
||||||
|
dev_err_ratelimited(smmu->dev,
|
||||||
|
"CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
|
||||||
|
llq.prod,
|
||||||
|
readl_relaxed(cmdq->q.prod_reg),
|
||||||
|
readl_relaxed(cmdq->q.cons_reg));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Try to unlock the cmq lock. This will fail if we're the last
|
||||||
|
* reader, in which case we can safely update cmdq->q.llq.cons
|
||||||
|
*/
|
||||||
|
if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
|
||||||
|
WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
|
||||||
|
arm_smmu_cmdq_shared_unlock(cmdq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
local_irq_restore(flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
|
||||||
|
struct arm_smmu_cmdq_ent *ent)
|
||||||
{
|
{
|
||||||
u64 cmd[CMDQ_ENT_DWORDS];
|
u64 cmd[CMDQ_ENT_DWORDS];
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
|
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
|
||||||
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
|
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
|
||||||
ent->opcode);
|
ent->opcode);
|
||||||
return;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irqsave(&smmu->cmdq.lock, flags);
|
return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false);
|
||||||
arm_smmu_cmdq_insert_cmd(smmu, cmd);
|
|
||||||
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The difference between val and sync_idx is bounded by the maximum size of
|
|
||||||
* a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
|
|
||||||
*/
|
|
||||||
static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
|
|
||||||
{
|
|
||||||
ktime_t timeout;
|
|
||||||
u32 val;
|
|
||||||
|
|
||||||
timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
|
|
||||||
val = smp_cond_load_acquire(&smmu->sync_count,
|
|
||||||
(int)(VAL - sync_idx) >= 0 ||
|
|
||||||
!ktime_before(ktime_get(), timeout));
|
|
||||||
|
|
||||||
return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
|
|
||||||
{
|
|
||||||
u64 cmd[CMDQ_ENT_DWORDS];
|
|
||||||
unsigned long flags;
|
|
||||||
struct arm_smmu_cmdq_ent ent = {
|
|
||||||
.opcode = CMDQ_OP_CMD_SYNC,
|
|
||||||
.sync = {
|
|
||||||
.msiaddr = virt_to_phys(&smmu->sync_count),
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
spin_lock_irqsave(&smmu->cmdq.lock, flags);
|
|
||||||
|
|
||||||
/* Piggy-back on the previous command if it's a SYNC */
|
|
||||||
if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
|
|
||||||
ent.sync.msidata = smmu->sync_nr;
|
|
||||||
} else {
|
|
||||||
ent.sync.msidata = ++smmu->sync_nr;
|
|
||||||
arm_smmu_cmdq_build_cmd(cmd, &ent);
|
|
||||||
arm_smmu_cmdq_insert_cmd(smmu, cmd);
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
|
|
||||||
|
|
||||||
return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
|
|
||||||
{
|
|
||||||
u64 cmd[CMDQ_ENT_DWORDS];
|
|
||||||
unsigned long flags;
|
|
||||||
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
|
|
||||||
struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
arm_smmu_cmdq_build_cmd(cmd, &ent);
|
|
||||||
|
|
||||||
spin_lock_irqsave(&smmu->cmdq.lock, flags);
|
|
||||||
arm_smmu_cmdq_insert_cmd(smmu, cmd);
|
|
||||||
ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
|
|
||||||
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
|
static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
|
||||||
{
|
{
|
||||||
int ret;
|
return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
|
||||||
bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
|
|
||||||
(smmu->features & ARM_SMMU_FEAT_COHERENCY);
|
|
||||||
|
|
||||||
ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
|
|
||||||
: __arm_smmu_cmdq_issue_sync(smmu);
|
|
||||||
if (ret)
|
|
||||||
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Context descriptor manipulation functions */
|
/* Context descriptor manipulation functions */
|
||||||
@ -1580,9 +1932,9 @@ static void arm_smmu_tlb_inv_context(void *cookie)
|
|||||||
/*
|
/*
|
||||||
* NOTE: when io-pgtable is in non-strict mode, we may get here with
|
* NOTE: when io-pgtable is in non-strict mode, we may get here with
|
||||||
* PTEs previously cleared by unmaps on the current CPU not yet visible
|
* PTEs previously cleared by unmaps on the current CPU not yet visible
|
||||||
* to the SMMU. We are relying on the DSB implicit in
|
* to the SMMU. We are relying on the dma_wmb() implicit during cmd
|
||||||
* queue_sync_prod_out() to guarantee those are observed before the
|
* insertion to guarantee those are observed before the TLBI. Do be
|
||||||
* TLBI. Do be careful, 007.
|
* careful, 007.
|
||||||
*/
|
*/
|
||||||
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
|
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
|
||||||
arm_smmu_cmdq_issue_sync(smmu);
|
arm_smmu_cmdq_issue_sync(smmu);
|
||||||
@ -2359,18 +2711,49 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void arm_smmu_cmdq_free_bitmap(void *data)
|
||||||
|
{
|
||||||
|
unsigned long *bitmap = data;
|
||||||
|
bitmap_free(bitmap);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
|
||||||
|
unsigned int nents = 1 << cmdq->q.llq.max_n_shift;
|
||||||
|
atomic_long_t *bitmap;
|
||||||
|
|
||||||
|
atomic_set(&cmdq->owner_prod, 0);
|
||||||
|
atomic_set(&cmdq->lock, 0);
|
||||||
|
|
||||||
|
bitmap = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
|
||||||
|
if (!bitmap) {
|
||||||
|
dev_err(smmu->dev, "failed to allocate cmdq bitmap\n");
|
||||||
|
ret = -ENOMEM;
|
||||||
|
} else {
|
||||||
|
cmdq->valid_map = bitmap;
|
||||||
|
devm_add_action(smmu->dev, arm_smmu_cmdq_free_bitmap, bitmap);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
|
static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
/* cmdq */
|
/* cmdq */
|
||||||
spin_lock_init(&smmu->cmdq.lock);
|
|
||||||
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
|
ret = arm_smmu_init_one_queue(smmu, &smmu->cmdq.q, ARM_SMMU_CMDQ_PROD,
|
||||||
ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
|
ARM_SMMU_CMDQ_CONS, CMDQ_ENT_DWORDS,
|
||||||
"cmdq");
|
"cmdq");
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
ret = arm_smmu_cmdq_init(smmu);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
/* evtq */
|
/* evtq */
|
||||||
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
|
ret = arm_smmu_init_one_queue(smmu, &smmu->evtq.q, ARM_SMMU_EVTQ_PROD,
|
||||||
ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
|
ARM_SMMU_EVTQ_CONS, EVTQ_ENT_DWORDS,
|
||||||
@ -2951,9 +3334,15 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
|
|||||||
/* Queue sizes, capped to ensure natural alignment */
|
/* Queue sizes, capped to ensure natural alignment */
|
||||||
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
|
smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
|
||||||
FIELD_GET(IDR1_CMDQS, reg));
|
FIELD_GET(IDR1_CMDQS, reg));
|
||||||
if (!smmu->cmdq.q.llq.max_n_shift) {
|
if (smmu->cmdq.q.llq.max_n_shift < ilog2(BITS_PER_LONG)) {
|
||||||
/* Odd alignment restrictions on the base, so ignore for now */
|
/*
|
||||||
dev_err(smmu->dev, "unit-length command queue not supported\n");
|
* The cmdq valid_map relies on the total number of entries
|
||||||
|
* being a multiple of BITS_PER_LONG. There's also no way
|
||||||
|
* we can handle the weird alignment restrictions on the
|
||||||
|
* base pointer for a unit-length queue.
|
||||||
|
*/
|
||||||
|
dev_err(smmu->dev, "command queue size < %d entries not supported\n",
|
||||||
|
BITS_PER_LONG);
|
||||||
return -ENXIO;
|
return -ENXIO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user