mirror_ubuntu-kernels/drivers/gpu/drm/i915/intel_ringbuffer.h
Chris Wilson f6168e3304 drm/i915: Convert breadcrumbs spinlock to be irqsafe
The breadcrumbs are about to be used from within IRQ context sections
(e.g. nouveau signals a fence from an interrupt handler causing us to
submit a new request) and/or from bottom-half tasklets (i.e.
intel_lrc_irq_handler), therefore we need to employ the irqsafe spinlock
variants.

For example, deferring the request submission to the
intel_lrc_irq_handler generates this trace:

[   66.388639] =================================
[   66.388650] [ INFO: inconsistent lock state ]
[   66.388663] 4.9.0-rc2+ #56 Not tainted
[   66.388672] ---------------------------------
[   66.388682] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
[   66.388695] swapper/1/0 [HC0[0]:SC1[1]:HE0:SE0] takes:
[   66.388706]  (&(&b->lock)->rlock){+.?...} , at: [<ffffffff81401c88>] intel_engine_enable_signaling+0x78/0x150
[   66.388761] {SOFTIRQ-ON-W} state was registered at:
[   66.388772]   [   66.388783] [<ffffffff810bd842>] __lock_acquire+0x682/0x1870
[   66.388795]   [   66.388803] [<ffffffff810bedbc>] lock_acquire+0x6c/0xb0
[   66.388814]   [   66.388824] [<ffffffff8161753a>] _raw_spin_lock+0x2a/0x40
[   66.388835]   [   66.388845] [<ffffffff81401e41>] intel_engine_reset_breadcrumbs+0x21/0xb0
[   66.388857]   [   66.388866] [<ffffffff81403ae7>] gen8_init_common_ring+0x67/0x100
[   66.388878]   [   66.388887] [<ffffffff81403b92>] gen8_init_render_ring+0x12/0x60
[   66.388903]   [   66.388912] [<ffffffff813f8707>] i915_gem_init_hw+0xf7/0x2a0
[   66.388927]   [   66.388936] [<ffffffff813f899b>] i915_gem_init+0xbb/0xf0
[   66.388950]   [   66.388959] [<ffffffff813b4980>] i915_driver_load+0x7e0/0x1330
[   66.388978]   [   66.388988] [<ffffffff813c09d8>] i915_pci_probe+0x28/0x40
[   66.389003]   [   66.389013] [<ffffffff812fa0db>] pci_device_probe+0x8b/0xf0
[   66.389028]   [   66.389037] [<ffffffff8147737e>] driver_probe_device+0x21e/0x430
[   66.389056]   [   66.389065] [<ffffffff8147766e>] __driver_attach+0xde/0xe0
[   66.389080]   [   66.389090] [<ffffffff814751ad>] bus_for_each_dev+0x5d/0x90
[   66.389105]   [   66.389113] [<ffffffff81477799>] driver_attach+0x19/0x20
[   66.389134]   [   66.389144] [<ffffffff81475ced>] bus_add_driver+0x15d/0x260
[   66.389159]   [   66.389168] [<ffffffff81477e3b>] driver_register+0x5b/0xd0
[   66.389183]   [   66.389281] [<ffffffff812fa19b>] __pci_register_driver+0x5b/0x60
[   66.389301]   [   66.389312] [<ffffffff81aed333>] i915_init+0x3e/0x45
[   66.389326]   [   66.389336] [<ffffffff81ac2ffa>] do_one_initcall+0x8b/0x118
[   66.389350]   [   66.389359] [<ffffffff81ac323a>] kernel_init_freeable+0x1b3/0x23b
[   66.389378]   [   66.389387] [<ffffffff8160fc39>] kernel_init+0x9/0x100
[   66.389402]   [   66.389411] [<ffffffff816180e7>] ret_from_fork+0x27/0x40
[   66.389426] irq event stamp: 315865
[   66.389438] hardirqs last  enabled at (315864): [<ffffffff816178f1>] _raw_spin_unlock_irqrestore+0x31/0x50
[   66.389469] hardirqs last disabled at (315865): [<ffffffff816176b3>] _raw_spin_lock_irqsave+0x13/0x50
[   66.389499] softirqs last  enabled at (315818): [<ffffffff8107a04c>] _local_bh_enable+0x1c/0x50
[   66.389530] softirqs last disabled at (315819): [<ffffffff8107a50e>] irq_exit+0xbe/0xd0
[   66.389559]
[   66.389559] other info that might help us debug this:
[   66.389580]  Possible unsafe locking scenario:
[   66.389580]
[   66.389598]        CPU0
[   66.389609]        ----
[   66.389620]   lock(&(&b->lock)->rlock);
[   66.389650]   <Interrupt>
[   66.389661]     lock(&(&b->lock)->rlock);
[   66.389690]
[   66.389690]  *** DEADLOCK ***
[   66.389690]
[   66.389715] 2 locks held by swapper/1/0:
[   66.389728]  #0: (&(&tl->lock)->rlock){..-...}, at: [<ffffffff81403e01>] intel_lrc_irq_handler+0x201/0x3c0
[   66.389785]  #1: (&(&req->lock)->rlock/1){..-...}, at: [<ffffffff813fc0af>] __i915_gem_request_submit+0x8f/0x170
[   66.389854]
[   66.389854] stack backtrace:
[   66.389959] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.9.0-rc2+ #56
[   66.389976] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[   66.389999]  ffff88027fd03c58 ffffffff812beae5 ffff88027696e680 ffffffff822afe20
[   66.390036]  ffff88027fd03ca8 ffffffff810bb420 0000000000000001 0000000000000000
[   66.390070]  0000000000000000 0000000000000006 0000000000000004 ffff88027696ee10
[   66.390104] Call Trace:
[   66.390117]  <IRQ>
[   66.390128]  [<ffffffff812beae5>] dump_stack+0x68/0x93
[   66.390147]  [<ffffffff810bb420>] print_usage_bug+0x1d0/0x1e0
[   66.390164]  [<ffffffff810bb8a0>] mark_lock+0x470/0x4f0
[   66.390181]  [<ffffffff810ba9d0>] ? print_shortest_lock_dependencies+0x1b0/0x1b0
[   66.390203]  [<ffffffff810bd75d>] __lock_acquire+0x59d/0x1870
[   66.390221]  [<ffffffff810bedbc>] lock_acquire+0x6c/0xb0
[   66.390237]  [<ffffffff810bedbc>] ? lock_acquire+0x6c/0xb0
[   66.390255]  [<ffffffff81401c88>] ? intel_engine_enable_signaling+0x78/0x150
[   66.390273]  [<ffffffff8161753a>] _raw_spin_lock+0x2a/0x40
[   66.390291]  [<ffffffff81401c88>] ? intel_engine_enable_signaling+0x78/0x150
[   66.390309]  [<ffffffff81401c88>] intel_engine_enable_signaling+0x78/0x150
[   66.390327]  [<ffffffff813fc170>] __i915_gem_request_submit+0x150/0x170
[   66.390345]  [<ffffffff81403e8b>] intel_lrc_irq_handler+0x28b/0x3c0
[   66.390363]  [<ffffffff81079d97>] tasklet_action+0x57/0xc0
[   66.390380]  [<ffffffff8107a249>] __do_softirq+0x119/0x240
[   66.390396]  [<ffffffff8107a50e>] irq_exit+0xbe/0xd0
[   66.390414]  [<ffffffff8101afd5>] do_IRQ+0x65/0x110
[   66.390431]  [<ffffffff81618806>] common_interrupt+0x86/0x86
[   66.390446]  <EOI>
[   66.390457]  [<ffffffff814ec6d1>] ? cpuidle_enter_state+0x151/0x200
[   66.390480]  [<ffffffff814ec7a2>] cpuidle_enter+0x12/0x20
[   66.390498]  [<ffffffff810b639e>] call_cpuidle+0x1e/0x40
[   66.390516]  [<ffffffff810b65ae>] cpu_startup_entry+0x10e/0x1f0
[   66.390534]  [<ffffffff81036133>] start_secondary+0x103/0x130

(This is split out of the defer global seqno allocation patch due to
realisation that we need a more complete conversion if we want to defer
request submission even further.)

v2: lockdep was warning about mixed SOFTIRQ contexts not HARDIRQ
contexts so we only need to use spin_lock_bh and not disable interrupts.

v3: We need full irq protection as we may be called from a third party
interrupt handler (via fences).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-32-chris@chris-wilson.co.uk
2016-10-28 20:53:55 +01:00

578 lines
20 KiB
C

#ifndef _INTEL_RINGBUFFER_H_
#define _INTEL_RINGBUFFER_H_
#include <linux/hashtable.h>
#include "i915_gem_batch_pool.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
#define I915_CMD_HASH_ORDER 9
/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
* but keeps the logic simple. Indeed, the whole purpose of this macro is just
* to give some inclination as to some of the magic values used in the various
* workarounds!
*/
#define CACHELINE_BYTES 64
#define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
/*
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
* Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
* Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
*
* "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
* cacheline, the Head Pointer must not be greater than the Tail
* Pointer."
*/
#define I915_RING_FREE_SPACE 64
struct intel_hw_status_page {
struct i915_vma *vma;
u32 *page_addr;
u32 ggtt_offset;
};
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
#define I915_WRITE_TAIL(engine, val) I915_WRITE(RING_TAIL((engine)->mmio_base), val)
#define I915_READ_START(engine) I915_READ(RING_START((engine)->mmio_base))
#define I915_WRITE_START(engine, val) I915_WRITE(RING_START((engine)->mmio_base), val)
#define I915_READ_HEAD(engine) I915_READ(RING_HEAD((engine)->mmio_base))
#define I915_WRITE_HEAD(engine, val) I915_WRITE(RING_HEAD((engine)->mmio_base), val)
#define I915_READ_CTL(engine) I915_READ(RING_CTL((engine)->mmio_base))
#define I915_WRITE_CTL(engine, val) I915_WRITE(RING_CTL((engine)->mmio_base), val)
#define I915_READ_IMR(engine) I915_READ(RING_IMR((engine)->mmio_base))
#define I915_WRITE_IMR(engine, val) I915_WRITE(RING_IMR((engine)->mmio_base), val)
#define I915_READ_MODE(engine) I915_READ(RING_MI_MODE((engine)->mmio_base))
#define I915_WRITE_MODE(engine, val) I915_WRITE(RING_MI_MODE((engine)->mmio_base), val)
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
*/
#define gen8_semaphore_seqno_size sizeof(uint64_t)
#define GEN8_SEMAPHORE_OFFSET(__from, __to) \
(((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size)
#define GEN8_SIGNAL_OFFSET(__ring, to) \
(dev_priv->semaphore->node.start + \
GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
#define GEN8_WAIT_OFFSET(__ring, from) \
(dev_priv->semaphore->node.start + \
GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
enum intel_engine_hangcheck_action {
HANGCHECK_IDLE = 0,
HANGCHECK_WAIT,
HANGCHECK_ACTIVE,
HANGCHECK_KICK,
HANGCHECK_HUNG,
};
#define HANGCHECK_SCORE_RING_HUNG 31
#define I915_MAX_SLICES 3
#define I915_MAX_SUBSLICES 3
#define instdone_slice_mask(dev_priv__) \
(INTEL_GEN(dev_priv__) == 7 ? \
1 : INTEL_INFO(dev_priv__)->sseu.slice_mask)
#define instdone_subslice_mask(dev_priv__) \
(INTEL_GEN(dev_priv__) == 7 ? \
1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
for ((slice__) = 0, (subslice__) = 0; \
(slice__) < I915_MAX_SLICES; \
(subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
(slice__) += ((subslice__) == 0)) \
for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
(BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
struct intel_instdone {
u32 instdone;
/* The following exist only in the RCS engine */
u32 slice_common;
u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES];
u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES];
};
struct intel_engine_hangcheck {
u64 acthd;
u32 seqno;
int score;
enum intel_engine_hangcheck_action action;
int deadlock;
struct intel_instdone instdone;
};
struct intel_ring {
struct i915_vma *vma;
void *vaddr;
struct intel_engine_cs *engine;
struct list_head request_list;
u32 head;
u32 tail;
int space;
int size;
int effective_size;
/** We track the position of the requests in the ring buffer, and
* when each is retired we increment last_retired_head as the GPU
* must have finished processing the request and so we know we
* can advance the ringbuffer up to that position.
*
* last_retired_head is set to -1 after the value is consumed so
* we can detect new retirements.
*/
u32 last_retired_head;
};
struct i915_gem_context;
struct drm_i915_reg_table;
/*
* we use a single page to load ctx workarounds so all of these
* values are referred in terms of dwords
*
* struct i915_wa_ctx_bb:
* offset: specifies batch starting position, also helpful in case
* if we want to have multiple batches at different offsets based on
* some criteria. It is not a requirement at the moment but provides
* an option for future use.
* size: size of the batch in DWORDS
*/
struct i915_ctx_workarounds {
struct i915_wa_ctx_bb {
u32 offset;
u32 size;
} indirect_ctx, per_ctx;
struct i915_vma *vma;
};
struct drm_i915_gem_request;
struct intel_render_state;
struct intel_engine_cs {
struct drm_i915_private *i915;
const char *name;
enum intel_engine_id {
RCS = 0,
BCS,
VCS,
VCS2, /* Keep instances of the same type engine together. */
VECS
} id;
#define _VCS(n) (VCS + (n))
unsigned int exec_id;
enum intel_engine_hw_id {
RCS_HW = 0,
VCS_HW,
BCS_HW,
VECS_HW,
VCS2_HW
} hw_id;
enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */
u32 mmio_base;
unsigned int irq_shift;
struct intel_ring *buffer;
struct intel_timeline *timeline;
struct intel_render_state *render_state;
/* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the
* bottom-half of the user interrupt) to the first client. After
* every interrupt, we wake up one client, who does the heavyweight
* coherent seqno read and either goes back to sleep (if incomplete),
* or wakes up all the completed clients in parallel, before then
* transferring the bottom-half status to the next client in the queue.
*
* Compared to walking the entire list of waiters in a single dedicated
* bottom-half, we reduce the latency of the first waiter by avoiding
* a context switch, but incur additional coherent seqno reads when
* following the chain of request breadcrumbs. Since it is most likely
* that we have a single client waiting on each seqno, then reducing
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
bool irq_posted;
spinlock_t lock; /* protects the lists of requests; irqsafe */
struct rb_root waiters; /* sorted by retirement, priority */
struct rb_root signals; /* sorted by retirement */
struct intel_wait *first_wait; /* oldest waiter by retirement */
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
struct timer_list hangcheck; /* detect missed interrupts */
unsigned long timeout;
bool irq_enabled : 1;
bool rpm_wakelock : 1;
} breadcrumbs;
/*
* A pool of objects to use as shadow copies of client batch buffers
* when the command parser is enabled. Prevents the client from
* modifying the batch contents after software parsing.
*/
struct i915_gem_batch_pool batch_pool;
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
struct i915_vma *scratch;
u32 irq_keep_mask; /* always keep these interrupts */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */
void (*irq_enable)(struct intel_engine_cs *engine);
void (*irq_disable)(struct intel_engine_cs *engine);
int (*init_hw)(struct intel_engine_cs *engine);
void (*reset_hw)(struct intel_engine_cs *engine,
struct drm_i915_gem_request *req);
int (*init_context)(struct drm_i915_gem_request *req);
int (*emit_flush)(struct drm_i915_gem_request *request,
u32 mode);
#define EMIT_INVALIDATE BIT(0)
#define EMIT_FLUSH BIT(1)
#define EMIT_BARRIER (EMIT_INVALIDATE | EMIT_FLUSH)
int (*emit_bb_start)(struct drm_i915_gem_request *req,
u64 offset, u32 length,
unsigned int dispatch_flags);
#define I915_DISPATCH_SECURE BIT(0)
#define I915_DISPATCH_PINNED BIT(1)
#define I915_DISPATCH_RS BIT(2)
void (*emit_breadcrumb)(struct drm_i915_gem_request *req,
u32 *out);
int emit_breadcrumb_sz;
/* Pass the request to the hardware queue (e.g. directly into
* the legacy ringbuffer or to the end of an execlist).
*
* This is called from an atomic context with irqs disabled; must
* be irq safe.
*/
void (*submit_request)(struct drm_i915_gem_request *req);
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
* However, the up-to-date seqno is not always required and the last
* seen value is good enough. Note that the seqno will always be
* monotonic, even if not coherent.
*/
void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
void (*cleanup)(struct intel_engine_cs *engine);
/* GEN8 signal/wait table - never trust comments!
* signal to signal to signal to signal to signal to
* RCS VCS BCS VECS VCS2
* --------------------------------------------------------------------
* RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
* |-------------------------------------------------------------------
* VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
* |-------------------------------------------------------------------
* BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
* |-------------------------------------------------------------------
* VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) |
* |-------------------------------------------------------------------
* VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) |
* |-------------------------------------------------------------------
*
* Generalization:
* f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
* ie. transpose of g(x, y)
*
* sync from sync from sync from sync from sync from
* RCS VCS BCS VECS VCS2
* --------------------------------------------------------------------
* RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
* |-------------------------------------------------------------------
* VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
* |-------------------------------------------------------------------
* BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
* |-------------------------------------------------------------------
* VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) |
* |-------------------------------------------------------------------
* VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) |
* |-------------------------------------------------------------------
*
* Generalization:
* g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
* ie. transpose of f(x, y)
*/
struct {
union {
#define GEN6_SEMAPHORE_LAST VECS_HW
#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1)
#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0)
struct {
/* our mbox written by others */
u32 wait[GEN6_NUM_SEMAPHORES];
/* mboxes this ring signals to */
i915_reg_t signal[GEN6_NUM_SEMAPHORES];
} mbox;
u64 signal_ggtt[I915_NUM_ENGINES];
};
/* AKA wait() */
int (*sync_to)(struct drm_i915_gem_request *req,
struct drm_i915_gem_request *signal);
u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out);
} semaphore;
/* Execlists */
struct tasklet_struct irq_tasklet;
spinlock_t execlist_lock; /* used inside tasklet, use spin_lock_bh */
struct execlist_port {
struct drm_i915_gem_request *request;
unsigned int count;
} execlist_port[2];
struct list_head execlist_queue;
unsigned int fw_domains;
bool disable_lite_restore_wa;
bool preempt_wa;
u32 ctx_desc_template;
struct i915_gem_context *last_context;
struct intel_engine_hangcheck hangcheck;
bool needs_cmd_parser;
/*
* Table of commands the command parser needs to know about
* for this engine.
*/
DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
/*
* Table of registers allowed in commands that read/write registers.
*/
const struct drm_i915_reg_table *reg_tables;
int reg_table_count;
/*
* Returns the bitmask for the length field of the specified command.
* Return 0 for an unrecognized/invalid command.
*
* If the command parser finds an entry for a command in the engine's
* cmd_tables, it gets the command's length based on the table entry.
* If not, it calls this function to determine the per-engine length
* field encoding for the command (i.e. different opcode ranges use
* certain bits to encode the command length in the header).
*/
u32 (*get_cmd_length_mask)(u32 cmd_header);
};
static inline unsigned
intel_engine_flag(const struct intel_engine_cs *engine)
{
return 1 << engine->id;
}
static inline void
intel_flush_status_page(struct intel_engine_cs *engine, int reg)
{
mb();
clflush(&engine->status_page.page_addr[reg]);
mb();
}
static inline u32
intel_read_status_page(struct intel_engine_cs *engine, int reg)
{
/* Ensure that the compiler doesn't optimize away the load. */
return READ_ONCE(engine->status_page.page_addr[reg]);
}
static inline void
intel_write_status_page(struct intel_engine_cs *engine,
int reg, u32 value)
{
engine->status_page.page_addr[reg] = value;
}
/*
* Reads a dword out of the status page, which is written to from the command
* queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
* MI_STORE_DATA_IMM.
*
* The following dwords have a reserved meaning:
* 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
* 0x04: ring 0 head pointer
* 0x05: ring 1 head pointer (915-class)
* 0x06: ring 2 head pointer (915-class)
* 0x10-0x1b: Context status DWords (GM45)
* 0x1f: Last written status offset. (GM45)
* 0x20-0x2f: Reserved (Gen6+)
*
* The area from dword 0x30 to 0x3ff is available for driver usage.
*/
#define I915_GEM_HWS_INDEX 0x30
#define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
#define I915_GEM_HWS_SCRATCH_INDEX 0x40
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size);
int intel_ring_pin(struct intel_ring *ring);
void intel_ring_unpin(struct intel_ring *ring);
void intel_ring_free(struct intel_ring *ring);
void intel_engine_stop(struct intel_engine_cs *engine);
void intel_engine_cleanup(struct intel_engine_cs *engine);
void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
static inline void intel_ring_emit(struct intel_ring *ring, u32 data)
{
*(uint32_t *)(ring->vaddr + ring->tail) = data;
ring->tail += 4;
}
static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg)
{
intel_ring_emit(ring, i915_mmio_reg_offset(reg));
}
static inline void intel_ring_advance(struct intel_ring *ring)
{
/* Dummy function.
*
* This serves as a placeholder in the code so that the reader
* can compare against the preceding intel_ring_begin() and
* check that the number of dwords emitted matches the space
* reserved for the command packet (i.e. the value passed to
* intel_ring_begin()).
*/
}
static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr)
{
/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
u32 offset = addr - ring->vaddr;
return offset & (ring->size - 1);
}
int __intel_ring_space(int head, int tail, int size);
void intel_ring_update_space(struct intel_ring *ring);
void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno);
void intel_engine_setup_common(struct intel_engine_cs *engine);
int intel_engine_init_common(struct intel_engine_cs *engine);
int intel_engine_create_scratch(struct intel_engine_cs *engine, int size);
void intel_engine_cleanup_common(struct intel_engine_cs *engine);
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine);
int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
int intel_init_vebox_ring_buffer(struct intel_engine_cs *engine);
u64 intel_engine_get_active_head(struct intel_engine_cs *engine);
u64 intel_engine_get_last_batch_head(struct intel_engine_cs *engine);
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine)
{
return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
}
int init_workarounds_ring(struct intel_engine_cs *engine);
void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone);
/*
* Arbitrary size for largest possible 'add request' sequence. The code paths
* are complex and variable. Empirical measurement shows that the worst case
* is BDW at 192 bytes (6 + 6 + 36 dwords), then ILK at 136 bytes. However,
* we need to allocate double the largest single packet within that emission
* to account for tail wraparound (so 6 + 6 + 72 dwords for BDW).
*/
#define MIN_SPACE_FOR_ADD_REQUEST 336
static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
{
return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
}
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
static inline void intel_wait_init(struct intel_wait *wait, u32 seqno)
{
wait->tsk = current;
wait->seqno = seqno;
}
static inline bool intel_wait_complete(const struct intel_wait *wait)
{
return RB_EMPTY_NODE(&wait->node);
}
bool intel_engine_add_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
void intel_engine_remove_wait(struct intel_engine_cs *engine,
struct intel_wait *wait);
void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
{
return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
}
static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine)
{
bool wakeup = false;
/* Note that for this not to dangerously chase a dangling pointer,
* we must hold the rcu_read_lock here.
*
* Also note that tsk is likely to be in !TASK_RUNNING state so an
* early test for tsk->state != TASK_RUNNING before wake_up_process()
* is unlikely to be beneficial.
*/
if (intel_engine_has_waiter(engine)) {
struct task_struct *tsk;
rcu_read_lock();
tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
if (tsk)
wakeup = wake_up_process(tsk);
rcu_read_unlock();
}
return wakeup;
}
void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915);
static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
{
return i915_gem_active_isset(&engine->timeline->last_request);
}
#endif /* _INTEL_RINGBUFFER_H_ */