blob: aec7db39c061e65206d7c3b5453d452428e46eb6 [file] [log] [blame]
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021 Intel Corporation
*/
#include "xe_lrc.h"
#include <generated/xe_wa_oob.h>
#include <linux/ascii85.h>
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_gfx_state_commands.h"
#include "regs/xe_engine_regs.h"
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue_types.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_hw_fence.h"
#include "xe_map.h"
#include "xe_memirq.h"
#include "xe_sriov.h"
#include "xe_vm.h"
#include "xe_wa.h"
#define LRC_VALID BIT_ULL(0)
#define LRC_PRIVILEGE BIT_ULL(8)
#define LRC_ADDRESSING_MODE GENMASK_ULL(4, 3)
#define LRC_LEGACY_64B_CONTEXT 3
#define LRC_ENGINE_CLASS GENMASK_ULL(63, 61)
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
struct xe_lrc_snapshot {
struct xe_bo *lrc_bo;
void *lrc_snapshot;
unsigned long lrc_size, lrc_offset;
u32 context_desc;
u32 indirect_context_desc;
u32 head;
struct {
u32 internal;
u32 memory;
} tail;
u32 start_seqno;
u32 seqno;
u32 ctx_timestamp;
u32 ctx_job_timestamp;
};
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
{
return gt_to_xe(lrc->fence_ctx.gt);
}
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
size_t size;
switch (class) {
case XE_ENGINE_CLASS_RENDER:
if (GRAPHICS_VER(xe) >= 20)
size = 4 * SZ_4K;
else
size = 14 * SZ_4K;
break;
case XE_ENGINE_CLASS_COMPUTE:
/* 14 pages since graphics_ver == 11 */
if (GRAPHICS_VER(xe) >= 20)
size = 3 * SZ_4K;
else
size = 14 * SZ_4K;
break;
default:
WARN(1, "Unknown engine class: %d", class);
fallthrough;
case XE_ENGINE_CLASS_COPY:
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
case XE_ENGINE_CLASS_OTHER:
size = 2 * SZ_4K;
}
/* Add indirect ring state page */
if (xe_gt_has_indirect_ring_state(gt))
size += LRC_INDIRECT_RING_STATE_SIZE;
return size;
}
/*
* The per-platform tables are u8-encoded in @data. Decode @data and set the
* addresses' offset and commands in @regs. The following encoding is used
* for each byte. There are 2 steps: decoding commands and decoding addresses.
*
* Commands:
* [7]: create NOPs - number of NOPs are set in lower bits
* [6]: When creating MI_LOAD_REGISTER_IMM command, allow to set
* MI_LRI_FORCE_POSTED
* [5:0]: Number of NOPs or registers to set values to in case of
* MI_LOAD_REGISTER_IMM
*
* Addresses: these are decoded after a MI_LOAD_REGISTER_IMM command by "count"
* number of registers. They are set by using the REG/REG16 macros: the former
* is used for offsets smaller than 0x200 while the latter is for values bigger
* than that. Those macros already set all the bits documented below correctly:
*
* [7]: When a register offset needs more than 6 bits, use additional bytes, to
* follow, for the lower bits
* [6:0]: Register offset, without considering the engine base.
*
* This function only tweaks the commands and register offsets. Values are not
* filled out.
*/
static void set_offsets(u32 *regs,
const u8 *data,
const struct xe_hw_engine *hwe)
#define NOP(x) (BIT(7) | (x))
#define LRI(count, flags) ((flags) << 6 | (count) | \
BUILD_BUG_ON_ZERO(count >= BIT(6)))
#define POSTED BIT(0)
#define REG(x) (((x) >> 2) | BUILD_BUG_ON_ZERO(x >= 0x200))
#define REG16(x) \
(((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \
(((x) >> 2) & 0x7f)
{
const u32 base = hwe->mmio_base;
while (*data) {
u8 count, flags;
if (*data & BIT(7)) { /* skip */
count = *data++ & ~BIT(7);
regs += count;
continue;
}
count = *data & 0x3f;
flags = *data >> 6;
data++;
*regs = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
if (flags & POSTED)
*regs |= MI_LRI_FORCE_POSTED;
*regs |= MI_LRI_LRM_CS_MMIO;
regs++;
xe_gt_assert(hwe->gt, count);
do {
u32 offset = 0;
u8 v;
do {
v = *data++;
offset <<= 7;
offset |= v & ~BIT(7);
} while (v & BIT(7));
regs[0] = base + (offset << 2);
regs += 2;
} while (--count);
}
*regs = MI_BATCH_BUFFER_END | BIT(0);
}
static const u8 gen12_xcs_offsets[] = {
NOP(1),
LRI(13, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
NOP(5),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
0
};
static const u8 dg2_xcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
REG(0x120),
REG(0x124),
NOP(1),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
0
};
static const u8 gen12_rcs_offsets[] = {
NOP(1),
LRI(13, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
NOP(5),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
LRI(3, POSTED),
REG(0x1b0),
REG16(0x5a8),
REG16(0x5ac),
NOP(6),
LRI(1, 0),
REG(0x0c8),
NOP(3 + 9 + 1),
LRI(51, POSTED),
REG16(0x588),
REG16(0x588),
REG16(0x588),
REG16(0x588),
REG16(0x588),
REG16(0x588),
REG(0x028),
REG(0x09c),
REG(0x0c0),
REG(0x178),
REG(0x17c),
REG16(0x358),
REG(0x170),
REG(0x150),
REG(0x154),
REG(0x158),
REG16(0x41c),
REG16(0x600),
REG16(0x604),
REG16(0x608),
REG16(0x60c),
REG16(0x610),
REG16(0x614),
REG16(0x618),
REG16(0x61c),
REG16(0x620),
REG16(0x624),
REG16(0x628),
REG16(0x62c),
REG16(0x630),
REG16(0x634),
REG16(0x638),
REG16(0x63c),
REG16(0x640),
REG16(0x644),
REG16(0x648),
REG16(0x64c),
REG16(0x650),
REG16(0x654),
REG16(0x658),
REG16(0x65c),
REG16(0x660),
REG16(0x664),
REG16(0x668),
REG16(0x66c),
REG16(0x670),
REG16(0x674),
REG16(0x678),
REG16(0x67c),
REG(0x068),
REG(0x084),
NOP(1),
0
};
static const u8 xehp_rcs_offsets[] = {
NOP(1),
LRI(13, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
NOP(5),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
LRI(3, POSTED),
REG(0x1b0),
REG16(0x5a8),
REG16(0x5ac),
NOP(6),
LRI(1, 0),
REG(0x0c8),
0
};
static const u8 dg2_rcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
REG(0x120),
REG(0x124),
NOP(1),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
LRI(3, POSTED),
REG(0x1b0),
REG16(0x5a8),
REG16(0x5ac),
NOP(6),
LRI(1, 0),
REG(0x0c8),
0
};
static const u8 mtl_rcs_offsets[] = {
NOP(1),
LRI(15, POSTED),
REG16(0x244),
REG(0x034),
REG(0x030),
REG(0x038),
REG(0x03c),
REG(0x168),
REG(0x140),
REG(0x110),
REG(0x1c0),
REG(0x1c4),
REG(0x1c8),
REG(0x180),
REG16(0x2b4),
REG(0x120),
REG(0x124),
NOP(1),
LRI(9, POSTED),
REG16(0x3a8),
REG16(0x28c),
REG16(0x288),
REG16(0x284),
REG16(0x280),
REG16(0x27c),
REG16(0x278),
REG16(0x274),
REG16(0x270),
NOP(2),
LRI(2, POSTED),
REG16(0x5a8),
REG16(0x5ac),
NOP(6),
LRI(1, 0),
REG(0x0c8),
0
};
#define XE2_CTX_COMMON \
NOP(1), /* [0x00] */ \
LRI(15, POSTED), /* [0x01] */ \
REG16(0x244), /* [0x02] CTXT_SR_CTL */ \
REG(0x034), /* [0x04] RING_BUFFER_HEAD */ \
REG(0x030), /* [0x06] RING_BUFFER_TAIL */ \
REG(0x038), /* [0x08] RING_BUFFER_START */ \
REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */ \
REG(0x168), /* [0x0c] BB_ADDR_UDW */ \
REG(0x140), /* [0x0e] BB_ADDR */ \
REG(0x110), /* [0x10] BB_STATE */ \
REG(0x1c0), /* [0x12] BB_PER_CTX_PTR */ \
REG(0x1c4), /* [0x14] RCS_INDIRECT_CTX */ \
REG(0x1c8), /* [0x16] RCS_INDIRECT_CTX_OFFSET */ \
REG(0x180), /* [0x18] CCID */ \
REG16(0x2b4), /* [0x1a] SEMAPHORE_TOKEN */ \
REG(0x120), /* [0x1c] PRT_BB_STATE */ \
REG(0x124), /* [0x1e] PRT_BB_STATE_UDW */ \
\
NOP(1), /* [0x20] */ \
LRI(9, POSTED), /* [0x21] */ \
REG16(0x3a8), /* [0x22] CTX_TIMESTAMP */ \
REG16(0x3ac), /* [0x24] CTX_TIMESTAMP_UDW */ \
REG(0x108), /* [0x26] INDIRECT_RING_STATE */ \
REG16(0x284), /* [0x28] dummy reg */ \
REG16(0x280), /* [0x2a] CS_ACC_CTR_THOLD */ \
REG16(0x27c), /* [0x2c] CS_CTX_SYS_PASID */ \
REG16(0x278), /* [0x2e] CS_CTX_ASID */ \
REG16(0x274), /* [0x30] PTBP_UDW */ \
REG16(0x270) /* [0x32] PTBP_LDW */
static const u8 xe2_rcs_offsets[] = {
XE2_CTX_COMMON,
NOP(2), /* [0x34] */
LRI(2, POSTED), /* [0x36] */
REG16(0x5a8), /* [0x37] CONTEXT_SCHEDULING_ATTRIBUTES */
REG16(0x5ac), /* [0x39] PREEMPTION_STATUS */
NOP(6), /* [0x41] */
LRI(1, 0), /* [0x47] */
REG(0x0c8), /* [0x48] R_PWR_CLK_STATE */
0
};
static const u8 xe2_bcs_offsets[] = {
XE2_CTX_COMMON,
NOP(4 + 8 + 1), /* [0x34] */
LRI(2, POSTED), /* [0x41] */
REG16(0x200), /* [0x42] BCS_SWCTRL */
REG16(0x204), /* [0x44] BLIT_CCTL */
0
};
static const u8 xe2_xcs_offsets[] = {
XE2_CTX_COMMON,
0
};
static const u8 xe2_indirect_ring_state_offsets[] = {
NOP(1), /* [0x00] */
LRI(5, POSTED), /* [0x01] */
REG(0x034), /* [0x02] RING_BUFFER_HEAD */
REG(0x030), /* [0x04] RING_BUFFER_TAIL */
REG(0x038), /* [0x06] RING_BUFFER_START */
REG(0x048), /* [0x08] RING_BUFFER_START_UDW */
REG(0x03c), /* [0x0a] RING_BUFFER_CONTROL */
NOP(5), /* [0x0c] */
LRI(9, POSTED), /* [0x11] */
REG(0x168), /* [0x12] BB_ADDR_UDW */
REG(0x140), /* [0x14] BB_ADDR */
REG(0x110), /* [0x16] BB_STATE */
REG16(0x588), /* [0x18] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x20] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x22] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x24] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x26] BB_STACK_WRITE_PORT */
REG16(0x588), /* [0x28] BB_STACK_WRITE_PORT */
NOP(12), /* [0x00] */
0
};
#undef REG16
#undef REG
#undef LRI
#undef NOP
static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
{
if (class == XE_ENGINE_CLASS_RENDER) {
if (GRAPHICS_VER(xe) >= 20)
return xe2_rcs_offsets;
else if (GRAPHICS_VERx100(xe) >= 1270)
return mtl_rcs_offsets;
else if (GRAPHICS_VERx100(xe) >= 1255)
return dg2_rcs_offsets;
else if (GRAPHICS_VERx100(xe) >= 1250)
return xehp_rcs_offsets;
else
return gen12_rcs_offsets;
} else if (class == XE_ENGINE_CLASS_COPY) {
if (GRAPHICS_VER(xe) >= 20)
return xe2_bcs_offsets;
else
return gen12_xcs_offsets;
} else {
if (GRAPHICS_VER(xe) >= 20)
return xe2_xcs_offsets;
else if (GRAPHICS_VERx100(xe) >= 1255)
return dg2_xcs_offsets;
else
return gen12_xcs_offsets;
}
}
static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
{
regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
if (xe_gt_has_indirect_ring_state(hwe->gt))
regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
/* TODO: Timestamp */
}
static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
{
struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->sriov.vf.memirq;
struct xe_device *xe = gt_to_xe(hwe->gt);
if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
return;
regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
MI_LRI_LRM_CS_MMIO | MI_LRM_USE_GGTT;
regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
}
static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
{
struct xe_device *xe = gt_to_xe(hwe->gt);
if (GRAPHICS_VERx100(xe) >= 1250)
return 0x70;
else
return 0x60;
}
static void reset_stop_ring(u32 *regs, struct xe_hw_engine *hwe)
{
int x;
x = lrc_ring_mi_mode(hwe);
regs[x + 1] &= ~STOP_RING;
regs[x + 1] |= STOP_RING << 16;
}
static inline bool xe_lrc_has_indirect_ring_state(struct xe_lrc *lrc)
{
return lrc->flags & XE_LRC_FLAG_INDIRECT_RING_STATE;
}
static inline u32 __xe_lrc_ring_offset(struct xe_lrc *lrc)
{
return 0;
}
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
{
return lrc->ring.size;
}
/* Make the magic macros work */
#define __xe_lrc_pphwsp_offset xe_lrc_pphwsp_offset
#define __xe_lrc_regs_offset xe_lrc_regs_offset
#define LRC_SEQNO_PPHWSP_OFFSET 512
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
{
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
}
static size_t lrc_reg_size(struct xe_device *xe)
{
if (GRAPHICS_VERx100(xe) >= 1250)
return 96 * sizeof(u32);
else
return 80 * sizeof(u32);
}
size_t xe_lrc_skip_size(struct xe_device *xe)
{
return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
}
static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
{
/* The seqno is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_SEQNO_PPHWSP_OFFSET;
}
static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
{
/* The start seqno is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_START_SEQNO_PPHWSP_OFFSET;
}
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
{
/* The start seqno is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
}
static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
{
/* The parallel is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
}
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
}
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
}
#define DECL_MAP_ADDR_HELPERS(elem) \
static inline struct iosys_map __xe_lrc_##elem##_map(struct xe_lrc *lrc) \
{ \
struct iosys_map map = lrc->bo->vmap; \
\
xe_assert(lrc_to_xe(lrc), !iosys_map_is_null(&map)); \
iosys_map_incr(&map, __xe_lrc_##elem##_offset(lrc)); \
return map; \
} \
static inline u32 __maybe_unused __xe_lrc_##elem##_ggtt_addr(struct xe_lrc *lrc) \
{ \
return xe_bo_ggtt_addr(lrc->bo) + __xe_lrc_##elem##_offset(lrc); \
} \
DECL_MAP_ADDR_HELPERS(ring)
DECL_MAP_ADDR_HELPERS(pphwsp)
DECL_MAP_ADDR_HELPERS(seqno)
DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
#undef DECL_MAP_ADDR_HELPERS
/**
* xe_lrc_ctx_timestamp_ggtt_addr() - Get ctx timestamp GGTT address
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp GGTT address
*/
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
}
/**
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp value
*/
u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_ctx_timestamp_map(lrc);
return xe_map_read32(xe, &map);
}
/**
* xe_lrc_ctx_job_timestamp_ggtt_addr() - Get ctx job timestamp GGTT address
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp job GGTT address
*/
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
}
/**
* xe_lrc_ctx_job_timestamp() - Read ctx job timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp job value
*/
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_ctx_job_timestamp_map(lrc);
return xe_map_read32(xe, &map);
}
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_pphwsp_ggtt_addr(lrc);
}
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc)
{
if (!xe_lrc_has_indirect_ring_state(lrc))
return 0;
return __xe_lrc_indirect_ring_ggtt_addr(lrc);
}
static u32 xe_lrc_read_indirect_ctx_reg(struct xe_lrc *lrc, int reg_nr)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_indirect_ring_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
return xe_map_read32(xe, &map);
}
static void xe_lrc_write_indirect_ctx_reg(struct xe_lrc *lrc,
int reg_nr, u32 val)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_indirect_ring_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
xe_map_write32(xe, &map, val);
}
u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_regs_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
return xe_map_read32(xe, &map);
}
void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
map = __xe_lrc_regs_map(lrc);
iosys_map_incr(&map, reg_nr * sizeof(u32));
xe_map_write32(xe, &map, val);
}
static void *empty_lrc_data(struct xe_hw_engine *hwe)
{
struct xe_gt *gt = hwe->gt;
void *data;
u32 *regs;
data = kzalloc(xe_gt_lrc_size(gt, hwe->class), GFP_KERNEL);
if (!data)
return NULL;
/* 1st page: Per-Process of HW status Page */
regs = data + LRC_PPHWSP_SIZE;
set_offsets(regs, reg_offsets(gt_to_xe(gt), hwe->class), hwe);
set_context_control(regs, hwe);
set_memory_based_intr(regs, hwe);
reset_stop_ring(regs, hwe);
if (xe_gt_has_indirect_ring_state(gt)) {
regs = data + xe_gt_lrc_size(gt, hwe->class) -
LRC_INDIRECT_RING_STATE_SIZE;
set_offsets(regs, xe2_indirect_ring_state_offsets, hwe);
}
return data;
}
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
}
static void xe_lrc_finish(struct xe_lrc *lrc)
{
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
xe_bo_lock(lrc->bo, false);
xe_bo_unpin(lrc->bo);
xe_bo_unlock(lrc->bo);
xe_bo_put(lrc->bo);
}
#define PVC_CTX_ASID (0x2e + 1)
#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_vm *vm, u32 ring_size)
{
struct xe_gt *gt = hwe->gt;
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = gt_to_xe(gt);
struct iosys_map map;
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
int err;
kref_init(&lrc->refcount);
lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
lrc->size = lrc_size;
lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
lrc->ctx_timestamp = 0;
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
if (!gt->default_lrc[hwe->class]) {
init_data = empty_lrc_data(hwe);
if (!init_data) {
err = -ENOMEM;
goto err_lrc_finish;
}
}
/*
* Init Per-Process of HW status Page, LRC / context state to known
* values
*/
map = __xe_lrc_pphwsp_map(lrc);
if (!init_data) {
xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
} else {
xe_map_memcpy_to(xe, &map, 0, init_data,
xe_gt_lrc_size(gt, hwe->class));
kfree(init_data);
}
if (vm) {
xe_lrc_set_ppgtt(lrc, vm);
if (vm->xef)
xe_drm_client_add_bo(vm->xef->client, lrc->bo);
}
if (xe_gt_has_indirect_ring_state(gt)) {
xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
__xe_lrc_indirect_ring_ggtt_addr(lrc));
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
__xe_lrc_ring_ggtt_addr(lrc));
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
} else {
xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, 0);
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, lrc->ring.tail);
xe_lrc_write_ctx_reg(lrc, CTX_RING_CTL,
RING_CTL_SIZE(lrc->ring.size) | RING_VALID);
}
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
lrc->desc = LRC_VALID;
lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
/* TODO: Priority */
/* While this appears to have something about privileged batches or
* some such, it really just means PPGTT mode.
*/
if (vm)
lrc->desc |= LRC_PRIVILEGE;
if (GRAPHICS_VERx100(xe) < 1250) {
lrc->desc |= FIELD_PREP(LRC_ENGINE_INSTANCE, hwe->instance);
lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
}
arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
map = __xe_lrc_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
return 0;
err_lrc_finish:
xe_lrc_finish(lrc);
return err;
}
/**
* xe_lrc_create - Create a LRC
* @hwe: Hardware Engine
* @vm: The VM (address space)
* @ring_size: LRC ring size
*
* Allocate and initialize the Logical Ring Context (LRC).
*
* Return pointer to created LRC upon success and an error pointer
* upon failure.
*/
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
u32 ring_size)
{
struct xe_lrc *lrc;
int err;
lrc = kzalloc(sizeof(*lrc), GFP_KERNEL);
if (!lrc)
return ERR_PTR(-ENOMEM);
err = xe_lrc_init(lrc, hwe, vm, ring_size);
if (err) {
kfree(lrc);
return ERR_PTR(err);
}
return lrc;
}
/**
* xe_lrc_destroy - Destroy the LRC
* @ref: reference to LRC
*
* Called when ref == 0, release resources held by the Logical Ring Context
* (LRC) and free the LRC memory.
*/
void xe_lrc_destroy(struct kref *ref)
{
struct xe_lrc *lrc = container_of(ref, struct xe_lrc, refcount);
xe_lrc_finish(lrc);
kfree(lrc);
}
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
{
if (xe_lrc_has_indirect_ring_state(lrc))
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, tail);
else
xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, tail);
}
u32 xe_lrc_ring_tail(struct xe_lrc *lrc)
{
if (xe_lrc_has_indirect_ring_state(lrc))
return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL) & TAIL_ADDR;
else
return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR;
}
void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head)
{
if (xe_lrc_has_indirect_ring_state(lrc))
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, head);
else
xe_lrc_write_ctx_reg(lrc, CTX_RING_HEAD, head);
}
u32 xe_lrc_ring_head(struct xe_lrc *lrc)
{
if (xe_lrc_has_indirect_ring_state(lrc))
return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD) & HEAD_ADDR;
else
return xe_lrc_read_ctx_reg(lrc, CTX_RING_HEAD) & HEAD_ADDR;
}
u32 xe_lrc_ring_space(struct xe_lrc *lrc)
{
const u32 head = xe_lrc_ring_head(lrc);
const u32 tail = lrc->ring.tail;
const u32 size = lrc->ring.size;
return ((head - tail - 1) & (size - 1)) + 1;
}
static void __xe_lrc_write_ring(struct xe_lrc *lrc, struct iosys_map ring,
const void *data, size_t size)
{
struct xe_device *xe = lrc_to_xe(lrc);
iosys_map_incr(&ring, lrc->ring.tail);
xe_map_memcpy_to(xe, &ring, 0, data, size);
lrc->ring.tail = (lrc->ring.tail + size) & (lrc->ring.size - 1);
}
void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map ring;
u32 rhs;
size_t aligned_size;
xe_assert(xe, IS_ALIGNED(size, 4));
aligned_size = ALIGN(size, 8);
ring = __xe_lrc_ring_map(lrc);
xe_assert(xe, lrc->ring.tail < lrc->ring.size);
rhs = lrc->ring.size - lrc->ring.tail;
if (size > rhs) {
__xe_lrc_write_ring(lrc, ring, data, rhs);
__xe_lrc_write_ring(lrc, ring, data + rhs, size - rhs);
} else {
__xe_lrc_write_ring(lrc, ring, data, size);
}
if (aligned_size > size) {
u32 noop = MI_NOOP;
__xe_lrc_write_ring(lrc, ring, &noop, sizeof(noop));
}
}
u64 xe_lrc_descriptor(struct xe_lrc *lrc)
{
return lrc->desc | xe_lrc_ggtt_addr(lrc);
}
u32 xe_lrc_seqno_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_seqno_ggtt_addr(lrc);
}
/**
* xe_lrc_alloc_seqno_fence() - Allocate an lrc seqno fence.
*
* Allocate but don't initialize an lrc seqno fence.
*
* Return: Pointer to the allocated fence or
* negative error pointer on error.
*/
struct dma_fence *xe_lrc_alloc_seqno_fence(void)
{
return xe_hw_fence_alloc();
}
/**
* xe_lrc_free_seqno_fence() - Free an lrc seqno fence.
* @fence: Pointer to the fence to free.
*
* Frees an lrc seqno fence that hasn't yet been
* initialized.
*/
void xe_lrc_free_seqno_fence(struct dma_fence *fence)
{
xe_hw_fence_free(fence);
}
/**
* xe_lrc_init_seqno_fence() - Initialize an lrc seqno fence.
* @lrc: Pointer to the lrc.
* @fence: Pointer to the fence to initialize.
*
* Initializes a pre-allocated lrc seqno fence.
* After initialization, the fence is subject to normal
* dma-fence refcounting.
*/
void xe_lrc_init_seqno_fence(struct xe_lrc *lrc, struct dma_fence *fence)
{
xe_hw_fence_init(fence, &lrc->fence_ctx, __xe_lrc_seqno_map(lrc));
}
s32 xe_lrc_seqno(struct xe_lrc *lrc)
{
struct iosys_map map = __xe_lrc_seqno_map(lrc);
return xe_map_read32(lrc_to_xe(lrc), &map);
}
s32 xe_lrc_start_seqno(struct xe_lrc *lrc)
{
struct iosys_map map = __xe_lrc_start_seqno_map(lrc);
return xe_map_read32(lrc_to_xe(lrc), &map);
}
u32 xe_lrc_start_seqno_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_start_seqno_ggtt_addr(lrc);
}
u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc)
{
return __xe_lrc_parallel_ggtt_addr(lrc);
}
struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
{
return __xe_lrc_parallel_map(lrc);
}
static int instr_dw(u32 cmd_header)
{
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
if ((cmd_header & (XE_INSTR_CMD_TYPE | GFXPIPE_PIPELINE)) ==
GFXPIPE_SINGLE_DW_CMD(0, 0))
return 1;
/* 3DSTATE_SO_DECL_LIST has a 9-bit dword length rather than 8 */
if ((cmd_header & GFXPIPE_MATCH_MASK) == CMD_3DSTATE_SO_DECL_LIST)
return REG_FIELD_GET(CMD_3DSTATE_SO_DECL_LIST_DW_LEN, cmd_header) + 2;
/* Most instructions have the # of dwords (minus 2) in 7:0 */
return REG_FIELD_GET(XE_INSTR_LEN_MASK, cmd_header) + 2;
}
static int dump_mi_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *dw,
int remaining_dw)
{
u32 inst_header = *dw;
u32 numdw = instr_dw(inst_header);
u32 opcode = REG_FIELD_GET(MI_OPCODE, inst_header);
int num_noop;
/* First check for commands that don't have/use a '# DW' field */
switch (inst_header & MI_OPCODE) {
case MI_NOOP:
num_noop = 1;
while (num_noop < remaining_dw &&
(*(++dw) & REG_GENMASK(31, 23)) == MI_NOOP)
num_noop++;
drm_printf(p, "[%#010x] MI_NOOP (%d dwords)\n", inst_header, num_noop);
return num_noop;
case MI_TOPOLOGY_FILTER:
drm_printf(p, "[%#010x] MI_TOPOLOGY_FILTER\n", inst_header);
return 1;
case MI_BATCH_BUFFER_END:
drm_printf(p, "[%#010x] MI_BATCH_BUFFER_END\n", inst_header);
/* Return 'remaining_dw' to consume the rest of the LRC */
return remaining_dw;
}
/*
* Any remaining commands include a # of dwords. We should make sure
* it doesn't exceed the remaining size of the LRC.
*/
if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
numdw = remaining_dw;
switch (inst_header & MI_OPCODE) {
case MI_LOAD_REGISTER_IMM:
drm_printf(p, "[%#010x] MI_LOAD_REGISTER_IMM: %d regs\n",
inst_header, (numdw - 1) / 2);
for (int i = 1; i < numdw; i += 2)
drm_printf(p, " - %#6x = %#010x\n", dw[i], dw[i + 1]);
return numdw;
case MI_LOAD_REGISTER_MEM & MI_OPCODE:
drm_printf(p, "[%#010x] MI_LOAD_REGISTER_MEM: %s%s\n",
inst_header,
dw[0] & MI_LRI_LRM_CS_MMIO ? "CS_MMIO " : "",
dw[0] & MI_LRM_USE_GGTT ? "USE_GGTT " : "");
if (numdw == 4)
drm_printf(p, " - %#6x = %#010llx\n",
dw[1], ((u64)(dw[3]) << 32 | (u64)(dw[2])));
else
drm_printf(p, " - %*ph (%s)\n",
(int)sizeof(u32) * (numdw - 1), dw + 1,
numdw < 4 ? "truncated" : "malformed");
return numdw;
case MI_FORCE_WAKEUP:
drm_printf(p, "[%#010x] MI_FORCE_WAKEUP\n", inst_header);
return numdw;
default:
drm_printf(p, "[%#010x] unknown MI opcode %#x, likely %d dwords\n",
inst_header, opcode, numdw);
return numdw;
}
}
static int dump_gfxpipe_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *dw,
int remaining_dw)
{
u32 numdw = instr_dw(*dw);
u32 pipeline = REG_FIELD_GET(GFXPIPE_PIPELINE, *dw);
u32 opcode = REG_FIELD_GET(GFXPIPE_OPCODE, *dw);
u32 subopcode = REG_FIELD_GET(GFXPIPE_SUBOPCODE, *dw);
/*
* Make sure we haven't mis-parsed a number of dwords that exceeds the
* remaining size of the LRC.
*/
if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
numdw = remaining_dw;
switch (*dw & GFXPIPE_MATCH_MASK) {
#define MATCH(cmd) \
case cmd: \
drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
return numdw
#define MATCH3D(cmd) \
case CMD_##cmd: \
drm_printf(p, "[%#010x] " #cmd " (%d dwords)\n", *dw, numdw); \
return numdw
MATCH(STATE_BASE_ADDRESS);
MATCH(STATE_SIP);
MATCH(GPGPU_CSR_BASE_ADDRESS);
MATCH(STATE_COMPUTE_MODE);
MATCH3D(3DSTATE_BTD);
MATCH(STATE_SYSTEM_MEM_FENCE_ADDRESS);
MATCH(STATE_CONTEXT_DATA_BASE_ADDRESS);
MATCH3D(3DSTATE_VF_STATISTICS);
MATCH(PIPELINE_SELECT);
MATCH3D(3DSTATE_DRAWING_RECTANGLE_FAST);
MATCH3D(3DSTATE_CLEAR_PARAMS);
MATCH3D(3DSTATE_DEPTH_BUFFER);
MATCH3D(3DSTATE_STENCIL_BUFFER);
MATCH3D(3DSTATE_HIER_DEPTH_BUFFER);
MATCH3D(3DSTATE_VERTEX_BUFFERS);
MATCH3D(3DSTATE_VERTEX_ELEMENTS);
MATCH3D(3DSTATE_INDEX_BUFFER);
MATCH3D(3DSTATE_VF);
MATCH3D(3DSTATE_MULTISAMPLE);
MATCH3D(3DSTATE_CC_STATE_POINTERS);
MATCH3D(3DSTATE_SCISSOR_STATE_POINTERS);
MATCH3D(3DSTATE_VS);
MATCH3D(3DSTATE_GS);
MATCH3D(3DSTATE_CLIP);
MATCH3D(3DSTATE_SF);
MATCH3D(3DSTATE_WM);
MATCH3D(3DSTATE_CONSTANT_VS);
MATCH3D(3DSTATE_CONSTANT_GS);
MATCH3D(3DSTATE_CONSTANT_PS);
MATCH3D(3DSTATE_SAMPLE_MASK);
MATCH3D(3DSTATE_CONSTANT_HS);
MATCH3D(3DSTATE_CONSTANT_DS);
MATCH3D(3DSTATE_HS);
MATCH3D(3DSTATE_TE);
MATCH3D(3DSTATE_DS);
MATCH3D(3DSTATE_STREAMOUT);
MATCH3D(3DSTATE_SBE);
MATCH3D(3DSTATE_PS);
MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP);
MATCH3D(3DSTATE_CPS_POINTERS);
MATCH3D(3DSTATE_VIEWPORT_STATE_POINTERS_CC);
MATCH3D(3DSTATE_BLEND_STATE_POINTERS);
MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_VS);
MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_HS);
MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_DS);
MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_GS);
MATCH3D(3DSTATE_BINDING_TABLE_POINTERS_PS);
MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_VS);
MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_HS);
MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_DS);
MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_GS);
MATCH3D(3DSTATE_SAMPLER_STATE_POINTERS_PS);
MATCH3D(3DSTATE_VF_INSTANCING);
MATCH3D(3DSTATE_VF_SGVS);
MATCH3D(3DSTATE_VF_TOPOLOGY);
MATCH3D(3DSTATE_WM_CHROMAKEY);
MATCH3D(3DSTATE_PS_BLEND);
MATCH3D(3DSTATE_WM_DEPTH_STENCIL);
MATCH3D(3DSTATE_PS_EXTRA);
MATCH3D(3DSTATE_RASTER);
MATCH3D(3DSTATE_SBE_SWIZ);
MATCH3D(3DSTATE_WM_HZ_OP);
MATCH3D(3DSTATE_VF_COMPONENT_PACKING);
MATCH3D(3DSTATE_VF_SGVS_2);
MATCH3D(3DSTATE_VFG);
MATCH3D(3DSTATE_URB_ALLOC_VS);
MATCH3D(3DSTATE_URB_ALLOC_HS);
MATCH3D(3DSTATE_URB_ALLOC_DS);
MATCH3D(3DSTATE_URB_ALLOC_GS);
MATCH3D(3DSTATE_SO_BUFFER_INDEX_0);
MATCH3D(3DSTATE_SO_BUFFER_INDEX_1);
MATCH3D(3DSTATE_SO_BUFFER_INDEX_2);
MATCH3D(3DSTATE_SO_BUFFER_INDEX_3);
MATCH3D(3DSTATE_PRIMITIVE_REPLICATION);
MATCH3D(3DSTATE_TBIMR_TILE_PASS_INFO);
MATCH3D(3DSTATE_AMFS);
MATCH3D(3DSTATE_DEPTH_BOUNDS);
MATCH3D(3DSTATE_AMFS_TEXTURE_POINTERS);
MATCH3D(3DSTATE_CONSTANT_TS_POINTER);
MATCH3D(3DSTATE_MESH_CONTROL);
MATCH3D(3DSTATE_MESH_DISTRIB);
MATCH3D(3DSTATE_TASK_REDISTRIB);
MATCH3D(3DSTATE_MESH_SHADER);
MATCH3D(3DSTATE_MESH_SHADER_DATA);
MATCH3D(3DSTATE_TASK_CONTROL);
MATCH3D(3DSTATE_TASK_SHADER);
MATCH3D(3DSTATE_TASK_SHADER_DATA);
MATCH3D(3DSTATE_URB_ALLOC_MESH);
MATCH3D(3DSTATE_URB_ALLOC_TASK);
MATCH3D(3DSTATE_CLIP_MESH);
MATCH3D(3DSTATE_SBE_MESH);
MATCH3D(3DSTATE_CPSIZE_CONTROL_BUFFER);
MATCH3D(3DSTATE_DRAWING_RECTANGLE);
MATCH3D(3DSTATE_CHROMA_KEY);
MATCH3D(3DSTATE_POLY_STIPPLE_OFFSET);
MATCH3D(3DSTATE_POLY_STIPPLE_PATTERN);
MATCH3D(3DSTATE_LINE_STIPPLE);
MATCH3D(3DSTATE_AA_LINE_PARAMETERS);
MATCH3D(3DSTATE_MONOFILTER_SIZE);
MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_VS);
MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_HS);
MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_DS);
MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_GS);
MATCH3D(3DSTATE_PUSH_CONSTANT_ALLOC_PS);
MATCH3D(3DSTATE_SO_DECL_LIST);
MATCH3D(3DSTATE_SO_BUFFER);
MATCH3D(3DSTATE_BINDING_TABLE_POOL_ALLOC);
MATCH3D(3DSTATE_SAMPLE_PATTERN);
MATCH3D(3DSTATE_3D_MODE);
MATCH3D(3DSTATE_SUBSLICE_HASH_TABLE);
MATCH3D(3DSTATE_SLICE_TABLE_STATE_POINTERS);
MATCH3D(3DSTATE_PTBR_TILE_PASS_INFO);
default:
drm_printf(p, "[%#010x] unknown GFXPIPE command (pipeline=%#x, opcode=%#x, subopcode=%#x), likely %d dwords\n",
*dw, pipeline, opcode, subopcode, numdw);
return numdw;
}
}
static int dump_gfx_state_command(struct drm_printer *p,
struct xe_gt *gt,
u32 *dw,
int remaining_dw)
{
u32 numdw = instr_dw(*dw);
u32 opcode = REG_FIELD_GET(GFX_STATE_OPCODE, *dw);
/*
* Make sure we haven't mis-parsed a number of dwords that exceeds the
* remaining size of the LRC.
*/
if (xe_gt_WARN_ON(gt, numdw > remaining_dw))
numdw = remaining_dw;
switch (*dw & (XE_INSTR_GFX_STATE | GFX_STATE_OPCODE)) {
MATCH(STATE_WRITE_INLINE);
default:
drm_printf(p, "[%#010x] unknown GFX_STATE command (opcode=%#x), likely %d dwords\n",
*dw, opcode, numdw);
return numdw;
}
}
void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
enum xe_engine_class hwe_class)
{
u32 *dw;
int remaining_dw, num_dw;
if (!gt->default_lrc[hwe_class]) {
drm_printf(p, "No default LRC for class %d\n", hwe_class);
return;
}
/*
* Skip the beginning of the LRC since it contains the per-process
* hardware status page.
*/
dw = gt->default_lrc[hwe_class] + LRC_PPHWSP_SIZE;
remaining_dw = (xe_gt_lrc_size(gt, hwe_class) - LRC_PPHWSP_SIZE) / 4;
while (remaining_dw > 0) {
if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_MI) {
num_dw = dump_mi_command(p, gt, dw, remaining_dw);
} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE) {
num_dw = dump_gfxpipe_command(p, gt, dw, remaining_dw);
} else if ((*dw & XE_INSTR_CMD_TYPE) == XE_INSTR_GFX_STATE) {
num_dw = dump_gfx_state_command(p, gt, dw, remaining_dw);
} else {
num_dw = min(instr_dw(*dw), remaining_dw);
drm_printf(p, "[%#10x] Unknown instruction of type %#x, likely %d dwords\n",
*dw, REG_FIELD_GET(XE_INSTR_CMD_TYPE, *dw),
num_dw);
}
dw += num_dw;
remaining_dw -= num_dw;
}
}
struct instr_state {
u32 instr;
u16 num_dw;
};
static const struct instr_state xe_hpg_svg_state[] = {
{ .instr = CMD_3DSTATE_CONSTANT_VS, .num_dw = 11 },
{ .instr = CMD_3DSTATE_CONSTANT_HS, .num_dw = 11 },
{ .instr = CMD_3DSTATE_CONSTANT_DS, .num_dw = 11 },
{ .instr = CMD_3DSTATE_CONSTANT_GS, .num_dw = 11 },
{ .instr = CMD_3DSTATE_VERTEX_ELEMENTS, .num_dw = 69 },
{ .instr = CMD_3DSTATE_VF_COMPONENT_PACKING, .num_dw = 5 },
{ .instr = CMD_3DSTATE_VF_SGVS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_VF_SGVS_2, .num_dw = 3 },
{ .instr = CMD_3DSTATE_VS, .num_dw = 9 },
{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_VS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_VS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_URB_ALLOC_VS, .num_dw = 3 },
{ .instr = CMD_3DSTATE_STREAMOUT, .num_dw = 5 },
{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_0, .num_dw = 8 },
{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_1, .num_dw = 8 },
{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_2, .num_dw = 8 },
{ .instr = CMD_3DSTATE_SO_BUFFER_INDEX_3, .num_dw = 8 },
{ .instr = CMD_3DSTATE_CLIP, .num_dw = 4 },
{ .instr = CMD_3DSTATE_PRIMITIVE_REPLICATION, .num_dw = 6 },
{ .instr = CMD_3DSTATE_CLIP_MESH, .num_dw = 2 },
{ .instr = CMD_3DSTATE_SF, .num_dw = 4 },
{ .instr = CMD_3DSTATE_SCISSOR_STATE_POINTERS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, .num_dw = 2 },
{ .instr = CMD_3DSTATE_RASTER, .num_dw = 5 },
{ .instr = CMD_3DSTATE_TBIMR_TILE_PASS_INFO, .num_dw = 4 },
{ .instr = CMD_3DSTATE_WM_HZ_OP, .num_dw = 6 },
{ .instr = CMD_3DSTATE_MULTISAMPLE, .num_dw = 2 },
{ .instr = CMD_3DSTATE_HS, .num_dw = 9 },
{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_HS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_HS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_URB_ALLOC_HS, .num_dw = 3 },
{ .instr = CMD_3DSTATE_TASK_CONTROL, .num_dw = 3 },
{ .instr = CMD_3DSTATE_TASK_SHADER, .num_dw = 7 },
{ .instr = CMD_3DSTATE_TASK_SHADER_DATA, .num_dw = 10 },
{ .instr = CMD_3DSTATE_URB_ALLOC_TASK, .num_dw = 3 },
{ .instr = CMD_3DSTATE_TE, .num_dw = 5 },
{ .instr = CMD_3DSTATE_TASK_REDISTRIB, .num_dw = 2 },
{ .instr = CMD_3DSTATE_DS, .num_dw = 11 },
{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_DS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_DS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_URB_ALLOC_DS, .num_dw = 3 },
{ .instr = CMD_3DSTATE_GS, .num_dw = 10 },
{ .instr = CMD_3DSTATE_BINDING_TABLE_POINTERS_GS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_SAMPLER_STATE_POINTERS_GS, .num_dw = 2 },
{ .instr = CMD_3DSTATE_URB_ALLOC_GS, .num_dw = 3 },
{ .instr = CMD_3DSTATE_MESH_CONTROL, .num_dw = 3 },
{ .instr = CMD_3DSTATE_MESH_SHADER_DATA, .num_dw = 10 },
{ .instr = CMD_3DSTATE_URB_ALLOC_MESH, .num_dw = 3 },
{ .instr = CMD_3DSTATE_MESH_SHADER, .num_dw = 8 },
{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
};
void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
{
struct xe_gt *gt = q->hwe->gt;
struct xe_device *xe = gt_to_xe(gt);
const struct instr_state *state_table = NULL;
int state_table_size = 0;
/*
* Wa_14019789679
*
* If the driver doesn't explicitly emit the SVG instructions while
* setting up the default LRC, the context switch will write 0's
* (noops) into the LRC memory rather than the expected instruction
* headers. Application contexts start out as a copy of the default
* LRC, and if they also do not emit specific settings for some SVG
* state, then on context restore they'll unintentionally inherit
* whatever state setting the previous context had programmed into the
* hardware (i.e., the lack of a 3DSTATE_* instruction in the LRC will
* prevent the hardware from resetting that state back to any specific
* value).
*
* The official workaround only requires emitting 3DSTATE_MESH_CONTROL
* since that's a specific state setting that can easily cause GPU
* hangs if unintentionally inherited. However to be safe we'll
* continue to emit all of the SVG state since it's best not to leak
* any of the state between contexts, even if that leakage is harmless.
*/
if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
state_table = xe_hpg_svg_state;
state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
}
if (!state_table) {
xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
return;
}
for (int i = 0; i < state_table_size; i++) {
u32 instr = state_table[i].instr;
u16 num_dw = state_table[i].num_dw;
bool is_single_dw = ((instr & GFXPIPE_PIPELINE) == PIPELINE_SINGLE_DW);
xe_gt_assert(gt, (instr & XE_INSTR_CMD_TYPE) == XE_INSTR_GFXPIPE);
xe_gt_assert(gt, num_dw != 0);
xe_gt_assert(gt, is_single_dw ^ (num_dw > 1));
/*
* Xe2's SVG context is the same as the one on DG2 / MTL
* except that 3DSTATE_DRAWING_RECTANGLE (non-pipelined) has
* been replaced by 3DSTATE_DRAWING_RECTANGLE_FAST (pipelined).
* Just make the replacement here rather than defining a
* whole separate table for the single trivial change.
*/
if (GRAPHICS_VER(xe) >= 20 &&
instr == CMD_3DSTATE_DRAWING_RECTANGLE)
instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
bb->cs[bb->len] = instr;
if (!is_single_dw)
bb->cs[bb->len] |= (num_dw - 2);
bb->len += num_dw;
}
}
struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
{
struct xe_lrc_snapshot *snapshot = kmalloc(sizeof(*snapshot), GFP_NOWAIT);
if (!snapshot)
return NULL;
if (lrc->bo->vm)
xe_vm_get(lrc->bo->vm);
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail;
snapshot->tail.memory = xe_lrc_ring_tail(lrc);
snapshot->start_seqno = xe_lrc_start_seqno(lrc);
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
{
struct xe_bo *bo;
struct xe_vm *vm;
struct iosys_map src;
if (!snapshot)
return;
bo = snapshot->lrc_bo;
vm = bo->vm;
snapshot->lrc_bo = NULL;
snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
if (!snapshot->lrc_snapshot)
goto put_bo;
xe_bo_lock(bo, false);
if (!ttm_bo_vmap(&bo->ttm, &src)) {
xe_map_memcpy_from(xe_bo_device(bo),
snapshot->lrc_snapshot, &src, snapshot->lrc_offset,
snapshot->lrc_size);
ttm_bo_vunmap(&bo->ttm, &src);
} else {
kvfree(snapshot->lrc_snapshot);
snapshot->lrc_snapshot = NULL;
}
xe_bo_unlock(bo);
put_bo:
xe_bo_put(bo);
if (vm)
xe_vm_put(vm);
}
void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
{
unsigned long i;
if (!snapshot)
return;
drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc);
drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n",
snapshot->indirect_context_desc);
drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head);
drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n",
snapshot->tail.internal, snapshot->tail.memory);
drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno);
drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno);
drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp);
drm_printf(p, "\tJob Timestamp: 0x%08x\n", snapshot->ctx_job_timestamp);
if (!snapshot->lrc_snapshot)
return;
drm_printf(p, "\t[HWSP].length: 0x%x\n", LRC_PPHWSP_SIZE);
drm_puts(p, "\t[HWSP].data: ");
for (i = 0; i < LRC_PPHWSP_SIZE; i += sizeof(u32)) {
u32 *val = snapshot->lrc_snapshot + i;
char dumped[ASCII85_BUFSZ];
drm_puts(p, ascii85_encode(*val, dumped));
}
drm_printf(p, "\n\t[HWCTX].length: 0x%lx\n", snapshot->lrc_size - LRC_PPHWSP_SIZE);
drm_puts(p, "\t[HWCTX].data: ");
for (; i < snapshot->lrc_size; i += sizeof(u32)) {
u32 *val = snapshot->lrc_snapshot + i;
char dumped[ASCII85_BUFSZ];
drm_puts(p, ascii85_encode(*val, dumped));
}
drm_puts(p, "\n");
}
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
{
if (!snapshot)
return;
kvfree(snapshot->lrc_snapshot);
if (snapshot->lrc_bo) {
struct xe_vm *vm;
vm = snapshot->lrc_bo->vm;
xe_bo_put(snapshot->lrc_bo);
if (vm)
xe_vm_put(vm);
}
kfree(snapshot);
}
/**
* xe_lrc_update_timestamp() - Update ctx timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
* update saved value.
*
* Returns: New ctx timestamp value
*/
u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
{
*old_ts = lrc->ctx_timestamp;
lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
return lrc->ctx_timestamp;
}