blob: b367ecfa42de9fdd0f0f54cfd79dc3375e2dc382 [file] [log] [blame]
// SPDX-License-Identifier: MIT
/*
* Copyright © 2018 Intel Corporation
*/
#include <linux/prime_numbers.h>
#include "gem/i915_gem_pm.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_reset.h"
#include "gt/selftest_engine_heartbeat.h"
#include "i915_selftest.h"
#include "selftests/i915_random.h"
#include "selftests/igt_flush_test.h"
#include "selftests/igt_live_test.h"
#include "selftests/igt_spinner.h"
#include "selftests/lib_sw_fence.h"
#include "gem/selftests/igt_gem_utils.h"
#include "gem/selftests/mock_context.h"
#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
#define NUM_GPR 16
#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
static bool is_active(struct i915_request *rq)
{
if (i915_request_is_active(rq))
return true;
if (i915_request_on_hold(rq))
return true;
if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
return true;
return false;
}
static int wait_for_submit(struct intel_engine_cs *engine,
struct i915_request *rq,
unsigned long timeout)
{
/* Ignore our own attempts to suppress excess tasklets */
tasklet_hi_schedule(&engine->sched_engine->tasklet);
timeout += jiffies;
do {
bool done = time_after(jiffies, timeout);
if (i915_request_completed(rq)) /* that was quick! */
return 0;
/* Wait until the HW has acknowleged the submission (or err) */
intel_engine_flush_submission(engine);
if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
return 0;
if (done)
return -ETIME;
cond_resched();
} while (1);
}
static int wait_for_reset(struct intel_engine_cs *engine,
struct i915_request *rq,
unsigned long timeout)
{
timeout += jiffies;
do {
cond_resched();
intel_engine_flush_submission(engine);
if (READ_ONCE(engine->execlists.pending[0]))
continue;
if (i915_request_completed(rq))
break;
if (READ_ONCE(rq->fence.error))
break;
} while (time_before(jiffies, timeout));
flush_scheduled_work();
if (rq->fence.error != -EIO) {
pr_err("%s: hanging request %llx:%lld not reset\n",
engine->name,
rq->fence.context,
rq->fence.seqno);
return -EINVAL;
}
/* Give the request a jiffie to complete after flushing the worker */
if (i915_request_wait(rq, 0,
max(0l, (long)(timeout - jiffies)) + 1) < 0) {
pr_err("%s: hanging request %llx:%lld did not complete\n",
engine->name,
rq->fence.context,
rq->fence.seqno);
return -ETIME;
}
return 0;
}
static int live_sanitycheck(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
int err = 0;
if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915))
return 0;
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
for_each_engine(engine, gt, id) {
struct intel_context *ce;
struct i915_request *rq;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
}
rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_ctx;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin, rq)) {
GEM_TRACE("spinner failed to start\n");
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
goto out_ctx;
}
igt_spinner_end(&spin);
if (igt_flush_test(gt->i915)) {
err = -EIO;
goto out_ctx;
}
out_ctx:
intel_context_put(ce);
if (err)
break;
}
igt_spinner_fini(&spin);
return err;
}
static int live_unlite_restore(struct intel_gt *gt, int prio)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
int err = -ENOMEM;
/*
* Check that we can correctly context switch between 2 instances
* on the same engine from the same parent context.
*/
if (igt_spinner_init(&spin, gt))
return err;
err = 0;
for_each_engine(engine, gt, id) {
struct intel_context *ce[2] = {};
struct i915_request *rq[2];
struct igt_live_test t;
int n;
if (prio && !intel_engine_has_preemption(engine))
continue;
if (!intel_engine_can_store_dword(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
break;
}
st_engine_heartbeat_disable(engine);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
tmp = intel_context_create(engine);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto err_ce;
}
err = intel_context_pin(tmp);
if (err) {
intel_context_put(tmp);
goto err_ce;
}
/*
* Setup the pair of contexts such that if we
* lite-restore using the RING_TAIL from ce[1] it
* will execute garbage from ce[0]->ring.
*/
memset(tmp->ring->vaddr,
POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */
tmp->ring->vma->size);
ce[n] = tmp;
}
GEM_BUG_ON(!ce[1]->ring->size);
intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2);
lrc_update_regs(ce[1], engine, ce[1]->ring->head);
rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
goto err_ce;
}
i915_request_get(rq[0]);
i915_request_add(rq[0]);
GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit);
if (!igt_wait_for_spinner(&spin, rq[0])) {
i915_request_put(rq[0]);
goto err_ce;
}
rq[1] = i915_request_create(ce[1]);
if (IS_ERR(rq[1])) {
err = PTR_ERR(rq[1]);
i915_request_put(rq[0]);
goto err_ce;
}
if (!prio) {
/*
* Ensure we do the switch to ce[1] on completion.
*
* rq[0] is already submitted, so this should reduce
* to a no-op (a wait on a request on the same engine
* uses the submit fence, not the completion fence),
* but it will install a dependency on rq[1] for rq[0]
* that will prevent the pair being reordered by
* timeslicing.
*/
i915_request_await_dma_fence(rq[1], &rq[0]->fence);
}
i915_request_get(rq[1]);
i915_request_add(rq[1]);
GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix);
i915_request_put(rq[0]);
if (prio) {
struct i915_sched_attr attr = {
.priority = prio,
};
/* Alternatively preempt the spinner with ce[1] */
engine->sched_engine->schedule(rq[1], &attr);
}
/* And switch back to ce[0] for good measure */
rq[0] = i915_request_create(ce[0]);
if (IS_ERR(rq[0])) {
err = PTR_ERR(rq[0]);
i915_request_put(rq[1]);
goto err_ce;
}
i915_request_await_dma_fence(rq[0], &rq[1]->fence);
i915_request_get(rq[0]);
i915_request_add(rq[0]);
GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix);
i915_request_put(rq[1]);
i915_request_put(rq[0]);
err_ce:
intel_engine_flush_submission(engine);
igt_spinner_end(&spin);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
if (IS_ERR_OR_NULL(ce[n]))
break;
intel_context_unpin(ce[n]);
intel_context_put(ce[n]);
}
st_engine_heartbeat_enable(engine);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
break;
}
igt_spinner_fini(&spin);
return err;
}
static int live_unlite_switch(void *arg)
{
return live_unlite_restore(arg, 0);
}
static int live_unlite_preempt(void *arg)
{
return live_unlite_restore(arg, I915_PRIORITY_MAX);
}
static int live_unlite_ring(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct igt_spinner spin;
enum intel_engine_id id;
int err = 0;
/*
* Setup a preemption event that will cause almost the entire ring
* to be unwound, potentially fooling our intel_ring_direction()
* into emitting a forward lite-restore instead of the rollback.
*/
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
for_each_engine(engine, gt, id) {
struct intel_context *ce[2] = {};
struct i915_request *rq;
struct igt_live_test t;
int n;
if (!intel_engine_has_preemption(engine))
continue;
if (!intel_engine_can_store_dword(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
break;
}
st_engine_heartbeat_disable(engine);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
tmp = intel_context_create(engine);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto err_ce;
}
err = intel_context_pin(tmp);
if (err) {
intel_context_put(tmp);
goto err_ce;
}
memset32(tmp->ring->vaddr,
0xdeadbeef, /* trigger a hang if executed */
tmp->ring->vma->size / sizeof(u32));
ce[n] = tmp;
}
/* Create max prio spinner, followed by N low prio nops */
rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ce;
}
i915_request_get(rq);
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin, rq)) {
intel_gt_set_wedged(gt);
i915_request_put(rq);
err = -ETIME;
goto err_ce;
}
/* Fill the ring, until we will cause a wrap */
n = 0;
while (intel_ring_direction(ce[0]->ring,
rq->wa_tail,
ce[0]->ring->tail) <= 0) {
struct i915_request *tmp;
tmp = intel_context_create_request(ce[0]);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
i915_request_put(rq);
goto err_ce;
}
i915_request_add(tmp);
intel_engine_flush_submission(engine);
n++;
}
intel_engine_flush_submission(engine);
pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
engine->name, n,
ce[0]->ring->size,
ce[0]->ring->tail,
ce[0]->ring->emit,
rq->tail);
GEM_BUG_ON(intel_ring_direction(ce[0]->ring,
rq->tail,
ce[0]->ring->tail) <= 0);
i915_request_put(rq);
/* Create a second ring to preempt the first ring after rq[0] */
rq = intel_context_create_request(ce[1]);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ce;
}
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_get(rq);
i915_request_add(rq);
err = wait_for_submit(engine, rq, HZ / 2);
i915_request_put(rq);
if (err) {
pr_err("%s: preemption request was not submitted\n",
engine->name);
err = -ETIME;
}
pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
engine->name,
ce[0]->ring->tail, ce[0]->ring->emit,
ce[1]->ring->tail, ce[1]->ring->emit);
err_ce:
intel_engine_flush_submission(engine);
igt_spinner_end(&spin);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
if (IS_ERR_OR_NULL(ce[n]))
break;
intel_context_unpin(ce[n]);
intel_context_put(ce[n]);
}
st_engine_heartbeat_enable(engine);
if (igt_live_test_end(&t))
err = -EIO;
if (err)
break;
}
igt_spinner_fini(&spin);
return err;
}
static int live_pin_rewind(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = 0;
/*
* We have to be careful not to trust intel_ring too much, for example
* ring->head is updated upon retire which is out of sync with pinning
* the context. Thus we cannot use ring->head to set CTX_RING_HEAD,
* or else we risk writing an older, stale value.
*
* To simulate this, let's apply a bit of deliberate sabotague.
*/
for_each_engine(engine, gt, id) {
struct intel_context *ce;
struct i915_request *rq;
struct intel_ring *ring;
struct igt_live_test t;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
break;
}
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
}
err = intel_context_pin(ce);
if (err) {
intel_context_put(ce);
break;
}
/* Keep the context awake while we play games */
err = i915_active_acquire(&ce->active);
if (err) {
intel_context_unpin(ce);
intel_context_put(ce);
break;
}
ring = ce->ring;
/* Poison the ring, and offset the next request from HEAD */
memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32));
ring->emit = ring->size / 2;
ring->tail = ring->emit;
GEM_BUG_ON(ring->head);
intel_context_unpin(ce);
/* Submit a simple nop request */
GEM_BUG_ON(intel_context_is_pinned(ce));
rq = intel_context_create_request(ce);
i915_active_release(&ce->active); /* e.g. async retire */
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
break;
}
GEM_BUG_ON(!rq->head);
i915_request_add(rq);
/* Expect not to hang! */
if (igt_live_test_end(&t)) {
err = -EIO;
break;
}
}
return err;
}
static int engine_lock_reset_tasklet(struct intel_engine_cs *engine)
{
tasklet_disable(&engine->sched_engine->tasklet);
local_bh_disable();
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&engine->gt->reset.flags)) {
local_bh_enable();
tasklet_enable(&engine->sched_engine->tasklet);
intel_gt_set_wedged(engine->gt);
return -EBUSY;
}
return 0;
}
static void engine_unlock_reset_tasklet(struct intel_engine_cs *engine)
{
clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
&engine->gt->reset.flags);
local_bh_enable();
tasklet_enable(&engine->sched_engine->tasklet);
}
static int live_hold_reset(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
int err = 0;
/*
* In order to support offline error capture for fast preempt reset,
* we need to decouple the guilty request and ensure that it and its
* descendents are not executed while the capture is in progress.
*/
if (!intel_has_reset_engine(gt))
return 0;
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
for_each_engine(engine, gt, id) {
struct intel_context *ce;
struct i915_request *rq;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
}
st_engine_heartbeat_disable(engine);
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin, rq)) {
intel_gt_set_wedged(gt);
err = -ETIME;
goto out;
}
/* We have our request executing, now remove it and reset */
err = engine_lock_reset_tasklet(engine);
if (err)
goto out;
engine->sched_engine->tasklet.callback(&engine->sched_engine->tasklet);
GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
i915_request_get(rq);
execlists_hold(engine, rq);
GEM_BUG_ON(!i915_request_on_hold(rq));
__intel_engine_reset_bh(engine, NULL);
GEM_BUG_ON(rq->fence.error != -EIO);
engine_unlock_reset_tasklet(engine);
/* Check that we do not resubmit the held request */
if (!i915_request_wait(rq, 0, HZ / 5)) {
pr_err("%s: on hold request completed!\n",
engine->name);
i915_request_put(rq);
err = -EIO;
goto out;
}
GEM_BUG_ON(!i915_request_on_hold(rq));
/* But is resubmitted on release */
execlists_unhold(engine, rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
pr_err("%s: held request did not complete!\n",
engine->name);
intel_gt_set_wedged(gt);
err = -ETIME;
}
i915_request_put(rq);
out:
st_engine_heartbeat_enable(engine);
intel_context_put(ce);
if (err)
break;
}
igt_spinner_fini(&spin);
return err;
}
static const char *error_repr(int err)
{
return err ? "bad" : "good";
}
static int live_error_interrupt(void *arg)
{
static const struct error_phase {
enum { GOOD = 0, BAD = -EIO } error[2];
} phases[] = {
{ { BAD, GOOD } },
{ { BAD, BAD } },
{ { BAD, GOOD } },
{ { GOOD, GOOD } }, /* sentinel */
};
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
/*
* We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning
* of invalid commands in user batches that will cause a GPU hang.
* This is a faster mechanism than using hangcheck/heartbeats, but
* only detects problems the HW knows about -- it will not warn when
* we kill the HW!
*
* To verify our detection and reset, we throw some invalid commands
* at the HW and wait for the interrupt.
*/
if (!intel_has_reset_engine(gt))
return 0;
for_each_engine(engine, gt, id) {
const struct error_phase *p;
int err = 0;
st_engine_heartbeat_disable(engine);
for (p = phases; p->error[0] != GOOD; p++) {
struct i915_request *client[ARRAY_SIZE(phases->error)];
u32 *cs;
int i;
memset(client, 0, sizeof(*client));
for (i = 0; i < ARRAY_SIZE(client); i++) {
struct intel_context *ce;
struct i915_request *rq;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out;
}
rq = intel_context_create_request(ce);
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
}
if (rq->engine->emit_init_breadcrumb) {
err = rq->engine->emit_init_breadcrumb(rq);
if (err) {
i915_request_add(rq);
goto out;
}
}
cs = intel_ring_begin(rq, 2);
if (IS_ERR(cs)) {
i915_request_add(rq);
err = PTR_ERR(cs);
goto out;
}
if (p->error[i]) {
*cs++ = 0xdeadbeef;
*cs++ = 0xdeadbeef;
} else {
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
client[i] = i915_request_get(rq);
i915_request_add(rq);
}
err = wait_for_submit(engine, client[0], HZ / 2);
if (err) {
pr_err("%s: first request did not start within time!\n",
engine->name);
err = -ETIME;
goto out;
}
for (i = 0; i < ARRAY_SIZE(client); i++) {
if (i915_request_wait(client[i], 0, HZ / 5) < 0)
pr_debug("%s: %s request incomplete!\n",
engine->name,
error_repr(p->error[i]));
if (!i915_request_started(client[i])) {
pr_err("%s: %s request not started!\n",
engine->name,
error_repr(p->error[i]));
err = -ETIME;
goto out;
}
/* Kick the tasklet to process the error */
intel_engine_flush_submission(engine);
if (client[i]->fence.error != p->error[i]) {
pr_err("%s: %s request (%s) with wrong error code: %d\n",
engine->name,
error_repr(p->error[i]),
i915_request_completed(client[i]) ? "completed" : "running",
client[i]->fence.error);
err = -EINVAL;
goto out;
}
}
out:
for (i = 0; i < ARRAY_SIZE(client); i++)
if (client[i])
i915_request_put(client[i]);
if (err) {
pr_err("%s: failed at phase[%zd] { %d, %d }\n",
engine->name, p - phases,
p->error[0], p->error[1]);
break;
}
}
st_engine_heartbeat_enable(engine);
if (err) {
intel_gt_set_wedged(gt);
return err;
}
}
return 0;
}
static int
emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
{
u32 *cs;
cs = intel_ring_begin(rq, 10);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_NEQ_SDD;
*cs++ = 0;
*cs++ = i915_ggtt_offset(vma) + 4 * idx;
*cs++ = 0;
if (idx > 0) {
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
*cs++ = 0;
*cs++ = 1;
} else {
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
*cs++ = MI_NOOP;
}
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
intel_ring_advance(rq, cs);
return 0;
}
static struct i915_request *
semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx)
{
struct intel_context *ce;
struct i915_request *rq;
int err;
ce = intel_context_create(engine);
if (IS_ERR(ce))
return ERR_CAST(ce);
rq = intel_context_create_request(ce);
if (IS_ERR(rq))
goto out_ce;
err = 0;
if (rq->engine->emit_init_breadcrumb)
err = rq->engine->emit_init_breadcrumb(rq);
if (err == 0)
err = emit_semaphore_chain(rq, vma, idx);
if (err == 0)
i915_request_get(rq);
i915_request_add(rq);
if (err)
rq = ERR_PTR(err);
out_ce:
intel_context_put(ce);
return rq;
}
static int
release_queue(struct intel_engine_cs *engine,
struct i915_vma *vma,
int idx, int prio)
{
struct i915_sched_attr attr = {
.priority = prio,
};
struct i915_request *rq;
u32 *cs;
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return PTR_ERR(rq);
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs)) {
i915_request_add(rq);
return PTR_ERR(cs);
}
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1);
*cs++ = 0;
*cs++ = 1;
intel_ring_advance(rq, cs);
i915_request_get(rq);
i915_request_add(rq);
local_bh_disable();
engine->sched_engine->schedule(rq, &attr);
local_bh_enable(); /* kick tasklet */
i915_request_put(rq);
return 0;
}
static int
slice_semaphore_queue(struct intel_engine_cs *outer,
struct i915_vma *vma,
int count)
{
struct intel_engine_cs *engine;
struct i915_request *head;
enum intel_engine_id id;
int err, i, n = 0;
head = semaphore_queue(outer, vma, n++);
if (IS_ERR(head))
return PTR_ERR(head);
for_each_engine(engine, outer->gt, id) {
if (!intel_engine_has_preemption(engine))
continue;
for (i = 0; i < count; i++) {
struct i915_request *rq;
rq = semaphore_queue(engine, vma, n++);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out;
}
i915_request_put(rq);
}
}
err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER);
if (err)
goto out;
if (i915_request_wait(head, 0,
2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) {
pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n",
outer->name, count, n);
GEM_TRACE_DUMP();
intel_gt_set_wedged(outer->gt);
err = -EIO;
}
out:
i915_request_put(head);
return err;
}
static int live_timeslice_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct i915_vma *vma;
void *vaddr;
int err = 0;
/*
* If a request takes too long, we would like to give other users
* a fair go on the GPU. In particular, users may create batches
* that wait upon external input, where that input may even be
* supplied by another GPU job. To avoid blocking forever, we
* need to preempt the current task and replace it with another
* ready task.
*/
if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
}
vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_obj;
}
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
if (err)
goto err_map;
err = i915_vma_sync(vma);
if (err)
goto err_pin;
for_each_engine(engine, gt, id) {
if (!intel_engine_has_preemption(engine))
continue;
memset(vaddr, 0, PAGE_SIZE);
st_engine_heartbeat_disable(engine);
err = slice_semaphore_queue(engine, vma, 5);
st_engine_heartbeat_enable(engine);
if (err)
goto err_pin;
if (igt_flush_test(gt->i915)) {
err = -EIO;
goto err_pin;
}
}
err_pin:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
return err;
}
static struct i915_request *
create_rewinder(struct intel_context *ce,
struct i915_request *wait,
void *slot, int idx)
{
const u32 offset =
i915_ggtt_offset(ce->engine->status_page.vma) +
offset_in_page(slot);
struct i915_request *rq;
u32 *cs;
int err;
rq = intel_context_create_request(ce);
if (IS_ERR(rq))
return rq;
if (wait) {
err = i915_request_await_dma_fence(rq, &wait->fence);
if (err)
goto err;
}
cs = intel_ring_begin(rq, 14);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err;
}
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_NOOP;
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_GTE_SDD;
*cs++ = idx;
*cs++ = offset;
*cs++ = 0;
*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
*cs++ = offset + idx * sizeof(u32);
*cs++ = 0;
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = offset;
*cs++ = 0;
*cs++ = idx + 1;
intel_ring_advance(rq, cs);
err = 0;
err:
i915_request_get(rq);
i915_request_add(rq);
if (err) {
i915_request_put(rq);
return ERR_PTR(err);
}
return rq;
}
static int live_timeslice_rewind(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
/*
* The usual presumption on timeslice expiration is that we replace
* the active context with another. However, given a chain of
* dependencies we may end up with replacing the context with itself,
* but only a few of those requests, forcing us to rewind the
* RING_TAIL of the original request.
*/
if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
for_each_engine(engine, gt, id) {
enum { A1, A2, B1 };
enum { X = 1, Z, Y };
struct i915_request *rq[3] = {};
struct intel_context *ce;
unsigned long timeslice;
int i, err = 0;
u32 *slot;
if (!intel_engine_has_timeslices(engine))
continue;
/*
* A:rq1 -- semaphore wait, timestamp X
* A:rq2 -- write timestamp Y
*
* B:rq1 [await A:rq1] -- write timestamp Z
*
* Force timeslice, release semaphore.
*
* Expect execution/evaluation order XZY
*/
st_engine_heartbeat_disable(engine);
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
slot = memset32(engine->status_page.addr + 1000, 0, 4);
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto err;
}
rq[A1] = create_rewinder(ce, NULL, slot, X);
if (IS_ERR(rq[A1])) {
intel_context_put(ce);
goto err;
}
rq[A2] = create_rewinder(ce, NULL, slot, Y);
intel_context_put(ce);
if (IS_ERR(rq[A2]))
goto err;
err = wait_for_submit(engine, rq[A2], HZ / 2);
if (err) {
pr_err("%s: failed to submit first context\n",
engine->name);
goto err;
}
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto err;
}
rq[B1] = create_rewinder(ce, rq[A1], slot, Z);
intel_context_put(ce);
if (IS_ERR(rq[2]))
goto err;
err = wait_for_submit(engine, rq[B1], HZ / 2);
if (err) {
pr_err("%s: failed to submit second context\n",
engine->name);
goto err;
}
/* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */
ENGINE_TRACE(engine, "forcing tasklet for rewind\n");
while (i915_request_is_active(rq[A2])) { /* semaphore yield! */
/* Wait for the timeslice to kick in */
del_timer(&engine->execlists.timer);
tasklet_hi_schedule(&engine->sched_engine->tasklet);
intel_engine_flush_submission(engine);
}
/* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */
GEM_BUG_ON(!i915_request_is_active(rq[A1]));
GEM_BUG_ON(!i915_request_is_active(rq[B1]));
GEM_BUG_ON(i915_request_is_active(rq[A2]));
/* Release the hounds! */
slot[0] = 1;
wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */
for (i = 1; i <= 3; i++) {
unsigned long timeout = jiffies + HZ / 2;
while (!READ_ONCE(slot[i]) &&
time_before(jiffies, timeout))
;
if (!time_before(jiffies, timeout)) {
pr_err("%s: rq[%d] timed out\n",
engine->name, i - 1);
err = -ETIME;
goto err;
}
pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]);
}
/* XZY: XZ < XY */
if (slot[Z] - slot[X] >= slot[Y] - slot[X]) {
pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n",
engine->name,
slot[Z] - slot[X],
slot[Y] - slot[X]);
err = -EINVAL;
}
err:
memset32(&slot[0], -1, 4);
wmb();
engine->props.timeslice_duration_ms = timeslice;
st_engine_heartbeat_enable(engine);
for (i = 0; i < 3; i++)
i915_request_put(rq[i]);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
return err;
}
return 0;
}
static struct i915_request *nop_request(struct intel_engine_cs *engine)
{
struct i915_request *rq;
rq = intel_engine_create_kernel_request(engine);
if (IS_ERR(rq))
return rq;
i915_request_get(rq);
i915_request_add(rq);
return rq;
}
static long slice_timeout(struct intel_engine_cs *engine)
{
long timeout;
/* Enough time for a timeslice to kick in, and kick out */
timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine));
/* Enough time for the nop request to complete */
timeout += HZ / 5;
return timeout + 1;
}
static int live_timeslice_queue(void *arg)
{
struct intel_gt *gt = arg;
struct drm_i915_gem_object *obj;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct i915_vma *vma;
void *vaddr;
int err = 0;
/*
* Make sure that even if ELSP[0] and ELSP[1] are filled with
* timeslicing between them disabled, we *do* enable timeslicing
* if the queue demands it. (Normally, we do not submit if
* ELSP[1] is already occupied, so must rely on timeslicing to
* eject ELSP[0] in favour of the queue.)
*/
if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
return PTR_ERR(obj);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
}
vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(vaddr)) {
err = PTR_ERR(vaddr);
goto err_obj;
}
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
if (err)
goto err_map;
err = i915_vma_sync(vma);
if (err)
goto err_pin;
for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct i915_request *rq, *nop;
if (!intel_engine_has_preemption(engine))
continue;
st_engine_heartbeat_disable(engine);
memset(vaddr, 0, PAGE_SIZE);
/* ELSP[0]: semaphore wait */
rq = semaphore_queue(engine, vma, 0);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_heartbeat;
}
engine->sched_engine->schedule(rq, &attr);
err = wait_for_submit(engine, rq, HZ / 2);
if (err) {
pr_err("%s: Timed out trying to submit semaphores\n",
engine->name);
goto err_rq;
}
/* ELSP[1]: nop request */
nop = nop_request(engine);
if (IS_ERR(nop)) {
err = PTR_ERR(nop);
goto err_rq;
}
err = wait_for_submit(engine, nop, HZ / 2);
i915_request_put(nop);
if (err) {
pr_err("%s: Timed out trying to submit nop\n",
engine->name);
goto err_rq;
}
GEM_BUG_ON(i915_request_completed(rq));
GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
/* Queue: semaphore signal, matching priority as semaphore */
err = release_queue(engine, vma, 1, effective_prio(rq));
if (err)
goto err_rq;
/* Wait until we ack the release_queue and start timeslicing */
do {
cond_resched();
intel_engine_flush_submission(engine);
} while (READ_ONCE(engine->execlists.pending[0]));
/* Timeslice every jiffy, so within 2 we should signal */
if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to timeslice into queue\n",
engine->name);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
memset(vaddr, 0xff, PAGE_SIZE);
err = -EIO;
}
err_rq:
i915_request_put(rq);
err_heartbeat:
st_engine_heartbeat_enable(engine);
if (err)
break;
}
err_pin:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
return err;
}
static int live_timeslice_nopreempt(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
struct igt_spinner spin;
int err = 0;
/*
* We should not timeslice into a request that is marked with
* I915_REQUEST_NOPREEMPT.
*/
if (!CONFIG_DRM_I915_TIMESLICE_DURATION)
return 0;
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
for_each_engine(engine, gt, id) {
struct intel_context *ce;
struct i915_request *rq;
unsigned long timeslice;
if (!intel_engine_has_preemption(engine))
continue;
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
break;
}
st_engine_heartbeat_disable(engine);
timeslice = xchg(&engine->props.timeslice_duration_ms, 1);
/* Create an unpreemptible spinner */
rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_heartbeat;
}
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin, rq)) {
i915_request_put(rq);
err = -ETIME;
goto out_spin;
}
set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
i915_request_put(rq);
/* Followed by a maximum priority barrier (heartbeat) */
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
err = PTR_ERR(ce);
goto out_spin;
}
rq = intel_context_create_request(ce);
intel_context_put(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_spin;
}
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_get(rq);
i915_request_add(rq);
/*
* Wait until the barrier is in ELSP, and we know timeslicing
* will have been activated.
*/
if (wait_for_submit(engine, rq, HZ / 2)) {
i915_request_put(rq);
err = -ETIME;
goto out_spin;
}
/*
* Since the ELSP[0] request is unpreemptible, it should not
* allow the maximum priority barrier through. Wait long
* enough to see if it is timesliced in by mistake.
*/
if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) {
pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n",
engine->name);
err = -EINVAL;
}
i915_request_put(rq);
out_spin:
igt_spinner_end(&spin);
out_heartbeat:
xchg(&engine->props.timeslice_duration_ms, timeslice);
st_engine_heartbeat_enable(engine);
if (err)
break;
if (igt_flush_test(gt->i915)) {
err = -EIO;
break;
}
}
igt_spinner_fini(&spin);
return err;
}
static int live_busywait_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct intel_engine_cs *engine;
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
enum intel_engine_id id;
int err = -ENOMEM;
u32 *map;
/*
* Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
* preempt the busywaits used to synchronise between rings.
*/
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
return -ENOMEM;
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_ctx_lo;
}
map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto err_obj;
}
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_map;
}
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
if (err)
goto err_map;
err = i915_vma_sync(vma);
if (err)
goto err_vma;
for_each_engine(engine, gt, id) {
struct i915_request *lo, *hi;
struct igt_live_test t;
u32 *cs;
if (!intel_engine_has_preemption(engine))
continue;
if (!intel_engine_can_store_dword(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_vma;
}
/*
* We create two requests. The low priority request
* busywaits on a semaphore (inside the ringbuffer where
* is should be preemptible) and the high priority requests
* uses a MI_STORE_DWORD_IMM to update the semaphore value
* allowing the first request to complete. If preemption
* fails, we hang instead.
*/
lo = igt_request_alloc(ctx_lo, engine);
if (IS_ERR(lo)) {
err = PTR_ERR(lo);
goto err_vma;
}
cs = intel_ring_begin(lo, 8);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
i915_request_add(lo);
goto err_vma;
}
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_ggtt_offset(vma);
*cs++ = 0;
*cs++ = 1;
/* XXX Do we need a flush + invalidate here? */
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = i915_ggtt_offset(vma);
*cs++ = 0;
intel_ring_advance(lo, cs);
i915_request_get(lo);
i915_request_add(lo);
if (wait_for(READ_ONCE(*map), 10)) {
i915_request_put(lo);
err = -ETIMEDOUT;
goto err_vma;
}
/* Low priority request should be busywaiting now */
if (i915_request_wait(lo, 0, 1) != -ETIME) {
i915_request_put(lo);
pr_err("%s: Busywaiting request did not!\n",
engine->name);
err = -EIO;
goto err_vma;
}
hi = igt_request_alloc(ctx_hi, engine);
if (IS_ERR(hi)) {
err = PTR_ERR(hi);
i915_request_put(lo);
goto err_vma;
}
cs = intel_ring_begin(hi, 4);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
i915_request_add(hi);
i915_request_put(lo);
goto err_vma;
}
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = i915_ggtt_offset(vma);
*cs++ = 0;
*cs++ = 0;
intel_ring_advance(hi, cs);
i915_request_add(hi);
if (i915_request_wait(lo, 0, HZ / 5) < 0) {
struct drm_printer p = drm_info_printer(gt->i915->drm.dev);
pr_err("%s: Failed to preempt semaphore busywait!\n",
engine->name);
intel_engine_dump(engine, &p, "%s\n", engine->name);
GEM_TRACE_DUMP();
i915_request_put(lo);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_vma;
}
GEM_BUG_ON(READ_ONCE(*map));
i915_request_put(lo);
if (igt_live_test_end(&t)) {
err = -EIO;
goto err_vma;
}
}
err = 0;
err_vma:
i915_vma_unpin(vma);
err_map:
i915_gem_object_unpin_map(obj);
err_obj:
i915_gem_object_put(obj);
err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
return err;
}
static struct i915_request *
spinner_create_request(struct igt_spinner *spin,
struct i915_gem_context *ctx,
struct intel_engine_cs *engine,
u32 arb)
{
struct intel_context *ce;
struct i915_request *rq;
ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
if (IS_ERR(ce))
return ERR_CAST(ce);
rq = igt_spinner_create_request(spin, ce, arb);
intel_context_put(ce);
return rq;
}
static int live_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int err = -ENOMEM;
if (igt_spinner_init(&spin_hi, gt))
return -ENOMEM;
if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
goto err_spin_lo;
ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY;
ctx_lo = kernel_context(gt->i915, NULL);
if (!ctx_lo)
goto err_ctx_hi;
ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY;
for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
rq = spinner_create_request(&spin_lo, ctx_lo, engine,
MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ctx_lo;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin_lo, rq)) {
GEM_TRACE("lo spinner failed to start\n");
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
rq = spinner_create_request(&spin_hi, ctx_hi, engine,
MI_ARB_CHECK);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
goto err_ctx_lo;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin_hi, rq)) {
GEM_TRACE("hi spinner failed to start\n");
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
if (igt_live_test_end(&t)) {
err = -EIO;
goto err_ctx_lo;
}
}
err = 0;
err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
return err;
}
static int live_late_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct i915_gem_context *ctx_hi, *ctx_lo;
struct igt_spinner spin_hi, spin_lo;
struct intel_engine_cs *engine;
struct i915_sched_attr attr = {};
enum intel_engine_id id;
int err = -ENOMEM;
if (igt_spinner_init(&spin_hi, gt))
return -ENOMEM;
if (igt_spinner_init(&spin_lo, gt))
goto err_spin_hi;
ctx_hi = kernel_context(gt->i915, NULL);
if (!ctx_hi)
goto err_spin_lo;
ctx_lo = kernel_context(gt->i915, NULL);
if (!ctx_lo)
goto err_ctx_hi;
/* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */
ctx_lo->sched.priority = 1;
for_each_engine(engine, gt, id) {
struct igt_live_test t;
struct i915_request *rq;
if (!intel_engine_has_preemption(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_ctx_lo;
}
rq = spinner_create_request(&spin_lo, ctx_lo, engine,
MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ctx_lo;
}
i915_request_add(rq);
if (!igt_wait_for_spinner(&spin_lo, rq)) {
pr_err("First context failed to start\n");
goto err_wedged;
}
rq = spinner_create_request(&spin_hi, ctx_hi, engine,
MI_NOOP);
if (IS_ERR(rq)) {
igt_spinner_end(&spin_lo);
err = PTR_ERR(rq);
goto err_ctx_lo;
}
i915_request_add(rq);
if (igt_wait_for_spinner(&spin_hi, rq)) {
pr_err("Second context overtook first?\n");
goto err_wedged;
}
attr.priority = I915_PRIORITY_MAX;
engine->sched_engine->schedule(rq, &attr);
if (!igt_wait_for_spinner(&spin_hi, rq)) {
pr_err("High priority context failed to preempt the low priority context\n");
GEM_TRACE_DUMP();
goto err_wedged;
}
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
if (igt_live_test_end(&t)) {
err = -EIO;
goto err_ctx_lo;
}
}
err = 0;
err_ctx_lo:
kernel_context_close(ctx_lo);
err_ctx_hi:
kernel_context_close(ctx_hi);
err_spin_lo:
igt_spinner_fini(&spin_lo);
err_spin_hi:
igt_spinner_fini(&spin_hi);
return err;
err_wedged:
igt_spinner_end(&spin_hi);
igt_spinner_end(&spin_lo);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_ctx_lo;
}
struct preempt_client {
struct igt_spinner spin;
struct i915_gem_context *ctx;
};
static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c)
{
c->ctx = kernel_context(gt->i915, NULL);
if (!c->ctx)
return -ENOMEM;
if (igt_spinner_init(&c->spin, gt))
goto err_ctx;
return 0;
err_ctx:
kernel_context_close(c->ctx);
return -ENOMEM;
}
static void preempt_client_fini(struct preempt_client *c)
{
igt_spinner_fini(&c->spin);
kernel_context_close(c->ctx);
}
static int live_nopreempt(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client a, b;
enum intel_engine_id id;
int err = -ENOMEM;
/*
* Verify that we can disable preemption for an individual request
* that may be being observed and not want to be interrupted.
*/
if (preempt_client_init(gt, &a))
return -ENOMEM;
if (preempt_client_init(gt, &b))
goto err_client_a;
b.ctx->sched.priority = I915_PRIORITY_MAX;
for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
if (!intel_engine_has_preemption(engine))
continue;
engine->execlists.preempt_hang.count = 0;
rq_a = spinner_create_request(&a.spin,
a.ctx, engine,
MI_ARB_CHECK);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
goto err_client_b;
}
/* Low priority client, but unpreemptable! */
__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags);
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
goto err_wedged;
}
rq_b = spinner_create_request(&b.spin,
b.ctx, engine,
MI_ARB_CHECK);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
goto err_client_b;
}
i915_request_add(rq_b);
/* B is much more important than A! (But A is unpreemptable.) */
GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a));
/* Wait long enough for preemption and timeslicing */
if (igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client started too early!\n");
goto err_wedged;
}
igt_spinner_end(&a.spin);
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
goto err_wedged;
}
igt_spinner_end(&b.spin);
if (engine->execlists.preempt_hang.count) {
pr_err("Preemption recorded x%d; should have been suppressed!\n",
engine->execlists.preempt_hang.count);
err = -EINVAL;
goto err_wedged;
}
if (igt_flush_test(gt->i915))
goto err_wedged;
}
err = 0;
err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
struct live_preempt_cancel {
struct intel_engine_cs *engine;
struct preempt_client a, b;
};
static int __cancel_active0(struct live_preempt_cancel *arg)
{
struct i915_request *rq;
struct igt_live_test t;
int err;
/* Preempt cancel of ELSP0 */
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
if (igt_live_test_begin(&t, arg->engine->i915,
__func__, arg->engine->name))
return -EIO;
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq))
return PTR_ERR(rq);
clear_bit(CONTEXT_BANNED, &rq->context->flags);
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
err = -EIO;
goto out;
}
intel_context_set_banned(rq->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
err = wait_for_reset(arg->engine, rq, HZ / 2);
if (err) {
pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
out:
i915_request_put(rq);
if (igt_live_test_end(&t))
err = -EIO;
return err;
}
static int __cancel_active1(struct live_preempt_cancel *arg)
{
struct i915_request *rq[2] = {};
struct igt_live_test t;
int err;
/* Preempt cancel of ELSP1 */
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
if (igt_live_test_begin(&t, arg->engine->i915,
__func__, arg->engine->name))
return -EIO;
rq[0] = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_NOOP); /* no preemption */
if (IS_ERR(rq[0]))
return PTR_ERR(rq[0]);
clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
i915_request_get(rq[0]);
i915_request_add(rq[0]);
if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
err = -EIO;
goto out;
}
rq[1] = spinner_create_request(&arg->b.spin,
arg->b.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq[1])) {
err = PTR_ERR(rq[1]);
goto out;
}
clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
i915_request_get(rq[1]);
err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
i915_request_add(rq[1]);
if (err)
goto out;
intel_context_set_banned(rq[1]->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
igt_spinner_end(&arg->a.spin);
err = wait_for_reset(arg->engine, rq[1], HZ / 2);
if (err)
goto out;
if (rq[0]->fence.error != 0) {
pr_err("Normal inflight0 request did not complete\n");
err = -EINVAL;
goto out;
}
if (rq[1]->fence.error != -EIO) {
pr_err("Cancelled inflight1 request did not report -EIO\n");
err = -EINVAL;
goto out;
}
out:
i915_request_put(rq[1]);
i915_request_put(rq[0]);
if (igt_live_test_end(&t))
err = -EIO;
return err;
}
static int __cancel_queued(struct live_preempt_cancel *arg)
{
struct i915_request *rq[3] = {};
struct igt_live_test t;
int err;
/* Full ELSP and one in the wings */
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
if (igt_live_test_begin(&t, arg->engine->i915,
__func__, arg->engine->name))
return -EIO;
rq[0] = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq[0]))
return PTR_ERR(rq[0]);
clear_bit(CONTEXT_BANNED, &rq[0]->context->flags);
i915_request_get(rq[0]);
i915_request_add(rq[0]);
if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) {
err = -EIO;
goto out;
}
rq[1] = igt_request_alloc(arg->b.ctx, arg->engine);
if (IS_ERR(rq[1])) {
err = PTR_ERR(rq[1]);
goto out;
}
clear_bit(CONTEXT_BANNED, &rq[1]->context->flags);
i915_request_get(rq[1]);
err = i915_request_await_dma_fence(rq[1], &rq[0]->fence);
i915_request_add(rq[1]);
if (err)
goto out;
rq[2] = spinner_create_request(&arg->b.spin,
arg->a.ctx, arg->engine,
MI_ARB_CHECK);
if (IS_ERR(rq[2])) {
err = PTR_ERR(rq[2]);
goto out;
}
i915_request_get(rq[2]);
err = i915_request_await_dma_fence(rq[2], &rq[1]->fence);
i915_request_add(rq[2]);
if (err)
goto out;
intel_context_set_banned(rq[2]->context);
err = intel_engine_pulse(arg->engine);
if (err)
goto out;
err = wait_for_reset(arg->engine, rq[2], HZ / 2);
if (err)
goto out;
if (rq[0]->fence.error != -EIO) {
pr_err("Cancelled inflight0 request did not report -EIO\n");
err = -EINVAL;
goto out;
}
if (rq[1]->fence.error != 0) {
pr_err("Normal inflight1 request did not complete\n");
err = -EINVAL;
goto out;
}
if (rq[2]->fence.error != -EIO) {
pr_err("Cancelled queued request did not report -EIO\n");
err = -EINVAL;
goto out;
}
out:
i915_request_put(rq[2]);
i915_request_put(rq[1]);
i915_request_put(rq[0]);
if (igt_live_test_end(&t))
err = -EIO;
return err;
}
static int __cancel_hostile(struct live_preempt_cancel *arg)
{
struct i915_request *rq;
int err;
/* Preempt cancel non-preemptible spinner in ELSP0 */
if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(arg->engine->gt))
return 0;
GEM_TRACE("%s(%s)\n", __func__, arg->engine->name);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, arg->engine,
MI_NOOP); /* preemption disabled */
if (IS_ERR(rq))
return PTR_ERR(rq);
clear_bit(CONTEXT_BANNED, &rq->context->flags);
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
err = -EIO;
goto out;
}
intel_context_set_banned(rq->context);
err = intel_engine_pulse(arg->engine); /* force reset */
if (err)
goto out;
err = wait_for_reset(arg->engine, rq, HZ / 2);
if (err) {
pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
out:
i915_request_put(rq);
if (igt_flush_test(arg->engine->i915))
err = -EIO;
return err;
}
static void force_reset_timeout(struct intel_engine_cs *engine)
{
engine->reset_timeout.probability = 999;
atomic_set(&engine->reset_timeout.times, -1);
}
static void cancel_reset_timeout(struct intel_engine_cs *engine)
{
memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
}
static int __cancel_fail(struct live_preempt_cancel *arg)
{
struct intel_engine_cs *engine = arg->engine;
struct i915_request *rq;
int err;
if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT)
return 0;
if (!intel_has_reset_engine(engine->gt))
return 0;
GEM_TRACE("%s(%s)\n", __func__, engine->name);
rq = spinner_create_request(&arg->a.spin,
arg->a.ctx, engine,
MI_NOOP); /* preemption disabled */
if (IS_ERR(rq))
return PTR_ERR(rq);
clear_bit(CONTEXT_BANNED, &rq->context->flags);
i915_request_get(rq);
i915_request_add(rq);
if (!igt_wait_for_spinner(&arg->a.spin, rq)) {
err = -EIO;
goto out;
}
intel_context_set_banned(rq->context);
err = intel_engine_pulse(engine);
if (err)
goto out;
force_reset_timeout(engine);
/* force preempt reset [failure] */
while (!engine->execlists.pending[0])
intel_engine_flush_submission(engine);
del_timer_sync(&engine->execlists.preempt);
intel_engine_flush_submission(engine);
cancel_reset_timeout(engine);
/* after failure, require heartbeats to reset device */
intel_engine_set_heartbeat(engine, 1);
err = wait_for_reset(engine, rq, HZ / 2);
intel_engine_set_heartbeat(engine,
engine->defaults.heartbeat_interval_ms);
if (err) {
pr_err("Cancelled inflight0 request did not reset\n");
goto out;
}
out:
i915_request_put(rq);
if (igt_flush_test(engine->i915))
err = -EIO;
return err;
}
static int live_preempt_cancel(void *arg)
{
struct intel_gt *gt = arg;
struct live_preempt_cancel data;
enum intel_engine_id id;
int err = -ENOMEM;
/*
* To cancel an inflight context, we need to first remove it from the
* GPU. That sounds like preemption! Plus a little bit of bookkeeping.
*/
if (preempt_client_init(gt, &data.a))
return -ENOMEM;
if (preempt_client_init(gt, &data.b))
goto err_client_a;
for_each_engine(data.engine, gt, id) {
if (!intel_engine_has_preemption(data.engine))
continue;
err = __cancel_active0(&data);
if (err)
goto err_wedged;
err = __cancel_active1(&data);
if (err)
goto err_wedged;
err = __cancel_queued(&data);
if (err)
goto err_wedged;
err = __cancel_hostile(&data);
if (err)
goto err_wedged;
err = __cancel_fail(&data);
if (err)
goto err_wedged;
}
err = 0;
err_client_b:
preempt_client_fini(&data.b);
err_client_a:
preempt_client_fini(&data.a);
return err;
err_wedged:
GEM_TRACE_DUMP();
igt_spinner_end(&data.b.spin);
igt_spinner_end(&data.a.spin);
intel_gt_set_wedged(gt);
goto err_client_b;
}
static int live_suppress_self_preempt(void *arg)
{
struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client a, b;
enum intel_engine_id id;
int err = -ENOMEM;
/*
* Verify that if a preemption request does not cause a change in
* the current execution order, the preempt-to-idle injection is
* skipped and that we do not accidentally apply it after the CS
* completion event.
*/
if (intel_uc_uses_guc_submission(&gt->uc))
return 0; /* presume black blox */
if (intel_vgpu_active(gt->i915))
return 0; /* GVT forces single port & request submission */
if (preempt_client_init(gt, &a))
return -ENOMEM;
if (preempt_client_init(gt, &b))
goto err_client_a;
for_each_engine(engine, gt, id) {
struct i915_request *rq_a, *rq_b;
int depth;
if (!intel_engine_has_preemption(engine))
continue;
if (igt_flush_test(gt->i915))
goto err_wedged;
st_engine_heartbeat_disable(engine);
engine->execlists.preempt_hang.count = 0;
rq_a = spinner_create_request(&a.spin,
a.ctx, engine,
MI_NOOP);
if (IS_ERR(rq_a)) {
err = PTR_ERR(rq_a);
st_engine_heartbeat_enable(engine);
goto err_client_b;
}
i915_request_add(rq_a);
if (!igt_wait_for_spinner(&a.spin, rq_a)) {
pr_err("First client failed to start\n");
st_engine_heartbeat_enable(engine);
goto err_wedged;
}
/* Keep postponing the timer to avoid premature slicing */
mod_timer(&engine->execlists.timer, jiffies + HZ);
for (depth = 0; depth < 8; depth++) {
rq_b = spinner_create_request(&b.spin,
b.ctx, engine,
MI_NOOP);
if (IS_ERR(rq_b)) {
err = PTR_ERR(rq_b);
st_engine_heartbeat_enable(engine);
goto err_client_b;
}
i915_request_add(rq_b);
GEM_BUG_ON(i915_request_completed(rq_a));
engine->sched_engine->schedule(rq_a, &attr);
igt_spinner_end(&a.spin);
if (!igt_wait_for_spinner(&b.spin, rq_b)) {
pr_err("Second client failed to start\n");
st_engine_heartbeat_enable(engine);
goto err_wedged;
}
swap(a, b);
rq_a = rq_b;
}
igt_spinner_end(&a.spin);
if (engine->execlists.preempt_hang.count) {
pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n",
engine->name,
engine->execlists.preempt_hang.count,
depth);
st_engine_heartbeat_enable(engine);
err = -EINVAL;
goto err_client_b;
}
st_engine_heartbeat_enable(engine);
if (igt_flush_test(gt->i915))
goto err_wedged;
}
err = 0;
err_client_b:
preempt_client_fini(&b);
err_client_a:
preempt_client_fini(&a);
return err;
err_wedged:
igt_spinner_end(&b.spin);
igt_spinner_end(&a.spin);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_b;
}
static int live_chain_preempt(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct preempt_client hi, lo;
enum intel_engine_id id;
int err = -ENOMEM;
/*
* Build a chain AB...BA between two contexts (A, B) and request
* preemption of the last request. It should then complete before
* the previously submitted spinner in B.
*/
if (preempt_client_init(gt, &hi))
return -ENOMEM;
if (preempt_client_init(gt, &lo))
goto err_client_hi;
for_each_engine(engine, gt, id) {
struct i915_sched_attr attr = { .priority = I915_PRIORITY_MAX };
struct igt_live_test t;
struct i915_request *rq;
int ring_size, count, i;
if (!intel_engine_has_preemption(engine))
continue;
rq = spinner_create_request(&lo.spin,
lo.ctx, engine,
MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
i915_request_get(rq);
i915_request_add(rq);
ring_size = rq->wa_tail - rq->head;
if (ring_size < 0)
ring_size += rq->ring->size;
ring_size = rq->ring->size / ring_size;
pr_debug("%s(%s): Using maximum of %d requests\n",
__func__, engine->name, ring_size);
igt_spinner_end(&lo.spin);
if (i915_request_wait(rq, 0, HZ / 2) < 0) {
pr_err("Timed out waiting to flush %s\n", engine->name);
i915_request_put(rq);
goto err_wedged;
}
i915_request_put(rq);
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) {
err = -EIO;
goto err_wedged;
}
for_each_prime_number_from(count, 1, ring_size) {
rq = spinner_create_request(&hi.spin,
hi.ctx, engine,
MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
i915_request_add(rq);
if (!igt_wait_for_spinner(&hi.spin, rq))
goto err_wedged;
rq = spinner_create_request(&lo.spin,
lo.ctx, engine,
MI_ARB_CHECK);
if (IS_ERR(rq))
goto err_wedged;
i915_request_add(rq);
for (i = 0; i < count; i++) {
rq = igt_request_alloc(lo.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
i915_request_add(rq);
}
rq = igt_request_alloc(hi.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
i915_request_get(rq);
i915_request_add(rq);
engine->sched_engine->schedule(rq, &attr);
igt_spinner_end(&hi.spin);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to preempt over chain of %d\n",
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
i915_request_put(rq);
goto err_wedged;
}
igt_spinner_end(&lo.spin);
i915_request_put(rq);
rq = igt_request_alloc(lo.ctx, engine);
if (IS_ERR(rq))
goto err_wedged;
i915_request_get(rq);
i915_request_add(rq);
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
pr_err("Failed to flush low priority chain of %d requests\n",
count);
intel_engine_dump(engine, &p,
"%s\n", engine->name);
i915_request_put(rq);
goto err_wedged;
}
i915_request_put(rq);
}
if (igt_live_test_end(&t)) {
err = -EIO;
goto err_wedged;
}
}
err = 0;
err_client_lo:
preempt_client_fini(&lo);
err_client_hi:
preempt_client_fini(&hi);
return err;
err_wedged:
igt_spinner_end(&hi.spin);
igt_spinner_end(&lo.spin);
intel_gt_set_wedged(gt);
err = -EIO;
goto err_client_lo;
}
static int create_gang(struct intel_engine_cs *engine,
struct i915_request **prev)
{
struct drm_i915_gem_object *obj;
struct intel_context *ce;
struct i915_request *rq;
struct i915_vma *vma;
u32 *cs;
int err;
ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
obj = i915_gem_object_create_internal(engine->i915, 4096);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
goto err_ce;
}
vma = i915_vma_instance(obj, ce->vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err)
goto err_obj;
cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(cs)) {
err = PTR_ERR(cs);
goto err_obj;
}
/* Semaphore target: spin until zero */
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = lower_32_bits(vma->node.start);
*cs++ = upper_32_bits(vma->node.start);
if (*prev) {
u64 offset = (*prev)->batch->node.start;
/* Terminate the spinner in the next lower priority batch. */
*cs++ = MI_STORE_DWORD_IMM_GEN4;
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
*cs++ = 0;
}
*cs++ = MI_BATCH_BUFFER_END;
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_obj;
}
rq->batch = i915_vma_get(vma);
i915_request_get(rq);
i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, false);
if (!err)
err = i915_vma_move_to_active(vma, rq, 0);
if (!err)
err = rq->engine->emit_bb_start(rq,
vma->node.start,
PAGE_SIZE, 0);
i915_vma_unlock(vma);
i915_request_add(rq);
if (err)
goto err_rq;
i915_gem_object_put(obj);
intel_context_put(ce);
rq->mock.link.next = &(*prev)->mock.link;
*prev = rq;
return 0;
err_rq:
i915_vma_put(rq->batch);
i915_request_put(rq);
err_obj:
i915_gem_object_put(obj);
err_ce:
intel_context_put(ce);
return err;
}
static int __live_preempt_ring(struct intel_engine_cs *engine,
struct igt_spinner *spin,
int queue_sz, int ring_sz)
{
struct intel_context *ce[2] = {};
struct i915_request *rq;
struct igt_live_test t;
int err = 0;
int n;
if (igt_live_test_begin(&t, engine->i915, __func__, engine->name))
return -EIO;
for (n = 0; n < ARRAY_SIZE(ce); n++) {
struct intel_context *tmp;
tmp = intel_context_create(engine);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto err_ce;
}
tmp->ring_size = ring_sz;
err = intel_context_pin(tmp);
if (err) {
intel_context_put(tmp);
goto err_ce;
}
memset32(tmp->ring->vaddr,
0xdeadbeef, /* trigger a hang if executed */
tmp->ring->vma->size / sizeof(u32));
ce[n] = tmp;
}
rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ce;
}
i915_request_get(rq);
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_add(rq);
if (!igt_wait_for_spinner(spin, rq)) {
intel_gt_set_wedged(engine->gt);
i915_request_put(rq);
err = -ETIME;
goto err_ce;
}
/* Fill the ring, until we will cause a wrap */
n = 0;
while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) {
struct i915_request *tmp;
tmp = intel_context_create_request(ce[0]);
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
i915_request_put(rq);
goto err_ce;
}
i915_request_add(tmp);
intel_engine_flush_submission(engine);
n++;
}
intel_engine_flush_submission(engine);
pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n",
engine->name, queue_sz, n,
ce[0]->ring->size,
ce[0]->ring->tail,
ce[0]->ring->emit,
rq->tail);
i915_request_put(rq);
/* Create a second request to preempt the first ring */
rq = intel_context_create_request(ce[1]);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_ce;
}
rq->sched.attr.priority = I915_PRIORITY_BARRIER;
i915_request_get(rq);
i915_request_add(rq);
err = wait_for_submit(engine, rq, HZ / 2);
i915_request_put(rq);
if (err) {
pr_err("%s: preemption request was not submitted\n",
engine->name);
err = -ETIME;
}
pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n",
engine->name,
ce[0]->ring->tail, ce[0]->ring->emit,
ce[1]->ring->tail, ce[1]->ring->emit);
err_ce:
intel_engine_flush_submission(engine);
igt_spinner_end(spin);
for (n = 0; n < ARRAY_SIZE(ce); n++) {
if (IS_ERR_OR_NULL(ce[n]))
break;
intel_context_unpin(ce[n]);
intel_context_put(ce[n]);
}
if (igt_live_test_end(&t))
err = -EIO;
return err;
}
static int live_preempt_ring(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
struct igt_spinner spin;
enum intel_engine_id id;
int err = 0;
/*
* Check that we rollback large chunks of a ring in order to do a
* preemption event. Similar to live_unlite_ring, but looking at
* ring size rather than the impact of intel_ring_direction().
*/
if (igt_spinner_init(&spin, gt))
return -ENOMEM;
for_each_engine(engine, gt, id) {
int n;
if (!intel_engine_has_preemption(engine))
continue;
if (!intel_engine_can_store_dword(engine))
continue;
st_engine_heartbeat_disable(engine);
for (n = 0; n <= 3; n++) {
err = __live_preempt_ring(engine, &spin,
n * SZ_4K / 4, SZ_4K);
if (err)
break;
}
st_engine_heartbeat_enable(engine);
if (err)
break;
}
igt_spinner_fini(&spin);
return err;
}
static int live_preempt_gang(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
/*
* Build as long a chain of preempters as we can, with each
* request higher priority than the last. Once we are ready, we release
* the last batch which then precolates down the chain, each releasing
* the next oldest in turn. The intent is to simply push as hard as we
* can with the number of preemptions, trying to exceed narrow HW
* limits. At a minimum, we insist that we can sort all the user
* high priority levels into execution order.
*/
for_each_engine(engine, gt, id) {
struct i915_request *rq = NULL;
struct igt_live_test t;
IGT_TIMEOUT(end_time);
int prio = 0;
int err = 0;
u32 *cs;
if (!intel_engine_has_preemption(engine))
continue;
if (igt_live_test_begin(&t, gt->i915, __func__, engine->name))
return -EIO;
do {
struct i915_sched_attr attr = { .priority = prio++ };
err = create_gang(engine, &rq);
if (err)
break;
/* Submit each spinner at increasing priority */
engine->sched_engine->schedule(rq, &attr);
} while (prio <= I915_PRIORITY_MAX &&
!__igt_timeout(end_time, NULL));
pr_debug("%s: Preempt chain of %d requests\n",
engine->name, prio);
/*
* Such that the last spinner is the highest priority and
* should execute first. When that spinner completes,
* it will terminate the next lowest spinner until there
* are no more spinners and the gang is complete.
*/
cs = i915_gem_object_pin_map_unlocked(rq->batch->obj, I915_MAP_WC);
if (!IS_ERR(cs)) {
*cs = 0;
i915_gem_object_unpin_map(rq->batch->obj);
} else {
err = PTR_ERR(cs);
intel_gt_set_wedged(gt);
}
while (rq) { /* wait for each rq from highest to lowest prio */
struct i915_request *n = list_next_entry(rq, mock.link);
if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) {
struct drm_printer p =
drm_info_printer(engine->i915->drm.dev);
pr_err("Failed to flush chain of %d requests, at %d\n",
prio, rq_prio(rq));
intel_engine_dump(engine, &p,
"%s\n", engine->name);
err = -ETIME;
}
i915_vma_put(rq->batch);
i915_request_put(rq);
rq = n;
}
if (igt_live_test_end(&t))
err = -EIO;
if (err)
return err;
}
return 0;
}
static struct i915_vma *
create_gpr_user(struct intel_engine_cs *engine,
struct i915_vma *result,
unsigned int offset)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
u32 *cs;
int err;
int i;
obj = i915_gem_object_create_internal(engine->i915, 4096);
if (IS_ERR(obj))
return ERR_CAST(obj);
vma = i915_vma_instance(obj, result->vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
}
err = i915_vma_pin(vma, 0, 0, PIN_USER);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
}
cs = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(cs)) {
i915_vma_put(vma);
return ERR_CAST(cs);
}
/* All GPR are clear for new contexts. We use GPR(0) as a constant */
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = CS_GPR(engine, 0);
*cs++ = 1;
for (i = 1; i < NUM_GPR; i++) {
u64 addr;
/*
* Perform: GPR[i]++
*
* As we read and write into the context saved GPR[i], if
* we restart this batch buffer from an earlier point, we
* will repeat the increment and store a value > 1.
*/
*cs++ = MI_MATH(4);
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i));
*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0));
*cs++ = MI_MATH_ADD;
*cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU);
addr = result->node.start + offset + i * sizeof(*cs);
*cs++ = MI_STORE_REGISTER_MEM_GEN8;
*cs++ = CS_GPR(engine, 2 * i);
*cs++ = lower_32_bits(addr);
*cs++ = upper_32_bits(addr);
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_GTE_SDD;
*cs++ = i;
*cs++ = lower_32_bits(result->node.start);
*cs++ = upper_32_bits(result->node.start);
}
*cs++ = MI_BATCH_BUFFER_END;
i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj);
return vma;
}
static struct i915_vma *create_global(struct intel_gt *gt, size_t sz)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int err;
obj = i915_gem_object_create_internal(gt->i915, sz);
if (IS_ERR(obj))
return ERR_CAST(obj);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
}
err = i915_ggtt_pin(vma, NULL, 0, 0);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
}
return vma;
}
static struct i915_request *
create_gpr_client(struct intel_engine_cs *engine,
struct i915_vma *global,