| // SPDX-License-Identifier: MIT |
| /* |
| * Copyright © 2017-2018 Intel Corporation |
| */ |
| |
| #include <linux/prime_numbers.h> |
| #include <linux/string_helpers.h> |
| |
| #include "intel_context.h" |
| #include "intel_engine_heartbeat.h" |
| #include "intel_engine_pm.h" |
| #include "intel_engine_regs.h" |
| #include "intel_gpu_commands.h" |
| #include "intel_gt.h" |
| #include "intel_gt_requests.h" |
| #include "intel_ring.h" |
| #include "selftest_engine_heartbeat.h" |
| |
| #include "../selftests/i915_random.h" |
| #include "../i915_selftest.h" |
| |
| #include "selftests/igt_flush_test.h" |
| #include "selftests/lib_sw_fence.h" |
| #include "selftests/mock_gem_device.h" |
| #include "selftests/mock_timeline.h" |
| |
| static struct page *hwsp_page(struct intel_timeline *tl) |
| { |
| struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; |
| |
| GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); |
| return sg_page(obj->mm.pages->sgl); |
| } |
| |
| static unsigned long hwsp_cacheline(struct intel_timeline *tl) |
| { |
| unsigned long address = (unsigned long)page_address(hwsp_page(tl)); |
| |
| return (address + offset_in_page(tl->hwsp_offset)) / TIMELINE_SEQNO_BYTES; |
| } |
| |
| static int selftest_tl_pin(struct intel_timeline *tl) |
| { |
| struct i915_gem_ww_ctx ww; |
| int err; |
| |
| i915_gem_ww_ctx_init(&ww, false); |
| retry: |
| err = i915_gem_object_lock(tl->hwsp_ggtt->obj, &ww); |
| if (!err) |
| err = intel_timeline_pin(tl, &ww); |
| |
| if (err == -EDEADLK) { |
| err = i915_gem_ww_ctx_backoff(&ww); |
| if (!err) |
| goto retry; |
| } |
| i915_gem_ww_ctx_fini(&ww); |
| return err; |
| } |
| |
| /* Only half of seqno's are usable, see __intel_timeline_get_seqno() */ |
| #define CACHELINES_PER_PAGE (PAGE_SIZE / TIMELINE_SEQNO_BYTES / 2) |
| |
| struct mock_hwsp_freelist { |
| struct intel_gt *gt; |
| struct radix_tree_root cachelines; |
| struct intel_timeline **history; |
| unsigned long count, max; |
| struct rnd_state prng; |
| }; |
| |
| enum { |
| SHUFFLE = BIT(0), |
| }; |
| |
| static void __mock_hwsp_record(struct mock_hwsp_freelist *state, |
| unsigned int idx, |
| struct intel_timeline *tl) |
| { |
| tl = xchg(&state->history[idx], tl); |
| if (tl) { |
| radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); |
| intel_timeline_unpin(tl); |
| intel_timeline_put(tl); |
| } |
| } |
| |
| static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, |
| unsigned int count, |
| unsigned int flags) |
| { |
| struct intel_timeline *tl; |
| unsigned int idx; |
| |
| while (count--) { |
| unsigned long cacheline; |
| int err; |
| |
| tl = intel_timeline_create(state->gt); |
| if (IS_ERR(tl)) |
| return PTR_ERR(tl); |
| |
| err = selftest_tl_pin(tl); |
| if (err) { |
| intel_timeline_put(tl); |
| return err; |
| } |
| |
| cacheline = hwsp_cacheline(tl); |
| err = radix_tree_insert(&state->cachelines, cacheline, tl); |
| if (err) { |
| if (err == -EEXIST) { |
| pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", |
| cacheline); |
| } |
| intel_timeline_unpin(tl); |
| intel_timeline_put(tl); |
| return err; |
| } |
| |
| idx = state->count++ % state->max; |
| __mock_hwsp_record(state, idx, tl); |
| } |
| |
| if (flags & SHUFFLE) |
| i915_prandom_shuffle(state->history, |
| sizeof(*state->history), |
| min(state->count, state->max), |
| &state->prng); |
| |
| count = i915_prandom_u32_max_state(min(state->count, state->max), |
| &state->prng); |
| while (count--) { |
| idx = --state->count % state->max; |
| __mock_hwsp_record(state, idx, NULL); |
| } |
| |
| return 0; |
| } |
| |
| static int mock_hwsp_freelist(void *arg) |
| { |
| struct mock_hwsp_freelist state; |
| struct drm_i915_private *i915; |
| const struct { |
| const char *name; |
| unsigned int flags; |
| } phases[] = { |
| { "linear", 0 }, |
| { "shuffled", SHUFFLE }, |
| { }, |
| }, *p; |
| unsigned int na; |
| int err = 0; |
| |
| i915 = mock_gem_device(); |
| if (!i915) |
| return -ENOMEM; |
| |
| INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); |
| state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); |
| |
| state.gt = to_gt(i915); |
| |
| /* |
| * Create a bunch of timelines and check that their HWSP do not overlap. |
| * Free some, and try again. |
| */ |
| |
| state.max = PAGE_SIZE / sizeof(*state.history); |
| state.count = 0; |
| state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); |
| if (!state.history) { |
| err = -ENOMEM; |
| goto err_put; |
| } |
| |
| for (p = phases; p->name; p++) { |
| pr_debug("%s(%s)\n", __func__, p->name); |
| for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { |
| err = __mock_hwsp_timeline(&state, na, p->flags); |
| if (err) |
| goto out; |
| } |
| } |
| |
| out: |
| for (na = 0; na < state.max; na++) |
| __mock_hwsp_record(&state, na, NULL); |
| kfree(state.history); |
| err_put: |
| mock_destroy_device(i915); |
| return err; |
| } |
| |
| struct __igt_sync { |
| const char *name; |
| u32 seqno; |
| bool expected; |
| bool set; |
| }; |
| |
| static int __igt_sync(struct intel_timeline *tl, |
| u64 ctx, |
| const struct __igt_sync *p, |
| const char *name) |
| { |
| int ret; |
| |
| if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { |
| pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", |
| name, p->name, ctx, p->seqno, str_yes_no(p->expected)); |
| return -EINVAL; |
| } |
| |
| if (p->set) { |
| ret = __intel_timeline_sync_set(tl, ctx, p->seqno); |
| if (ret) |
| return ret; |
| } |
| |
| return 0; |
| } |
| |
| static int igt_sync(void *arg) |
| { |
| const struct __igt_sync pass[] = { |
| { "unset", 0, false, false }, |
| { "new", 0, false, true }, |
| { "0a", 0, true, true }, |
| { "1a", 1, false, true }, |
| { "1b", 1, true, true }, |
| { "0b", 0, true, false }, |
| { "2a", 2, false, true }, |
| { "4", 4, false, true }, |
| { "INT_MAX", INT_MAX, false, true }, |
| { "INT_MAX-1", INT_MAX-1, true, false }, |
| { "INT_MAX+1", (u32)INT_MAX+1, false, true }, |
| { "INT_MAX", INT_MAX, true, false }, |
| { "UINT_MAX", UINT_MAX, false, true }, |
| { "wrap", 0, false, true }, |
| { "unwrap", UINT_MAX, true, false }, |
| {}, |
| }, *p; |
| struct intel_timeline tl; |
| int order, offset; |
| int ret = -ENODEV; |
| |
| mock_timeline_init(&tl, 0); |
| for (p = pass; p->name; p++) { |
| for (order = 1; order < 64; order++) { |
| for (offset = -1; offset <= (order > 1); offset++) { |
| u64 ctx = BIT_ULL(order) + offset; |
| |
| ret = __igt_sync(&tl, ctx, p, "1"); |
| if (ret) |
| goto out; |
| } |
| } |
| } |
| mock_timeline_fini(&tl); |
| |
| mock_timeline_init(&tl, 0); |
| for (order = 1; order < 64; order++) { |
| for (offset = -1; offset <= (order > 1); offset++) { |
| u64 ctx = BIT_ULL(order) + offset; |
| |
| for (p = pass; p->name; p++) { |
| ret = __igt_sync(&tl, ctx, p, "2"); |
| if (ret) |
| goto out; |
| } |
| } |
| } |
| |
| out: |
| mock_timeline_fini(&tl); |
| return ret; |
| } |
| |
| static unsigned int random_engine(struct rnd_state *rnd) |
| { |
| return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); |
| } |
| |
| static int bench_sync(void *arg) |
| { |
| struct rnd_state prng; |
| struct intel_timeline tl; |
| unsigned long end_time, count; |
| u64 prng32_1M; |
| ktime_t kt; |
| int order, last_order; |
| |
| mock_timeline_init(&tl, 0); |
| |
| /* Lookups from cache are very fast and so the random number generation |
| * and the loop itself becomes a significant factor in the per-iteration |
| * timings. We try to compensate the results by measuring the overhead |
| * of the prng and subtract it from the reported results. |
| */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u32 x; |
| |
| /* Make sure the compiler doesn't optimise away the prng call */ |
| WRITE_ONCE(x, prandom_u32_state(&prng)); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_debug("%s: %lu random evaluations, %lluns/prng\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); |
| |
| /* Benchmark (only) setting random context ids */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u64 id = i915_prandom_u64_state(&prng); |
| |
| __intel_timeline_sync_set(&tl, id, 0); |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu random insertions, %lluns/insert\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| /* Benchmark looking up the exact same context ids as we just set */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| end_time = count; |
| kt = ktime_get(); |
| while (end_time--) { |
| u64 id = i915_prandom_u64_state(&prng); |
| |
| if (!__intel_timeline_sync_is_later(&tl, id, 0)) { |
| mock_timeline_fini(&tl); |
| pr_err("Lookup of %llu failed\n", id); |
| return -EINVAL; |
| } |
| } |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu random lookups, %lluns/lookup\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| mock_timeline_fini(&tl); |
| cond_resched(); |
| |
| mock_timeline_init(&tl, 0); |
| |
| /* Benchmark setting the first N (in order) contexts */ |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| __intel_timeline_sync_set(&tl, count++, 0); |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu in-order insertions, %lluns/insert\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| /* Benchmark looking up the exact same context ids as we just set */ |
| end_time = count; |
| kt = ktime_get(); |
| while (end_time--) { |
| if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { |
| pr_err("Lookup of %lu failed\n", end_time); |
| mock_timeline_fini(&tl); |
| return -EINVAL; |
| } |
| } |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu in-order lookups, %lluns/lookup\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| |
| mock_timeline_fini(&tl); |
| cond_resched(); |
| |
| mock_timeline_init(&tl, 0); |
| |
| /* Benchmark searching for a random context id and maybe changing it */ |
| prandom_seed_state(&prng, i915_selftest.random_seed); |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| u32 id = random_engine(&prng); |
| u32 seqno = prandom_u32_state(&prng); |
| |
| if (!__intel_timeline_sync_is_later(&tl, id, seqno)) |
| __intel_timeline_sync_set(&tl, id, seqno); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); |
| pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", |
| __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); |
| mock_timeline_fini(&tl); |
| cond_resched(); |
| |
| /* Benchmark searching for a known context id and changing the seqno */ |
| for (last_order = 1, order = 1; order < 32; |
| ({ int tmp = last_order; last_order = order; order += tmp; })) { |
| unsigned int mask = BIT(order) - 1; |
| |
| mock_timeline_init(&tl, 0); |
| |
| count = 0; |
| kt = ktime_get(); |
| end_time = jiffies + HZ/10; |
| do { |
| /* Without assuming too many details of the underlying |
| * implementation, try to identify its phase-changes |
| * (if any)! |
| */ |
| u64 id = (u64)(count & mask) << order; |
| |
| __intel_timeline_sync_is_later(&tl, id, 0); |
| __intel_timeline_sync_set(&tl, id, 0); |
| |
| count++; |
| } while (!time_after(jiffies, end_time)); |
| kt = ktime_sub(ktime_get(), kt); |
| pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", |
| __func__, count, order, |
| (long long)div64_ul(ktime_to_ns(kt), count)); |
| mock_timeline_fini(&tl); |
| cond_resched(); |
| } |
| |
| return 0; |
| } |
| |
| int intel_timeline_mock_selftests(void) |
| { |
| static const struct i915_subtest tests[] = { |
| SUBTEST(mock_hwsp_freelist), |
| SUBTEST(igt_sync), |
| SUBTEST(bench_sync), |
| }; |
| |
| return i915_subtests(tests, NULL); |
| } |
| |
| static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) |
| { |
| u32 *cs; |
| |
| cs = intel_ring_begin(rq, 4); |
| if (IS_ERR(cs)) |
| return PTR_ERR(cs); |
| |
| if (GRAPHICS_VER(rq->i915) >= 8) { |
| *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
| *cs++ = addr; |
| *cs++ = 0; |
| *cs++ = value; |
| } else if (GRAPHICS_VER(rq->i915) >= 4) { |
| *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
| *cs++ = 0; |
| *cs++ = addr; |
| *cs++ = value; |
| } else { |
| *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; |
| *cs++ = addr; |
| *cs++ = value; |
| *cs++ = MI_NOOP; |
| } |
| |
| intel_ring_advance(rq, cs); |
| |
| return 0; |
| } |
| |
| static struct i915_request * |
| checked_tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) |
| { |
| struct i915_request *rq; |
| int err; |
| |
| err = selftest_tl_pin(tl); |
| if (err) { |
| rq = ERR_PTR(err); |
| goto out; |
| } |
| |
| if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) { |
| pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", |
| *tl->hwsp_seqno, tl->seqno); |
| intel_timeline_unpin(tl); |
| return ERR_PTR(-EINVAL); |
| } |
| |
| rq = intel_engine_create_kernel_request(engine); |
| if (IS_ERR(rq)) |
| goto out_unpin; |
| |
| i915_request_get(rq); |
| |
| err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); |
| i915_request_add(rq); |
| if (err) { |
| i915_request_put(rq); |
| rq = ERR_PTR(err); |
| } |
| |
| out_unpin: |
| intel_timeline_unpin(tl); |
| out: |
| if (IS_ERR(rq)) |
| pr_err("Failed to write to timeline!\n"); |
| return rq; |
| } |
| |
| static int live_hwsp_engine(void *arg) |
| { |
| #define NUM_TIMELINES 4096 |
| struct intel_gt *gt = arg; |
| struct intel_timeline **timelines; |
| struct intel_engine_cs *engine; |
| enum intel_engine_id id; |
| unsigned long count, n; |
| int err = 0; |
| |
| /* |
| * Create a bunch of timelines and check we can write |
| * independently to each of their breadcrumb slots. |
| */ |
| |
| timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, |
| sizeof(*timelines), |
| GFP_KERNEL); |
| if (!timelines) |
| return -ENOMEM; |
| |
| count = 0; |
| for_each_engine(engine, gt, id) { |
| if (!intel_engine_can_store_dword(engine)) |
| continue; |
| |
| intel_engine_pm_get(engine); |
| |
| for (n = 0; n < NUM_TIMELINES; n++) { |
| struct intel_timeline *tl; |
| struct i915_request *rq; |
| |
| tl = intel_timeline_create(gt); |
| if (IS_ERR(tl)) { |
| err = PTR_ERR(tl); |
| break; |
| } |
| |
| rq = checked_tl_write(tl, engine, count); |
| if (IS_ERR(rq)) { |
| intel_timeline_put(tl); |
| err = PTR_ERR(rq); |
| break; |
| } |
| |
| timelines[count++] = tl; |
| i915_request_put(rq); |
| } |
| |
| intel_engine_pm_put(engine); |
| if (err) |
| break; |
| } |
| |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| for (n = 0; n < count; n++) { |
| struct intel_timeline *tl = timelines[n]; |
| |
| if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { |
| GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", |
| n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); |
| GEM_TRACE_DUMP(); |
| err = -EINVAL; |
| } |
| intel_timeline_put(tl); |
| } |
| |
| kvfree(timelines); |
| return err; |
| #undef NUM_TIMELINES |
| } |
| |
| static int live_hwsp_alternate(void *arg) |
| { |
| #define NUM_TIMELINES 4096 |
| struct intel_gt *gt = arg; |
| struct intel_timeline **timelines; |
| struct intel_engine_cs *engine; |
| enum intel_engine_id id; |
| unsigned long count, n; |
| int err = 0; |
| |
| /* |
| * Create a bunch of timelines and check we can write |
| * independently to each of their breadcrumb slots with adjacent |
| * engines. |
| */ |
| |
| timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, |
| sizeof(*timelines), |
| GFP_KERNEL); |
| if (!timelines) |
| return -ENOMEM; |
| |
| count = 0; |
| for (n = 0; n < NUM_TIMELINES; n++) { |
| for_each_engine(engine, gt, id) { |
| struct intel_timeline *tl; |
| struct i915_request *rq; |
| |
| if (!intel_engine_can_store_dword(engine)) |
| continue; |
| |
| tl = intel_timeline_create(gt); |
| if (IS_ERR(tl)) { |
| err = PTR_ERR(tl); |
| goto out; |
| } |
| |
| intel_engine_pm_get(engine); |
| rq = checked_tl_write(tl, engine, count); |
| intel_engine_pm_put(engine); |
| if (IS_ERR(rq)) { |
| intel_timeline_put(tl); |
| err = PTR_ERR(rq); |
| goto out; |
| } |
| |
| timelines[count++] = tl; |
| i915_request_put(rq); |
| } |
| } |
| |
| out: |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| for (n = 0; n < count; n++) { |
| struct intel_timeline *tl = timelines[n]; |
| |
| if (!err && READ_ONCE(*tl->hwsp_seqno) != n) { |
| GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n", |
| n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno); |
| GEM_TRACE_DUMP(); |
| err = -EINVAL; |
| } |
| intel_timeline_put(tl); |
| } |
| |
| kvfree(timelines); |
| return err; |
| #undef NUM_TIMELINES |
| } |
| |
| static int live_hwsp_wrap(void *arg) |
| { |
| struct intel_gt *gt = arg; |
| struct intel_engine_cs *engine; |
| struct intel_timeline *tl; |
| enum intel_engine_id id; |
| int err = 0; |
| |
| /* |
| * Across a seqno wrap, we need to keep the old cacheline alive for |
| * foreign GPU references. |
| */ |
| |
| tl = intel_timeline_create(gt); |
| if (IS_ERR(tl)) |
| return PTR_ERR(tl); |
| |
| if (!tl->has_initial_breadcrumb) |
| goto out_free; |
| |
| err = selftest_tl_pin(tl); |
| if (err) |
| goto out_free; |
| |
| for_each_engine(engine, gt, id) { |
| const u32 *hwsp_seqno[2]; |
| struct i915_request *rq; |
| u32 seqno[2]; |
| |
| if (!intel_engine_can_store_dword(engine)) |
| continue; |
| |
| rq = intel_engine_create_kernel_request(engine); |
| if (IS_ERR(rq)) { |
| err = PTR_ERR(rq); |
| goto out; |
| } |
| |
| tl->seqno = -4u; |
| |
| mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); |
| err = intel_timeline_get_seqno(tl, rq, &seqno[0]); |
| mutex_unlock(&tl->mutex); |
| if (err) { |
| i915_request_add(rq); |
| goto out; |
| } |
| pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", |
| seqno[0], tl->hwsp_offset); |
| |
| err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); |
| if (err) { |
| i915_request_add(rq); |
| goto out; |
| } |
| hwsp_seqno[0] = tl->hwsp_seqno; |
| |
| mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); |
| err = intel_timeline_get_seqno(tl, rq, &seqno[1]); |
| mutex_unlock(&tl->mutex); |
| if (err) { |
| i915_request_add(rq); |
| goto out; |
| } |
| pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", |
| seqno[1], tl->hwsp_offset); |
| |
| err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); |
| if (err) { |
| i915_request_add(rq); |
| goto out; |
| } |
| hwsp_seqno[1] = tl->hwsp_seqno; |
| |
| /* With wrap should come a new hwsp */ |
| GEM_BUG_ON(seqno[1] >= seqno[0]); |
| GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); |
| |
| i915_request_add(rq); |
| |
| if (i915_request_wait(rq, 0, HZ / 5) < 0) { |
| pr_err("Wait for timeline writes timed out!\n"); |
| err = -EIO; |
| goto out; |
| } |
| |
| if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] || |
| READ_ONCE(*hwsp_seqno[1]) != seqno[1]) { |
| pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", |
| *hwsp_seqno[0], *hwsp_seqno[1], |
| seqno[0], seqno[1]); |
| err = -EINVAL; |
| goto out; |
| } |
| |
| intel_gt_retire_requests(gt); /* recycle HWSP */ |
| } |
| |
| out: |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| intel_timeline_unpin(tl); |
| out_free: |
| intel_timeline_put(tl); |
| return err; |
| } |
| |
| static int emit_read_hwsp(struct i915_request *rq, |
| u32 seqno, u32 hwsp, |
| u32 *addr) |
| { |
| const u32 gpr = i915_mmio_reg_offset(GEN8_RING_CS_GPR(rq->engine->mmio_base, 0)); |
| u32 *cs; |
| |
| cs = intel_ring_begin(rq, 12); |
| if (IS_ERR(cs)) |
| return PTR_ERR(cs); |
| |
| *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; |
| *cs++ = *addr; |
| *cs++ = 0; |
| *cs++ = seqno; |
| *addr += 4; |
| |
| *cs++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
| *cs++ = gpr; |
| *cs++ = hwsp; |
| *cs++ = 0; |
| |
| *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; |
| *cs++ = gpr; |
| *cs++ = *addr; |
| *cs++ = 0; |
| *addr += 4; |
| |
| intel_ring_advance(rq, cs); |
| |
| return 0; |
| } |
| |
| struct hwsp_watcher { |
| struct i915_vma *vma; |
| struct i915_request *rq; |
| u32 addr; |
| u32 *map; |
| }; |
| |
| static bool cmp_lt(u32 a, u32 b) |
| { |
| return a < b; |
| } |
| |
| static bool cmp_gte(u32 a, u32 b) |
| { |
| return a >= b; |
| } |
| |
| static int setup_watcher(struct hwsp_watcher *w, struct intel_gt *gt, |
| struct intel_timeline *tl) |
| { |
| struct drm_i915_gem_object *obj; |
| struct i915_vma *vma; |
| |
| obj = i915_gem_object_create_internal(gt->i915, SZ_2M); |
| if (IS_ERR(obj)) |
| return PTR_ERR(obj); |
| |
| /* keep the same cache settings as timeline */ |
| i915_gem_object_set_pat_index(obj, tl->hwsp_ggtt->obj->pat_index); |
| w->map = i915_gem_object_pin_map_unlocked(obj, |
| page_unmask_bits(tl->hwsp_ggtt->obj->mm.mapping)); |
| if (IS_ERR(w->map)) { |
| i915_gem_object_put(obj); |
| return PTR_ERR(w->map); |
| } |
| |
| vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); |
| if (IS_ERR(vma)) { |
| i915_gem_object_put(obj); |
| return PTR_ERR(vma); |
| } |
| |
| w->vma = vma; |
| w->addr = i915_ggtt_offset(vma); |
| return 0; |
| } |
| |
| static void switch_tl_lock(struct i915_request *from, struct i915_request *to) |
| { |
| /* some light mutex juggling required; think co-routines */ |
| |
| if (from) { |
| lockdep_unpin_lock(&from->context->timeline->mutex, from->cookie); |
| mutex_unlock(&from->context->timeline->mutex); |
| } |
| |
| if (to) { |
| mutex_lock(&to->context->timeline->mutex); |
| to->cookie = lockdep_pin_lock(&to->context->timeline->mutex); |
| } |
| } |
| |
| static int create_watcher(struct hwsp_watcher *w, |
| struct intel_engine_cs *engine, |
| int ringsz) |
| { |
| struct intel_context *ce; |
| |
| ce = intel_context_create(engine); |
| if (IS_ERR(ce)) |
| return PTR_ERR(ce); |
| |
| ce->ring_size = ringsz; |
| w->rq = intel_context_create_request(ce); |
| intel_context_put(ce); |
| if (IS_ERR(w->rq)) |
| return PTR_ERR(w->rq); |
| |
| w->addr = i915_ggtt_offset(w->vma); |
| |
| switch_tl_lock(w->rq, NULL); |
| |
| return 0; |
| } |
| |
| static int check_watcher(struct hwsp_watcher *w, const char *name, |
| bool (*op)(u32 hwsp, u32 seqno)) |
| { |
| struct i915_request *rq = fetch_and_zero(&w->rq); |
| u32 offset, end; |
| int err; |
| |
| GEM_BUG_ON(w->addr - i915_ggtt_offset(w->vma) > w->vma->size); |
| |
| i915_request_get(rq); |
| switch_tl_lock(NULL, rq); |
| i915_request_add(rq); |
| |
| if (i915_request_wait(rq, 0, HZ) < 0) { |
| err = -ETIME; |
| goto out; |
| } |
| |
| err = 0; |
| offset = 0; |
| end = (w->addr - i915_ggtt_offset(w->vma)) / sizeof(*w->map); |
| while (offset < end) { |
| if (!op(w->map[offset + 1], w->map[offset])) { |
| pr_err("Watcher '%s' found HWSP value %x for seqno %x\n", |
| name, w->map[offset + 1], w->map[offset]); |
| err = -EINVAL; |
| } |
| |
| offset += 2; |
| } |
| |
| out: |
| i915_request_put(rq); |
| return err; |
| } |
| |
| static void cleanup_watcher(struct hwsp_watcher *w) |
| { |
| if (w->rq) { |
| switch_tl_lock(NULL, w->rq); |
| |
| i915_request_add(w->rq); |
| } |
| |
| i915_vma_unpin_and_release(&w->vma, I915_VMA_RELEASE_MAP); |
| } |
| |
| static bool retire_requests(struct intel_timeline *tl) |
| { |
| struct i915_request *rq, *rn; |
| |
| mutex_lock(&tl->mutex); |
| list_for_each_entry_safe(rq, rn, &tl->requests, link) |
| if (!i915_request_retire(rq)) |
| break; |
| mutex_unlock(&tl->mutex); |
| |
| return !i915_active_fence_isset(&tl->last_request); |
| } |
| |
| static struct i915_request *wrap_timeline(struct i915_request *rq) |
| { |
| struct intel_context *ce = rq->context; |
| struct intel_timeline *tl = ce->timeline; |
| u32 seqno = rq->fence.seqno; |
| |
| while (tl->seqno >= seqno) { /* Cause a wrap */ |
| i915_request_put(rq); |
| rq = intel_context_create_request(ce); |
| if (IS_ERR(rq)) |
| return rq; |
| |
| i915_request_get(rq); |
| i915_request_add(rq); |
| } |
| |
| i915_request_put(rq); |
| rq = i915_request_create(ce); |
| if (IS_ERR(rq)) |
| return rq; |
| |
| i915_request_get(rq); |
| i915_request_add(rq); |
| |
| return rq; |
| } |
| |
| static int live_hwsp_read(void *arg) |
| { |
| struct intel_gt *gt = arg; |
| struct hwsp_watcher watcher[2] = {}; |
| struct intel_engine_cs *engine; |
| struct intel_timeline *tl; |
| enum intel_engine_id id; |
| int err = 0; |
| int i; |
| |
| /* |
| * If we take a reference to the HWSP for reading on the GPU, that |
| * read may be arbitrarily delayed (either by foreign fence or |
| * priority saturation) and a wrap can happen within 30 minutes. |
| * When the GPU read is finally submitted it should be correct, |
| * even across multiple wraps. |
| */ |
| |
| if (GRAPHICS_VER(gt->i915) < 8) /* CS convenience [SRM/LRM] */ |
| return 0; |
| |
| tl = intel_timeline_create(gt); |
| if (IS_ERR(tl)) |
| return PTR_ERR(tl); |
| |
| if (!tl->has_initial_breadcrumb) |
| goto out_free; |
| |
| selftest_tl_pin(tl); |
| |
| for (i = 0; i < ARRAY_SIZE(watcher); i++) { |
| err = setup_watcher(&watcher[i], gt, tl); |
| if (err) |
| goto out; |
| } |
| |
| for_each_engine(engine, gt, id) { |
| struct intel_context *ce; |
| unsigned long count = 0; |
| IGT_TIMEOUT(end_time); |
| |
| /* Create a request we can use for remote reading of the HWSP */ |
| err = create_watcher(&watcher[1], engine, SZ_512K); |
| if (err) |
| goto out; |
| |
| do { |
| struct i915_sw_fence *submit; |
| struct i915_request *rq; |
| u32 hwsp, dummy; |
| |
| submit = heap_fence_create(GFP_KERNEL); |
| if (!submit) { |
| err = -ENOMEM; |
| goto out; |
| } |
| |
| err = create_watcher(&watcher[0], engine, SZ_4K); |
| if (err) |
| goto out; |
| |
| ce = intel_context_create(engine); |
| if (IS_ERR(ce)) { |
| err = PTR_ERR(ce); |
| goto out; |
| } |
| |
| ce->timeline = intel_timeline_get(tl); |
| |
| /* Ensure timeline is mapped, done during first pin */ |
| err = intel_context_pin(ce); |
| if (err) { |
| intel_context_put(ce); |
| goto out; |
| } |
| |
| /* |
| * Start at a new wrap, and set seqno right before another wrap, |
| * saving 30 minutes of nops |
| */ |
| tl->seqno = -12u + 2 * (count & 3); |
| __intel_timeline_get_seqno(tl, &dummy); |
| |
| rq = i915_request_create(ce); |
| if (IS_ERR(rq)) { |
| err = PTR_ERR(rq); |
| intel_context_unpin(ce); |
| intel_context_put(ce); |
| goto out; |
| } |
| |
| err = i915_sw_fence_await_dma_fence(&rq->submit, |
| &watcher[0].rq->fence, 0, |
| GFP_KERNEL); |
| if (err < 0) { |
| i915_request_add(rq); |
| intel_context_unpin(ce); |
| intel_context_put(ce); |
| goto out; |
| } |
| |
| switch_tl_lock(rq, watcher[0].rq); |
| err = intel_timeline_read_hwsp(rq, watcher[0].rq, &hwsp); |
| if (err == 0) |
| err = emit_read_hwsp(watcher[0].rq, /* before */ |
| rq->fence.seqno, hwsp, |
| &watcher[0].addr); |
| switch_tl_lock(watcher[0].rq, rq); |
| if (err) { |
| i915_request_add(rq); |
| intel_context_unpin(ce); |
| intel_context_put(ce); |
| goto out; |
| } |
| |
| switch_tl_lock(rq, watcher[1].rq); |
| err = intel_timeline_read_hwsp(rq, watcher[1].rq, &hwsp); |
| if (err == 0) |
| err = emit_read_hwsp(watcher[1].rq, /* after */ |
| rq->fence.seqno, hwsp, |
| &watcher[1].addr); |
| switch_tl_lock(watcher[1].rq, rq); |
| if (err) { |
| i915_request_add(rq); |
| intel_context_unpin(ce); |
| intel_context_put(ce); |
| goto out; |
| } |
| |
| i915_request_get(rq); |
| i915_request_add(rq); |
| |
| rq = wrap_timeline(rq); |
| intel_context_unpin(ce); |
| intel_context_put(ce); |
| if (IS_ERR(rq)) { |
| err = PTR_ERR(rq); |
| goto out; |
| } |
| |
| err = i915_sw_fence_await_dma_fence(&watcher[1].rq->submit, |
| &rq->fence, 0, |
| GFP_KERNEL); |
| if (err < 0) { |
| i915_request_put(rq); |
| goto out; |
| } |
| |
| err = check_watcher(&watcher[0], "before", cmp_lt); |
| i915_sw_fence_commit(submit); |
| heap_fence_put(submit); |
| if (err) { |
| i915_request_put(rq); |
| goto out; |
| } |
| count++; |
| |
| /* Flush the timeline before manually wrapping again */ |
| if (i915_request_wait(rq, |
| I915_WAIT_INTERRUPTIBLE, |
| HZ) < 0) { |
| err = -ETIME; |
| i915_request_put(rq); |
| goto out; |
| } |
| retire_requests(tl); |
| i915_request_put(rq); |
| |
| /* Single requests are limited to half a ring at most */ |
| if (8 * watcher[1].rq->ring->emit > |
| 3 * watcher[1].rq->ring->size) |
| break; |
| |
| } while (!__igt_timeout(end_time, NULL) && |
| count < (PAGE_SIZE / TIMELINE_SEQNO_BYTES - 1) / 2); |
| |
| pr_info("%s: simulated %lu wraps\n", engine->name, count); |
| err = check_watcher(&watcher[1], "after", cmp_gte); |
| if (err) |
| goto out; |
| } |
| |
| out: |
| for (i = 0; i < ARRAY_SIZE(watcher); i++) |
| cleanup_watcher(&watcher[i]); |
| |
| intel_timeline_unpin(tl); |
| |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| out_free: |
| intel_timeline_put(tl); |
| return err; |
| } |
| |
| static int live_hwsp_rollover_kernel(void *arg) |
| { |
| struct intel_gt *gt = arg; |
| struct intel_engine_cs *engine; |
| enum intel_engine_id id; |
| int err = 0; |
| |
| /* |
| * Run the host for long enough, and even the kernel context will |
| * see a seqno rollover. |
| */ |
| |
| for_each_engine(engine, gt, id) { |
| struct intel_context *ce = engine->kernel_context; |
| struct intel_timeline *tl = ce->timeline; |
| struct i915_request *rq[3] = {}; |
| int i; |
| |
| st_engine_heartbeat_disable(engine); |
| if (intel_gt_wait_for_idle(gt, HZ / 2)) { |
| err = -EIO; |
| goto out; |
| } |
| |
| GEM_BUG_ON(i915_active_fence_isset(&tl->last_request)); |
| tl->seqno = -2u; |
| WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); |
| |
| for (i = 0; i < ARRAY_SIZE(rq); i++) { |
| struct i915_request *this; |
| |
| this = i915_request_create(ce); |
| if (IS_ERR(this)) { |
| err = PTR_ERR(this); |
| goto out; |
| } |
| |
| pr_debug("%s: create fence.seqnp:%d\n", |
| engine->name, |
| lower_32_bits(this->fence.seqno)); |
| |
| GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); |
| |
| rq[i] = i915_request_get(this); |
| i915_request_add(this); |
| } |
| |
| /* We expected a wrap! */ |
| GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); |
| |
| if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { |
| pr_err("Wait for timeline wrap timed out!\n"); |
| err = -EIO; |
| goto out; |
| } |
| |
| for (i = 0; i < ARRAY_SIZE(rq); i++) { |
| if (!i915_request_completed(rq[i])) { |
| pr_err("Pre-wrap request not completed!\n"); |
| err = -EINVAL; |
| goto out; |
| } |
| } |
| |
| out: |
| for (i = 0; i < ARRAY_SIZE(rq); i++) |
| i915_request_put(rq[i]); |
| st_engine_heartbeat_enable(engine); |
| if (err) |
| break; |
| } |
| |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| return err; |
| } |
| |
| static int live_hwsp_rollover_user(void *arg) |
| { |
| struct intel_gt *gt = arg; |
| struct intel_engine_cs *engine; |
| enum intel_engine_id id; |
| int err = 0; |
| |
| /* |
| * Simulate a long running user context, and force the seqno wrap |
| * on the user's timeline. |
| */ |
| |
| for_each_engine(engine, gt, id) { |
| struct i915_request *rq[3] = {}; |
| struct intel_timeline *tl; |
| struct intel_context *ce; |
| int i; |
| |
| ce = intel_context_create(engine); |
| if (IS_ERR(ce)) |
| return PTR_ERR(ce); |
| |
| err = intel_context_alloc_state(ce); |
| if (err) |
| goto out; |
| |
| tl = ce->timeline; |
| if (!tl->has_initial_breadcrumb) |
| goto out; |
| |
| err = intel_context_pin(ce); |
| if (err) |
| goto out; |
| |
| tl->seqno = -4u; |
| WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno); |
| |
| for (i = 0; i < ARRAY_SIZE(rq); i++) { |
| struct i915_request *this; |
| |
| this = intel_context_create_request(ce); |
| if (IS_ERR(this)) { |
| err = PTR_ERR(this); |
| goto out_unpin; |
| } |
| |
| pr_debug("%s: create fence.seqnp:%d\n", |
| engine->name, |
| lower_32_bits(this->fence.seqno)); |
| |
| GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl); |
| |
| rq[i] = i915_request_get(this); |
| i915_request_add(this); |
| } |
| |
| /* We expected a wrap! */ |
| GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno); |
| |
| if (i915_request_wait(rq[2], 0, HZ / 5) < 0) { |
| pr_err("Wait for timeline wrap timed out!\n"); |
| err = -EIO; |
| goto out_unpin; |
| } |
| |
| for (i = 0; i < ARRAY_SIZE(rq); i++) { |
| if (!i915_request_completed(rq[i])) { |
| pr_err("Pre-wrap request not completed!\n"); |
| err = -EINVAL; |
| goto out_unpin; |
| } |
| } |
| out_unpin: |
| intel_context_unpin(ce); |
| out: |
| for (i = 0; i < ARRAY_SIZE(rq); i++) |
| i915_request_put(rq[i]); |
| intel_context_put(ce); |
| if (err) |
| break; |
| } |
| |
| if (igt_flush_test(gt->i915)) |
| err = -EIO; |
| |
| return err; |
| } |
| |
| static int live_hwsp_recycle(void *arg) |
| { |
| struct intel_gt *gt = arg; |
| struct intel_engine_cs *engine; |
| enum intel_engine_id id; |
| unsigned long count; |
| int err = 0; |
| |
| /* |
| * Check seqno writes into one timeline at a time. We expect to |
| * recycle the breadcrumb slot between iterations and neither |
| * want to confuse ourselves or the GPU. |
| */ |
| |
| count = 0; |
| for_each_engine(engine, gt, id) { |
| IGT_TIMEOUT(end_time); |
| |
| if (!intel_engine_can_store_dword(engine)) |
| continue; |
| |
| intel_engine_pm_get(engine); |
| |
| do { |
| struct intel_timeline *tl; |
| struct i915_request *rq; |
| |
| tl = intel_timeline_create(gt); |
| if (IS_ERR(tl)) { |
| err = PTR_ERR(tl); |
| break; |
| } |
| |
| rq = checked_tl_write(tl, engine, count); |
| if (IS_ERR(rq)) { |
| intel_timeline_put(tl); |
| err = PTR_ERR(rq); |
| break; |
| } |
| |
| if (i915_request_wait(rq, 0, HZ / 5) < 0) { |
| pr_err("Wait for timeline writes timed out!\n"); |
| i915_request_put(rq); |
| intel_timeline_put(tl); |
| err = -EIO; |
| break; |
| } |
| |
| if (READ_ONCE(*tl->hwsp_seqno) != count) { |
| GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n", |
| count, tl->fence_context, |
| tl->hwsp_offset, *tl->hwsp_seqno); |
| GEM_TRACE_DUMP(); |
| err = -EINVAL; |
| } |
| |
| i915_request_put(rq); |
| intel_timeline_put(tl); |
| count++; |
| |
| if (err) |
| break; |
| } while (!__igt_timeout(end_time, NULL)); |
| |
| intel_engine_pm_put(engine); |
| if (err) |
| break; |
| } |
| |
| return err; |
| } |
| |
| int intel_timeline_live_selftests(struct drm_i915_private *i915) |
| { |
| static const struct i915_subtest tests[] = { |
| SUBTEST(live_hwsp_recycle), |
| SUBTEST(live_hwsp_engine), |
| SUBTEST(live_hwsp_alternate), |
| SUBTEST(live_hwsp_wrap), |
| SUBTEST(live_hwsp_read), |
| SUBTEST(live_hwsp_rollover_kernel), |
| SUBTEST(live_hwsp_rollover_user), |
| }; |
| |
| if (intel_gt_is_wedged(to_gt(i915))) |
| return 0; |
| |
| return intel_gt_live_subtests(tests, to_gt(i915)); |
| } |