| // SPDX-License-Identifier: MIT |
| /* |
| * Copyright © 2022 Intel Corporation |
| */ |
| |
| #include "xe_bb.h" |
| |
| #include "instructions/xe_mi_commands.h" |
| #include "regs/xe_gpu_commands.h" |
| #include "xe_device.h" |
| #include "xe_exec_queue_types.h" |
| #include "xe_gt.h" |
| #include "xe_hw_fence.h" |
| #include "xe_sa.h" |
| #include "xe_sched_job.h" |
| #include "xe_vm_types.h" |
| |
| static int bb_prefetch(struct xe_gt *gt) |
| { |
| struct xe_device *xe = gt_to_xe(gt); |
| |
| if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt)) |
| /* |
| * RCS and CCS require 1K, although other engines would be |
| * okay with 512. |
| */ |
| return SZ_1K; |
| else |
| return SZ_512; |
| } |
| |
| struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm) |
| { |
| struct xe_tile *tile = gt_to_tile(gt); |
| struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL); |
| int err; |
| |
| if (!bb) |
| return ERR_PTR(-ENOMEM); |
| |
| /* |
| * We need to allocate space for the requested number of dwords, |
| * one additional MI_BATCH_BUFFER_END dword, and additional buffer |
| * space to accomodate the platform-specific hardware prefetch |
| * requirements. |
| */ |
| bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool, |
| 4 * (dwords + 1) + bb_prefetch(gt)); |
| if (IS_ERR(bb->bo)) { |
| err = PTR_ERR(bb->bo); |
| goto err; |
| } |
| |
| bb->cs = xe_sa_bo_cpu_addr(bb->bo); |
| bb->len = 0; |
| |
| return bb; |
| err: |
| kfree(bb); |
| return ERR_PTR(err); |
| } |
| |
| static struct xe_sched_job * |
| __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr) |
| { |
| u32 size = drm_suballoc_size(bb->bo); |
| |
| bb->cs[bb->len++] = MI_BATCH_BUFFER_END; |
| |
| xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size); |
| |
| xe_sa_bo_flush_write(bb->bo); |
| |
| return xe_sched_job_create(q, addr); |
| } |
| |
| struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q, |
| struct xe_bb *bb, |
| u64 batch_base_ofs, |
| u32 second_idx) |
| { |
| u64 addr[2] = { |
| batch_base_ofs + drm_suballoc_soffset(bb->bo), |
| batch_base_ofs + drm_suballoc_soffset(bb->bo) + |
| 4 * second_idx, |
| }; |
| |
| xe_gt_assert(q->gt, second_idx <= bb->len); |
| xe_gt_assert(q->gt, q->vm->flags & XE_VM_FLAG_MIGRATION); |
| |
| return __xe_bb_create_job(q, bb, addr); |
| } |
| |
| struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q, |
| struct xe_bb *bb) |
| { |
| u64 addr = xe_sa_bo_gpu_addr(bb->bo); |
| |
| xe_gt_assert(q->gt, !(q->vm && q->vm->flags & XE_VM_FLAG_MIGRATION)); |
| return __xe_bb_create_job(q, bb, &addr); |
| } |
| |
| void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence) |
| { |
| if (!bb) |
| return; |
| |
| xe_sa_bo_free(bb->bo, fence); |
| kfree(bb); |
| } |