| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Copyright (C) 2017 - Linaro Ltd |
| * Author: Jintack Lim <jintack.lim@linaro.org> |
| */ |
| |
| #include <linux/kvm_host.h> |
| |
| #include <asm/esr.h> |
| #include <asm/kvm_hyp.h> |
| #include <asm/kvm_mmu.h> |
| |
| enum trans_regime { |
| TR_EL10, |
| TR_EL20, |
| TR_EL2, |
| }; |
| |
| struct s1_walk_info { |
| u64 baddr; |
| enum trans_regime regime; |
| unsigned int max_oa_bits; |
| unsigned int pgshift; |
| unsigned int txsz; |
| int sl; |
| bool hpd; |
| bool be; |
| bool s2; |
| }; |
| |
| struct s1_walk_result { |
| union { |
| struct { |
| u64 desc; |
| u64 pa; |
| s8 level; |
| u8 APTable; |
| bool UXNTable; |
| bool PXNTable; |
| }; |
| struct { |
| u8 fst; |
| bool ptw; |
| bool s2; |
| }; |
| }; |
| bool failed; |
| }; |
| |
| static void fail_s1_walk(struct s1_walk_result *wr, u8 fst, bool ptw, bool s2) |
| { |
| wr->fst = fst; |
| wr->ptw = ptw; |
| wr->s2 = s2; |
| wr->failed = true; |
| } |
| |
| #define S1_MMU_DISABLED (-127) |
| |
| static int get_ia_size(struct s1_walk_info *wi) |
| { |
| return 64 - wi->txsz; |
| } |
| |
| /* Return true if the IPA is out of the OA range */ |
| static bool check_output_size(u64 ipa, struct s1_walk_info *wi) |
| { |
| return wi->max_oa_bits < 48 && (ipa & GENMASK_ULL(47, wi->max_oa_bits)); |
| } |
| |
| /* Return the translation regime that applies to an AT instruction */ |
| static enum trans_regime compute_translation_regime(struct kvm_vcpu *vcpu, u32 op) |
| { |
| /* |
| * We only get here from guest EL2, so the translation |
| * regime AT applies to is solely defined by {E2H,TGE}. |
| */ |
| switch (op) { |
| case OP_AT_S1E2R: |
| case OP_AT_S1E2W: |
| case OP_AT_S1E2A: |
| return vcpu_el2_e2h_is_set(vcpu) ? TR_EL20 : TR_EL2; |
| break; |
| default: |
| return (vcpu_el2_e2h_is_set(vcpu) && |
| vcpu_el2_tge_is_set(vcpu)) ? TR_EL20 : TR_EL10; |
| } |
| } |
| |
| static int setup_s1_walk(struct kvm_vcpu *vcpu, u32 op, struct s1_walk_info *wi, |
| struct s1_walk_result *wr, u64 va) |
| { |
| u64 hcr, sctlr, tcr, tg, ps, ia_bits, ttbr; |
| unsigned int stride, x; |
| bool va55, tbi, lva, as_el0; |
| |
| hcr = __vcpu_sys_reg(vcpu, HCR_EL2); |
| |
| wi->regime = compute_translation_regime(vcpu, op); |
| as_el0 = (op == OP_AT_S1E0R || op == OP_AT_S1E0W); |
| |
| va55 = va & BIT(55); |
| |
| if (wi->regime == TR_EL2 && va55) |
| goto addrsz; |
| |
| wi->s2 = wi->regime == TR_EL10 && (hcr & (HCR_VM | HCR_DC)); |
| |
| switch (wi->regime) { |
| case TR_EL10: |
| sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); |
| tcr = vcpu_read_sys_reg(vcpu, TCR_EL1); |
| ttbr = (va55 ? |
| vcpu_read_sys_reg(vcpu, TTBR1_EL1) : |
| vcpu_read_sys_reg(vcpu, TTBR0_EL1)); |
| break; |
| case TR_EL2: |
| case TR_EL20: |
| sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); |
| tcr = vcpu_read_sys_reg(vcpu, TCR_EL2); |
| ttbr = (va55 ? |
| vcpu_read_sys_reg(vcpu, TTBR1_EL2) : |
| vcpu_read_sys_reg(vcpu, TTBR0_EL2)); |
| break; |
| default: |
| BUG(); |
| } |
| |
| tbi = (wi->regime == TR_EL2 ? |
| FIELD_GET(TCR_EL2_TBI, tcr) : |
| (va55 ? |
| FIELD_GET(TCR_TBI1, tcr) : |
| FIELD_GET(TCR_TBI0, tcr))); |
| |
| if (!tbi && (u64)sign_extend64(va, 55) != va) |
| goto addrsz; |
| |
| va = (u64)sign_extend64(va, 55); |
| |
| /* Let's put the MMU disabled case aside immediately */ |
| switch (wi->regime) { |
| case TR_EL10: |
| /* |
| * If dealing with the EL1&0 translation regime, 3 things |
| * can disable the S1 translation: |
| * |
| * - HCR_EL2.DC = 1 |
| * - HCR_EL2.{E2H,TGE} = {0,1} |
| * - SCTLR_EL1.M = 0 |
| * |
| * The TGE part is interesting. If we have decided that this |
| * is EL1&0, then it means that either {E2H,TGE} == {1,0} or |
| * {0,x}, and we only need to test for TGE == 1. |
| */ |
| if (hcr & (HCR_DC | HCR_TGE)) { |
| wr->level = S1_MMU_DISABLED; |
| break; |
| } |
| fallthrough; |
| case TR_EL2: |
| case TR_EL20: |
| if (!(sctlr & SCTLR_ELx_M)) |
| wr->level = S1_MMU_DISABLED; |
| break; |
| } |
| |
| if (wr->level == S1_MMU_DISABLED) { |
| if (va >= BIT(kvm_get_pa_bits(vcpu->kvm))) |
| goto addrsz; |
| |
| wr->pa = va; |
| return 0; |
| } |
| |
| wi->be = sctlr & SCTLR_ELx_EE; |
| |
| wi->hpd = kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, HPDS, IMP); |
| wi->hpd &= (wi->regime == TR_EL2 ? |
| FIELD_GET(TCR_EL2_HPD, tcr) : |
| (va55 ? |
| FIELD_GET(TCR_HPD1, tcr) : |
| FIELD_GET(TCR_HPD0, tcr))); |
| |
| /* Someone was silly enough to encode TG0/TG1 differently */ |
| if (va55) { |
| wi->txsz = FIELD_GET(TCR_T1SZ_MASK, tcr); |
| tg = FIELD_GET(TCR_TG1_MASK, tcr); |
| |
| switch (tg << TCR_TG1_SHIFT) { |
| case TCR_TG1_4K: |
| wi->pgshift = 12; break; |
| case TCR_TG1_16K: |
| wi->pgshift = 14; break; |
| case TCR_TG1_64K: |
| default: /* IMPDEF: treat any other value as 64k */ |
| wi->pgshift = 16; break; |
| } |
| } else { |
| wi->txsz = FIELD_GET(TCR_T0SZ_MASK, tcr); |
| tg = FIELD_GET(TCR_TG0_MASK, tcr); |
| |
| switch (tg << TCR_TG0_SHIFT) { |
| case TCR_TG0_4K: |
| wi->pgshift = 12; break; |
| case TCR_TG0_16K: |
| wi->pgshift = 14; break; |
| case TCR_TG0_64K: |
| default: /* IMPDEF: treat any other value as 64k */ |
| wi->pgshift = 16; break; |
| } |
| } |
| |
| /* R_PLCGL, R_YXNYW */ |
| if (!kvm_has_feat_enum(vcpu->kvm, ID_AA64MMFR2_EL1, ST, 48_47)) { |
| if (wi->txsz > 39) |
| goto transfault_l0; |
| } else { |
| if (wi->txsz > 48 || (BIT(wi->pgshift) == SZ_64K && wi->txsz > 47)) |
| goto transfault_l0; |
| } |
| |
| /* R_GTJBY, R_SXWGM */ |
| switch (BIT(wi->pgshift)) { |
| case SZ_4K: |
| lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN4, 52_BIT); |
| lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); |
| break; |
| case SZ_16K: |
| lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR0_EL1, TGRAN16, 52_BIT); |
| lva &= tcr & (wi->regime == TR_EL2 ? TCR_EL2_DS : TCR_DS); |
| break; |
| case SZ_64K: |
| lva = kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, VARange, 52); |
| break; |
| } |
| |
| if ((lva && wi->txsz < 12) || (!lva && wi->txsz < 16)) |
| goto transfault_l0; |
| |
| ia_bits = get_ia_size(wi); |
| |
| /* R_YYVYV, I_THCZK */ |
| if ((!va55 && va > GENMASK(ia_bits - 1, 0)) || |
| (va55 && va < GENMASK(63, ia_bits))) |
| goto transfault_l0; |
| |
| /* I_ZFSYQ */ |
| if (wi->regime != TR_EL2 && |
| (tcr & (va55 ? TCR_EPD1_MASK : TCR_EPD0_MASK))) |
| goto transfault_l0; |
| |
| /* R_BNDVG and following statements */ |
| if (kvm_has_feat(vcpu->kvm, ID_AA64MMFR2_EL1, E0PD, IMP) && |
| as_el0 && (tcr & (va55 ? TCR_E0PD1 : TCR_E0PD0))) |
| goto transfault_l0; |
| |
| /* AArch64.S1StartLevel() */ |
| stride = wi->pgshift - 3; |
| wi->sl = 3 - (((ia_bits - 1) - wi->pgshift) / stride); |
| |
| ps = (wi->regime == TR_EL2 ? |
| FIELD_GET(TCR_EL2_PS_MASK, tcr) : FIELD_GET(TCR_IPS_MASK, tcr)); |
| |
| wi->max_oa_bits = min(get_kvm_ipa_limit(), ps_to_output_size(ps)); |
| |
| /* Compute minimal alignment */ |
| x = 3 + ia_bits - ((3 - wi->sl) * stride + wi->pgshift); |
| |
| wi->baddr = ttbr & TTBRx_EL1_BADDR; |
| |
| /* R_VPBBF */ |
| if (check_output_size(wi->baddr, wi)) |
| goto addrsz; |
| |
| wi->baddr &= GENMASK_ULL(wi->max_oa_bits - 1, x); |
| |
| return 0; |
| |
| addrsz: /* Address Size Fault level 0 */ |
| fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(0), false, false); |
| return -EFAULT; |
| |
| transfault_l0: /* Translation Fault level 0 */ |
| fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(0), false, false); |
| return -EFAULT; |
| } |
| |
| static int walk_s1(struct kvm_vcpu *vcpu, struct s1_walk_info *wi, |
| struct s1_walk_result *wr, u64 va) |
| { |
| u64 va_top, va_bottom, baddr, desc; |
| int level, stride, ret; |
| |
| level = wi->sl; |
| stride = wi->pgshift - 3; |
| baddr = wi->baddr; |
| |
| va_top = get_ia_size(wi) - 1; |
| |
| while (1) { |
| u64 index, ipa; |
| |
| va_bottom = (3 - level) * stride + wi->pgshift; |
| index = (va & GENMASK_ULL(va_top, va_bottom)) >> (va_bottom - 3); |
| |
| ipa = baddr | index; |
| |
| if (wi->s2) { |
| struct kvm_s2_trans s2_trans = {}; |
| |
| ret = kvm_walk_nested_s2(vcpu, ipa, &s2_trans); |
| if (ret) { |
| fail_s1_walk(wr, |
| (s2_trans.esr & ~ESR_ELx_FSC_LEVEL) | level, |
| true, true); |
| return ret; |
| } |
| |
| if (!kvm_s2_trans_readable(&s2_trans)) { |
| fail_s1_walk(wr, ESR_ELx_FSC_PERM_L(level), |
| true, true); |
| |
| return -EPERM; |
| } |
| |
| ipa = kvm_s2_trans_output(&s2_trans); |
| } |
| |
| ret = kvm_read_guest(vcpu->kvm, ipa, &desc, sizeof(desc)); |
| if (ret) { |
| fail_s1_walk(wr, ESR_ELx_FSC_SEA_TTW(level), |
| true, false); |
| return ret; |
| } |
| |
| if (wi->be) |
| desc = be64_to_cpu((__force __be64)desc); |
| else |
| desc = le64_to_cpu((__force __le64)desc); |
| |
| /* Invalid descriptor */ |
| if (!(desc & BIT(0))) |
| goto transfault; |
| |
| /* Block mapping, check validity down the line */ |
| if (!(desc & BIT(1))) |
| break; |
| |
| /* Page mapping */ |
| if (level == 3) |
| break; |
| |
| /* Table handling */ |
| if (!wi->hpd) { |
| wr->APTable |= FIELD_GET(S1_TABLE_AP, desc); |
| wr->UXNTable |= FIELD_GET(PMD_TABLE_UXN, desc); |
| wr->PXNTable |= FIELD_GET(PMD_TABLE_PXN, desc); |
| } |
| |
| baddr = desc & GENMASK_ULL(47, wi->pgshift); |
| |
| /* Check for out-of-range OA */ |
| if (check_output_size(baddr, wi)) |
| goto addrsz; |
| |
| /* Prepare for next round */ |
| va_top = va_bottom - 1; |
| level++; |
| } |
| |
| /* Block mapping, check the validity of the level */ |
| if (!(desc & BIT(1))) { |
| bool valid_block = false; |
| |
| switch (BIT(wi->pgshift)) { |
| case SZ_4K: |
| valid_block = level == 1 || level == 2; |
| break; |
| case SZ_16K: |
| case SZ_64K: |
| valid_block = level == 2; |
| break; |
| } |
| |
| if (!valid_block) |
| goto transfault; |
| } |
| |
| if (check_output_size(desc & GENMASK(47, va_bottom), wi)) |
| goto addrsz; |
| |
| va_bottom += contiguous_bit_shift(desc, wi, level); |
| |
| wr->failed = false; |
| wr->level = level; |
| wr->desc = desc; |
| wr->pa = desc & GENMASK(47, va_bottom); |
| wr->pa |= va & GENMASK_ULL(va_bottom - 1, 0); |
| |
| return 0; |
| |
| addrsz: |
| fail_s1_walk(wr, ESR_ELx_FSC_ADDRSZ_L(level), true, false); |
| return -EINVAL; |
| transfault: |
| fail_s1_walk(wr, ESR_ELx_FSC_FAULT_L(level), true, false); |
| return -ENOENT; |
| } |
| |
| struct mmu_config { |
| u64 ttbr0; |
| u64 ttbr1; |
| u64 tcr; |
| u64 mair; |
| u64 sctlr; |
| u64 vttbr; |
| u64 vtcr; |
| u64 hcr; |
| }; |
| |
| static void __mmu_config_save(struct mmu_config *config) |
| { |
| config->ttbr0 = read_sysreg_el1(SYS_TTBR0); |
| config->ttbr1 = read_sysreg_el1(SYS_TTBR1); |
| config->tcr = read_sysreg_el1(SYS_TCR); |
| config->mair = read_sysreg_el1(SYS_MAIR); |
| config->sctlr = read_sysreg_el1(SYS_SCTLR); |
| config->vttbr = read_sysreg(vttbr_el2); |
| config->vtcr = read_sysreg(vtcr_el2); |
| config->hcr = read_sysreg(hcr_el2); |
| } |
| |
| static void __mmu_config_restore(struct mmu_config *config) |
| { |
| write_sysreg(config->hcr, hcr_el2); |
| |
| /* |
| * ARM errata 1165522 and 1530923 require TGE to be 1 before |
| * we update the guest state. |
| */ |
| asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); |
| |
| write_sysreg_el1(config->ttbr0, SYS_TTBR0); |
| write_sysreg_el1(config->ttbr1, SYS_TTBR1); |
| write_sysreg_el1(config->tcr, SYS_TCR); |
| write_sysreg_el1(config->mair, SYS_MAIR); |
| write_sysreg_el1(config->sctlr, SYS_SCTLR); |
| write_sysreg(config->vttbr, vttbr_el2); |
| write_sysreg(config->vtcr, vtcr_el2); |
| } |
| |
| static bool at_s1e1p_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| u64 host_pan; |
| bool fail; |
| |
| host_pan = read_sysreg_s(SYS_PSTATE_PAN); |
| write_sysreg_s(*vcpu_cpsr(vcpu) & PSTATE_PAN, SYS_PSTATE_PAN); |
| |
| switch (op) { |
| case OP_AT_S1E1RP: |
| fail = __kvm_at(OP_AT_S1E1RP, vaddr); |
| break; |
| case OP_AT_S1E1WP: |
| fail = __kvm_at(OP_AT_S1E1WP, vaddr); |
| break; |
| } |
| |
| write_sysreg_s(host_pan, SYS_PSTATE_PAN); |
| |
| return fail; |
| } |
| |
| #define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) |
| #define MEMATTR_NC 0b0100 |
| #define MEMATTR_Wt 0b1000 |
| #define MEMATTR_Wb 0b1100 |
| #define MEMATTR_WbRaWa 0b1111 |
| |
| #define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) |
| |
| static u8 s2_memattr_to_attr(u8 memattr) |
| { |
| memattr &= 0b1111; |
| |
| switch (memattr) { |
| case 0b0000: |
| case 0b0001: |
| case 0b0010: |
| case 0b0011: |
| return memattr << 2; |
| case 0b0100: |
| return MEMATTR(Wb, Wb); |
| case 0b0101: |
| return MEMATTR(NC, NC); |
| case 0b0110: |
| return MEMATTR(Wt, NC); |
| case 0b0111: |
| return MEMATTR(Wb, NC); |
| case 0b1000: |
| /* Reserved, assume NC */ |
| return MEMATTR(NC, NC); |
| case 0b1001: |
| return MEMATTR(NC, Wt); |
| case 0b1010: |
| return MEMATTR(Wt, Wt); |
| case 0b1011: |
| return MEMATTR(Wb, Wt); |
| case 0b1100: |
| /* Reserved, assume NC */ |
| return MEMATTR(NC, NC); |
| case 0b1101: |
| return MEMATTR(NC, Wb); |
| case 0b1110: |
| return MEMATTR(Wt, Wb); |
| case 0b1111: |
| return MEMATTR(Wb, Wb); |
| default: |
| unreachable(); |
| } |
| } |
| |
| static u8 combine_s1_s2_attr(u8 s1, u8 s2) |
| { |
| bool transient; |
| u8 final = 0; |
| |
| /* Upgrade transient s1 to non-transient to simplify things */ |
| switch (s1) { |
| case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ |
| transient = true; |
| s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); |
| break; |
| case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ |
| transient = true; |
| s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); |
| break; |
| default: |
| transient = false; |
| } |
| |
| /* S2CombineS1AttrHints() */ |
| if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || |
| (s2 & GENMASK(3, 2)) == MEMATTR_NC) |
| final = MEMATTR_NC; |
| else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || |
| (s2 & GENMASK(3, 2)) == MEMATTR_Wt) |
| final = MEMATTR_Wt; |
| else |
| final = MEMATTR_Wb; |
| |
| if (final != MEMATTR_NC) { |
| /* Inherit RaWa hints form S1 */ |
| if (transient) { |
| switch (s1 & GENMASK(3, 2)) { |
| case MEMATTR_Wt: |
| final = 0; |
| break; |
| case MEMATTR_Wb: |
| final = MEMATTR_NC; |
| break; |
| } |
| } |
| |
| final |= s1 & GENMASK(1, 0); |
| } |
| |
| return final; |
| } |
| |
| #define ATTR_NSH 0b00 |
| #define ATTR_RSV 0b01 |
| #define ATTR_OSH 0b10 |
| #define ATTR_ISH 0b11 |
| |
| static u8 compute_sh(u8 attr, u64 desc) |
| { |
| u8 sh; |
| |
| /* Any form of device, as well as NC has SH[1:0]=0b10 */ |
| if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) |
| return ATTR_OSH; |
| |
| sh = FIELD_GET(PTE_SHARED, desc); |
| if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ |
| sh = ATTR_NSH; |
| |
| return sh; |
| } |
| |
| static u8 combine_sh(u8 s1_sh, u8 s2_sh) |
| { |
| if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) |
| return ATTR_OSH; |
| if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) |
| return ATTR_ISH; |
| |
| return ATTR_NSH; |
| } |
| |
| static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, |
| struct kvm_s2_trans *tr) |
| { |
| u8 s1_parattr, s2_memattr, final_attr; |
| u64 par; |
| |
| /* If S2 has failed to translate, report the damage */ |
| if (tr->esr) { |
| par = SYS_PAR_EL1_RES1; |
| par |= SYS_PAR_EL1_F; |
| par |= SYS_PAR_EL1_S; |
| par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); |
| return par; |
| } |
| |
| s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); |
| s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); |
| |
| if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { |
| if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) |
| s2_memattr &= ~BIT(3); |
| |
| /* Combination of R_VRJSW and R_RHWZM */ |
| switch (s2_memattr) { |
| case 0b0101: |
| if (MEMATTR_IS_DEVICE(s1_parattr)) |
| final_attr = s1_parattr; |
| else |
| final_attr = MEMATTR(NC, NC); |
| break; |
| case 0b0110: |
| case 0b1110: |
| final_attr = MEMATTR(WbRaWa, WbRaWa); |
| break; |
| case 0b0111: |
| case 0b1111: |
| /* Preserve S1 attribute */ |
| final_attr = s1_parattr; |
| break; |
| case 0b0100: |
| case 0b1100: |
| case 0b1101: |
| /* Reserved, do something non-silly */ |
| final_attr = s1_parattr; |
| break; |
| default: |
| /* MemAttr[2]=0, Device from S2 */ |
| final_attr = s2_memattr & GENMASK(1,0) << 2; |
| } |
| } else { |
| /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ |
| u8 s2_parattr = s2_memattr_to_attr(s2_memattr); |
| |
| if (MEMATTR_IS_DEVICE(s1_parattr) || |
| MEMATTR_IS_DEVICE(s2_parattr)) { |
| final_attr = min(s1_parattr, s2_parattr); |
| } else { |
| /* At this stage, this is memory vs memory */ |
| final_attr = combine_s1_s2_attr(s1_parattr & 0xf, |
| s2_parattr & 0xf); |
| final_attr |= combine_s1_s2_attr(s1_parattr >> 4, |
| s2_parattr >> 4) << 4; |
| } |
| } |
| |
| if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && |
| !MEMATTR_IS_DEVICE(final_attr)) |
| final_attr = MEMATTR(NC, NC); |
| |
| par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); |
| par |= tr->output & GENMASK(47, 12); |
| par |= FIELD_PREP(SYS_PAR_EL1_SH, |
| combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), |
| compute_sh(final_attr, tr->desc))); |
| |
| return par; |
| } |
| |
| static u64 compute_par_s1(struct kvm_vcpu *vcpu, struct s1_walk_result *wr, |
| enum trans_regime regime) |
| { |
| u64 par; |
| |
| if (wr->failed) { |
| par = SYS_PAR_EL1_RES1; |
| par |= SYS_PAR_EL1_F; |
| par |= FIELD_PREP(SYS_PAR_EL1_FST, wr->fst); |
| par |= wr->ptw ? SYS_PAR_EL1_PTW : 0; |
| par |= wr->s2 ? SYS_PAR_EL1_S : 0; |
| } else if (wr->level == S1_MMU_DISABLED) { |
| /* MMU off or HCR_EL2.DC == 1 */ |
| par = SYS_PAR_EL1_NSE; |
| par |= wr->pa & GENMASK_ULL(47, 12); |
| |
| if (regime == TR_EL10 && |
| (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_DC)) { |
| par |= FIELD_PREP(SYS_PAR_EL1_ATTR, |
| MEMATTR(WbRaWa, WbRaWa)); |
| par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_NSH); |
| } else { |
| par |= FIELD_PREP(SYS_PAR_EL1_ATTR, 0); /* nGnRnE */ |
| par |= FIELD_PREP(SYS_PAR_EL1_SH, ATTR_OSH); |
| } |
| } else { |
| u64 mair, sctlr; |
| u8 sh; |
| |
| par = SYS_PAR_EL1_NSE; |
| |
| mair = (regime == TR_EL10 ? |
| vcpu_read_sys_reg(vcpu, MAIR_EL1) : |
| vcpu_read_sys_reg(vcpu, MAIR_EL2)); |
| |
| mair >>= FIELD_GET(PTE_ATTRINDX_MASK, wr->desc) * 8; |
| mair &= 0xff; |
| |
| sctlr = (regime == TR_EL10 ? |
| vcpu_read_sys_reg(vcpu, SCTLR_EL1) : |
| vcpu_read_sys_reg(vcpu, SCTLR_EL2)); |
| |
| /* Force NC for memory if SCTLR_ELx.C is clear */ |
| if (!(sctlr & SCTLR_EL1_C) && !MEMATTR_IS_DEVICE(mair)) |
| mair = MEMATTR(NC, NC); |
| |
| par |= FIELD_PREP(SYS_PAR_EL1_ATTR, mair); |
| par |= wr->pa & GENMASK_ULL(47, 12); |
| |
| sh = compute_sh(mair, wr->desc); |
| par |= FIELD_PREP(SYS_PAR_EL1_SH, sh); |
| } |
| |
| return par; |
| } |
| |
| static bool pan3_enabled(struct kvm_vcpu *vcpu, enum trans_regime regime) |
| { |
| u64 sctlr; |
| |
| if (!kvm_has_feat(vcpu->kvm, ID_AA64MMFR1_EL1, PAN, PAN3)) |
| return false; |
| |
| if (regime == TR_EL10) |
| sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); |
| else |
| sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2); |
| |
| return sctlr & SCTLR_EL1_EPAN; |
| } |
| |
| static u64 handle_at_slow(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| bool perm_fail, ur, uw, ux, pr, pw, px; |
| struct s1_walk_result wr = {}; |
| struct s1_walk_info wi = {}; |
| int ret, idx; |
| |
| ret = setup_s1_walk(vcpu, op, &wi, &wr, vaddr); |
| if (ret) |
| goto compute_par; |
| |
| if (wr.level == S1_MMU_DISABLED) |
| goto compute_par; |
| |
| idx = srcu_read_lock(&vcpu->kvm->srcu); |
| |
| ret = walk_s1(vcpu, &wi, &wr, vaddr); |
| |
| srcu_read_unlock(&vcpu->kvm->srcu, idx); |
| |
| if (ret) |
| goto compute_par; |
| |
| /* FIXME: revisit when adding indirect permission support */ |
| /* AArch64.S1DirectBasePermissions() */ |
| if (wi.regime != TR_EL2) { |
| switch (FIELD_GET(PTE_USER | PTE_RDONLY, wr.desc)) { |
| case 0b00: |
| pr = pw = true; |
| ur = uw = false; |
| break; |
| case 0b01: |
| pr = pw = ur = uw = true; |
| break; |
| case 0b10: |
| pr = true; |
| pw = ur = uw = false; |
| break; |
| case 0b11: |
| pr = ur = true; |
| pw = uw = false; |
| break; |
| } |
| |
| switch (wr.APTable) { |
| case 0b00: |
| break; |
| case 0b01: |
| ur = uw = false; |
| break; |
| case 0b10: |
| pw = uw = false; |
| break; |
| case 0b11: |
| pw = ur = uw = false; |
| break; |
| } |
| |
| /* We don't use px for anything yet, but hey... */ |
| px = !((wr.desc & PTE_PXN) || wr.PXNTable || uw); |
| ux = !((wr.desc & PTE_UXN) || wr.UXNTable); |
| |
| if (op == OP_AT_S1E1RP || op == OP_AT_S1E1WP) { |
| bool pan; |
| |
| pan = *vcpu_cpsr(vcpu) & PSR_PAN_BIT; |
| pan &= ur || uw || (pan3_enabled(vcpu, wi.regime) && ux); |
| pw &= !pan; |
| pr &= !pan; |
| } |
| } else { |
| ur = uw = ux = false; |
| |
| if (!(wr.desc & PTE_RDONLY)) { |
| pr = pw = true; |
| } else { |
| pr = true; |
| pw = false; |
| } |
| |
| if (wr.APTable & BIT(1)) |
| pw = false; |
| |
| /* XN maps to UXN */ |
| px = !((wr.desc & PTE_UXN) || wr.UXNTable); |
| } |
| |
| perm_fail = false; |
| |
| switch (op) { |
| case OP_AT_S1E1RP: |
| case OP_AT_S1E1R: |
| case OP_AT_S1E2R: |
| perm_fail = !pr; |
| break; |
| case OP_AT_S1E1WP: |
| case OP_AT_S1E1W: |
| case OP_AT_S1E2W: |
| perm_fail = !pw; |
| break; |
| case OP_AT_S1E0R: |
| perm_fail = !ur; |
| break; |
| case OP_AT_S1E0W: |
| perm_fail = !uw; |
| break; |
| case OP_AT_S1E1A: |
| case OP_AT_S1E2A: |
| break; |
| default: |
| BUG(); |
| } |
| |
| if (perm_fail) |
| fail_s1_walk(&wr, ESR_ELx_FSC_PERM_L(wr.level), false, false); |
| |
| compute_par: |
| return compute_par_s1(vcpu, &wr, wi.regime); |
| } |
| |
| /* |
| * Return the PAR_EL1 value as the result of a valid translation. |
| * |
| * If the translation is unsuccessful, the value may only contain |
| * PAR_EL1.F, and cannot be taken at face value. It isn't an |
| * indication of the translation having failed, only that the fast |
| * path did not succeed, *unless* it indicates a S1 permission fault. |
| */ |
| static u64 __kvm_at_s1e01_fast(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| struct mmu_config config; |
| struct kvm_s2_mmu *mmu; |
| bool fail; |
| u64 par; |
| |
| par = SYS_PAR_EL1_F; |
| |
| /* |
| * We've trapped, so everything is live on the CPU. As we will |
| * be switching contexts behind everybody's back, disable |
| * interrupts while holding the mmu lock. |
| */ |
| guard(write_lock_irqsave)(&vcpu->kvm->mmu_lock); |
| |
| /* |
| * If HCR_EL2.{E2H,TGE} == {1,1}, the MMU context is already |
| * the right one (as we trapped from vEL2). If not, save the |
| * full MMU context. |
| */ |
| if (vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) |
| goto skip_mmu_switch; |
| |
| /* |
| * Obtaining the S2 MMU for a L2 is horribly racy, and we may not |
| * find it (recycled by another vcpu, for example). When this |
| * happens, admit defeat immediately and use the SW (slow) path. |
| */ |
| mmu = lookup_s2_mmu(vcpu); |
| if (!mmu) |
| return par; |
| |
| __mmu_config_save(&config); |
| |
| write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR0_EL1), SYS_TTBR0); |
| write_sysreg_el1(vcpu_read_sys_reg(vcpu, TTBR1_EL1), SYS_TTBR1); |
| write_sysreg_el1(vcpu_read_sys_reg(vcpu, TCR_EL1), SYS_TCR); |
| write_sysreg_el1(vcpu_read_sys_reg(vcpu, MAIR_EL1), SYS_MAIR); |
| write_sysreg_el1(vcpu_read_sys_reg(vcpu, SCTLR_EL1), SYS_SCTLR); |
| __load_stage2(mmu, mmu->arch); |
| |
| skip_mmu_switch: |
| /* Clear TGE, enable S2 translation, we're rolling */ |
| write_sysreg((config.hcr & ~HCR_TGE) | HCR_VM, hcr_el2); |
| isb(); |
| |
| switch (op) { |
| case OP_AT_S1E1RP: |
| case OP_AT_S1E1WP: |
| fail = at_s1e1p_fast(vcpu, op, vaddr); |
| break; |
| case OP_AT_S1E1R: |
| fail = __kvm_at(OP_AT_S1E1R, vaddr); |
| break; |
| case OP_AT_S1E1W: |
| fail = __kvm_at(OP_AT_S1E1W, vaddr); |
| break; |
| case OP_AT_S1E0R: |
| fail = __kvm_at(OP_AT_S1E0R, vaddr); |
| break; |
| case OP_AT_S1E0W: |
| fail = __kvm_at(OP_AT_S1E0W, vaddr); |
| break; |
| case OP_AT_S1E1A: |
| fail = __kvm_at(OP_AT_S1E1A, vaddr); |
| break; |
| default: |
| WARN_ON_ONCE(1); |
| fail = true; |
| break; |
| } |
| |
| if (!fail) |
| par = read_sysreg_par(); |
| |
| if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu))) |
| __mmu_config_restore(&config); |
| |
| return par; |
| } |
| |
| static bool par_check_s1_perm_fault(u64 par) |
| { |
| u8 fst = FIELD_GET(SYS_PAR_EL1_FST, par); |
| |
| return ((fst & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM && |
| !(par & SYS_PAR_EL1_S)); |
| } |
| |
| void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| u64 par = __kvm_at_s1e01_fast(vcpu, op, vaddr); |
| |
| /* |
| * If PAR_EL1 reports that AT failed on a S1 permission fault, we |
| * know for sure that the PTW was able to walk the S1 tables and |
| * there's nothing else to do. |
| * |
| * If AT failed for any other reason, then we must walk the guest S1 |
| * to emulate the instruction. |
| */ |
| if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) |
| par = handle_at_slow(vcpu, op, vaddr); |
| |
| vcpu_write_sys_reg(vcpu, par, PAR_EL1); |
| } |
| |
| void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| u64 par; |
| |
| /* |
| * We've trapped, so everything is live on the CPU. As we will be |
| * switching context behind everybody's back, disable interrupts... |
| */ |
| scoped_guard(write_lock_irqsave, &vcpu->kvm->mmu_lock) { |
| struct kvm_s2_mmu *mmu; |
| u64 val, hcr; |
| bool fail; |
| |
| mmu = &vcpu->kvm->arch.mmu; |
| |
| val = hcr = read_sysreg(hcr_el2); |
| val &= ~HCR_TGE; |
| val |= HCR_VM; |
| |
| if (!vcpu_el2_e2h_is_set(vcpu)) |
| val |= HCR_NV | HCR_NV1; |
| |
| write_sysreg(val, hcr_el2); |
| isb(); |
| |
| par = SYS_PAR_EL1_F; |
| |
| switch (op) { |
| case OP_AT_S1E2R: |
| fail = __kvm_at(OP_AT_S1E1R, vaddr); |
| break; |
| case OP_AT_S1E2W: |
| fail = __kvm_at(OP_AT_S1E1W, vaddr); |
| break; |
| case OP_AT_S1E2A: |
| fail = __kvm_at(OP_AT_S1E1A, vaddr); |
| break; |
| default: |
| WARN_ON_ONCE(1); |
| fail = true; |
| } |
| |
| isb(); |
| |
| if (!fail) |
| par = read_sysreg_par(); |
| |
| write_sysreg(hcr, hcr_el2); |
| isb(); |
| } |
| |
| /* We failed the translation, let's replay it in slow motion */ |
| if ((par & SYS_PAR_EL1_F) && !par_check_s1_perm_fault(par)) |
| par = handle_at_slow(vcpu, op, vaddr); |
| |
| vcpu_write_sys_reg(vcpu, par, PAR_EL1); |
| } |
| |
| void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) |
| { |
| struct kvm_s2_trans out = {}; |
| u64 ipa, par; |
| bool write; |
| int ret; |
| |
| /* Do the stage-1 translation */ |
| switch (op) { |
| case OP_AT_S12E1R: |
| op = OP_AT_S1E1R; |
| write = false; |
| break; |
| case OP_AT_S12E1W: |
| op = OP_AT_S1E1W; |
| write = true; |
| break; |
| case OP_AT_S12E0R: |
| op = OP_AT_S1E0R; |
| write = false; |
| break; |
| case OP_AT_S12E0W: |
| op = OP_AT_S1E0W; |
| write = true; |
| break; |
| default: |
| WARN_ON_ONCE(1); |
| return; |
| } |
| |
| __kvm_at_s1e01(vcpu, op, vaddr); |
| par = vcpu_read_sys_reg(vcpu, PAR_EL1); |
| if (par & SYS_PAR_EL1_F) |
| return; |
| |
| /* |
| * If we only have a single stage of translation (E2H=0 or |
| * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. |
| */ |
| if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || |
| !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) |
| return; |
| |
| /* Do the stage-2 translation */ |
| ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); |
| out.esr = 0; |
| ret = kvm_walk_nested_s2(vcpu, ipa, &out); |
| if (ret < 0) |
| return; |
| |
| /* Check the access permission */ |
| if (!out.esr && |
| ((!write && !out.readable) || (write && !out.writable))) |
| out.esr = ESR_ELx_FSC_PERM_L(out.level & 0x3); |
| |
| par = compute_par_s12(vcpu, par, &out); |
| vcpu_write_sys_reg(vcpu, par, PAR_EL1); |
| } |