arch/riscv/kvm/vcpu_pmu.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2023 Rivos Inc
  *
  * Authors:
  *     Atish Patra <atishp@rivosinc.com>
  */

 #define pr_fmt(fmt)	"riscv-kvm-pmu: " fmt
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/kvm_host.h>
 #include <linux/perf/riscv_pmu.h>
 #include <asm/csr.h>
 #include <asm/kvm_vcpu_sbi.h>
 #include <asm/kvm_vcpu_pmu.h>
 #include <linux/bitops.h>

 #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
 #define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
 #define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)

 static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
 	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
 	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
 	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
 	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
 	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
 	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
 	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
 	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
 	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
 	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
 };

 static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
 {
 	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
 	u64 sample_period;

 	if (!pmc->counter_val)
 		sample_period = counter_val_mask + 1;
 	else
 		sample_period = (-pmc->counter_val) & counter_val_mask;

 	return sample_period;
 }

 static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
 {
 	enum sbi_pmu_event_type etype = get_event_type(eidx);
 	u32 type = PERF_TYPE_MAX;

 	switch (etype) {
 	case SBI_PMU_EVENT_TYPE_HW:
 		type = PERF_TYPE_HARDWARE;
 		break;
 	case SBI_PMU_EVENT_TYPE_CACHE:
 		type = PERF_TYPE_HW_CACHE;
 		break;
 	case SBI_PMU_EVENT_TYPE_RAW:
 	case SBI_PMU_EVENT_TYPE_FW:
 		type = PERF_TYPE_RAW;
 		break;
 	default:
 		break;
 	}

 	return type;
 }

 static bool kvm_pmu_is_fw_event(unsigned long eidx)
 {
 	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
 }

 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
 {
 	if (pmc->perf_event) {
 		perf_event_disable(pmc->perf_event);
 		perf_event_release_kernel(pmc->perf_event);
 		pmc->perf_event = NULL;
 	}
 }

 static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
 {
 	return hw_event_perf_map[sbi_event_code];
 }

 static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
 {
 	u64 config = U64_MAX;
 	unsigned int cache_type, cache_op, cache_result;

 	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
 	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
 		      SBI_PMU_EVENT_CACHE_ID_SHIFT;
 	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
 		    SBI_PMU_EVENT_CACHE_OP_SHIFT;
 	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;

 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX ||
 	    cache_op >= PERF_COUNT_HW_CACHE_OP_MAX ||
 	    cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
 		return config;

 	config = cache_type | (cache_op << 8) | (cache_result << 16);

 	return config;
 }

 static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
 {
 	enum sbi_pmu_event_type etype = get_event_type(eidx);
 	u32 ecode = get_event_code(eidx);
 	u64 config = U64_MAX;

 	switch (etype) {
 	case SBI_PMU_EVENT_TYPE_HW:
 		if (ecode < SBI_PMU_HW_GENERAL_MAX)
 			config = kvm_pmu_get_perf_event_hw_config(ecode);
 		break;
 	case SBI_PMU_EVENT_TYPE_CACHE:
 		config = kvm_pmu_get_perf_event_cache_config(ecode);
 		break;
 	case SBI_PMU_EVENT_TYPE_RAW:
 		config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
 		break;
 	case SBI_PMU_EVENT_TYPE_FW:
 		if (ecode < SBI_PMU_FW_MAX)
 			config = (1ULL << 63) | ecode;
 		break;
 	default:
 		break;
 	}

 	return config;
 }

 static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
 {
 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
 	u32 ecode = get_event_code(eidx);

 	if (etype != SBI_PMU_EVENT_TYPE_HW)
 		return -EINVAL;

 	if (ecode == SBI_PMU_HW_CPU_CYCLES)
 		return 0;
 	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
 		return 2;
 	else
 		return -EINVAL;
 }

 static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
 					      unsigned long cbase, unsigned long cmask)
 {
 	int ctr_idx = -1;
 	int i, pmc_idx;
 	int min, max;

 	if (kvm_pmu_is_fw_event(eidx)) {
 		/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
 		min = kvpmu->num_hw_ctrs;
 		max = min + kvpmu->num_fw_ctrs;
 	} else {
 		/* First 3 counters are reserved for fixed counters */
 		min = 3;
 		max = kvpmu->num_hw_ctrs;
 	}

 	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
 		pmc_idx = i + cbase;
 		if ((pmc_idx >= min && pmc_idx < max) &&
 		    !test_bit(pmc_idx, kvpmu->pmc_in_use)) {
 			ctr_idx = pmc_idx;
 			break;
 		}
 	}

 	return ctr_idx;
 }

 static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
 			     unsigned long cbase, unsigned long cmask)
 {
 	int ret;

 	/* Fixed counters need to be have fixed mapping as they have different width */
 	ret = kvm_pmu_get_fixed_pmc_index(eidx);
 	if (ret >= 0)
 		return ret;

 	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
 }

 static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
 			unsigned long *out_val)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 	u64 enabled, running;
 	int fevent_code;

 	pmc = &kvpmu->pmc[cidx];

 	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 		fevent_code = get_event_code(pmc->event_idx);
 		pmc->counter_val = kvpmu->fw_event[fevent_code].value;
 	} else if (pmc->perf_event) {
 		pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
 	} else {
 		return -EINVAL;
 	}
 	*out_val = pmc->counter_val;

 	return 0;
 }

 static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
 					 unsigned long ctr_mask)
 {
 	/* Make sure the we have a valid counter mask requested from the caller */
 	if (!ctr_mask || (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
 		return -EINVAL;

 	return 0;
 }

 static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
 				     unsigned long flags, unsigned long eidx, unsigned long evtdata)
 {
 	struct perf_event *event;

 	kvm_pmu_release_perf_event(pmc);
 	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
 	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
 		//TODO: Do we really want to clear the value in hardware counter
 		pmc->counter_val = 0;
 	}

 	/*
 	 * Set the default sample_period for now. The guest specified value
 	 * will be updated in the start call.
 	 */
 	attr->sample_period = kvm_pmu_get_sample_period(pmc);

 	event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
 	if (IS_ERR(event)) {
 		pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
 		return PTR_ERR(event);
 	}

 	pmc->perf_event = event;
 	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
 		perf_event_enable(pmc->perf_event);

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_fw_event *fevent;

 	if (!kvpmu || fid >= SBI_PMU_FW_MAX)
 		return -EINVAL;

 	fevent = &kvpmu->fw_event[fid];
 	if (fevent->started)
 		fevent->value++;

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
 				unsigned long *val, unsigned long new_val,
 				unsigned long wr_mask)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;

 	if (!kvpmu || !kvpmu->init_done) {
 		/*
 		 * In absence of sscofpmf in the platform, the guest OS may use
 		 * the legacy PMU driver to read cycle/instret. In that case,
 		 * just return 0 to avoid any illegal trap. However, any other
 		 * hpmcounter access should result in illegal trap as they must
 		 * be access through SBI PMU only.
 		 */
 		if (csr_num == CSR_CYCLE || csr_num == CSR_INSTRET) {
 			*val = 0;
 			return ret;
 		} else {
 			return KVM_INSN_ILLEGAL_TRAP;
 		}
 	}

 	/* The counter CSR are read only. Thus, any write should result in illegal traps */
 	if (wr_mask)
 		return KVM_INSN_ILLEGAL_TRAP;

 	cidx = csr_num - CSR_CYCLE;

 	if (pmu_ctr_read(vcpu, cidx, val) < 0)
 		return KVM_INSN_ILLEGAL_TRAP;

 	return ret;
 }

 int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
 				struct kvm_vcpu_sbi_return *retdata)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);

 	retdata->out_val = kvm_pmu_num_counters(kvpmu);

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
 				struct kvm_vcpu_sbi_return *retdata)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);

 	if (cidx > RISCV_KVM_MAX_COUNTERS || cidx == 1) {
 		retdata->err_val = SBI_ERR_INVALID_PARAM;
 		return 0;
 	}

 	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 				 unsigned long ctr_mask, unsigned long flags, u64 ival,
 				 struct kvm_vcpu_sbi_return *retdata)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	int i, pmc_index, sbiret = 0;
 	struct kvm_pmc *pmc;
 	int fevent_code;

 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 		sbiret = SBI_ERR_INVALID_PARAM;
 		goto out;
 	}

 	/* Start the counters that have been configured and requested by the guest */
 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
 		pmc_index = i + ctr_base;
 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
 			continue;
 		pmc = &kvpmu->pmc[pmc_index];
 		if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
 			pmc->counter_val = ival;
 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 			fevent_code = get_event_code(pmc->event_idx);
 			if (fevent_code >= SBI_PMU_FW_MAX) {
 				sbiret = SBI_ERR_INVALID_PARAM;
 				goto out;
 			}

 			/* Check if the counter was already started for some reason */
 			if (kvpmu->fw_event[fevent_code].started) {
 				sbiret = SBI_ERR_ALREADY_STARTED;
 				continue;
 			}

 			kvpmu->fw_event[fevent_code].started = true;
 			kvpmu->fw_event[fevent_code].value = pmc->counter_val;
 		} else if (pmc->perf_event) {
 			if (unlikely(pmc->started)) {
 				sbiret = SBI_ERR_ALREADY_STARTED;
 				continue;
 			}
 			perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
 			perf_event_enable(pmc->perf_event);
 			pmc->started = true;
 		} else {
 			sbiret = SBI_ERR_INVALID_PARAM;
 		}
 	}

 out:
 	retdata->err_val = sbiret;

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 				unsigned long ctr_mask, unsigned long flags,
 				struct kvm_vcpu_sbi_return *retdata)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	int i, pmc_index, sbiret = 0;
 	u64 enabled, running;
 	struct kvm_pmc *pmc;
 	int fevent_code;

 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 		sbiret = SBI_ERR_INVALID_PARAM;
 		goto out;
 	}

 	/* Stop the counters that have been configured and requested by the guest */
 	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
 		pmc_index = i + ctr_base;
 		if (!test_bit(pmc_index, kvpmu->pmc_in_use))
 			continue;
 		pmc = &kvpmu->pmc[pmc_index];
 		if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
 			fevent_code = get_event_code(pmc->event_idx);
 			if (fevent_code >= SBI_PMU_FW_MAX) {
 				sbiret = SBI_ERR_INVALID_PARAM;
 				goto out;
 			}

 			if (!kvpmu->fw_event[fevent_code].started)
 				sbiret = SBI_ERR_ALREADY_STOPPED;

 			kvpmu->fw_event[fevent_code].started = false;
 		} else if (pmc->perf_event) {
 			if (pmc->started) {
 				/* Stop counting the counter */
 				perf_event_disable(pmc->perf_event);
 				pmc->started = false;
 			} else {
 				sbiret = SBI_ERR_ALREADY_STOPPED;
 			}

 			if (flags & SBI_PMU_STOP_FLAG_RESET) {
 				/* Relase the counter if this is a reset request */
 				pmc->counter_val += perf_event_read_value(pmc->perf_event,
 									  &enabled, &running);
 				kvm_pmu_release_perf_event(pmc);
 			}
 		} else {
 			sbiret = SBI_ERR_INVALID_PARAM;
 		}
 		if (flags & SBI_PMU_STOP_FLAG_RESET) {
 			pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 			clear_bit(pmc_index, kvpmu->pmc_in_use);
 		}
 	}

 out:
 	retdata->err_val = sbiret;

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
 				     unsigned long ctr_mask, unsigned long flags,
 				     unsigned long eidx, u64 evtdata,
 				     struct kvm_vcpu_sbi_return *retdata)
 {
 	int ctr_idx, ret, sbiret = 0;
 	bool is_fevent;
 	unsigned long event_code;
 	u32 etype = kvm_pmu_get_perf_event_type(eidx);
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc = NULL;
 	struct perf_event_attr attr = {
 		.type = etype,
 		.size = sizeof(struct perf_event_attr),
 		.pinned = true,
 		/*
 		 * It should never reach here if the platform doesn't support the sscofpmf
 		 * extension as mode filtering won't work without it.
 		 */
 		.exclude_host = true,
 		.exclude_hv = true,
 		.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
 		.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
 		.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
 	};

 	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
 		sbiret = SBI_ERR_INVALID_PARAM;
 		goto out;
 	}

 	event_code = get_event_code(eidx);
 	is_fevent = kvm_pmu_is_fw_event(eidx);
 	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
 		sbiret = SBI_ERR_NOT_SUPPORTED;
 		goto out;
 	}

 	/*
 	 * SKIP_MATCH flag indicates the caller is aware of the assigned counter
 	 * for this event. Just do a sanity check if it already marked used.
 	 */
 	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
 		if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
 			sbiret = SBI_ERR_FAILURE;
 			goto out;
 		}
 		ctr_idx = ctr_base + __ffs(ctr_mask);
 	} else  {
 		ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
 		if (ctr_idx < 0) {
 			sbiret = SBI_ERR_NOT_SUPPORTED;
 			goto out;
 		}
 	}

 	pmc = &kvpmu->pmc[ctr_idx];
 	pmc->idx = ctr_idx;

 	if (is_fevent) {
 		if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
 			kvpmu->fw_event[event_code].started = true;
 	} else {
 		ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
 		if (ret)
 			return ret;
 	}

 	set_bit(ctr_idx, kvpmu->pmc_in_use);
 	pmc->event_idx = eidx;
 	retdata->out_val = ctr_idx;
 out:
 	retdata->err_val = sbiret;

 	return 0;
 }

 int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
 				struct kvm_vcpu_sbi_return *retdata)
 {
 	int ret;

 	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
 	if (ret == -EINVAL)
 		retdata->err_val = SBI_ERR_INVALID_PARAM;

 	return 0;
 }

 void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
 {
 	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;

 	/*
 	 * PMU functionality should be only available to guests if privilege mode
 	 * filtering is available in the host. Otherwise, guest will always count
 	 * events while the execution is in hypervisor mode.
 	 */
 	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
 		return;

 	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
 	if (ret < 0 || !hpm_width || !num_hw_ctrs)
 		return;

 	/*
 	 * Increase the number of hardware counters to offset the time counter.
 	 */
 	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
 	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));

 	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
 		pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
 		kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
 	}

 	/*
 	 * There is no correlation between the logical hardware counter and virtual counters.
 	 * However, we need to encode a hpmcounter CSR in the counter info field so that
 	 * KVM can trap n emulate the read. This works well in the migration use case as
 	 * KVM doesn't care if the actual hpmcounter is available in the hardware or not.
 	 */
 	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
 		/* TIME CSR shouldn't be read from perf interface */
 		if (i == 1)
 			continue;
 		pmc = &kvpmu->pmc[i];
 		pmc->idx = i;
 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 		if (i < kvpmu->num_hw_ctrs) {
 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
 			if (i < 3)
 				/* CY, IR counters */
 				pmc->cinfo.width = 63;
 			else
 				pmc->cinfo.width = hpm_width;
 			/*
 			 * The CSR number doesn't have any relation with the logical
 			 * hardware counters. The CSR numbers are encoded sequentially
 			 * to avoid maintaining a map between the virtual counter
 			 * and CSR number.
 			 */
 			pmc->cinfo.csr = CSR_CYCLE + i;
 		} else {
 			pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
 			pmc->cinfo.width = BITS_PER_LONG - 1;
 		}
 	}

 	kvpmu->init_done = true;
 }

 void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
 	struct kvm_pmc *pmc;
 	int i;

 	if (!kvpmu)
 		return;

 	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
 		pmc = &kvpmu->pmc[i];
 		pmc->counter_val = 0;
 		kvm_pmu_release_perf_event(pmc);
 		pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
 	}
 	bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
 	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
 }

 void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
 {
 	kvm_riscv_vcpu_pmu_deinit(vcpu);
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (c) 2023 Rivos Inc
	*
	* Authors:
	* Atish Patra <atishp@rivosinc.com>
	*/

	#define pr_fmt(fmt) "riscv-kvm-pmu: " fmt
	#include <linux/errno.h>
	#include <linux/err.h>
	#include <linux/kvm_host.h>
	#include <linux/perf/riscv_pmu.h>
	#include <asm/csr.h>
	#include <asm/kvm_vcpu_sbi.h>
	#include <asm/kvm_vcpu_pmu.h>
	#include <linux/bitops.h>

	#define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs)
	#define get_event_type(x) (((x) & SBI_PMU_EVENT_IDX_TYPE_MASK) >> 16)
	#define get_event_code(x) ((x) & SBI_PMU_EVENT_IDX_CODE_MASK)

	static enum perf_hw_id hw_event_perf_map[SBI_PMU_HW_GENERAL_MAX] = {
	[SBI_PMU_HW_CPU_CYCLES] = PERF_COUNT_HW_CPU_CYCLES,
	[SBI_PMU_HW_INSTRUCTIONS] = PERF_COUNT_HW_INSTRUCTIONS,
	[SBI_PMU_HW_CACHE_REFERENCES] = PERF_COUNT_HW_CACHE_REFERENCES,
	[SBI_PMU_HW_CACHE_MISSES] = PERF_COUNT_HW_CACHE_MISSES,
	[SBI_PMU_HW_BRANCH_INSTRUCTIONS] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
	[SBI_PMU_HW_BRANCH_MISSES] = PERF_COUNT_HW_BRANCH_MISSES,
	[SBI_PMU_HW_BUS_CYCLES] = PERF_COUNT_HW_BUS_CYCLES,
	[SBI_PMU_HW_STALLED_CYCLES_FRONTEND] = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
	[SBI_PMU_HW_STALLED_CYCLES_BACKEND] = PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
	[SBI_PMU_HW_REF_CPU_CYCLES] = PERF_COUNT_HW_REF_CPU_CYCLES,
	};

	static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc)
	{
	u64 counter_val_mask = GENMASK(pmc->cinfo.width, 0);
	u64 sample_period;

	if (!pmc->counter_val)
	sample_period = counter_val_mask + 1;
	else
	sample_period = (-pmc->counter_val) & counter_val_mask;

	return sample_period;
	}

	static u32 kvm_pmu_get_perf_event_type(unsigned long eidx)
	{
	enum sbi_pmu_event_type etype = get_event_type(eidx);
	u32 type = PERF_TYPE_MAX;

	switch (etype) {
	case SBI_PMU_EVENT_TYPE_HW:
	type = PERF_TYPE_HARDWARE;
	break;
	case SBI_PMU_EVENT_TYPE_CACHE:
	type = PERF_TYPE_HW_CACHE;
	break;
	case SBI_PMU_EVENT_TYPE_RAW:
	case SBI_PMU_EVENT_TYPE_FW:
	type = PERF_TYPE_RAW;
	break;
	default:
	break;
	}

	return type;
	}

	static bool kvm_pmu_is_fw_event(unsigned long eidx)
	{
	return get_event_type(eidx) == SBI_PMU_EVENT_TYPE_FW;
	}

	static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
	{
	if (pmc->perf_event) {
	perf_event_disable(pmc->perf_event);
	perf_event_release_kernel(pmc->perf_event);
	pmc->perf_event = NULL;
	}
	}

	static u64 kvm_pmu_get_perf_event_hw_config(u32 sbi_event_code)
	{
	return hw_event_perf_map[sbi_event_code];
	}

	static u64 kvm_pmu_get_perf_event_cache_config(u32 sbi_event_code)
	{
	u64 config = U64_MAX;
	unsigned int cache_type, cache_op, cache_result;

	/* All the cache event masks lie within 0xFF. No separate masking is necessary */
	cache_type = (sbi_event_code & SBI_PMU_EVENT_CACHE_ID_CODE_MASK) >>
	SBI_PMU_EVENT_CACHE_ID_SHIFT;
	cache_op = (sbi_event_code & SBI_PMU_EVENT_CACHE_OP_ID_CODE_MASK) >>
	SBI_PMU_EVENT_CACHE_OP_SHIFT;
	cache_result = sbi_event_code & SBI_PMU_EVENT_CACHE_RESULT_ID_CODE_MASK;

	if (cache_type >= PERF_COUNT_HW_CACHE_MAX \|\|
	cache_op >= PERF_COUNT_HW_CACHE_OP_MAX \|\|
	cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
	return config;

	config = cache_type \| (cache_op << 8) \| (cache_result << 16);

	return config;
	}

	static u64 kvm_pmu_get_perf_event_config(unsigned long eidx, uint64_t evt_data)
	{
	enum sbi_pmu_event_type etype = get_event_type(eidx);
	u32 ecode = get_event_code(eidx);
	u64 config = U64_MAX;

	switch (etype) {
	case SBI_PMU_EVENT_TYPE_HW:
	if (ecode < SBI_PMU_HW_GENERAL_MAX)
	config = kvm_pmu_get_perf_event_hw_config(ecode);
	break;
	case SBI_PMU_EVENT_TYPE_CACHE:
	config = kvm_pmu_get_perf_event_cache_config(ecode);
	break;
	case SBI_PMU_EVENT_TYPE_RAW:
	config = evt_data & RISCV_PMU_RAW_EVENT_MASK;
	break;
	case SBI_PMU_EVENT_TYPE_FW:
	if (ecode < SBI_PMU_FW_MAX)
	config = (1ULL << 63) \| ecode;
	break;
	default:
	break;
	}

	return config;
	}

	static int kvm_pmu_get_fixed_pmc_index(unsigned long eidx)
	{
	u32 etype = kvm_pmu_get_perf_event_type(eidx);
	u32 ecode = get_event_code(eidx);

	if (etype != SBI_PMU_EVENT_TYPE_HW)
	return -EINVAL;

	if (ecode == SBI_PMU_HW_CPU_CYCLES)
	return 0;
	else if (ecode == SBI_PMU_HW_INSTRUCTIONS)
	return 2;
	else
	return -EINVAL;
	}

	static int kvm_pmu_get_programmable_pmc_index(struct kvm_pmu *kvpmu, unsigned long eidx,
	unsigned long cbase, unsigned long cmask)
	{
	int ctr_idx = -1;
	int i, pmc_idx;
	int min, max;

	if (kvm_pmu_is_fw_event(eidx)) {
	/* Firmware counters are mapped 1:1 starting from num_hw_ctrs for simplicity */
	min = kvpmu->num_hw_ctrs;
	max = min + kvpmu->num_fw_ctrs;
	} else {
	/* First 3 counters are reserved for fixed counters */
	min = 3;
	max = kvpmu->num_hw_ctrs;
	}

	for_each_set_bit(i, &cmask, BITS_PER_LONG) {
	pmc_idx = i + cbase;
	if ((pmc_idx >= min && pmc_idx < max) &&
	!test_bit(pmc_idx, kvpmu->pmc_in_use)) {
	ctr_idx = pmc_idx;
	break;
	}
	}

	return ctr_idx;
	}

	static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx,
	unsigned long cbase, unsigned long cmask)
	{
	int ret;

	/* Fixed counters need to be have fixed mapping as they have different width */
	ret = kvm_pmu_get_fixed_pmc_index(eidx);
	if (ret >= 0)
	return ret;

	return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask);
	}

	static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
	unsigned long *out_val)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	struct kvm_pmc *pmc;
	u64 enabled, running;
	int fevent_code;

	pmc = &kvpmu->pmc[cidx];

	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
	fevent_code = get_event_code(pmc->event_idx);
	pmc->counter_val = kvpmu->fw_event[fevent_code].value;
	} else if (pmc->perf_event) {
	pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running);
	} else {
	return -EINVAL;
	}
	*out_val = pmc->counter_val;

	return 0;
	}

	static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ctr_base,
	unsigned long ctr_mask)
	{
	/* Make sure the we have a valid counter mask requested from the caller */
	if (!ctr_mask \|\| (ctr_base + __fls(ctr_mask) >= kvm_pmu_num_counters(kvpmu)))
	return -EINVAL;

	return 0;
	}

	static int kvm_pmu_create_perf_event(struct kvm_pmc pmc, struct perf_event_attr attr,
	unsigned long flags, unsigned long eidx, unsigned long evtdata)
	{
	struct perf_event *event;

	kvm_pmu_release_perf_event(pmc);
	attr->config = kvm_pmu_get_perf_event_config(eidx, evtdata);
	if (flags & SBI_PMU_CFG_FLAG_CLEAR_VALUE) {
	//TODO: Do we really want to clear the value in hardware counter
	pmc->counter_val = 0;
	}

	/*
	* Set the default sample_period for now. The guest specified value
	* will be updated in the start call.
	*/
	attr->sample_period = kvm_pmu_get_sample_period(pmc);

	event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
	if (IS_ERR(event)) {
	pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
	return PTR_ERR(event);
	}

	pmc->perf_event = event;
	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
	perf_event_enable(pmc->perf_event);

	return 0;
	}

	int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	struct kvm_fw_event *fevent;

	if (!kvpmu \|\| fid >= SBI_PMU_FW_MAX)
	return -EINVAL;

	fevent = &kvpmu->fw_event[fid];
	if (fevent->started)
	fevent->value++;

	return 0;
	}

	int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num,
	unsigned long *val, unsigned long new_val,
	unsigned long wr_mask)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	int cidx, ret = KVM_INSN_CONTINUE_NEXT_SEPC;

	if (!kvpmu \|\| !kvpmu->init_done) {
	/*
	* In absence of sscofpmf in the platform, the guest OS may use
	* the legacy PMU driver to read cycle/instret. In that case,
	* just return 0 to avoid any illegal trap. However, any other
	* hpmcounter access should result in illegal trap as they must
	* be access through SBI PMU only.
	*/
	if (csr_num == CSR_CYCLE \|\| csr_num == CSR_INSTRET) {
	*val = 0;
	return ret;
	} else {
	return KVM_INSN_ILLEGAL_TRAP;
	}
	}

	/* The counter CSR are read only. Thus, any write should result in illegal traps */
	if (wr_mask)
	return KVM_INSN_ILLEGAL_TRAP;

	cidx = csr_num - CSR_CYCLE;

	if (pmu_ctr_read(vcpu, cidx, val) < 0)
	return KVM_INSN_ILLEGAL_TRAP;

	return ret;
	}

	int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu,
	struct kvm_vcpu_sbi_return *retdata)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);

	retdata->out_val = kvm_pmu_num_counters(kvpmu);

	return 0;
	}

	int kvm_riscv_vcpu_pmu_ctr_info(struct kvm_vcpu *vcpu, unsigned long cidx,
	struct kvm_vcpu_sbi_return *retdata)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);

	if (cidx > RISCV_KVM_MAX_COUNTERS \|\| cidx == 1) {
	retdata->err_val = SBI_ERR_INVALID_PARAM;
	return 0;
	}

	retdata->out_val = kvpmu->pmc[cidx].cinfo.value;

	return 0;
	}

	int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
	unsigned long ctr_mask, unsigned long flags, u64 ival,
	struct kvm_vcpu_sbi_return *retdata)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	int i, pmc_index, sbiret = 0;
	struct kvm_pmc *pmc;
	int fevent_code;

	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
	sbiret = SBI_ERR_INVALID_PARAM;
	goto out;
	}

	/* Start the counters that have been configured and requested by the guest */
	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
	pmc_index = i + ctr_base;
	if (!test_bit(pmc_index, kvpmu->pmc_in_use))
	continue;
	pmc = &kvpmu->pmc[pmc_index];
	if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE)
	pmc->counter_val = ival;
	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
	fevent_code = get_event_code(pmc->event_idx);
	if (fevent_code >= SBI_PMU_FW_MAX) {
	sbiret = SBI_ERR_INVALID_PARAM;
	goto out;
	}

	/* Check if the counter was already started for some reason */
	if (kvpmu->fw_event[fevent_code].started) {
	sbiret = SBI_ERR_ALREADY_STARTED;
	continue;
	}

	kvpmu->fw_event[fevent_code].started = true;
	kvpmu->fw_event[fevent_code].value = pmc->counter_val;
	} else if (pmc->perf_event) {
	if (unlikely(pmc->started)) {
	sbiret = SBI_ERR_ALREADY_STARTED;
	continue;
	}
	perf_event_period(pmc->perf_event, kvm_pmu_get_sample_period(pmc));
	perf_event_enable(pmc->perf_event);
	pmc->started = true;
	} else {
	sbiret = SBI_ERR_INVALID_PARAM;
	}
	}

	out:
	retdata->err_val = sbiret;

	return 0;
	}

	int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
	unsigned long ctr_mask, unsigned long flags,
	struct kvm_vcpu_sbi_return *retdata)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	int i, pmc_index, sbiret = 0;
	u64 enabled, running;
	struct kvm_pmc *pmc;
	int fevent_code;

	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
	sbiret = SBI_ERR_INVALID_PARAM;
	goto out;
	}

	/* Stop the counters that have been configured and requested by the guest */
	for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) {
	pmc_index = i + ctr_base;
	if (!test_bit(pmc_index, kvpmu->pmc_in_use))
	continue;
	pmc = &kvpmu->pmc[pmc_index];
	if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) {
	fevent_code = get_event_code(pmc->event_idx);
	if (fevent_code >= SBI_PMU_FW_MAX) {
	sbiret = SBI_ERR_INVALID_PARAM;
	goto out;
	}

	if (!kvpmu->fw_event[fevent_code].started)
	sbiret = SBI_ERR_ALREADY_STOPPED;

	kvpmu->fw_event[fevent_code].started = false;
	} else if (pmc->perf_event) {
	if (pmc->started) {
	/* Stop counting the counter */
	perf_event_disable(pmc->perf_event);
	pmc->started = false;
	} else {
	sbiret = SBI_ERR_ALREADY_STOPPED;
	}

	if (flags & SBI_PMU_STOP_FLAG_RESET) {
	/* Relase the counter if this is a reset request */
	pmc->counter_val += perf_event_read_value(pmc->perf_event,
	&enabled, &running);
	kvm_pmu_release_perf_event(pmc);
	}
	} else {
	sbiret = SBI_ERR_INVALID_PARAM;
	}
	if (flags & SBI_PMU_STOP_FLAG_RESET) {
	pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
	clear_bit(pmc_index, kvpmu->pmc_in_use);
	}
	}

	out:
	retdata->err_val = sbiret;

	return 0;
	}

	int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_base,
	unsigned long ctr_mask, unsigned long flags,
	unsigned long eidx, u64 evtdata,
	struct kvm_vcpu_sbi_return *retdata)
	{
	int ctr_idx, ret, sbiret = 0;
	bool is_fevent;
	unsigned long event_code;
	u32 etype = kvm_pmu_get_perf_event_type(eidx);
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	struct kvm_pmc *pmc = NULL;
	struct perf_event_attr attr = {
	.type = etype,
	.size = sizeof(struct perf_event_attr),
	.pinned = true,
	/*
	* It should never reach here if the platform doesn't support the sscofpmf
	* extension as mode filtering won't work without it.
	*/
	.exclude_host = true,
	.exclude_hv = true,
	.exclude_user = !!(flags & SBI_PMU_CFG_FLAG_SET_UINH),
	.exclude_kernel = !!(flags & SBI_PMU_CFG_FLAG_SET_SINH),
	.config1 = RISCV_PMU_CONFIG1_GUEST_EVENTS,
	};

	if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) {
	sbiret = SBI_ERR_INVALID_PARAM;
	goto out;
	}

	event_code = get_event_code(eidx);
	is_fevent = kvm_pmu_is_fw_event(eidx);
	if (is_fevent && event_code >= SBI_PMU_FW_MAX) {
	sbiret = SBI_ERR_NOT_SUPPORTED;
	goto out;
	}

	/*
	* SKIP_MATCH flag indicates the caller is aware of the assigned counter
	* for this event. Just do a sanity check if it already marked used.
	*/
	if (flags & SBI_PMU_CFG_FLAG_SKIP_MATCH) {
	if (!test_bit(ctr_base + __ffs(ctr_mask), kvpmu->pmc_in_use)) {
	sbiret = SBI_ERR_FAILURE;
	goto out;
	}
	ctr_idx = ctr_base + __ffs(ctr_mask);
	} else {
	ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask);
	if (ctr_idx < 0) {
	sbiret = SBI_ERR_NOT_SUPPORTED;
	goto out;
	}
	}

	pmc = &kvpmu->pmc[ctr_idx];
	pmc->idx = ctr_idx;

	if (is_fevent) {
	if (flags & SBI_PMU_CFG_FLAG_AUTO_START)
	kvpmu->fw_event[event_code].started = true;
	} else {
	ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata);
	if (ret)
	return ret;
	}

	set_bit(ctr_idx, kvpmu->pmc_in_use);
	pmc->event_idx = eidx;
	retdata->out_val = ctr_idx;
	out:
	retdata->err_val = sbiret;

	return 0;
	}

	int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx,
	struct kvm_vcpu_sbi_return *retdata)
	{
	int ret;

	ret = pmu_ctr_read(vcpu, cidx, &retdata->out_val);
	if (ret == -EINVAL)
	retdata->err_val = SBI_ERR_INVALID_PARAM;

	return 0;
	}

	void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
	{
	int i = 0, ret, num_hw_ctrs = 0, hpm_width = 0;
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	struct kvm_pmc *pmc;

	/*
	* PMU functionality should be only available to guests if privilege mode
	* filtering is available in the host. Otherwise, guest will always count
	* events while the execution is in hypervisor mode.
	*/
	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
	return;

	ret = riscv_pmu_get_hpm_info(&hpm_width, &num_hw_ctrs);
	if (ret < 0 \|\| !hpm_width \|\| !num_hw_ctrs)
	return;

	/*
	* Increase the number of hardware counters to offset the time counter.
	*/
	kvpmu->num_hw_ctrs = num_hw_ctrs + 1;
	kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX;
	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));

	if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) {
	pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA");
	kvpmu->num_hw_ctrs = RISCV_KVM_MAX_HW_CTRS;
	}

	/*
	* There is no correlation between the logical hardware counter and virtual counters.
	* However, we need to encode a hpmcounter CSR in the counter info field so that
	* KVM can trap n emulate the read. This works well in the migration use case as
	* KVM doesn't care if the actual hpmcounter is available in the hardware or not.
	*/
	for (i = 0; i < kvm_pmu_num_counters(kvpmu); i++) {
	/* TIME CSR shouldn't be read from perf interface */
	if (i == 1)
	continue;
	pmc = &kvpmu->pmc[i];
	pmc->idx = i;
	pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
	if (i < kvpmu->num_hw_ctrs) {
	pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
	if (i < 3)
	/* CY, IR counters */
	pmc->cinfo.width = 63;
	else
	pmc->cinfo.width = hpm_width;
	/*
	* The CSR number doesn't have any relation with the logical
	* hardware counters. The CSR numbers are encoded sequentially
	* to avoid maintaining a map between the virtual counter
	* and CSR number.
	*/
	pmc->cinfo.csr = CSR_CYCLE + i;
	} else {
	pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW;
	pmc->cinfo.width = BITS_PER_LONG - 1;
	}
	}

	kvpmu->init_done = true;
	}

	void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
	{
	struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
	struct kvm_pmc *pmc;
	int i;

	if (!kvpmu)
	return;

	for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
	pmc = &kvpmu->pmc[i];
	pmc->counter_val = 0;
	kvm_pmu_release_perf_event(pmc);
	pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
	}
	bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
	memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
	}

	void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu)
	{
	kvm_riscv_vcpu_pmu_deinit(vcpu);
	}