| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Support Intel IOMMU PerfMon |
| * Copyright(c) 2023 Intel Corporation. |
| */ |
| #define pr_fmt(fmt) "DMAR: " fmt |
| #define dev_fmt(fmt) pr_fmt(fmt) |
| |
| #include <linux/dmar.h> |
| #include "iommu.h" |
| #include "perfmon.h" |
| |
| PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */ |
| PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */ |
| |
| static struct attribute *iommu_pmu_format_attrs[] = { |
| &format_attr_event_group.attr, |
| &format_attr_event.attr, |
| NULL |
| }; |
| |
| static struct attribute_group iommu_pmu_format_attr_group = { |
| .name = "format", |
| .attrs = iommu_pmu_format_attrs, |
| }; |
| |
| /* The available events are added in attr_update later */ |
| static struct attribute *attrs_empty[] = { |
| NULL |
| }; |
| |
| static struct attribute_group iommu_pmu_events_attr_group = { |
| .name = "events", |
| .attrs = attrs_empty, |
| }; |
| |
| static cpumask_t iommu_pmu_cpu_mask; |
| |
| static ssize_t |
| cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) |
| { |
| return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask); |
| } |
| static DEVICE_ATTR_RO(cpumask); |
| |
| static struct attribute *iommu_pmu_cpumask_attrs[] = { |
| &dev_attr_cpumask.attr, |
| NULL |
| }; |
| |
| static struct attribute_group iommu_pmu_cpumask_attr_group = { |
| .attrs = iommu_pmu_cpumask_attrs, |
| }; |
| |
| static const struct attribute_group *iommu_pmu_attr_groups[] = { |
| &iommu_pmu_format_attr_group, |
| &iommu_pmu_events_attr_group, |
| &iommu_pmu_cpumask_attr_group, |
| NULL |
| }; |
| |
| static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) |
| { |
| /* |
| * The perf_event creates its own dev for each PMU. |
| * See pmu_dev_alloc() |
| */ |
| return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); |
| } |
| |
| #define IOMMU_PMU_ATTR(_name, _format, _filter) \ |
| PMU_FORMAT_ATTR(_name, _format); \ |
| \ |
| static struct attribute *_name##_attr[] = { \ |
| &format_attr_##_name.attr, \ |
| NULL \ |
| }; \ |
| \ |
| static umode_t \ |
| _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ |
| { \ |
| struct device *dev = kobj_to_dev(kobj); \ |
| struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ |
| \ |
| if (!iommu_pmu) \ |
| return 0; \ |
| return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ |
| } \ |
| \ |
| static struct attribute_group _name = { \ |
| .name = "format", \ |
| .attrs = _name##_attr, \ |
| .is_visible = _name##_is_visible, \ |
| }; |
| |
| IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID); |
| IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN); |
| IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID); |
| IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS); |
| IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE); |
| IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID); |
| IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN); |
| IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID); |
| IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS); |
| IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE); |
| |
| #define iommu_pmu_en_requester_id(e) ((e) & 0x1) |
| #define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) |
| #define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) |
| #define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) |
| #define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) |
| #define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) |
| #define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) |
| #define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) |
| #define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) |
| #define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) |
| |
| #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ |
| { \ |
| if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ |
| dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ |
| IOMMU_PMU_CFG_SIZE + \ |
| (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ |
| iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ |
| } \ |
| } |
| |
| #define iommu_pmu_clear_filter(_filter, _idx) \ |
| { \ |
| if (iommu_pmu->filter & _filter) { \ |
| dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ |
| IOMMU_PMU_CFG_SIZE + \ |
| (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ |
| 0); \ |
| } \ |
| } |
| |
| /* |
| * Define the event attr related functions |
| * Input: _name: event attr name |
| * _string: string of the event in sysfs |
| * _g_idx: event group encoding |
| * _event: event encoding |
| */ |
| #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ |
| PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ |
| \ |
| static struct attribute *_name##_attr[] = { \ |
| &event_attr_##_name.attr.attr, \ |
| NULL \ |
| }; \ |
| \ |
| static umode_t \ |
| _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ |
| { \ |
| struct device *dev = kobj_to_dev(kobj); \ |
| struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ |
| \ |
| if (!iommu_pmu) \ |
| return 0; \ |
| return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ |
| } \ |
| \ |
| static struct attribute_group _name = { \ |
| .name = "events", \ |
| .attrs = _name##_attr, \ |
| .is_visible = _name##_is_visible, \ |
| }; |
| |
| IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001) |
| IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002) |
| IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004) |
| IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008) |
| IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001) |
| IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020) |
| IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040) |
| IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001) |
| IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002) |
| IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004) |
| IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008) |
| IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010) |
| IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020) |
| IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040) |
| IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080) |
| IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100) |
| IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200) |
| IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001) |
| IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002) |
| IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004) |
| IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008) |
| IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001) |
| IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002) |
| IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004) |
| |
| static const struct attribute_group *iommu_pmu_attr_update[] = { |
| &filter_requester_id_en, |
| &filter_domain_en, |
| &filter_pasid_en, |
| &filter_ats_en, |
| &filter_page_table_en, |
| &filter_requester_id, |
| &filter_domain, |
| &filter_pasid, |
| &filter_ats, |
| &filter_page_table, |
| &iommu_clocks, |
| &iommu_requests, |
| &pw_occupancy, |
| &ats_blocked, |
| &iommu_mrds, |
| &iommu_mem_blocked, |
| &pg_req_posted, |
| &ctxt_cache_lookup, |
| &ctxt_cache_hit, |
| &pasid_cache_lookup, |
| &pasid_cache_hit, |
| &ss_nonleaf_lookup, |
| &ss_nonleaf_hit, |
| &fs_nonleaf_lookup, |
| &fs_nonleaf_hit, |
| &hpt_nonleaf_lookup, |
| &hpt_nonleaf_hit, |
| &iotlb_lookup, |
| &iotlb_hit, |
| &hpt_leaf_lookup, |
| &hpt_leaf_hit, |
| &int_cache_lookup, |
| &int_cache_hit_nonposted, |
| &int_cache_hit_posted, |
| NULL |
| }; |
| |
| static inline void __iomem * |
| iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) |
| { |
| return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; |
| } |
| |
| static inline void __iomem * |
| iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) |
| { |
| return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; |
| } |
| |
| static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) |
| { |
| return container_of(event->pmu, struct iommu_pmu, pmu); |
| } |
| |
| static inline u64 iommu_event_config(struct perf_event *event) |
| { |
| u64 config = event->attr.config; |
| |
| return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | |
| (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | |
| IOMMU_EVENT_CFG_INT; |
| } |
| |
| static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, |
| struct perf_event *event) |
| { |
| return event->pmu == &iommu_pmu->pmu; |
| } |
| |
| static int iommu_pmu_validate_event(struct perf_event *event) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| u32 event_group = iommu_event_group(event->attr.config); |
| |
| if (event_group >= iommu_pmu->num_eg) |
| return -EINVAL; |
| |
| return 0; |
| } |
| |
| static int iommu_pmu_validate_group(struct perf_event *event) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| struct perf_event *sibling; |
| int nr = 0; |
| |
| /* |
| * All events in a group must be scheduled simultaneously. |
| * Check whether there is enough counters for all the events. |
| */ |
| for_each_sibling_event(sibling, event->group_leader) { |
| if (!is_iommu_pmu_event(iommu_pmu, sibling) || |
| sibling->state <= PERF_EVENT_STATE_OFF) |
| continue; |
| |
| if (++nr > iommu_pmu->num_cntr) |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static int iommu_pmu_event_init(struct perf_event *event) |
| { |
| struct hw_perf_event *hwc = &event->hw; |
| |
| if (event->attr.type != event->pmu->type) |
| return -ENOENT; |
| |
| /* sampling not supported */ |
| if (event->attr.sample_period) |
| return -EINVAL; |
| |
| if (event->cpu < 0) |
| return -EINVAL; |
| |
| if (iommu_pmu_validate_event(event)) |
| return -EINVAL; |
| |
| hwc->config = iommu_event_config(event); |
| |
| return iommu_pmu_validate_group(event); |
| } |
| |
| static void iommu_pmu_event_update(struct perf_event *event) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| struct hw_perf_event *hwc = &event->hw; |
| u64 prev_count, new_count, delta; |
| int shift = 64 - iommu_pmu->cntr_width; |
| |
| again: |
| prev_count = local64_read(&hwc->prev_count); |
| new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); |
| if (local64_xchg(&hwc->prev_count, new_count) != prev_count) |
| goto again; |
| |
| /* |
| * The counter width is enumerated. Always shift the counter |
| * before using it. |
| */ |
| delta = (new_count << shift) - (prev_count << shift); |
| delta >>= shift; |
| |
| local64_add(delta, &event->count); |
| } |
| |
| static void iommu_pmu_start(struct perf_event *event, int flags) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| struct intel_iommu *iommu = iommu_pmu->iommu; |
| struct hw_perf_event *hwc = &event->hw; |
| u64 count; |
| |
| if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) |
| return; |
| |
| if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) |
| return; |
| |
| if (flags & PERF_EF_RELOAD) |
| WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); |
| |
| hwc->state = 0; |
| |
| /* Always reprogram the period */ |
| count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); |
| local64_set((&hwc->prev_count), count); |
| |
| /* |
| * The error of ecmd will be ignored. |
| * - The existing perf_event subsystem doesn't handle the error. |
| * Only IOMMU PMU returns runtime HW error. We don't want to |
| * change the existing generic interfaces for the specific case. |
| * - It's a corner case caused by HW, which is very unlikely to |
| * happen. There is nothing SW can do. |
| * - The worst case is that the user will get <not count> with |
| * perf command, which can give the user some hints. |
| */ |
| ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0); |
| |
| perf_event_update_userpage(event); |
| } |
| |
| static void iommu_pmu_stop(struct perf_event *event, int flags) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| struct intel_iommu *iommu = iommu_pmu->iommu; |
| struct hw_perf_event *hwc = &event->hw; |
| |
| if (!(hwc->state & PERF_HES_STOPPED)) { |
| ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0); |
| |
| iommu_pmu_event_update(event); |
| |
| hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; |
| } |
| } |
| |
| static inline int |
| iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, |
| int idx, struct perf_event *event) |
| { |
| u32 event_group = iommu_event_group(event->attr.config); |
| u32 select = iommu_event_select(event->attr.config); |
| |
| if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) |
| return -EINVAL; |
| |
| return 0; |
| } |
| |
| static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, |
| struct perf_event *event) |
| { |
| struct hw_perf_event *hwc = &event->hw; |
| int idx; |
| |
| /* |
| * The counters which support limited events are usually at the end. |
| * Schedule them first to accommodate more events. |
| */ |
| for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { |
| if (test_and_set_bit(idx, iommu_pmu->used_mask)) |
| continue; |
| /* Check per-counter event capabilities */ |
| if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) |
| break; |
| clear_bit(idx, iommu_pmu->used_mask); |
| } |
| if (idx < 0) |
| return -EINVAL; |
| |
| iommu_pmu->event_list[idx] = event; |
| hwc->idx = idx; |
| |
| /* config events */ |
| dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); |
| |
| iommu_pmu_set_filter(requester_id, event->attr.config1, |
| IOMMU_PMU_FILTER_REQUESTER_ID, idx, |
| event->attr.config1); |
| iommu_pmu_set_filter(domain, event->attr.config1, |
| IOMMU_PMU_FILTER_DOMAIN, idx, |
| event->attr.config1); |
| iommu_pmu_set_filter(pasid, event->attr.config1, |
| IOMMU_PMU_FILTER_PASID, idx, |
| event->attr.config1); |
| iommu_pmu_set_filter(ats, event->attr.config2, |
| IOMMU_PMU_FILTER_ATS, idx, |
| event->attr.config1); |
| iommu_pmu_set_filter(page_table, event->attr.config2, |
| IOMMU_PMU_FILTER_PAGE_TABLE, idx, |
| event->attr.config1); |
| |
| return 0; |
| } |
| |
| static int iommu_pmu_add(struct perf_event *event, int flags) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| struct hw_perf_event *hwc = &event->hw; |
| int ret; |
| |
| ret = iommu_pmu_assign_event(iommu_pmu, event); |
| if (ret < 0) |
| return ret; |
| |
| hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; |
| |
| if (flags & PERF_EF_START) |
| iommu_pmu_start(event, 0); |
| |
| return 0; |
| } |
| |
| static void iommu_pmu_del(struct perf_event *event, int flags) |
| { |
| struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); |
| int idx = event->hw.idx; |
| |
| iommu_pmu_stop(event, PERF_EF_UPDATE); |
| |
| iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); |
| iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); |
| iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); |
| iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); |
| iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); |
| |
| iommu_pmu->event_list[idx] = NULL; |
| event->hw.idx = -1; |
| clear_bit(idx, iommu_pmu->used_mask); |
| |
| perf_event_update_userpage(event); |
| } |
| |
| static void iommu_pmu_enable(struct pmu *pmu) |
| { |
| struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); |
| struct intel_iommu *iommu = iommu_pmu->iommu; |
| |
| ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0); |
| } |
| |
| static void iommu_pmu_disable(struct pmu *pmu) |
| { |
| struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); |
| struct intel_iommu *iommu = iommu_pmu->iommu; |
| |
| ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); |
| } |
| |
| static void iommu_pmu_counter_overflow(struct iommu_pmu *iommu_pmu) |
| { |
| struct perf_event *event; |
| u64 status; |
| int i; |
| |
| /* |
| * Two counters may be overflowed very close. Always check |
| * whether there are more to handle. |
| */ |
| while ((status = dmar_readq(iommu_pmu->overflow))) { |
| for_each_set_bit(i, (unsigned long *)&status, iommu_pmu->num_cntr) { |
| /* |
| * Find the assigned event of the counter. |
| * Accumulate the value into the event->count. |
| */ |
| event = iommu_pmu->event_list[i]; |
| if (!event) { |
| pr_warn_once("Cannot find the assigned event for counter %d\n", i); |
| continue; |
| } |
| iommu_pmu_event_update(event); |
| } |
| |
| dmar_writeq(iommu_pmu->overflow, status); |
| } |
| } |
| |
| static irqreturn_t iommu_pmu_irq_handler(int irq, void *dev_id) |
| { |
| struct intel_iommu *iommu = dev_id; |
| |
| if (!dmar_readl(iommu->reg + DMAR_PERFINTRSTS_REG)) |
| return IRQ_NONE; |
| |
| iommu_pmu_counter_overflow(iommu->pmu); |
| |
| /* Clear the status bit */ |
| dmar_writel(iommu->reg + DMAR_PERFINTRSTS_REG, DMA_PERFINTRSTS_PIS); |
| |
| return IRQ_HANDLED; |
| } |
| |
| static int __iommu_pmu_register(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu = iommu->pmu; |
| |
| iommu_pmu->pmu.name = iommu->name; |
| iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; |
| iommu_pmu->pmu.event_init = iommu_pmu_event_init; |
| iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; |
| iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; |
| iommu_pmu->pmu.add = iommu_pmu_add; |
| iommu_pmu->pmu.del = iommu_pmu_del; |
| iommu_pmu->pmu.start = iommu_pmu_start; |
| iommu_pmu->pmu.stop = iommu_pmu_stop; |
| iommu_pmu->pmu.read = iommu_pmu_event_update; |
| iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; |
| iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; |
| iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; |
| iommu_pmu->pmu.module = THIS_MODULE; |
| |
| return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); |
| } |
| |
| static inline void __iomem * |
| get_perf_reg_address(struct intel_iommu *iommu, u32 offset) |
| { |
| u32 off = dmar_readl(iommu->reg + offset); |
| |
| return iommu->reg + off; |
| } |
| |
| int alloc_iommu_pmu(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu; |
| int i, j, ret; |
| u64 perfcap; |
| u32 cap; |
| |
| if (!ecap_pms(iommu->ecap)) |
| return 0; |
| |
| /* The IOMMU PMU requires the ECMD support as well */ |
| if (!cap_ecmds(iommu->cap)) |
| return -ENODEV; |
| |
| perfcap = dmar_readq(iommu->reg + DMAR_PERFCAP_REG); |
| /* The performance monitoring is not supported. */ |
| if (!perfcap) |
| return -ENODEV; |
| |
| /* Sanity check for the number of the counters and event groups */ |
| if (!pcap_num_cntr(perfcap) || !pcap_num_event_group(perfcap)) |
| return -ENODEV; |
| |
| /* The interrupt on overflow is required */ |
| if (!pcap_interrupt(perfcap)) |
| return -ENODEV; |
| |
| /* Check required Enhanced Command Capability */ |
| if (!ecmd_has_pmu_essential(iommu)) |
| return -ENODEV; |
| |
| iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); |
| if (!iommu_pmu) |
| return -ENOMEM; |
| |
| iommu_pmu->num_cntr = pcap_num_cntr(perfcap); |
| if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { |
| pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!", |
| iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); |
| iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; |
| } |
| |
| iommu_pmu->cntr_width = pcap_cntr_width(perfcap); |
| iommu_pmu->filter = pcap_filters_mask(perfcap); |
| iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); |
| iommu_pmu->num_eg = pcap_num_event_group(perfcap); |
| |
| iommu_pmu->evcap = kcalloc(iommu_pmu->num_eg, sizeof(u64), GFP_KERNEL); |
| if (!iommu_pmu->evcap) { |
| ret = -ENOMEM; |
| goto free_pmu; |
| } |
| |
| /* Parse event group capabilities */ |
| for (i = 0; i < iommu_pmu->num_eg; i++) { |
| u64 pcap; |
| |
| pcap = dmar_readq(iommu->reg + DMAR_PERFEVNTCAP_REG + |
| i * IOMMU_PMU_CAP_REGS_STEP); |
| iommu_pmu->evcap[i] = pecap_es(pcap); |
| } |
| |
| iommu_pmu->cntr_evcap = kcalloc(iommu_pmu->num_cntr, sizeof(u32 *), GFP_KERNEL); |
| if (!iommu_pmu->cntr_evcap) { |
| ret = -ENOMEM; |
| goto free_pmu_evcap; |
| } |
| for (i = 0; i < iommu_pmu->num_cntr; i++) { |
| iommu_pmu->cntr_evcap[i] = kcalloc(iommu_pmu->num_eg, sizeof(u32), GFP_KERNEL); |
| if (!iommu_pmu->cntr_evcap[i]) { |
| ret = -ENOMEM; |
| goto free_pmu_cntr_evcap; |
| } |
| /* |
| * Set to the global capabilities, will adjust according |
| * to per-counter capabilities later. |
| */ |
| for (j = 0; j < iommu_pmu->num_eg; j++) |
| iommu_pmu->cntr_evcap[i][j] = (u32)iommu_pmu->evcap[j]; |
| } |
| |
| iommu_pmu->cfg_reg = get_perf_reg_address(iommu, DMAR_PERFCFGOFF_REG); |
| iommu_pmu->cntr_reg = get_perf_reg_address(iommu, DMAR_PERFCNTROFF_REG); |
| iommu_pmu->overflow = get_perf_reg_address(iommu, DMAR_PERFOVFOFF_REG); |
| |
| /* |
| * Check per-counter capabilities. All counters should have the |
| * same capabilities on Interrupt on Overflow Support and Counter |
| * Width. |
| */ |
| for (i = 0; i < iommu_pmu->num_cntr; i++) { |
| cap = dmar_readl(iommu_pmu->cfg_reg + |
| i * IOMMU_PMU_CFG_OFFSET + |
| IOMMU_PMU_CFG_CNTRCAP_OFFSET); |
| if (!iommu_cntrcap_pcc(cap)) |
| continue; |
| |
| /* |
| * It's possible that some counters have a different |
| * capability because of e.g., HW bug. Check the corner |
| * case here and simply drop those counters. |
| */ |
| if ((iommu_cntrcap_cw(cap) != iommu_pmu->cntr_width) || |
| !iommu_cntrcap_ios(cap)) { |
| iommu_pmu->num_cntr = i; |
| pr_warn("PMU counter capability inconsistent, counter number reduced to %d\n", |
| iommu_pmu->num_cntr); |
| } |
| |
| /* Clear the pre-defined events group */ |
| for (j = 0; j < iommu_pmu->num_eg; j++) |
| iommu_pmu->cntr_evcap[i][j] = 0; |
| |
| /* Override with per-counter event capabilities */ |
| for (j = 0; j < iommu_cntrcap_egcnt(cap); j++) { |
| cap = dmar_readl(iommu_pmu->cfg_reg + i * IOMMU_PMU_CFG_OFFSET + |
| IOMMU_PMU_CFG_CNTREVCAP_OFFSET + |
| (j * IOMMU_PMU_OFF_REGS_STEP)); |
| iommu_pmu->cntr_evcap[i][iommu_event_group(cap)] = iommu_event_select(cap); |
| /* |
| * Some events may only be supported by a specific counter. |
| * Track them in the evcap as well. |
| */ |
| iommu_pmu->evcap[iommu_event_group(cap)] |= iommu_event_select(cap); |
| } |
| } |
| |
| iommu_pmu->iommu = iommu; |
| iommu->pmu = iommu_pmu; |
| |
| return 0; |
| |
| free_pmu_cntr_evcap: |
| for (i = 0; i < iommu_pmu->num_cntr; i++) |
| kfree(iommu_pmu->cntr_evcap[i]); |
| kfree(iommu_pmu->cntr_evcap); |
| free_pmu_evcap: |
| kfree(iommu_pmu->evcap); |
| free_pmu: |
| kfree(iommu_pmu); |
| |
| return ret; |
| } |
| |
| void free_iommu_pmu(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu = iommu->pmu; |
| |
| if (!iommu_pmu) |
| return; |
| |
| if (iommu_pmu->evcap) { |
| int i; |
| |
| for (i = 0; i < iommu_pmu->num_cntr; i++) |
| kfree(iommu_pmu->cntr_evcap[i]); |
| kfree(iommu_pmu->cntr_evcap); |
| } |
| kfree(iommu_pmu->evcap); |
| kfree(iommu_pmu); |
| iommu->pmu = NULL; |
| } |
| |
| static int iommu_pmu_set_interrupt(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu = iommu->pmu; |
| int irq, ret; |
| |
| irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PERF + iommu->seq_id, iommu->node, iommu); |
| if (irq <= 0) |
| return -EINVAL; |
| |
| snprintf(iommu_pmu->irq_name, sizeof(iommu_pmu->irq_name), "dmar%d-perf", iommu->seq_id); |
| |
| iommu->perf_irq = irq; |
| ret = request_threaded_irq(irq, NULL, iommu_pmu_irq_handler, |
| IRQF_ONESHOT, iommu_pmu->irq_name, iommu); |
| if (ret) { |
| dmar_free_hwirq(irq); |
| iommu->perf_irq = 0; |
| return ret; |
| } |
| return 0; |
| } |
| |
| static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) |
| { |
| if (!iommu->perf_irq) |
| return; |
| |
| free_irq(iommu->perf_irq, iommu); |
| dmar_free_hwirq(iommu->perf_irq); |
| iommu->perf_irq = 0; |
| } |
| |
| static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) |
| { |
| struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); |
| |
| if (cpumask_empty(&iommu_pmu_cpu_mask)) |
| cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); |
| |
| if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) |
| iommu_pmu->cpu = cpu; |
| |
| return 0; |
| } |
| |
| static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) |
| { |
| struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); |
| int target = cpumask_first(&iommu_pmu_cpu_mask); |
| |
| /* |
| * The iommu_pmu_cpu_mask has been updated when offline the CPU |
| * for the first iommu_pmu. Migrate the other iommu_pmu to the |
| * new target. |
| */ |
| if (target < nr_cpu_ids && target != iommu_pmu->cpu) { |
| perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); |
| iommu_pmu->cpu = target; |
| return 0; |
| } |
| |
| if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) |
| return 0; |
| |
| target = cpumask_any_but(cpu_online_mask, cpu); |
| |
| if (target < nr_cpu_ids) |
| cpumask_set_cpu(target, &iommu_pmu_cpu_mask); |
| else |
| return 0; |
| |
| perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); |
| iommu_pmu->cpu = target; |
| |
| return 0; |
| } |
| |
| static int nr_iommu_pmu; |
| static enum cpuhp_state iommu_cpuhp_slot; |
| |
| static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) |
| { |
| int ret; |
| |
| if (!nr_iommu_pmu) { |
| ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, |
| "driver/iommu/intel/perfmon:online", |
| iommu_pmu_cpu_online, |
| iommu_pmu_cpu_offline); |
| if (ret < 0) |
| return ret; |
| iommu_cpuhp_slot = ret; |
| } |
| |
| ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); |
| if (ret) { |
| if (!nr_iommu_pmu) |
| cpuhp_remove_multi_state(iommu_cpuhp_slot); |
| return ret; |
| } |
| nr_iommu_pmu++; |
| |
| return 0; |
| } |
| |
| static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) |
| { |
| cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); |
| |
| if (--nr_iommu_pmu) |
| return; |
| |
| cpuhp_remove_multi_state(iommu_cpuhp_slot); |
| } |
| |
| void iommu_pmu_register(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu = iommu->pmu; |
| |
| if (!iommu_pmu) |
| return; |
| |
| if (__iommu_pmu_register(iommu)) |
| goto err; |
| |
| if (iommu_pmu_cpuhp_setup(iommu_pmu)) |
| goto unregister; |
| |
| /* Set interrupt for overflow */ |
| if (iommu_pmu_set_interrupt(iommu)) |
| goto cpuhp_free; |
| |
| return; |
| |
| cpuhp_free: |
| iommu_pmu_cpuhp_free(iommu_pmu); |
| unregister: |
| perf_pmu_unregister(&iommu_pmu->pmu); |
| err: |
| pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id); |
| free_iommu_pmu(iommu); |
| } |
| |
| void iommu_pmu_unregister(struct intel_iommu *iommu) |
| { |
| struct iommu_pmu *iommu_pmu = iommu->pmu; |
| |
| if (!iommu_pmu) |
| return; |
| |
| iommu_pmu_unset_interrupt(iommu); |
| iommu_pmu_cpuhp_free(iommu_pmu); |
| perf_pmu_unregister(&iommu_pmu->pmu); |
| } |