| // SPDX-License-Identifier: GPL-2.0-or-later |
| /* |
| * Hypervisor supplied "gpci" ("get performance counter info") performance |
| * counter support |
| * |
| * Author: Cody P Schafer <cody@linux.vnet.ibm.com> |
| * Copyright 2014 IBM Corporation. |
| */ |
| |
| #define pr_fmt(fmt) "hv-gpci: " fmt |
| |
| #include <linux/init.h> |
| #include <linux/perf_event.h> |
| #include <asm/firmware.h> |
| #include <asm/hvcall.h> |
| #include <asm/io.h> |
| |
| #include "hv-gpci.h" |
| #include "hv-common.h" |
| |
| /* |
| * Example usage: |
| * perf stat -e 'hv_gpci/counter_info_version=3,offset=0,length=8, |
| * secondary_index=0,starting_index=0xffffffff,request=0x10/' ... |
| */ |
| |
| /* u32 */ |
| EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31); |
| /* u32 */ |
| /* |
| * Note that starting_index, phys_processor_idx, sibling_part_id, |
| * hw_chip_id, partition_id all refer to the same bit range. They |
| * are basically aliases for the starting_index. The specific alias |
| * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h |
| */ |
| EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63); |
| EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63); |
| EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63); |
| EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63); |
| EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63); |
| |
| /* u16 */ |
| EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15); |
| /* u8 */ |
| EVENT_DEFINE_RANGE_FORMAT(counter_info_version, config1, 16, 23); |
| /* u8, bytes of data (1-8) */ |
| EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31); |
| /* u32, byte offset */ |
| EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63); |
| |
| static cpumask_t hv_gpci_cpumask; |
| |
| static struct attribute *format_attrs[] = { |
| &format_attr_request.attr, |
| &format_attr_starting_index.attr, |
| &format_attr_phys_processor_idx.attr, |
| &format_attr_sibling_part_id.attr, |
| &format_attr_hw_chip_id.attr, |
| &format_attr_partition_id.attr, |
| &format_attr_secondary_index.attr, |
| &format_attr_counter_info_version.attr, |
| |
| &format_attr_offset.attr, |
| &format_attr_length.attr, |
| NULL, |
| }; |
| |
| static const struct attribute_group format_group = { |
| .name = "format", |
| .attrs = format_attrs, |
| }; |
| |
| static struct attribute_group event_group = { |
| .name = "events", |
| /* .attrs is set in init */ |
| }; |
| |
| #define HV_CAPS_ATTR(_name, _format) \ |
| static ssize_t _name##_show(struct device *dev, \ |
| struct device_attribute *attr, \ |
| char *page) \ |
| { \ |
| struct hv_perf_caps caps; \ |
| unsigned long hret = hv_perf_caps_get(&caps); \ |
| if (hret) \ |
| return -EIO; \ |
| \ |
| return sprintf(page, _format, caps._name); \ |
| } \ |
| static struct device_attribute hv_caps_attr_##_name = __ATTR_RO(_name) |
| |
| static ssize_t kernel_version_show(struct device *dev, |
| struct device_attribute *attr, |
| char *page) |
| { |
| return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT); |
| } |
| |
| static ssize_t cpumask_show(struct device *dev, |
| struct device_attribute *attr, char *buf) |
| { |
| return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); |
| } |
| |
| /* Interface attribute array index to store system information */ |
| #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 |
| #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 |
| #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 |
| #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 |
| #define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 |
| #define INTERFACE_NULL_ATTR 11 |
| |
| /* Counter request value to retrieve system information */ |
| enum { |
| PROCESSOR_BUS_TOPOLOGY, |
| PROCESSOR_CONFIG, |
| AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ |
| AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ |
| AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ |
| }; |
| |
| static int sysinfo_counter_request[] = { |
| [PROCESSOR_BUS_TOPOLOGY] = 0xD0, |
| [PROCESSOR_CONFIG] = 0x90, |
| [AFFINITY_DOMAIN_VIA_VP] = 0xA0, |
| [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, |
| [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, |
| }; |
| |
| static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); |
| |
| static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, |
| u16 secondary_index, char *buf, |
| size_t *n, struct hv_gpci_request_buffer *arg) |
| { |
| unsigned long ret; |
| size_t i, j; |
| |
| arg->params.counter_request = cpu_to_be32(req); |
| arg->params.starting_index = cpu_to_be32(starting_index); |
| arg->params.secondary_index = cpu_to_be16(secondary_index); |
| |
| ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', |
| * which means that the current buffer size cannot accommodate |
| * all the information and a partial buffer returned. |
| * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. |
| * |
| * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
| * performance information, and required to set |
| * "Enable Performance Information Collection" option. |
| */ |
| if (ret == H_AUTHORITY) |
| return -EPERM; |
| |
| /* |
| * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE |
| * because of invalid buffer-length/address or due to some hardware |
| * error. |
| */ |
| if (ret && (ret != H_PARAMETER)) |
| return -EIO; |
| |
| /* |
| * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' |
| * to show the total number of counter_value array elements |
| * returned via hcall. |
| * hcall also populates 'cv_element_size' corresponds to individual |
| * counter_value array element size. Below loop go through all |
| * counter_value array elements as per their size and add it to |
| * the output buffer. |
| */ |
| for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { |
| j = i * be16_to_cpu(arg->params.cv_element_size); |
| |
| for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) |
| *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); |
| *n += sprintf(buf + *n, "\n"); |
| } |
| |
| if (*n >= PAGE_SIZE) { |
| pr_info("System information exceeds PAGE_SIZE\n"); |
| return -EFBIG; |
| } |
| |
| return ret; |
| } |
| |
| static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, |
| char *buf) |
| { |
| struct hv_gpci_request_buffer *arg; |
| unsigned long ret; |
| size_t n = 0; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * Pass the counter request value 0xD0 corresponds to request |
| * type 'Processor_bus_topology', to retrieve |
| * the system topology information. |
| * starting_index value implies the starting hardware |
| * chip id. |
| */ |
| ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], |
| 0, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
| * implies that buffer can't accommodate all information, and a partial buffer |
| * returned. To handle that, we need to make subsequent requests |
| * with next starting index to retrieve additional (missing) data. |
| * Below loop do subsequent hcalls with next starting index and add it |
| * to buffer util we get all the information. |
| */ |
| while (ret == H_PARAMETER) { |
| int returned_values = be16_to_cpu(arg->params.returned_values); |
| int elementsize = be16_to_cpu(arg->params.cv_element_size); |
| int last_element = (returned_values - 1) * elementsize; |
| |
| /* |
| * Since the starting index value is part of counter_value |
| * buffer elements, use the starting index value in the last |
| * element and add 1 to make subsequent hcalls. |
| */ |
| u32 starting_index = arg->bytes[last_element + 3] + |
| (arg->bytes[last_element + 2] << 8) + |
| (arg->bytes[last_element + 1] << 16) + |
| (arg->bytes[last_element] << 24) + 1; |
| |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], |
| starting_index, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| } |
| |
| return n; |
| |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| return ret; |
| } |
| |
| static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, |
| char *buf) |
| { |
| struct hv_gpci_request_buffer *arg; |
| unsigned long ret; |
| size_t n = 0; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * Pass the counter request value 0x90 corresponds to request |
| * type 'Processor_config', to retrieve |
| * the system processor information. |
| * starting_index value implies the starting hardware |
| * processor index. |
| */ |
| ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], |
| 0, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
| * implies that buffer can't accommodate all information, and a partial buffer |
| * returned. To handle that, we need to take subsequent requests |
| * with next starting index to retrieve additional (missing) data. |
| * Below loop do subsequent hcalls with next starting index and add it |
| * to buffer util we get all the information. |
| */ |
| while (ret == H_PARAMETER) { |
| int returned_values = be16_to_cpu(arg->params.returned_values); |
| int elementsize = be16_to_cpu(arg->params.cv_element_size); |
| int last_element = (returned_values - 1) * elementsize; |
| |
| /* |
| * Since the starting index is part of counter_value |
| * buffer elements, use the starting index value in the last |
| * element and add 1 to subsequent hcalls. |
| */ |
| u32 starting_index = arg->bytes[last_element + 3] + |
| (arg->bytes[last_element + 2] << 8) + |
| (arg->bytes[last_element + 1] << 16) + |
| (arg->bytes[last_element] << 24) + 1; |
| |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], |
| starting_index, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| } |
| |
| return n; |
| |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| return ret; |
| } |
| |
| static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, |
| struct device_attribute *attr, char *buf) |
| { |
| struct hv_gpci_request_buffer *arg; |
| unsigned long ret; |
| size_t n = 0; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * Pass the counter request 0xA0 corresponds to request |
| * type 'Affinity_domain_information_by_virutal_processor', |
| * to retrieve the system affinity domain information. |
| * starting_index value refers to the starting hardware |
| * processor index. |
| */ |
| ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], |
| 0, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
| * implies that buffer can't accommodate all information, and a partial buffer |
| * returned. To handle that, we need to take subsequent requests |
| * with next secondary index to retrieve additional (missing) data. |
| * Below loop do subsequent hcalls with next secondary index and add it |
| * to buffer util we get all the information. |
| */ |
| while (ret == H_PARAMETER) { |
| int returned_values = be16_to_cpu(arg->params.returned_values); |
| int elementsize = be16_to_cpu(arg->params.cv_element_size); |
| int last_element = (returned_values - 1) * elementsize; |
| |
| /* |
| * Since the starting index and secondary index type is part of the |
| * counter_value buffer elements, use the starting index value in the |
| * last array element as subsequent starting index, and use secondary index |
| * value in the last array element plus 1 as subsequent secondary index. |
| * For counter request '0xA0', starting index points to partition id |
| * and secondary index points to corresponding virtual processor index. |
| */ |
| u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); |
| u16 secondary_index = arg->bytes[last_element + 3] + |
| (arg->bytes[last_element + 2] << 8) + 1; |
| |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], |
| starting_index, secondary_index, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| } |
| |
| return n; |
| |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| return ret; |
| } |
| |
| static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, |
| char *buf) |
| { |
| struct hv_gpci_request_buffer *arg; |
| unsigned long ret; |
| size_t n = 0; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * Pass the counter request 0xB0 corresponds to request |
| * type 'Affinity_domain_information_by_domain', |
| * to retrieve the system affinity domain information. |
| * starting_index value refers to the starting hardware |
| * processor index. |
| */ |
| ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], |
| 0, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which |
| * implies that buffer can't accommodate all information, and a partial buffer |
| * returned. To handle that, we need to take subsequent requests |
| * with next starting index to retrieve additional (missing) data. |
| * Below loop do subsequent hcalls with next starting index and add it |
| * to buffer util we get all the information. |
| */ |
| while (ret == H_PARAMETER) { |
| int returned_values = be16_to_cpu(arg->params.returned_values); |
| int elementsize = be16_to_cpu(arg->params.cv_element_size); |
| int last_element = (returned_values - 1) * elementsize; |
| |
| /* |
| * Since the starting index value is part of counter_value |
| * buffer elements, use the starting index value in the last |
| * element and add 1 to make subsequent hcalls. |
| */ |
| u32 starting_index = arg->bytes[last_element + 1] + |
| (arg->bytes[last_element] << 8) + 1; |
| |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], |
| starting_index, 0, buf, &n, arg); |
| |
| if (!ret) |
| return n; |
| |
| if (ret != H_PARAMETER) |
| goto out; |
| } |
| |
| return n; |
| |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| return ret; |
| } |
| |
| static void affinity_domain_via_partition_result_parse(int returned_values, |
| int element_size, char *buf, size_t *last_element, |
| size_t *n, struct hv_gpci_request_buffer *arg) |
| { |
| size_t i = 0, j = 0; |
| size_t k, l, m; |
| uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; |
| |
| /* |
| * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' |
| * to show the total number of counter_value array elements |
| * returned via hcall. |
| * Unlike other request types, the data structure returned by this |
| * request is variable-size. For this counter request type, |
| * hcall populates 'cv_element_size' corresponds to minimum size of |
| * the structure returned i.e; the size of the structure with no domain |
| * information. Below loop go through all counter_value array |
| * to determine the number and size of each domain array element and |
| * add it to the output buffer. |
| */ |
| while (i < returned_values) { |
| k = j; |
| for (; k < j + element_size; k++) |
| *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); |
| *n += sprintf(buf + *n, "\n"); |
| |
| total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; |
| size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; |
| |
| for (l = 0; l < total_affinity_domain_ele; l++) { |
| for (m = 0; m < size_of_each_affinity_domain_ele; m++) { |
| *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); |
| k++; |
| } |
| *n += sprintf(buf + *n, "\n"); |
| } |
| |
| *n += sprintf(buf + *n, "\n"); |
| i++; |
| j = k; |
| } |
| |
| *last_element = k; |
| } |
| |
| static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, |
| char *buf) |
| { |
| struct hv_gpci_request_buffer *arg; |
| unsigned long ret; |
| size_t n = 0; |
| size_t last_element = 0; |
| u32 starting_index; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * Pass the counter request value 0xB1 corresponds to counter request |
| * type 'Affinity_domain_information_by_partition', |
| * to retrieve the system affinity domain by partition information. |
| * starting_index value refers to the starting hardware |
| * processor index. |
| */ |
| arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); |
| arg->params.starting_index = cpu_to_be32(0); |
| |
| ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| |
| if (!ret) |
| goto parse_result; |
| |
| if (ret && (ret != H_PARAMETER)) |
| goto out; |
| |
| /* |
| * ret value as 'H_PARAMETER' implies that the current buffer size |
| * can't accommodate all the information, and a partial buffer |
| * returned. To handle that, we need to make subsequent requests |
| * with next starting index to retrieve additional (missing) data. |
| * Below loop do subsequent hcalls with next starting index and add it |
| * to buffer util we get all the information. |
| */ |
| while (ret == H_PARAMETER) { |
| affinity_domain_via_partition_result_parse( |
| be16_to_cpu(arg->params.returned_values) - 1, |
| be16_to_cpu(arg->params.cv_element_size), buf, |
| &last_element, &n, arg); |
| |
| if (n >= PAGE_SIZE) { |
| put_cpu_var(hv_gpci_reqb); |
| pr_debug("System information exceeds PAGE_SIZE\n"); |
| return -EFBIG; |
| } |
| |
| /* |
| * Since the starting index value is part of counter_value |
| * buffer elements, use the starting_index value in the last |
| * element and add 1 to make subsequent hcalls. |
| */ |
| starting_index = (u8)arg->bytes[last_element] << 8 | |
| (u8)arg->bytes[last_element + 1]; |
| |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| arg->params.counter_request = cpu_to_be32( |
| sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); |
| arg->params.starting_index = cpu_to_be32(starting_index); |
| |
| ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| |
| if (ret && (ret != H_PARAMETER)) |
| goto out; |
| } |
| |
| parse_result: |
| affinity_domain_via_partition_result_parse( |
| be16_to_cpu(arg->params.returned_values), |
| be16_to_cpu(arg->params.cv_element_size), |
| buf, &last_element, &n, arg); |
| |
| put_cpu_var(hv_gpci_reqb); |
| return n; |
| |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| |
| /* |
| * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', |
| * which means that the current buffer size cannot accommodate |
| * all the information and a partial buffer returned. |
| * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. |
| * |
| * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
| * performance information, and required to set |
| * "Enable Performance Information Collection" option. |
| */ |
| if (ret == H_AUTHORITY) |
| return -EPERM; |
| |
| /* |
| * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE |
| * because of invalid buffer-length/address or due to some hardware |
| * error. |
| */ |
| return -EIO; |
| } |
| |
| static DEVICE_ATTR_RO(kernel_version); |
| static DEVICE_ATTR_RO(cpumask); |
| |
| HV_CAPS_ATTR(version, "0x%x\n"); |
| HV_CAPS_ATTR(ga, "%d\n"); |
| HV_CAPS_ATTR(expanded, "%d\n"); |
| HV_CAPS_ATTR(lab, "%d\n"); |
| HV_CAPS_ATTR(collect_privileged, "%d\n"); |
| |
| static struct attribute *interface_attrs[] = { |
| &dev_attr_kernel_version.attr, |
| &hv_caps_attr_version.attr, |
| &hv_caps_attr_ga.attr, |
| &hv_caps_attr_expanded.attr, |
| &hv_caps_attr_lab.attr, |
| &hv_caps_attr_collect_privileged.attr, |
| /* |
| * This NULL is a placeholder for the processor_bus_topology |
| * attribute, set in init function if applicable. |
| */ |
| NULL, |
| /* |
| * This NULL is a placeholder for the processor_config |
| * attribute, set in init function if applicable. |
| */ |
| NULL, |
| /* |
| * This NULL is a placeholder for the affinity_domain_via_virtual_processor |
| * attribute, set in init function if applicable. |
| */ |
| NULL, |
| /* |
| * This NULL is a placeholder for the affinity_domain_via_domain |
| * attribute, set in init function if applicable. |
| */ |
| NULL, |
| /* |
| * This NULL is a placeholder for the affinity_domain_via_partition |
| * attribute, set in init function if applicable. |
| */ |
| NULL, |
| NULL, |
| }; |
| |
| static struct attribute *cpumask_attrs[] = { |
| &dev_attr_cpumask.attr, |
| NULL, |
| }; |
| |
| static const struct attribute_group cpumask_attr_group = { |
| .attrs = cpumask_attrs, |
| }; |
| |
| static const struct attribute_group interface_group = { |
| .name = "interface", |
| .attrs = interface_attrs, |
| }; |
| |
| static const struct attribute_group *attr_groups[] = { |
| &format_group, |
| &event_group, |
| &interface_group, |
| &cpumask_attr_group, |
| NULL, |
| }; |
| |
| static unsigned long single_gpci_request(u32 req, u32 starting_index, |
| u16 secondary_index, u8 version_in, u32 offset, u8 length, |
| u64 *value) |
| { |
| unsigned long ret; |
| size_t i; |
| u64 count; |
| struct hv_gpci_request_buffer *arg; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| arg->params.counter_request = cpu_to_be32(req); |
| arg->params.starting_index = cpu_to_be32(starting_index); |
| arg->params.secondary_index = cpu_to_be16(secondary_index); |
| arg->params.counter_info_version_in = version_in; |
| |
| ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL', |
| * specifies that the current buffer size cannot accommodate |
| * all the information and a partial buffer returned. |
| * Since in this function we are only accessing data for a given starting index, |
| * we don't need to accommodate whole data and can get required count by |
| * accessing first entry data. |
| * Hence hcall fails only incase the ret value is other than H_SUCCESS or |
| * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B). |
| */ |
| if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B) |
| ret = 0; |
| |
| if (ret) { |
| pr_devel("hcall failed: 0x%lx\n", ret); |
| goto out; |
| } |
| |
| /* |
| * we verify offset and length are within the zeroed buffer at event |
| * init. |
| */ |
| count = 0; |
| for (i = offset; i < offset + length; i++) |
| count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8); |
| |
| *value = count; |
| out: |
| put_cpu_var(hv_gpci_reqb); |
| return ret; |
| } |
| |
| static u64 h_gpci_get_value(struct perf_event *event) |
| { |
| u64 count; |
| unsigned long ret = single_gpci_request(event_get_request(event), |
| event_get_starting_index(event), |
| event_get_secondary_index(event), |
| event_get_counter_info_version(event), |
| event_get_offset(event), |
| event_get_length(event), |
| &count); |
| if (ret) |
| return 0; |
| return count; |
| } |
| |
| static void h_gpci_event_update(struct perf_event *event) |
| { |
| s64 prev; |
| u64 now = h_gpci_get_value(event); |
| prev = local64_xchg(&event->hw.prev_count, now); |
| local64_add(now - prev, &event->count); |
| } |
| |
| static void h_gpci_event_start(struct perf_event *event, int flags) |
| { |
| local64_set(&event->hw.prev_count, h_gpci_get_value(event)); |
| } |
| |
| static void h_gpci_event_stop(struct perf_event *event, int flags) |
| { |
| h_gpci_event_update(event); |
| } |
| |
| static int h_gpci_event_add(struct perf_event *event, int flags) |
| { |
| if (flags & PERF_EF_START) |
| h_gpci_event_start(event, flags); |
| |
| return 0; |
| } |
| |
| static int h_gpci_event_init(struct perf_event *event) |
| { |
| u64 count; |
| u8 length; |
| unsigned long ret; |
| |
| /* Not our event */ |
| if (event->attr.type != event->pmu->type) |
| return -ENOENT; |
| |
| /* config2 is unused */ |
| if (event->attr.config2) { |
| pr_devel("config2 set when reserved\n"); |
| return -EINVAL; |
| } |
| |
| /* no branch sampling */ |
| if (has_branch_stack(event)) |
| return -EOPNOTSUPP; |
| |
| length = event_get_length(event); |
| if (length < 1 || length > 8) { |
| pr_devel("length invalid\n"); |
| return -EINVAL; |
| } |
| |
| /* last byte within the buffer? */ |
| if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) { |
| pr_devel("request outside of buffer: %zu > %zu\n", |
| (size_t)event_get_offset(event) + length, |
| HGPCI_MAX_DATA_BYTES); |
| return -EINVAL; |
| } |
| |
| /* check if the request works... */ |
| ret = single_gpci_request(event_get_request(event), |
| event_get_starting_index(event), |
| event_get_secondary_index(event), |
| event_get_counter_info_version(event), |
| event_get_offset(event), |
| length, |
| &count); |
| |
| /* |
| * ret value as H_AUTHORITY implies that partition is not permitted to retrieve |
| * performance information, and required to set |
| * "Enable Performance Information Collection" option. |
| */ |
| if (ret == H_AUTHORITY) |
| return -EPERM; |
| |
| if (ret) { |
| pr_devel("gpci hcall failed\n"); |
| return -EINVAL; |
| } |
| |
| return 0; |
| } |
| |
| static struct pmu h_gpci_pmu = { |
| .task_ctx_nr = perf_invalid_context, |
| |
| .name = "hv_gpci", |
| .attr_groups = attr_groups, |
| .event_init = h_gpci_event_init, |
| .add = h_gpci_event_add, |
| .del = h_gpci_event_stop, |
| .start = h_gpci_event_start, |
| .stop = h_gpci_event_stop, |
| .read = h_gpci_event_update, |
| .capabilities = PERF_PMU_CAP_NO_EXCLUDE, |
| }; |
| |
| static int ppc_hv_gpci_cpu_online(unsigned int cpu) |
| { |
| if (cpumask_empty(&hv_gpci_cpumask)) |
| cpumask_set_cpu(cpu, &hv_gpci_cpumask); |
| |
| return 0; |
| } |
| |
| static int ppc_hv_gpci_cpu_offline(unsigned int cpu) |
| { |
| int target; |
| |
| /* Check if exiting cpu is used for collecting gpci events */ |
| if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask)) |
| return 0; |
| |
| /* Find a new cpu to collect gpci events */ |
| target = cpumask_last(cpu_active_mask); |
| |
| if (target < 0 || target >= nr_cpu_ids) { |
| pr_err("hv_gpci: CPU hotplug init failed\n"); |
| return -1; |
| } |
| |
| /* Migrate gpci events to the new target */ |
| cpumask_set_cpu(target, &hv_gpci_cpumask); |
| perf_pmu_migrate_context(&h_gpci_pmu, cpu, target); |
| |
| return 0; |
| } |
| |
| static int hv_gpci_cpu_hotplug_init(void) |
| { |
| return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE, |
| "perf/powerpc/hv_gcpi:online", |
| ppc_hv_gpci_cpu_online, |
| ppc_hv_gpci_cpu_offline); |
| } |
| |
| static struct device_attribute *sysinfo_device_attr_create(int |
| sysinfo_interface_group_index, u32 req) |
| { |
| struct device_attribute *attr = NULL; |
| unsigned long ret; |
| struct hv_gpci_request_buffer *arg; |
| |
| if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || |
| sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { |
| pr_info("Wrong interface group index for system information\n"); |
| return NULL; |
| } |
| |
| /* Check for given counter request value support */ |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| arg->params.counter_request = cpu_to_be32(req); |
| |
| ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| |
| put_cpu_var(hv_gpci_reqb); |
| |
| /* |
| * Add given counter request value attribute in the interface_attrs |
| * attribute array, only for valid return types. |
| */ |
| if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { |
| attr = kzalloc(sizeof(*attr), GFP_KERNEL); |
| if (!attr) |
| return NULL; |
| |
| sysfs_attr_init(&attr->attr); |
| attr->attr.mode = 0444; |
| |
| switch (sysinfo_interface_group_index) { |
| case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: |
| attr->attr.name = "processor_bus_topology"; |
| attr->show = processor_bus_topology_show; |
| break; |
| case INTERFACE_PROCESSOR_CONFIG_ATTR: |
| attr->attr.name = "processor_config"; |
| attr->show = processor_config_show; |
| break; |
| case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: |
| attr->attr.name = "affinity_domain_via_virtual_processor"; |
| attr->show = affinity_domain_via_virtual_processor_show; |
| break; |
| case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: |
| attr->attr.name = "affinity_domain_via_domain"; |
| attr->show = affinity_domain_via_domain_show; |
| break; |
| case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: |
| attr->attr.name = "affinity_domain_via_partition"; |
| attr->show = affinity_domain_via_partition_show; |
| break; |
| } |
| } else |
| pr_devel("hcall failed, with error: 0x%lx\n", ret); |
| |
| return attr; |
| } |
| |
| static void add_sysinfo_interface_files(void) |
| { |
| int sysfs_count; |
| struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; |
| int i; |
| |
| sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; |
| |
| /* Get device attribute for a given counter request value */ |
| for (i = 0; i < sysfs_count; i++) { |
| attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, |
| sysinfo_counter_request[i]); |
| |
| if (!attr[i]) |
| goto out; |
| } |
| |
| /* Add sysinfo interface attributes in the interface_attrs attribute array */ |
| for (i = 0; i < sysfs_count; i++) |
| interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; |
| |
| return; |
| |
| out: |
| /* |
| * The sysinfo interface attributes will be added, only if hcall passed for |
| * all the counter request values. Free the device attribute array incase |
| * of any hcall failure. |
| */ |
| if (i > 0) { |
| while (i >= 0) { |
| kfree(attr[i]); |
| i--; |
| } |
| } |
| } |
| |
| static int hv_gpci_init(void) |
| { |
| int r; |
| unsigned long hret; |
| struct hv_perf_caps caps; |
| struct hv_gpci_request_buffer *arg; |
| |
| hv_gpci_assert_offsets_correct(); |
| |
| if (!firmware_has_feature(FW_FEATURE_LPAR)) { |
| pr_debug("not a virtualized system, not enabling\n"); |
| return -ENODEV; |
| } |
| |
| hret = hv_perf_caps_get(&caps); |
| if (hret) { |
| pr_debug("could not obtain capabilities, not enabling, rc=%ld\n", |
| hret); |
| return -ENODEV; |
| } |
| |
| /* init cpuhotplug */ |
| r = hv_gpci_cpu_hotplug_init(); |
| if (r) |
| return r; |
| |
| /* sampling not supported */ |
| h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; |
| |
| arg = (void *)get_cpu_var(hv_gpci_reqb); |
| memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); |
| |
| /* |
| * hcall H_GET_PERF_COUNTER_INFO populates the output |
| * counter_info_version value based on the system hypervisor. |
| * Pass the counter request 0x10 corresponds to request type |
| * 'Dispatch_timebase_by_processor', to get the supported |
| * counter_info_version. |
| */ |
| arg->params.counter_request = cpu_to_be32(0x10); |
| |
| r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, |
| virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); |
| if (r) { |
| pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r); |
| arg->params.counter_info_version_out = 0x8; |
| } |
| |
| /* |
| * Use counter_info_version_out value to assign |
| * required hv-gpci event list. |
| */ |
| if (arg->params.counter_info_version_out >= 0x8) |
| event_group.attrs = hv_gpci_event_attrs; |
| else |
| event_group.attrs = hv_gpci_event_attrs_v6; |
| |
| put_cpu_var(hv_gpci_reqb); |
| |
| r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1); |
| if (r) |
| return r; |
| |
| /* sysinfo interface files are only available for power10 and above platforms */ |
| if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) |
| add_sysinfo_interface_files(); |
| |
| return 0; |
| } |
| |
| device_initcall(hv_gpci_init); |