blob: 30f1525639b5736e94ee047e14a950a59864c577 [file] [log] [blame]
// SPDX-License-Identifier: GPL-2.0
/*
* Driver for HiSilicon PCIe tune and trace device
*
* Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
* Author: Yicong Yang <yangyicong@hisilicon.com>
*/
#include <linux/bitfield.h>
#include <linux/bitops.h>
#include <linux/cpuhotplug.h>
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/iommu.h>
#include <linux/iopoll.h>
#include <linux/module.h>
#include <linux/sysfs.h>
#include <linux/vmalloc.h>
#include "hisi_ptt.h"
/* Dynamic CPU hotplug state used by PTT */
static enum cpuhp_state hisi_ptt_pmu_online;
static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
{
u32 val;
return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
HISI_PTT_WAIT_POLL_INTERVAL_US,
HISI_PTT_WAIT_TUNE_TIMEOUT_US);
}
static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
struct dev_ext_attribute *ext_attr;
struct hisi_ptt_tune_desc *desc;
u32 reg;
u16 val;
ext_attr = container_of(attr, struct dev_ext_attribute, attr);
desc = ext_attr->var;
mutex_lock(&hisi_ptt->tune_lock);
reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
desc->event_code);
writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
/* Write all 1 to indicates it's the read process */
writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
mutex_unlock(&hisi_ptt->tune_lock);
return -ETIMEDOUT;
}
reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
mutex_unlock(&hisi_ptt->tune_lock);
return sysfs_emit(buf, "%u\n", val);
}
static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
struct dev_ext_attribute *ext_attr;
struct hisi_ptt_tune_desc *desc;
u32 reg;
u16 val;
ext_attr = container_of(attr, struct dev_ext_attribute, attr);
desc = ext_attr->var;
if (kstrtou16(buf, 10, &val))
return -EINVAL;
mutex_lock(&hisi_ptt->tune_lock);
reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
desc->event_code);
writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
mutex_unlock(&hisi_ptt->tune_lock);
return -ETIMEDOUT;
}
mutex_unlock(&hisi_ptt->tune_lock);
return count;
}
#define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store) \
static struct hisi_ptt_tune_desc _name##_desc = { \
.name = #_name, \
.event_code = (_val), \
}; \
static struct dev_ext_attribute hisi_ptt_##_name##_attr = { \
.attr = __ATTR(_name, 0600, _show, _store), \
.var = &_name##_desc, \
}
#define HISI_PTT_TUNE_ATTR_COMMON(_name, _val) \
HISI_PTT_TUNE_ATTR(_name, _val, \
hisi_ptt_tune_attr_show, \
hisi_ptt_tune_attr_store)
/*
* The value of the tuning event are composed of two parts: main event code
* in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
* a subevent of 'Tx path QoS control' which for tuning the weight of Tx
* completion TLPs. See hisi_ptt.rst documentation for more information.
*/
#define HISI_PTT_TUNE_QOS_TX_CPL (0x4 | (3 << 16))
#define HISI_PTT_TUNE_QOS_TX_NP (0x4 | (4 << 16))
#define HISI_PTT_TUNE_QOS_TX_P (0x4 | (5 << 16))
#define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL (0x5 | (6 << 16))
#define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL (0x5 | (7 << 16))
HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
static struct attribute *hisi_ptt_tune_attrs[] = {
&hisi_ptt_qos_tx_cpl_attr.attr.attr,
&hisi_ptt_qos_tx_np_attr.attr.attr,
&hisi_ptt_qos_tx_p_attr.attr.attr,
&hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
&hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
NULL,
};
static struct attribute_group hisi_ptt_tune_group = {
.name = "tune",
.attrs = hisi_ptt_tune_attrs,
};
static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
{
if (is_port)
return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
return devid;
}
static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
{
u32 val;
return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
val, val & HISI_PTT_TRACE_IDLE,
HISI_PTT_WAIT_POLL_INTERVAL_US,
HISI_PTT_WAIT_TRACE_TIMEOUT_US);
}
static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
{
u32 val;
readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
HISI_PTT_RESET_TIMEOUT_US);
}
static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
{
writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
hisi_ptt->trace_ctrl.started = false;
}
static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
{
struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
u32 val;
int i;
/* Check device idle before start trace */
if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
return -EBUSY;
}
ctrl->started = true;
/* Reset the DMA before start tracing */
val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
val |= HISI_PTT_TRACE_CTRL_RST;
writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
hisi_ptt_wait_dma_reset_done(hisi_ptt);
val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
val &= ~HISI_PTT_TRACE_CTRL_RST;
writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
/* Reset the index of current buffer */
hisi_ptt->trace_ctrl.buf_index = 0;
/* Zero the trace buffers */
for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
/* Clear the interrupt status */
writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
/* Set the trace control register */
val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
if (!hisi_ptt->trace_ctrl.is_port)
val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
/* Start the Trace */
val |= HISI_PTT_TRACE_CTRL_EN;
writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
return 0;
}
static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
{
struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
struct perf_output_handle *handle = &ctrl->handle;
struct perf_event *event = handle->event;
struct hisi_ptt_pmu_buf *buf;
size_t size;
void *addr;
buf = perf_get_aux(handle);
if (!buf || !handle->size)
return -EINVAL;
addr = ctrl->trace_buf[ctrl->buf_index].addr;
/*
* If we're going to stop, read the size of already traced data from
* HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
* the data size is always HISI_PTT_TRACE_BUF_SIZE.
*/
if (stop) {
u32 reg;
reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
} else {
size = HISI_PTT_TRACE_BUF_SIZE;
}
memcpy(buf->base + buf->pos, addr, size);
buf->pos += size;
/*
* Just commit the traced data if we're going to stop. Otherwise if the
* resident AUX buffer cannot contain the data of next trace buffer,
* apply a new one.
*/
if (stop) {
perf_aux_output_end(handle, buf->pos);
} else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
perf_aux_output_end(handle, buf->pos);
buf = perf_aux_output_begin(handle, event);
if (!buf)
return -EINVAL;
buf->pos = handle->head % buf->length;
if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
perf_aux_output_end(handle, 0);
return -EINVAL;
}
}
return 0;
}
static irqreturn_t hisi_ptt_isr(int irq, void *context)
{
struct hisi_ptt *hisi_ptt = context;
u32 status, buf_idx;
status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
return IRQ_NONE;
buf_idx = ffs(status) - 1;
/* Clear the interrupt status of buffer @buf_idx */
writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
/*
* Update the AUX buffer and cache the current buffer index,
* as we need to know this and save the data when the trace
* is ended out of the interrupt handler. End the trace
* if the updating fails.
*/
if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
hisi_ptt_trace_end(hisi_ptt);
else
hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
return IRQ_HANDLED;
}
static void hisi_ptt_irq_free_vectors(void *pdev)
{
pci_free_irq_vectors(pdev);
}
static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
{
struct pci_dev *pdev = hisi_ptt->pdev;
int ret;
ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
if (ret < 0) {
pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
return ret;
}
ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
if (ret < 0)
return ret;
ret = devm_request_threaded_irq(&pdev->dev,
pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ),
NULL, hisi_ptt_isr, 0,
DRV_NAME, hisi_ptt);
if (ret) {
pci_err(pdev, "failed to request irq %d, ret = %d\n",
pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ), ret);
return ret;
}
return 0;
}
static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
{
struct pci_dev *root_port = pcie_find_root_port(pdev);
struct hisi_ptt_filter_desc *filter;
struct hisi_ptt *hisi_ptt = data;
u32 port_devid;
if (!root_port)
return 0;
port_devid = PCI_DEVID(root_port->bus->number, root_port->devfn);
if (port_devid < hisi_ptt->lower_bdf ||
port_devid > hisi_ptt->upper_bdf)
return 0;
/*
* We won't fail the probe if filter allocation failed here. The filters
* should be partial initialized and users would know which filter fails
* through the log. Other functions of PTT device are still available.
*/
filter = kzalloc(sizeof(*filter), GFP_KERNEL);
if (!filter) {
pci_err(hisi_ptt->pdev, "failed to add filter %s\n", pci_name(pdev));
return -ENOMEM;
}
filter->devid = PCI_DEVID(pdev->bus->number, pdev->devfn);
if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT) {
filter->is_port = true;
list_add_tail(&filter->list, &hisi_ptt->port_filters);
/* Update the available port mask */
hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
} else {
list_add_tail(&filter->list, &hisi_ptt->req_filters);
}
return 0;
}
static void hisi_ptt_release_filters(void *data)
{
struct hisi_ptt_filter_desc *filter, *tmp;
struct hisi_ptt *hisi_ptt = data;
list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list) {
list_del(&filter->list);
kfree(filter);
}
list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list) {
list_del(&filter->list);
kfree(filter);
}
}
static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
{
struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
struct device *dev = &hisi_ptt->pdev->dev;
int i;
ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
sizeof(*ctrl->trace_buf), GFP_KERNEL);
if (!ctrl->trace_buf)
return -ENOMEM;
for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
&ctrl->trace_buf[i].dma,
GFP_KERNEL);
if (!ctrl->trace_buf[i].addr)
return -ENOMEM;
}
/* Configure the trace DMA buffer */
for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
writel(lower_32_bits(ctrl->trace_buf[i].dma),
hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
i * HISI_PTT_TRACE_ADDR_STRIDE);
writel(upper_32_bits(ctrl->trace_buf[i].dma),
hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
i * HISI_PTT_TRACE_ADDR_STRIDE);
}
writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
return 0;
}
static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
{
struct pci_dev *pdev = hisi_ptt->pdev;
struct pci_bus *bus;
int ret;
u32 reg;
INIT_LIST_HEAD(&hisi_ptt->port_filters);
INIT_LIST_HEAD(&hisi_ptt->req_filters);
ret = hisi_ptt_config_trace_buf(hisi_ptt);
if (ret)
return ret;
/*
* The device range register provides the information about the root
* ports which the RCiEP can control and trace. The RCiEP and the root
* ports which it supports are on the same PCIe core, with same domain
* number but maybe different bus number. The device range register
* will tell us which root ports we can support, Bit[31:16] indicates
* the upper BDF numbers of the root port, while Bit[15:0] indicates
* the lower.
*/
reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
if (bus)
pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
if (ret)
return ret;
hisi_ptt->trace_ctrl.on_cpu = -1;
return 0;
}
static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
return cpumap_print_to_pagebuf(true, buf, cpumask);
}
static DEVICE_ATTR_RO(cpumask);
static struct attribute *hisi_ptt_cpumask_attrs[] = {
&dev_attr_cpumask.attr,
NULL
};
static const struct attribute_group hisi_ptt_cpumask_attr_group = {
.attrs = hisi_ptt_cpumask_attrs,
};
/*
* Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
* filter. Bit[15:0] indicates the filter value, for Root Port filter it's
* a bit mask of desired ports and for Requester filter it's the Requester ID
* of the desired PCIe function. Bit[18:16] is reserved for extension.
*
* See hisi_ptt.rst documentation for detailed information.
*/
PMU_FORMAT_ATTR(filter, "config:0-19");
PMU_FORMAT_ATTR(direction, "config:20-23");
PMU_FORMAT_ATTR(type, "config:24-31");
PMU_FORMAT_ATTR(format, "config:32-35");
static struct attribute *hisi_ptt_pmu_format_attrs[] = {
&format_attr_filter.attr,
&format_attr_direction.attr,
&format_attr_type.attr,
&format_attr_format.attr,
NULL
};
static struct attribute_group hisi_ptt_pmu_format_group = {
.name = "format",
.attrs = hisi_ptt_pmu_format_attrs,
};
static const struct attribute_group *hisi_ptt_pmu_groups[] = {
&hisi_ptt_cpumask_attr_group,
&hisi_ptt_pmu_format_group,
&hisi_ptt_tune_group,
NULL
};
static int hisi_ptt_trace_valid_direction(u32 val)
{
/*
* The direction values have different effects according to the data
* format (specified in the parentheses). TLP set A/B means different
* set of TLP types. See hisi_ptt.rst documentation for more details.
*/
static const u32 hisi_ptt_trace_available_direction[] = {
0, /* inbound(4DW) or reserved(8DW) */
1, /* outbound(4DW) */
2, /* {in, out}bound(4DW) or inbound(8DW), TLP set A */
3, /* {in, out}bound(4DW) or inbound(8DW), TLP set B */
};
int i;
for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
if (val == hisi_ptt_trace_available_direction[i])
return 0;
}
return -EINVAL;
}
static int hisi_ptt_trace_valid_type(u32 val)
{
/* Different types can be set simultaneously */
static const u32 hisi_ptt_trace_available_type[] = {
1, /* posted_request */
2, /* non-posted_request */
4, /* completion */
};
int i;
if (!val)
return -EINVAL;
/*
* Walk the available list and clear the valid bits of
* the config. If there is any resident bit after the
* walk then the config is invalid.
*/
for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
val &= ~hisi_ptt_trace_available_type[i];
if (val)
return -EINVAL;
return 0;
}
static int hisi_ptt_trace_valid_format(u32 val)
{
static const u32 hisi_ptt_trace_availble_format[] = {
0, /* 4DW */
1, /* 8DW */
};
int i;
for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
if (val == hisi_ptt_trace_availble_format[i])
return 0;
}
return -EINVAL;
}
static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
{
unsigned long val, port_mask = hisi_ptt->port_mask;
struct hisi_ptt_filter_desc *filter;
hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
/*
* Port filters are defined as bit mask. For port filters, check
* the bits in the @val are within the range of hisi_ptt->port_mask
* and whether it's empty or not, otherwise user has specified
* some unsupported root ports.
*
* For Requester ID filters, walk the available filter list to see
* whether we have one matched.
*/
if (!hisi_ptt->trace_ctrl.is_port) {
list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
return 0;
}
} else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
return 0;
}
return -EINVAL;
}
static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
{
struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
u32 val;
val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
hisi_ptt->trace_ctrl.filter = val;
val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
ctrl->direction = val;
val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
ctrl->type = val;
val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
ctrl->format = val;
}
static int hisi_ptt_pmu_event_init(struct perf_event *event)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
int ret;
u32 val;
if (event->cpu < 0) {
dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
return -EOPNOTSUPP;
}
if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
return -ENOENT;
ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
if (ret < 0)
return ret;
val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
ret = hisi_ptt_trace_valid_direction(val);
if (ret < 0)
return ret;
val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
ret = hisi_ptt_trace_valid_type(val);
if (ret < 0)
return ret;
val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
return hisi_ptt_trace_valid_format(val);
}
static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
int nr_pages, bool overwrite)
{
struct hisi_ptt_pmu_buf *buf;
struct page **pagelist;
int i;
if (overwrite) {
dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
return NULL;
}
/* If the pages size less than buffers, we cannot start trace */
if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
return NULL;
buf = kzalloc(sizeof(*buf), GFP_KERNEL);
if (!buf)
return NULL;
pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
if (!pagelist)
goto err;
for (i = 0; i < nr_pages; i++)
pagelist[i] = virt_to_page(pages[i]);
buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
if (!buf->base) {
kfree(pagelist);
goto err;
}
buf->nr_pages = nr_pages;
buf->length = nr_pages * PAGE_SIZE;
buf->pos = 0;
kfree(pagelist);
return buf;
err:
kfree(buf);
return NULL;
}
static void hisi_ptt_pmu_free_aux(void *aux)
{
struct hisi_ptt_pmu_buf *buf = aux;
vunmap(buf->base);
kfree(buf);
}
static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
struct hw_perf_event *hwc = &event->hw;
struct device *dev = event->pmu->dev;
struct hisi_ptt_pmu_buf *buf;
int cpu = event->cpu;
int ret;
hwc->state = 0;
/* Serialize the perf process if user specified several CPUs */
spin_lock(&hisi_ptt->pmu_lock);
if (hisi_ptt->trace_ctrl.started) {
dev_dbg(dev, "trace has already started\n");
goto stop;
}
/*
* Handle the interrupt on the same cpu which starts the trace to avoid
* context mismatch. Otherwise we'll trigger the WARN from the perf
* core in event_function_local(). If CPU passed is offline we'll fail
* here, just log it since we can do nothing here.
*/
ret = irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
cpumask_of(cpu));
if (ret)
dev_warn(dev, "failed to set the affinity of trace interrupt\n");
hisi_ptt->trace_ctrl.on_cpu = cpu;
buf = perf_aux_output_begin(handle, event);
if (!buf) {
dev_dbg(dev, "aux output begin failed\n");
goto stop;
}
buf->pos = handle->head % buf->length;
hisi_ptt_pmu_init_configs(hisi_ptt, event);
ret = hisi_ptt_trace_start(hisi_ptt);
if (ret) {
dev_dbg(dev, "trace start failed, ret = %d\n", ret);
perf_aux_output_end(handle, 0);
goto stop;
}
spin_unlock(&hisi_ptt->pmu_lock);
return;
stop:
event->hw.state |= PERF_HES_STOPPED;
spin_unlock(&hisi_ptt->pmu_lock);
}
static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
struct hw_perf_event *hwc = &event->hw;
if (hwc->state & PERF_HES_STOPPED)
return;
spin_lock(&hisi_ptt->pmu_lock);
if (hisi_ptt->trace_ctrl.started) {
hisi_ptt_trace_end(hisi_ptt);
if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
dev_warn(event->pmu->dev, "Device is still busy\n");
hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
}
spin_unlock(&hisi_ptt->pmu_lock);
hwc->state |= PERF_HES_STOPPED;
perf_event_update_userpage(event);
hwc->state |= PERF_HES_UPTODATE;
}
static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
{
struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
struct hw_perf_event *hwc = &event->hw;
int cpu = event->cpu;
/* Only allow the cpus on the device's node to add the event */
if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
return 0;
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START) {
hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
if (hwc->state & PERF_HES_STOPPED)
return -EINVAL;
}
return 0;
}
static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
{
hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
}
static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
{
cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
}
static void hisi_ptt_unregister_pmu(void *pmu)
{
perf_pmu_unregister(pmu);
}
static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
{
u16 core_id, sicl_id;
char *pmu_name;
u32 reg;
int ret;
ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
&hisi_ptt->hotplug_node);
if (ret)
return ret;
ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
hisi_ptt_remove_cpuhp_instance,
&hisi_ptt->hotplug_node);
if (ret)
return ret;
mutex_init(&hisi_ptt->tune_lock);
spin_lock_init(&hisi_ptt->pmu_lock);
hisi_ptt->hisi_ptt_pmu = (struct pmu) {
.module = THIS_MODULE,
.capabilities = PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE,
.task_ctx_nr = perf_sw_context,
.attr_groups = hisi_ptt_pmu_groups,
.event_init = hisi_ptt_pmu_event_init,
.setup_aux = hisi_ptt_pmu_setup_aux,
.free_aux = hisi_ptt_pmu_free_aux,
.start = hisi_ptt_pmu_start,
.stop = hisi_ptt_pmu_stop,
.add = hisi_ptt_pmu_add,
.del = hisi_ptt_pmu_del,
};
reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
sicl_id, core_id);
if (!pmu_name)
return -ENOMEM;
ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
if (ret)
return ret;
return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
hisi_ptt_unregister_pmu,
&hisi_ptt->hisi_ptt_pmu);
}
/*
* The DMA of PTT trace can only use direct mappings due to some
* hardware restriction. Check whether there is no IOMMU or the
* policy of the IOMMU domain is passthrough, otherwise the trace
* cannot work.
*
* The PTT device is supposed to behind an ARM SMMUv3, which
* should have passthrough the device by a quirk.
*/
static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
{
struct iommu_domain *iommu_domain;
iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
return 0;
return -EOPNOTSUPP;
}
static int hisi_ptt_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
struct hisi_ptt *hisi_ptt;
int ret;
ret = hisi_ptt_check_iommu_mapping(pdev);
if (ret) {
pci_err(pdev, "requires direct DMA mappings\n");
return ret;
}
hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
if (!hisi_ptt)
return -ENOMEM;
hisi_ptt->pdev = pdev;
pci_set_drvdata(pdev, hisi_ptt);
ret = pcim_enable_device(pdev);
if (ret) {
pci_err(pdev, "failed to enable device, ret = %d\n", ret);
return ret;
}
ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
if (ret) {
pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
return ret;
}
hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
if (ret) {
pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
return ret;
}
pci_set_master(pdev);
ret = hisi_ptt_register_irq(hisi_ptt);
if (ret)
return ret;
ret = hisi_ptt_init_ctrls(hisi_ptt);
if (ret) {
pci_err(pdev, "failed to init controls, ret = %d\n", ret);
return ret;
}
ret = hisi_ptt_register_pmu(hisi_ptt);
if (ret) {
pci_err(pdev, "failed to register PMU device, ret = %d", ret);
return ret;
}
return 0;
}
static const struct pci_device_id hisi_ptt_id_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
{ }
};
MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
static struct pci_driver hisi_ptt_driver = {
.name = DRV_NAME,
.id_table = hisi_ptt_id_tbl,
.probe = hisi_ptt_probe,
};
static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
{
struct hisi_ptt *hisi_ptt;
struct device *dev;
int target, src;
hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
src = hisi_ptt->trace_ctrl.on_cpu;
dev = hisi_ptt->hisi_ptt_pmu.dev;
if (!hisi_ptt->trace_ctrl.started || src != cpu)
return 0;
target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
if (target >= nr_cpu_ids) {
dev_err(dev, "no available cpu for perf context migration\n");
return 0;
}
perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
/*
* Also make sure the interrupt bind to the migrated CPU as well. Warn
* the user on failure here.
*/
if (irq_set_affinity(pci_irq_vector(hisi_ptt->pdev, HISI_PTT_TRACE_DMA_IRQ),
cpumask_of(target)))
dev_warn(dev, "failed to set the affinity of trace interrupt\n");
hisi_ptt->trace_ctrl.on_cpu = target;
return 0;
}
static int __init hisi_ptt_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
hisi_ptt_cpu_teardown);
if (ret < 0)
return ret;
hisi_ptt_pmu_online = ret;
ret = pci_register_driver(&hisi_ptt_driver);
if (ret)
cpuhp_remove_multi_state(hisi_ptt_pmu_online);
return ret;
}
module_init(hisi_ptt_init);
static void __exit hisi_ptt_exit(void)
{
pci_unregister_driver(&hisi_ptt_driver);
cpuhp_remove_multi_state(hisi_ptt_pmu_online);
}
module_exit(hisi_ptt_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");