irqchip: Add per-cpu interrupt partitioning library

We've unfortunately started seeing a situation where percpu interrupts
are partitioned in the system: one arbitrary set of CPUs has an
interrupt connected to a type of device, while another disjoint
set of CPUs has the same interrupt connected to another type of device.

This makes it impossible to have a device driver requesting this interrupt
using the current percpu-interrupt abstraction, as the same interrupt number
is now potentially claimed by at least two drivers, and we forbid interrupt
sharing on per-cpu interrupt.

A solution to this is to turn things upside down. Let's assume that our
system describes all the possible partitions for a given interrupt, and
give each of them a unique identifier. It is then possible to create
a namespace where the affinity identifier itself is a form of interrupt
number. At this point, it becomes easy to implement a set of partitions
as a cascaded irqchip, each affinity identifier being the HW irq.

This allows us to keep a number of nice properties:
- Each partition results in a separate percpu-interrupt (with a restrictied
  affinity), which keeps drivers happy.
- Because the underlying interrupt is still per-cpu, the overhead of
  the indirection can be kept pretty minimal.
- The core code can ignore most of that crap.

For that purpose, we implement a small library that deals with some of
the boilerplate code, relying on platform-specific drivers to provide
a description of the affinity sets and a set of callbacks.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: devicetree@vger.kernel.org
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Rob Herring <robh+dt@kernel.org>
Link: http://lkml.kernel.org/r/1460365075-7316-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 3e12479..ea1836b 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -244,3 +244,6 @@
 config MVEBU_ODMI
 	bool
 	select GENERIC_MSI_IRQ_DOMAIN
+
+config PARTITION_PERCPU
+	bool
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index b03cfcb..e354b00c 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -27,6 +27,7 @@
 obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
 obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_GIC_V3_ITS)		+= irq-gic-v3-its.o irq-gic-v3-its-pci-msi.o irq-gic-v3-its-platform-msi.o
+obj-$(CONFIG_PARTITION_PERCPU)		+= irq-partition-percpu.o
 obj-$(CONFIG_HISILICON_IRQ_MBIGEN)	+= irq-mbigen.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
 obj-$(CONFIG_ARM_VIC)			+= irq-vic.o
diff --git a/drivers/irqchip/irq-partition-percpu.c b/drivers/irqchip/irq-partition-percpu.c
new file mode 100644
index 0000000..ccd72c2
--- /dev/null
+++ b/drivers/irqchip/irq-partition-percpu.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2016 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqchip/irq-partition-percpu.h>
+#include <linux/irqdomain.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+
+struct partition_desc {
+	int				nr_parts;
+	struct partition_affinity	*parts;
+	struct irq_domain		*domain;
+	struct irq_desc			*chained_desc;
+	unsigned long			*bitmap;
+	struct irq_domain_ops		ops;
+};
+
+static bool partition_check_cpu(struct partition_desc *part,
+				unsigned int cpu, unsigned int hwirq)
+{
+	return cpumask_test_cpu(cpu, &part->parts[hwirq].mask);
+}
+
+static void partition_irq_mask(struct irq_data *d)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
+	    chip->irq_mask)
+		chip->irq_mask(data);
+}
+
+static void partition_irq_unmask(struct irq_data *d)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
+	    chip->irq_unmask)
+		chip->irq_unmask(data);
+}
+
+static int partition_irq_set_irqchip_state(struct irq_data *d,
+					   enum irqchip_irq_state which,
+					   bool val)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
+	    chip->irq_set_irqchip_state)
+		return chip->irq_set_irqchip_state(data, which, val);
+
+	return -EINVAL;
+}
+
+static int partition_irq_get_irqchip_state(struct irq_data *d,
+					   enum irqchip_irq_state which,
+					   bool *val)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	if (partition_check_cpu(part, smp_processor_id(), d->hwirq) &&
+	    chip->irq_get_irqchip_state)
+		return chip->irq_get_irqchip_state(data, which, val);
+
+	return -EINVAL;
+}
+
+static int partition_irq_set_type(struct irq_data *d, unsigned int type)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	if (chip->irq_set_type)
+		return chip->irq_set_type(data, type);
+
+	return -EINVAL;
+}
+
+static void partition_irq_print_chip(struct irq_data *d, struct seq_file *p)
+{
+	struct partition_desc *part = irq_data_get_irq_chip_data(d);
+	struct irq_chip *chip = irq_desc_get_chip(part->chained_desc);
+	struct irq_data *data = irq_desc_get_irq_data(part->chained_desc);
+
+	seq_printf(p, " %5s-%lu", chip->name, data->hwirq);
+}
+
+static struct irq_chip partition_irq_chip = {
+	.irq_mask		= partition_irq_mask,
+	.irq_unmask		= partition_irq_unmask,
+	.irq_set_type		= partition_irq_set_type,
+	.irq_get_irqchip_state	= partition_irq_get_irqchip_state,
+	.irq_set_irqchip_state	= partition_irq_set_irqchip_state,
+	.irq_print_chip		= partition_irq_print_chip,
+};
+
+static void partition_handle_irq(struct irq_desc *desc)
+{
+	struct partition_desc *part = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	int cpu = smp_processor_id();
+	int hwirq;
+
+	chained_irq_enter(chip, desc);
+
+	for_each_set_bit(hwirq, part->bitmap, part->nr_parts) {
+		if (partition_check_cpu(part, cpu, hwirq))
+			break;
+	}
+
+	if (unlikely(hwirq == part->nr_parts)) {
+		handle_bad_irq(desc);
+	} else {
+		unsigned int irq;
+		irq = irq_find_mapping(part->domain, hwirq);
+		generic_handle_irq(irq);
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static int partition_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	int ret;
+	irq_hw_number_t hwirq;
+	unsigned int type;
+	struct irq_fwspec *fwspec = arg;
+	struct partition_desc *part;
+
+	BUG_ON(nr_irqs != 1);
+	ret = domain->ops->translate(domain, fwspec, &hwirq, &type);
+	if (ret)
+		return ret;
+
+	part = domain->host_data;
+
+	set_bit(hwirq, part->bitmap);
+	irq_set_chained_handler_and_data(irq_desc_get_irq(part->chained_desc),
+					 partition_handle_irq, part);
+	irq_set_percpu_devid_partition(virq, &part->parts[hwirq].mask);
+	irq_domain_set_info(domain, virq, hwirq, &partition_irq_chip, part,
+			    handle_percpu_devid_irq, NULL, NULL);
+	irq_set_status_flags(virq, IRQ_NOAUTOEN);
+
+	return 0;
+}
+
+static void partition_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	struct irq_data *d;
+
+	BUG_ON(nr_irqs != 1);
+
+	d = irq_domain_get_irq_data(domain, virq);
+	irq_set_handler(virq, NULL);
+	irq_domain_reset_irq_data(d);
+}
+
+int partition_translate_id(struct partition_desc *desc, void *partition_id)
+{
+	struct partition_affinity *part = NULL;
+	int i;
+
+	for (i = 0; i < desc->nr_parts; i++) {
+		if (desc->parts[i].partition_id == partition_id) {
+			part = &desc->parts[i];
+			break;
+		}
+	}
+
+	if (WARN_ON(!part)) {
+		pr_err("Failed to find partition\n");
+		return -EINVAL;
+	}
+
+	return i;
+}
+
+struct partition_desc *partition_create_desc(struct fwnode_handle *fwnode,
+					     struct partition_affinity *parts,
+					     int nr_parts,
+					     int chained_irq,
+					     const struct irq_domain_ops *ops)
+{
+	struct partition_desc *desc;
+	struct irq_domain *d;
+
+	BUG_ON(!ops->select || !ops->translate);
+
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
+		return NULL;
+
+	desc->ops = *ops;
+	desc->ops.free = partition_domain_free;
+	desc->ops.alloc = partition_domain_alloc;
+
+	d = irq_domain_create_linear(fwnode, nr_parts, &desc->ops, desc);
+	if (!d)
+		goto out;
+	desc->domain = d;
+
+	desc->bitmap = kzalloc(sizeof(long) * BITS_TO_LONGS(nr_parts),
+			       GFP_KERNEL);
+	if (WARN_ON(!desc->bitmap))
+		goto out;
+
+	desc->chained_desc = irq_to_desc(chained_irq);
+	desc->nr_parts = nr_parts;
+	desc->parts = parts;
+
+	return desc;
+out:
+	if (d)
+		irq_domain_remove(d);
+	kfree(desc);
+
+	return NULL;
+}
+
+struct irq_domain *partition_get_domain(struct partition_desc *dsc)
+{
+	if (dsc)
+		return dsc->domain;
+
+	return NULL;
+}