blob: 6bd13ac2a8b126170a7e81206c48bd84229fec00 [file] [log] [blame]
#include "kvm/devices.h"
#include "kvm/fdt.h"
#include "kvm/ioeventfd.h"
#include "kvm/ioport.h"
#include "kvm/kvm.h"
#include "kvm/kvm-cpu.h"
#include "kvm/irq.h"
#include "kvm/mutex.h"
#include <linux/byteorder.h>
#include <linux/kernel.h>
#include <linux/kvm.h>
#include <linux/sizes.h>
/*
* From the RISC-V Privlidged Spec v1.10:
*
* Global interrupt sources are assigned small unsigned integer identifiers,
* beginning at the value 1. An interrupt ID of 0 is reserved to mean no
* interrupt. Interrupt identifiers are also used to break ties when two or
* more interrupt sources have the same assigned priority. Smaller values of
* interrupt ID take precedence over larger values of interrupt ID.
*
* While the RISC-V supervisor spec doesn't define the maximum number of
* devices supported by the PLIC, the largest number supported by devices
* marked as 'riscv,plic0' (which is the only device type this driver supports,
* and is the only extant PLIC as of now) is 1024. As mentioned above, device
* 0 is defined to be non-existant so this device really only supports 1023
* devices.
*/
#define MAX_DEVICES 1024
#define MAX_CONTEXTS 15872
/*
* The PLIC consists of memory-mapped control registers, with a memory map as
* follows:
*
* base + 0x000000: Reserved (interrupt source 0 does not exist)
* base + 0x000004: Interrupt source 1 priority
* base + 0x000008: Interrupt source 2 priority
* ...
* base + 0x000FFC: Interrupt source 1023 priority
* base + 0x001000: Pending 0
* base + 0x001FFF: Pending
* base + 0x002000: Enable bits for sources 0-31 on context 0
* base + 0x002004: Enable bits for sources 32-63 on context 0
* ...
* base + 0x0020FC: Enable bits for sources 992-1023 on context 0
* base + 0x002080: Enable bits for sources 0-31 on context 1
* ...
* base + 0x002100: Enable bits for sources 0-31 on context 2
* ...
* base + 0x1F1F80: Enable bits for sources 992-1023 on context 15871
* base + 0x1F1F84: Reserved
* ... (higher context IDs would fit here, but wouldn't fit
* inside the per-context priority vector)
* base + 0x1FFFFC: Reserved
* base + 0x200000: Priority threshold for context 0
* base + 0x200004: Claim/complete for context 0
* base + 0x200008: Reserved
* ...
* base + 0x200FFC: Reserved
* base + 0x201000: Priority threshold for context 1
* base + 0x201004: Claim/complete for context 1
* ...
* base + 0xFFE000: Priority threshold for context 15871
* base + 0xFFE004: Claim/complete for context 15871
* base + 0xFFE008: Reserved
* ...
* base + 0xFFFFFC: Reserved
*/
/* Each interrupt source has a priority register associated with it. */
#define PRIORITY_BASE 0
#define PRIORITY_PER_ID 4
/*
* Each hart context has a vector of interupt enable bits associated with it.
* There's one bit for each interrupt source.
*/
#define ENABLE_BASE 0x2000
#define ENABLE_PER_HART 0x80
/*
* Each hart context has a set of control registers associated with it. Right
* now there's only two: a source priority threshold over which the hart will
* take an interrupt, and a register to claim interrupts.
*/
#define CONTEXT_BASE 0x200000
#define CONTEXT_PER_HART 0x1000
#define CONTEXT_THRESHOLD 0
#define CONTEXT_CLAIM 4
#define REG_SIZE 0x1000000
#define IRQCHIP_PLIC_NR 0
struct plic_state;
struct plic_context {
/* State to which this belongs */
struct plic_state *s;
/* Static Configuration */
u32 num;
struct kvm_cpu *vcpu;
/* Local IRQ state */
struct mutex irq_lock;
u8 irq_priority_threshold;
u32 irq_enable[MAX_DEVICES/32];
u32 irq_pending[MAX_DEVICES/32];
u8 irq_pending_priority[MAX_DEVICES];
u32 irq_claimed[MAX_DEVICES/32];
u32 irq_autoclear[MAX_DEVICES/32];
};
struct plic_state {
bool ready;
struct kvm *kvm;
/* Static Configuration */
u32 num_irq;
u32 num_irq_word;
u32 max_prio;
/* Context Array */
u32 num_context;
struct plic_context *contexts;
/* Global IRQ state */
struct mutex irq_lock;
u8 irq_priority[MAX_DEVICES];
u32 irq_level[MAX_DEVICES/32];
};
static struct plic_state plic;
/* Note: Must be called with c->irq_lock held */
static u32 __plic_context_best_pending_irq(struct plic_state *s,
struct plic_context *c)
{
u8 best_irq_prio = 0;
u32 i, j, irq, best_irq = 0;
for (i = 0; i < s->num_irq_word; i++) {
if (!c->irq_pending[i])
continue;
for (j = 0; j < 32; j++) {
irq = i * 32 + j;
if ((s->num_irq <= irq) ||
!(c->irq_pending[i] & (1 << j)) ||
(c->irq_claimed[i] & (1 << j)))
continue;
if (!best_irq ||
(best_irq_prio < c->irq_pending_priority[irq])) {
best_irq = irq;
best_irq_prio = c->irq_pending_priority[irq];
}
}
}
return best_irq;
}
/* Note: Must be called with c->irq_lock held */
static void __plic_context_irq_update(struct plic_state *s,
struct plic_context *c)
{
u32 best_irq = __plic_context_best_pending_irq(s, c);
u32 virq = (best_irq) ? KVM_INTERRUPT_SET : KVM_INTERRUPT_UNSET;
if (ioctl(c->vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
pr_warning("KVM_INTERRUPT failed");
}
/* Note: Must be called with c->irq_lock held */
static u32 __plic_context_irq_claim(struct plic_state *s,
struct plic_context *c)
{
u32 virq = KVM_INTERRUPT_UNSET;
u32 best_irq = __plic_context_best_pending_irq(s, c);
u32 best_irq_word = best_irq / 32;
u32 best_irq_mask = (1 << (best_irq % 32));
if (ioctl(c->vcpu->vcpu_fd, KVM_INTERRUPT, &virq) < 0)
pr_warning("KVM_INTERRUPT failed");
if (best_irq) {
if (c->irq_autoclear[best_irq_word] & best_irq_mask) {
c->irq_pending[best_irq_word] &= ~best_irq_mask;
c->irq_pending_priority[best_irq] = 0;
c->irq_claimed[best_irq_word] &= ~best_irq_mask;
c->irq_autoclear[best_irq_word] &= ~best_irq_mask;
} else
c->irq_claimed[best_irq_word] |= best_irq_mask;
}
__plic_context_irq_update(s, c);
return best_irq;
}
static void plic__irq_trig(struct kvm *kvm, int irq, int level, bool edge)
{
bool irq_marked = false;
u8 i, irq_prio, irq_word;
u32 irq_mask;
struct plic_context *c = NULL;
struct plic_state *s = &plic;
if (!s->ready)
return;
if (irq <= 0 || s->num_irq <= (u32)irq)
goto done;
mutex_lock(&s->irq_lock);
irq_prio = s->irq_priority[irq];
irq_word = irq / 32;
irq_mask = 1 << (irq % 32);
if (level)
s->irq_level[irq_word] |= irq_mask;
else
s->irq_level[irq_word] &= ~irq_mask;
/*
* Note: PLIC interrupts are level-triggered. As of now,
* there is no notion of edge-triggered interrupts. To
* handle this we auto-clear edge-triggered interrupts
* when PLIC context CLAIM register is read.
*/
for (i = 0; i < s->num_context; i++) {
c = &s->contexts[i];
mutex_lock(&c->irq_lock);
if (c->irq_enable[irq_word] & irq_mask) {
if (level) {
c->irq_pending[irq_word] |= irq_mask;
c->irq_pending_priority[irq] = irq_prio;
if (edge)
c->irq_autoclear[irq_word] |= irq_mask;
} else {
c->irq_pending[irq_word] &= ~irq_mask;
c->irq_pending_priority[irq] = 0;
c->irq_claimed[irq_word] &= ~irq_mask;
c->irq_autoclear[irq_word] &= ~irq_mask;
}
__plic_context_irq_update(s, c);
irq_marked = true;
}
mutex_unlock(&c->irq_lock);
if (irq_marked)
break;
}
done:
mutex_unlock(&s->irq_lock);
}
static void plic__priority_read(struct plic_state *s,
u64 offset, void *data)
{
u32 irq = (offset >> 2);
if (irq == 0 || irq >= s->num_irq)
return;
mutex_lock(&s->irq_lock);
ioport__write32(data, s->irq_priority[irq]);
mutex_unlock(&s->irq_lock);
}
static void plic__priority_write(struct plic_state *s,
u64 offset, void *data)
{
u32 val, irq = (offset >> 2);
if (irq == 0 || irq >= s->num_irq)
return;
mutex_lock(&s->irq_lock);
val = ioport__read32(data);
val &= ((1 << PRIORITY_PER_ID) - 1);
s->irq_priority[irq] = val;
mutex_unlock(&s->irq_lock);
}
static void plic__context_enable_read(struct plic_state *s,
struct plic_context *c,
u64 offset, void *data)
{
u32 irq_word = offset >> 2;
if (s->num_irq_word < irq_word)
return;
mutex_lock(&c->irq_lock);
ioport__write32(data, c->irq_enable[irq_word]);
mutex_unlock(&c->irq_lock);
}
static void plic__context_enable_write(struct plic_state *s,
struct plic_context *c,
u64 offset, void *data)
{
u8 irq_prio;
u32 i, irq, irq_mask;
u32 irq_word = offset >> 2;
u32 old_val, new_val, xor_val;
if (s->num_irq_word < irq_word)
return;
mutex_lock(&s->irq_lock);
mutex_lock(&c->irq_lock);
old_val = c->irq_enable[irq_word];
new_val = ioport__read32(data);
if (irq_word == 0)
new_val &= ~0x1;
c->irq_enable[irq_word] = new_val;
xor_val = old_val ^ new_val;
for (i = 0; i < 32; i++) {
irq = irq_word * 32 + i;
irq_mask = 1 << i;
irq_prio = s->irq_priority[irq];
if (!(xor_val & irq_mask))
continue;
if ((new_val & irq_mask) &&
(s->irq_level[irq_word] & irq_mask)) {
c->irq_pending[irq_word] |= irq_mask;
c->irq_pending_priority[irq] = irq_prio;
} else if (!(new_val & irq_mask)) {
c->irq_pending[irq_word] &= ~irq_mask;
c->irq_pending_priority[irq] = 0;
c->irq_claimed[irq_word] &= ~irq_mask;
}
}
__plic_context_irq_update(s, c);
mutex_unlock(&c->irq_lock);
mutex_unlock(&s->irq_lock);
}
static void plic__context_read(struct plic_state *s,
struct plic_context *c,
u64 offset, void *data)
{
mutex_lock(&c->irq_lock);
switch (offset) {
case CONTEXT_THRESHOLD:
ioport__write32(data, c->irq_priority_threshold);
break;
case CONTEXT_CLAIM:
ioport__write32(data, __plic_context_irq_claim(s, c));
break;
default:
break;
};
mutex_unlock(&c->irq_lock);
}
static void plic__context_write(struct plic_state *s,
struct plic_context *c,
u64 offset, void *data)
{
u32 val, irq_word, irq_mask;
bool irq_update = false;
mutex_lock(&c->irq_lock);
switch (offset) {
case CONTEXT_THRESHOLD:
val = ioport__read32(data);
val &= ((1 << PRIORITY_PER_ID) - 1);
if (val <= s->max_prio)
c->irq_priority_threshold = val;
else
irq_update = true;
break;
case CONTEXT_CLAIM:
val = ioport__read32(data);
irq_word = val / 32;
irq_mask = 1 << (val % 32);
if ((val < plic.num_irq) &&
(c->irq_enable[irq_word] & irq_mask)) {
c->irq_claimed[irq_word] &= ~irq_mask;
irq_update = true;
}
break;
default:
irq_update = true;
break;
};
if (irq_update)
__plic_context_irq_update(s, c);
mutex_unlock(&c->irq_lock);
}
static void plic__mmio_callback(struct kvm_cpu *vcpu,
u64 addr, u8 *data, u32 len,
u8 is_write, void *ptr)
{
u32 cntx;
struct plic_state *s = ptr;
if (len != 4)
die("plic: invalid len=%d", len);
addr &= ~0x3;
addr -= RISCV_IRQCHIP;
if (is_write) {
if (PRIORITY_BASE <= addr && addr < ENABLE_BASE) {
plic__priority_write(s, addr, data);
} else if (ENABLE_BASE <= addr && addr < CONTEXT_BASE) {
cntx = (addr - ENABLE_BASE) / ENABLE_PER_HART;
addr -= cntx * ENABLE_PER_HART + ENABLE_BASE;
if (cntx < s->num_context)
plic__context_enable_write(s,
&s->contexts[cntx],
addr, data);
} else if (CONTEXT_BASE <= addr && addr < REG_SIZE) {
cntx = (addr - CONTEXT_BASE) / CONTEXT_PER_HART;
addr -= cntx * CONTEXT_PER_HART + CONTEXT_BASE;
if (cntx < s->num_context)
plic__context_write(s, &s->contexts[cntx],
addr, data);
}
} else {
if (PRIORITY_BASE <= addr && addr < ENABLE_BASE) {
plic__priority_read(s, addr, data);
} else if (ENABLE_BASE <= addr && addr < CONTEXT_BASE) {
cntx = (addr - ENABLE_BASE) / ENABLE_PER_HART;
addr -= cntx * ENABLE_PER_HART + ENABLE_BASE;
if (cntx < s->num_context)
plic__context_enable_read(s,
&s->contexts[cntx],
addr, data);
} else if (CONTEXT_BASE <= addr && addr < REG_SIZE) {
cntx = (addr - CONTEXT_BASE) / CONTEXT_PER_HART;
addr -= cntx * CONTEXT_PER_HART + CONTEXT_BASE;
if (cntx < s->num_context)
plic__context_read(s, &s->contexts[cntx],
addr, data);
}
}
}
static void plic__generate_fdt_node(void *fdt, struct kvm *kvm)
{
u32 i;
char name[64];
u32 reg_cells[4], *irq_cells;
reg_cells[0] = 0;
reg_cells[1] = cpu_to_fdt32(RISCV_IRQCHIP);
reg_cells[2] = 0;
reg_cells[3] = cpu_to_fdt32(RISCV_IRQCHIP_SIZE);
irq_cells = calloc(plic.num_context * 2, sizeof(u32));
if (!irq_cells)
die("Failed to alloc irq_cells");
sprintf(name, "interrupt-controller@%08x", (u32)RISCV_IRQCHIP);
_FDT(fdt_begin_node(fdt, name));
_FDT(fdt_property_string(fdt, "compatible", "riscv,plic0"));
_FDT(fdt_property(fdt, "reg", reg_cells, sizeof(reg_cells)));
_FDT(fdt_property_cell(fdt, "#interrupt-cells", 1));
_FDT(fdt_property(fdt, "interrupt-controller", NULL, 0));
_FDT(fdt_property_cell(fdt, "riscv,max-priority", plic.max_prio));
_FDT(fdt_property_cell(fdt, "riscv,ndev", MAX_DEVICES - 1));
_FDT(fdt_property_cell(fdt, "phandle", PHANDLE_PLIC));
for (i = 0; i < (plic.num_context / 2); i++) {
irq_cells[4*i + 0] = cpu_to_fdt32(PHANDLE_CPU_INTC_BASE + i);
irq_cells[4*i + 1] = cpu_to_fdt32(0xffffffff);
irq_cells[4*i + 2] = cpu_to_fdt32(PHANDLE_CPU_INTC_BASE + i);
irq_cells[4*i + 3] = cpu_to_fdt32(9);
}
_FDT(fdt_property(fdt, "interrupts-extended", irq_cells,
sizeof(u32) * plic.num_context * 2));
_FDT(fdt_end_node(fdt));
free(irq_cells);
}
static int plic__irq_routing_init(struct kvm *kvm)
{
int r;
/*
* This describes the default routing that the kernel uses without
* any routing explicitly set up via KVM_SET_GSI_ROUTING. So we
* don't need to commit these setting right now. The first actual
* user (MSI routing) will engage these mappings then.
*/
for (next_gsi = 0; next_gsi < MAX_DEVICES; next_gsi++) {
r = irq__allocate_routing_entry();
if (r)
return r;
irq_routing->entries[irq_routing->nr++] =
(struct kvm_irq_routing_entry) {
.gsi = next_gsi,
.type = KVM_IRQ_ROUTING_IRQCHIP,
.u.irqchip.irqchip = IRQCHIP_PLIC_NR,
.u.irqchip.pin = next_gsi,
};
}
return 0;
}
static int plic__init(struct kvm *kvm)
{
u32 i;
int ret;
struct plic_context *c;
if (riscv_irqchip != IRQCHIP_PLIC)
return 0;
plic.kvm = kvm;
plic.num_irq = MAX_DEVICES;
plic.num_irq_word = plic.num_irq / 32;
if ((plic.num_irq_word * 32) < plic.num_irq)
plic.num_irq_word++;
plic.max_prio = (1UL << PRIORITY_PER_ID) - 1;
plic.num_context = kvm->nrcpus * 2;
plic.contexts = calloc(plic.num_context, sizeof(struct plic_context));
if (!plic.contexts)
return -ENOMEM;
for (i = 0; i < plic.num_context; i++) {
c = &plic.contexts[i];
c->s = &plic;
c->num = i;
c->vcpu = kvm->cpus[i / 2];
mutex_init(&c->irq_lock);
}
mutex_init(&plic.irq_lock);
ret = kvm__register_mmio(kvm, RISCV_IRQCHIP, RISCV_IRQCHIP_SIZE,
false, plic__mmio_callback, &plic);
if (ret)
return ret;
/* Setup default IRQ routing */
plic__irq_routing_init(kvm);
plic.ready = true;
return 0;
}
dev_init(plic__init);
static int plic__exit(struct kvm *kvm)
{
if (riscv_irqchip != IRQCHIP_PLIC)
return 0;
plic.ready = false;
kvm__deregister_mmio(kvm, RISCV_IRQCHIP);
free(plic.contexts);
return 0;
}
dev_exit(plic__exit);
void plic__create(struct kvm *kvm)
{
if (riscv_irqchip != IRQCHIP_UNKNOWN)
return;
riscv_irqchip = IRQCHIP_PLIC;
riscv_irqchip_inkernel = false;
riscv_irqchip_trigger = plic__irq_trig;
riscv_irqchip_generate_fdt_node = plic__generate_fdt_node;
riscv_irqchip_phandle = PHANDLE_PLIC;
riscv_irqchip_msi_phandle = PHANDLE_RESERVED;
riscv_irqchip_line_sensing = false;
}