| #include "kvm/devices.h" |
| #include "kvm/pci.h" |
| #include "kvm/ioport.h" |
| #include "kvm/irq.h" |
| #include "kvm/util.h" |
| #include "kvm/kvm.h" |
| |
| #include <linux/err.h> |
| #include <assert.h> |
| |
| static u32 pci_config_address_bits; |
| |
| /* This is within our PCI gap - in an unused area. |
| * Note this is a PCI *bus address*, is used to assign BARs etc.! |
| * (That's why it can still 32bit even with 64bit guests-- 64bit |
| * PCI isn't currently supported.) |
| */ |
| static u32 mmio_blocks = KVM_PCI_MMIO_AREA; |
| static u16 io_port_blocks = PCI_IOPORT_START; |
| |
| u16 pci_get_io_port_block(u32 size) |
| { |
| u16 port = ALIGN(io_port_blocks, PCI_IO_SIZE); |
| |
| io_port_blocks = port + size; |
| return port; |
| } |
| |
| /* |
| * BARs must be naturally aligned, so enforce this in the allocator. |
| */ |
| u32 pci_get_mmio_block(u32 size) |
| { |
| u32 block = ALIGN(mmio_blocks, size); |
| mmio_blocks = block + size; |
| return block; |
| } |
| |
| void *pci_find_cap(struct pci_device_header *hdr, u8 cap_type) |
| { |
| u8 pos; |
| struct pci_cap_hdr *cap; |
| |
| pci_for_each_cap(pos, cap, hdr) { |
| if (cap->type == cap_type) |
| return cap; |
| } |
| |
| return NULL; |
| } |
| |
| int pci__assign_irq(struct pci_device_header *pci_hdr) |
| { |
| /* |
| * PCI supports only INTA#,B#,C#,D# per device. |
| * |
| * A#,B#,C#,D# are allowed for multifunctional devices so stick |
| * with A# for our single function devices. |
| */ |
| pci_hdr->irq_pin = 1; |
| pci_hdr->irq_line = irq__alloc_line(); |
| |
| if (!pci_hdr->irq_type) |
| pci_hdr->irq_type = IRQ_TYPE_LEVEL_HIGH; |
| |
| return pci_hdr->irq_line; |
| } |
| |
| static bool pci_bar_is_implemented(struct pci_device_header *pci_hdr, int bar_num) |
| { |
| return pci__bar_size(pci_hdr, bar_num); |
| } |
| |
| static bool pci_bar_is_active(struct pci_device_header *pci_hdr, int bar_num) |
| { |
| return pci_hdr->bar_active[bar_num]; |
| } |
| |
| static void *pci_config_address_ptr(u16 port) |
| { |
| unsigned long offset; |
| void *base; |
| |
| offset = port - PCI_CONFIG_ADDRESS; |
| base = &pci_config_address_bits; |
| |
| return base + offset; |
| } |
| |
| static void pci_config_address_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, |
| u32 len, u8 is_write, void *ptr) |
| { |
| void *p = pci_config_address_ptr(addr); |
| |
| if (is_write) |
| memcpy(p, data, len); |
| else |
| memcpy(data, p, len); |
| } |
| static bool pci_device_exists(u8 bus_number, u8 device_number, u8 function_number) |
| { |
| union pci_config_address pci_config_address; |
| |
| pci_config_address.w = ioport__read32(&pci_config_address_bits); |
| |
| if (pci_config_address.bus_number != bus_number) |
| return false; |
| |
| if (pci_config_address.function_number != function_number) |
| return false; |
| |
| return !IS_ERR_OR_NULL(device__find_dev(DEVICE_BUS_PCI, device_number)); |
| } |
| |
| static void pci_config_data_mmio(struct kvm_cpu *vcpu, u64 addr, u8 *data, |
| u32 len, u8 is_write, void *kvm) |
| { |
| union pci_config_address pci_config_address; |
| |
| pci_config_address.w = ioport__read32(&pci_config_address_bits); |
| /* |
| * If someone accesses PCI configuration space offsets that are not |
| * aligned to 4 bytes, it uses ioports to signify that. |
| */ |
| pci_config_address.reg_offset = addr - PCI_CONFIG_DATA; |
| |
| /* Ensure the access does not cross a 4-byte boundary */ |
| len = min(len, 4U - pci_config_address.reg_offset); |
| |
| if (is_write) |
| pci__config_wr(vcpu->kvm, pci_config_address, data, len); |
| else |
| pci__config_rd(vcpu->kvm, pci_config_address, data, len); |
| } |
| |
| static int pci_activate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, |
| int bar_num) |
| { |
| int r = 0; |
| |
| if (pci_bar_is_active(pci_hdr, bar_num)) |
| goto out; |
| |
| r = pci_hdr->bar_activate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); |
| if (r < 0) { |
| pci_dev_warn(pci_hdr, "Error activating emulation for BAR %d", |
| bar_num); |
| goto out; |
| } |
| pci_hdr->bar_active[bar_num] = true; |
| |
| out: |
| return r; |
| } |
| |
| static int pci_deactivate_bar(struct kvm *kvm, struct pci_device_header *pci_hdr, |
| int bar_num) |
| { |
| int r = 0; |
| |
| if (!pci_bar_is_active(pci_hdr, bar_num)) |
| goto out; |
| |
| r = pci_hdr->bar_deactivate_fn(kvm, pci_hdr, bar_num, pci_hdr->data); |
| if (r < 0) { |
| pci_dev_warn(pci_hdr, "Error deactivating emulation for BAR %d", |
| bar_num); |
| goto out; |
| } |
| pci_hdr->bar_active[bar_num] = false; |
| |
| out: |
| return r; |
| } |
| |
| static void pci_config_command_wr(struct kvm *kvm, |
| struct pci_device_header *pci_hdr, |
| u16 new_command) |
| { |
| int i; |
| bool toggle_io, toggle_mem; |
| |
| toggle_io = (pci_hdr->command ^ new_command) & PCI_COMMAND_IO; |
| toggle_mem = (pci_hdr->command ^ new_command) & PCI_COMMAND_MEMORY; |
| |
| for (i = 0; i < 6; i++) { |
| if (!pci_bar_is_implemented(pci_hdr, i)) |
| continue; |
| |
| if (toggle_io && pci__bar_is_io(pci_hdr, i)) { |
| if (__pci__io_space_enabled(new_command)) |
| pci_activate_bar(kvm, pci_hdr, i); |
| else |
| pci_deactivate_bar(kvm, pci_hdr, i); |
| } |
| |
| if (toggle_mem && pci__bar_is_memory(pci_hdr, i)) { |
| if (__pci__memory_space_enabled(new_command)) |
| pci_activate_bar(kvm, pci_hdr, i); |
| else |
| pci_deactivate_bar(kvm, pci_hdr, i); |
| } |
| } |
| |
| pci_hdr->command = new_command; |
| } |
| |
| static int pci_toggle_bar_regions(bool activate, struct kvm *kvm, u32 start, u32 size) |
| { |
| struct device_header *dev_hdr; |
| struct pci_device_header *tmp_hdr; |
| u32 tmp_start, tmp_size; |
| int i, r; |
| |
| dev_hdr = device__first_dev(DEVICE_BUS_PCI); |
| while (dev_hdr) { |
| tmp_hdr = dev_hdr->data; |
| for (i = 0; i < 6; i++) { |
| if (!pci_bar_is_implemented(tmp_hdr, i)) |
| continue; |
| |
| tmp_start = pci__bar_address(tmp_hdr, i); |
| tmp_size = pci__bar_size(tmp_hdr, i); |
| if (tmp_start + tmp_size <= start || |
| tmp_start >= start + size) |
| continue; |
| |
| if (activate) |
| r = pci_activate_bar(kvm, tmp_hdr, i); |
| else |
| r = pci_deactivate_bar(kvm, tmp_hdr, i); |
| if (r < 0) |
| return r; |
| } |
| dev_hdr = device__next_dev(dev_hdr); |
| } |
| |
| return 0; |
| } |
| |
| static inline int pci_activate_bar_regions(struct kvm *kvm, u32 start, u32 size) |
| { |
| return pci_toggle_bar_regions(true, kvm, start, size); |
| } |
| |
| static inline int pci_deactivate_bar_regions(struct kvm *kvm, u32 start, u32 size) |
| { |
| return pci_toggle_bar_regions(false, kvm, start, size); |
| } |
| |
| static void pci_config_bar_wr(struct kvm *kvm, |
| struct pci_device_header *pci_hdr, int bar_num, |
| u32 value) |
| { |
| u32 old_addr, new_addr, bar_size; |
| u32 mask; |
| int r; |
| |
| if (pci__bar_is_io(pci_hdr, bar_num)) |
| mask = (u32)PCI_BASE_ADDRESS_IO_MASK; |
| else |
| mask = (u32)PCI_BASE_ADDRESS_MEM_MASK; |
| |
| /* |
| * If the kernel masks the BAR, it will expect to find the size of the |
| * BAR there next time it reads from it. After the kernel reads the |
| * size, it will write the address back. |
| * |
| * According to the PCI local bus specification REV 3.0: The number of |
| * upper bits that a device actually implements depends on how much of |
| * the address space the device will respond to. A device that wants a 1 |
| * MB memory address space (using a 32-bit base address register) would |
| * build the top 12 bits of the address register, hardwiring the other |
| * bits to 0. |
| * |
| * Furthermore, software can determine how much address space the device |
| * requires by writing a value of all 1's to the register and then |
| * reading the value back. The device will return 0's in all don't-care |
| * address bits, effectively specifying the address space required. |
| * |
| * Software computes the size of the address space with the formula |
| * S = ~B + 1, where S is the memory size and B is the value read from |
| * the BAR. This means that the BAR value that kvmtool should return is |
| * B = ~(S - 1). |
| */ |
| if (value == 0xffffffff) { |
| value = ~(pci__bar_size(pci_hdr, bar_num) - 1); |
| /* Preserve the special bits. */ |
| value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); |
| pci_hdr->bar[bar_num] = value; |
| return; |
| } |
| |
| value = (value & mask) | (pci_hdr->bar[bar_num] & ~mask); |
| |
| /* Don't toggle emulation when region type access is disbled. */ |
| if (pci__bar_is_io(pci_hdr, bar_num) && |
| !pci__io_space_enabled(pci_hdr)) { |
| pci_hdr->bar[bar_num] = value; |
| return; |
| } |
| |
| if (pci__bar_is_memory(pci_hdr, bar_num) && |
| !pci__memory_space_enabled(pci_hdr)) { |
| pci_hdr->bar[bar_num] = value; |
| return; |
| } |
| |
| /* |
| * BAR reassignment can be done while device access is enabled and |
| * memory regions for different devices can overlap as long as no access |
| * is made to the overlapping memory regions. To implement BAR |
| * reasignment, we deactivate emulation for the region described by the |
| * BAR value that the guest is changing, we disable emulation for the |
| * regions that overlap with the new one (by scanning through all PCI |
| * devices), we enable emulation for the new BAR value and finally we |
| * enable emulation for all device regions that were overlapping with |
| * the old value. |
| */ |
| old_addr = pci__bar_address(pci_hdr, bar_num); |
| new_addr = __pci__bar_address(value); |
| bar_size = pci__bar_size(pci_hdr, bar_num); |
| |
| r = pci_deactivate_bar(kvm, pci_hdr, bar_num); |
| if (r < 0) |
| return; |
| |
| r = pci_deactivate_bar_regions(kvm, new_addr, bar_size); |
| if (r < 0) { |
| /* |
| * We cannot update the BAR because of an overlapping region |
| * that failed to deactivate emulation, so keep the old BAR |
| * value and re-activate emulation for it. |
| */ |
| pci_activate_bar(kvm, pci_hdr, bar_num); |
| return; |
| } |
| |
| pci_hdr->bar[bar_num] = value; |
| r = pci_activate_bar(kvm, pci_hdr, bar_num); |
| if (r < 0) { |
| /* |
| * New region cannot be emulated, re-enable the regions that |
| * were overlapping. |
| */ |
| pci_activate_bar_regions(kvm, new_addr, bar_size); |
| return; |
| } |
| |
| pci_activate_bar_regions(kvm, old_addr, bar_size); |
| } |
| |
| /* |
| * Bits that are writable in the config space header. |
| * Write-1-to-clear Status bits are missing since we never set them. |
| */ |
| static const u8 pci_config_writable[PCI_STD_HEADER_SIZEOF] = { |
| [PCI_COMMAND] = |
| PCI_COMMAND_IO | |
| PCI_COMMAND_MEMORY | |
| PCI_COMMAND_MASTER | |
| PCI_COMMAND_PARITY, |
| [PCI_COMMAND + 1] = |
| (PCI_COMMAND_SERR | |
| PCI_COMMAND_INTX_DISABLE) >> 8, |
| [PCI_INTERRUPT_LINE] = 0xff, |
| [PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5 + 3] = 0xff, |
| [PCI_CACHE_LINE_SIZE] = 0xff, |
| }; |
| |
| void pci__config_wr(struct kvm *kvm, union pci_config_address addr, void *data, int size) |
| { |
| void *base; |
| u8 bar; |
| u16 offset; |
| struct pci_device_header *pci_hdr; |
| u8 dev_num = addr.device_number; |
| u32 value = 0, mask = 0; |
| |
| if (!pci_device_exists(addr.bus_number, dev_num, 0)) |
| return; |
| |
| offset = addr.w & PCI_DEV_CFG_MASK; |
| base = pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; |
| |
| /* We don't sanity-check capabilities for the moment */ |
| if (offset < PCI_STD_HEADER_SIZEOF) { |
| memcpy(&mask, pci_config_writable + offset, size); |
| if (!mask) |
| return; |
| } |
| |
| if (pci_hdr->cfg_ops.write) |
| pci_hdr->cfg_ops.write(kvm, pci_hdr, offset, data, size); |
| |
| if (offset == PCI_COMMAND) { |
| memcpy(&value, data, size); |
| pci_config_command_wr(kvm, pci_hdr, (u16)value & mask); |
| return; |
| } |
| |
| bar = (offset - PCI_BAR_OFFSET(0)) / sizeof(u32); |
| if (bar < 6) { |
| memcpy(&value, data, size); |
| pci_config_bar_wr(kvm, pci_hdr, bar, value); |
| return; |
| } |
| |
| memcpy(base + offset, data, size); |
| } |
| |
| void pci__config_rd(struct kvm *kvm, union pci_config_address addr, void *data, int size) |
| { |
| u16 offset; |
| struct pci_device_header *pci_hdr; |
| u8 dev_num = addr.device_number; |
| |
| if (pci_device_exists(addr.bus_number, dev_num, 0)) { |
| pci_hdr = device__find_dev(DEVICE_BUS_PCI, dev_num)->data; |
| offset = addr.w & PCI_DEV_CFG_MASK; |
| |
| if (pci_hdr->cfg_ops.read) |
| pci_hdr->cfg_ops.read(kvm, pci_hdr, offset, data, size); |
| |
| memcpy(data, (void *)pci_hdr + offset, size); |
| } else { |
| memset(data, 0xff, size); |
| } |
| } |
| |
| static void pci_config_mmio_access(struct kvm_cpu *vcpu, u64 addr, u8 *data, |
| u32 len, u8 is_write, void *kvm) |
| { |
| union pci_config_address cfg_addr; |
| |
| addr -= KVM_PCI_CFG_AREA; |
| cfg_addr.w = (u32)addr; |
| cfg_addr.enable_bit = 1; |
| |
| /* |
| * To prevent some overflows, reject accesses that cross a 4-byte |
| * boundary. The PCIe specification says: |
| * |
| * "Root Complex implementations are not required to support the |
| * generation of Configuration Requests from accesses that cross DW |
| * [4 bytes] boundaries." |
| */ |
| if ((addr & 3) + len > 4) |
| return; |
| |
| if (is_write) |
| pci__config_wr(kvm, cfg_addr, data, len); |
| else |
| pci__config_rd(kvm, cfg_addr, data, len); |
| } |
| |
| struct pci_device_header *pci__find_dev(u8 dev_num) |
| { |
| struct device_header *hdr = device__find_dev(DEVICE_BUS_PCI, dev_num); |
| |
| if (IS_ERR_OR_NULL(hdr)) |
| return NULL; |
| |
| return hdr->data; |
| } |
| |
| int pci__register_bar_regions(struct kvm *kvm, struct pci_device_header *pci_hdr, |
| bar_activate_fn_t bar_activate_fn, |
| bar_deactivate_fn_t bar_deactivate_fn, void *data) |
| { |
| int i, r; |
| |
| assert(bar_activate_fn && bar_deactivate_fn); |
| |
| pci_hdr->bar_activate_fn = bar_activate_fn; |
| pci_hdr->bar_deactivate_fn = bar_deactivate_fn; |
| pci_hdr->data = data; |
| |
| for (i = 0; i < 6; i++) { |
| if (!pci_bar_is_implemented(pci_hdr, i)) |
| continue; |
| |
| assert(!pci_bar_is_active(pci_hdr, i)); |
| |
| if (pci__bar_is_io(pci_hdr, i) && |
| pci__io_space_enabled(pci_hdr)) { |
| r = pci_activate_bar(kvm, pci_hdr, i); |
| if (r < 0) |
| return r; |
| } |
| |
| if (pci__bar_is_memory(pci_hdr, i) && |
| pci__memory_space_enabled(pci_hdr)) { |
| r = pci_activate_bar(kvm, pci_hdr, i); |
| if (r < 0) |
| return r; |
| } |
| } |
| |
| return 0; |
| } |
| |
| int pci__init(struct kvm *kvm) |
| { |
| int r; |
| |
| r = kvm__register_pio(kvm, PCI_CONFIG_DATA, 4, |
| pci_config_data_mmio, NULL); |
| if (r < 0) |
| return r; |
| r = kvm__register_pio(kvm, PCI_CONFIG_ADDRESS, 4, |
| pci_config_address_mmio, NULL); |
| if (r < 0) |
| goto err_unregister_data; |
| |
| r = kvm__register_mmio(kvm, KVM_PCI_CFG_AREA, PCI_CFG_SIZE, false, |
| pci_config_mmio_access, kvm); |
| if (r < 0) |
| goto err_unregister_addr; |
| |
| return 0; |
| |
| err_unregister_addr: |
| kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS); |
| err_unregister_data: |
| kvm__deregister_pio(kvm, PCI_CONFIG_DATA); |
| return r; |
| } |
| dev_base_init(pci__init); |
| |
| int pci__exit(struct kvm *kvm) |
| { |
| kvm__deregister_pio(kvm, PCI_CONFIG_DATA); |
| kvm__deregister_pio(kvm, PCI_CONFIG_ADDRESS); |
| |
| return 0; |
| } |
| dev_base_exit(pci__exit); |