| // SPDX-License-Identifier: BSD-3-Clause |
| /* |
| * Copyright (c) 2009-2012,2016,2023 Microsoft Corp. |
| * Copyright (c) 2012 NetApp Inc. |
| * Copyright (c) 2012 Citrix Inc. |
| * All rights reserved. |
| */ |
| |
| #include <errno.h> |
| #include <fcntl.h> |
| #include <emmintrin.h> |
| #include <linux/limits.h> |
| #include <stdbool.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <sys/mman.h> |
| #include <sys/uio.h> |
| #include <unistd.h> |
| #include "vmbus_bufring.h" |
| |
| /** |
| * Compiler barrier. |
| * |
| * Guarantees that operation reordering does not occur at compile time |
| * for operations directly before and after the barrier. |
| */ |
| #define rte_compiler_barrier() ({ asm volatile ("" : : : "memory"); }) |
| |
| #define VMBUS_RQST_ERROR 0xFFFFFFFFFFFFFFFF |
| #define ALIGN(val, align) ((typeof(val))((val) & (~((typeof(val))((align) - 1))))) |
| |
| void *vmbus_uio_map(int *fd, int size) |
| { |
| void *map; |
| |
| map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0); |
| if (map == MAP_FAILED) |
| return NULL; |
| |
| return map; |
| } |
| |
| /* Increase bufring index by inc with wraparound */ |
| static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz) |
| { |
| idx += inc; |
| if (idx >= sz) |
| idx -= sz; |
| |
| return idx; |
| } |
| |
| void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen) |
| { |
| br->vbr = buf; |
| br->windex = br->vbr->windex; |
| br->dsize = blen - sizeof(struct vmbus_bufring); |
| } |
| |
| static inline __always_inline void |
| rte_smp_mb(void) |
| { |
| asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); |
| } |
| |
| static inline int |
| rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) |
| { |
| uint8_t res; |
| |
| asm volatile("lock ; " |
| "cmpxchgl %[src], %[dst];" |
| "sete %[res];" |
| : [res] "=a" (res), /* output */ |
| [dst] "=m" (*dst) |
| : [src] "r" (src), /* input */ |
| "a" (exp), |
| "m" (*dst) |
| : "memory"); /* no-clobber list */ |
| return res; |
| } |
| |
| static inline uint32_t |
| vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex, |
| const void *src0, uint32_t cplen) |
| { |
| uint8_t *br_data = tbr->vbr->data; |
| uint32_t br_dsize = tbr->dsize; |
| const uint8_t *src = src0; |
| |
| /* XXX use double mapping like Linux kernel? */ |
| if (cplen > br_dsize - windex) { |
| uint32_t fraglen = br_dsize - windex; |
| |
| /* Wrap-around detected */ |
| memcpy(br_data + windex, src, fraglen); |
| memcpy(br_data, src + fraglen, cplen - fraglen); |
| } else { |
| memcpy(br_data + windex, src, cplen); |
| } |
| |
| return vmbus_br_idxinc(windex, cplen, br_dsize); |
| } |
| |
| /* |
| * Write scattered channel packet to TX bufring. |
| * |
| * The offset of this channel packet is written as a 64bits value |
| * immediately after this channel packet. |
| * |
| * The write goes through three stages: |
| * 1. Reserve space in ring buffer for the new data. |
| * Writer atomically moves priv_write_index. |
| * 2. Copy the new data into the ring. |
| * 3. Update the tail of the ring (visible to host) that indicates |
| * next read location. Writer updates write_index |
| */ |
| static int |
| vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen) |
| { |
| struct vmbus_bufring *vbr = tbr->vbr; |
| uint32_t ring_size = tbr->dsize; |
| uint32_t old_windex, next_windex, windex, total; |
| uint64_t save_windex; |
| int i; |
| |
| total = 0; |
| for (i = 0; i < iovlen; i++) |
| total += iov[i].iov_len; |
| total += sizeof(save_windex); |
| |
| /* Reserve space in ring */ |
| do { |
| uint32_t avail; |
| |
| /* Get current free location */ |
| old_windex = tbr->windex; |
| |
| /* Prevent compiler reordering this with calculation */ |
| rte_compiler_barrier(); |
| |
| avail = vmbus_br_availwrite(tbr, old_windex); |
| |
| /* If not enough space in ring, then tell caller. */ |
| if (avail <= total) |
| return -EAGAIN; |
| |
| next_windex = vmbus_br_idxinc(old_windex, total, ring_size); |
| |
| /* Atomic update of next write_index for other threads */ |
| } while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex)); |
| |
| /* Space from old..new is now reserved */ |
| windex = old_windex; |
| for (i = 0; i < iovlen; i++) |
| windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len); |
| |
| /* Set the offset of the current channel packet. */ |
| save_windex = ((uint64_t)old_windex) << 32; |
| windex = vmbus_txbr_copyto(tbr, windex, &save_windex, |
| sizeof(save_windex)); |
| |
| /* The region reserved should match region used */ |
| if (windex != next_windex) |
| return -EINVAL; |
| |
| /* Ensure that data is available before updating host index */ |
| rte_compiler_barrier(); |
| |
| /* Checkin for our reservation. wait for our turn to update host */ |
| while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex)) |
| _mm_pause(); |
| |
| return 0; |
| } |
| |
| int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data, |
| uint32_t dlen, uint32_t flags) |
| { |
| struct vmbus_chanpkt pkt; |
| unsigned int pktlen, pad_pktlen; |
| const uint32_t hlen = sizeof(pkt); |
| uint64_t pad = 0; |
| struct iovec iov[3]; |
| int error; |
| |
| pktlen = hlen + dlen; |
| pad_pktlen = ALIGN(pktlen, sizeof(uint64_t)); |
| |
| pkt.hdr.type = type; |
| pkt.hdr.flags = flags; |
| pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
| pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT; |
| pkt.hdr.xactid = VMBUS_RQST_ERROR; |
| |
| iov[0].iov_base = &pkt; |
| iov[0].iov_len = hlen; |
| iov[1].iov_base = data; |
| iov[1].iov_len = dlen; |
| iov[2].iov_base = &pad; |
| iov[2].iov_len = pad_pktlen - pktlen; |
| |
| error = vmbus_txbr_write(txbr, iov, 3); |
| |
| return error; |
| } |
| |
| static inline uint32_t |
| vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex, |
| void *dst0, size_t cplen) |
| { |
| const uint8_t *br_data = rbr->vbr->data; |
| uint32_t br_dsize = rbr->dsize; |
| uint8_t *dst = dst0; |
| |
| if (cplen > br_dsize - rindex) { |
| uint32_t fraglen = br_dsize - rindex; |
| |
| /* Wrap-around detected. */ |
| memcpy(dst, br_data + rindex, fraglen); |
| memcpy(dst + fraglen, br_data, cplen - fraglen); |
| } else { |
| memcpy(dst, br_data + rindex, cplen); |
| } |
| |
| return vmbus_br_idxinc(rindex, cplen, br_dsize); |
| } |
| |
| /* Copy data from receive ring but don't change index */ |
| static int |
| vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen) |
| { |
| uint32_t avail; |
| |
| /* |
| * The requested data and the 64bits channel packet |
| * offset should be there at least. |
| */ |
| avail = vmbus_br_availread(rbr); |
| if (avail < dlen + sizeof(uint64_t)) |
| return -EAGAIN; |
| |
| vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen); |
| return 0; |
| } |
| |
| /* |
| * Copy data from receive ring and change index |
| * NOTE: |
| * We assume (dlen + skip) == sizeof(channel packet). |
| */ |
| static int |
| vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip) |
| { |
| struct vmbus_bufring *vbr = rbr->vbr; |
| uint32_t br_dsize = rbr->dsize; |
| uint32_t rindex; |
| |
| if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t)) |
| return -EAGAIN; |
| |
| /* Record where host was when we started read (for debug) */ |
| rbr->windex = rbr->vbr->windex; |
| |
| /* |
| * Copy channel packet from RX bufring. |
| */ |
| rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize); |
| rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen); |
| |
| /* |
| * Discard this channel packet's 64bits offset, which is useless to us. |
| */ |
| rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize); |
| |
| /* Update the read index _after_ the channel packet is fetched. */ |
| rte_compiler_barrier(); |
| |
| vbr->rindex = rindex; |
| |
| return 0; |
| } |
| |
| int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr, |
| void *data, uint32_t *len) |
| { |
| struct vmbus_chanpkt_hdr pkt; |
| uint32_t dlen, bufferlen = *len; |
| int error; |
| |
| error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt)); |
| if (error) |
| return error; |
| |
| if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN)) |
| /* XXX this channel is dead actually. */ |
| return -EIO; |
| |
| if (unlikely(pkt.hlen > pkt.tlen)) |
| return -EIO; |
| |
| /* Length are in quad words */ |
| dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT; |
| *len = dlen; |
| |
| /* If caller buffer is not large enough */ |
| if (unlikely(dlen > bufferlen)) |
| return -ENOBUFS; |
| |
| /* Read data and skip packet header */ |
| error = vmbus_rxbr_read(rxbr, data, dlen, 0); |
| if (error) |
| return error; |
| |
| /* Return the number of bytes read */ |
| return dlen + sizeof(uint64_t); |
| } |