blob: 36fd4a925e8ded7edc55dacbb84d2e1b3c13885a [file] [log] [blame] [edit]
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2023 Google LLC
* Author: Vincent Donnefort <vdonnefort@google.com>
*/
#include <nvhe/alloc.h>
#include <nvhe/clock.h>
#include <nvhe/mem_protect.h>
#include <nvhe/mm.h>
#include <nvhe/trace.h>
#include <asm/percpu.h>
#include <asm/kvm_mmu.h>
#include <asm/local.h>
#define HYP_RB_PAGE_HEAD 1UL
#define HYP_RB_PAGE_UPDATE 2UL
#define HYP_RB_FLAG_MASK 3UL
struct hyp_buffer_page {
struct list_head list;
struct buffer_data_page *page;
unsigned long write;
unsigned long entries;
u32 id;
};
struct hyp_rb_per_cpu {
struct ring_buffer_meta *meta;
struct hyp_buffer_page *tail_page;
struct hyp_buffer_page *reader_page;
struct hyp_buffer_page *head_page;
struct hyp_buffer_page *bpages;
unsigned long nr_pages;
unsigned long last_overrun;
u64 write_stamp;
atomic_t status;
};
#define HYP_RB_UNAVAILABLE 0
#define HYP_RB_READY 1
#define HYP_RB_WRITING 2
DEFINE_PER_CPU(struct hyp_rb_per_cpu, trace_rb);
DEFINE_HYP_SPINLOCK(trace_rb_lock);
static bool rb_set_flag(struct hyp_buffer_page *bpage, int new_flag)
{
unsigned long ret, val = (unsigned long)bpage->list.next;
ret = cmpxchg((unsigned long *)&bpage->list.next,
val, (val & ~HYP_RB_FLAG_MASK) | new_flag);
return ret == val;
}
static struct hyp_buffer_page *rb_hyp_buffer_page(struct list_head *list)
{
unsigned long ptr = (unsigned long)list & ~HYP_RB_FLAG_MASK;
return container_of((struct list_head *)ptr, struct hyp_buffer_page, list);
}
static struct hyp_buffer_page *rb_next_page(struct hyp_buffer_page *bpage)
{
return rb_hyp_buffer_page(bpage->list.next);
}
static bool rb_is_head_page(struct hyp_buffer_page *bpage)
{
return (unsigned long)bpage->list.prev->next & HYP_RB_PAGE_HEAD;
}
static struct hyp_buffer_page *rb_set_head_page(struct hyp_rb_per_cpu *cpu_buffer)
{
struct hyp_buffer_page *bpage, *prev_head;
int cnt = 0;
again:
bpage = prev_head = cpu_buffer->head_page;
do {
if (rb_is_head_page(bpage)) {
cpu_buffer->head_page = bpage;
return bpage;
}
bpage = rb_next_page(bpage);
} while (bpage != prev_head);
/* We might have race with the writer let's try again */
if (++cnt < 3)
goto again;
return NULL;
}
static int rb_swap_reader_page(struct hyp_rb_per_cpu *cpu_buffer)
{
unsigned long *old_head_link, old_link_val, new_link_val, overrun;
struct hyp_buffer_page *head, *reader = cpu_buffer->reader_page;
spin:
/* Update the cpu_buffer->header_page according to HYP_RB_PAGE_HEAD */
head = rb_set_head_page(cpu_buffer);
if (!head)
return -ENODEV;
/* Connect the reader page around the header page */
reader->list.next = head->list.next;
reader->list.prev = head->list.prev;
/* The reader page points to the new header page */
rb_set_flag(reader, HYP_RB_PAGE_HEAD);
/*
* Paired with the cmpxchg in rb_move_tail(). Order the read of the head
* page and overrun.
*/
smp_mb();
overrun = READ_ONCE(cpu_buffer->meta->overrun);
/* Try to swap the prev head link to the reader page */
old_head_link = (unsigned long *)&reader->list.prev->next;
old_link_val = (*old_head_link & ~HYP_RB_FLAG_MASK) | HYP_RB_PAGE_HEAD;
new_link_val = (unsigned long)&reader->list;
if (cmpxchg(old_head_link, old_link_val, new_link_val)
!= old_link_val)
goto spin;
cpu_buffer->head_page = rb_hyp_buffer_page(reader->list.next);
cpu_buffer->head_page->list.prev = &reader->list;
cpu_buffer->reader_page = head;
cpu_buffer->meta->reader_page.lost_events = overrun - cpu_buffer->last_overrun;
cpu_buffer->meta->reader_page.id = cpu_buffer->reader_page->id;
cpu_buffer->last_overrun = overrun;
return 0;
}
static struct hyp_buffer_page *
rb_move_tail(struct hyp_rb_per_cpu *cpu_buffer)
{
struct hyp_buffer_page *tail_page, *new_tail, *new_head;
tail_page = cpu_buffer->tail_page;
new_tail = rb_next_page(tail_page);
again:
/*
* We caught the reader ... Let's try to move the head page.
* The writer can only rely on ->next links to check if this is head.
*/
if ((unsigned long)tail_page->list.next & HYP_RB_PAGE_HEAD) {
/* The reader moved the head in between */
if (!rb_set_flag(tail_page, HYP_RB_PAGE_UPDATE))
goto again;
WRITE_ONCE(cpu_buffer->meta->overrun,
cpu_buffer->meta->overrun + new_tail->entries);
WRITE_ONCE(cpu_buffer->meta->pages_lost,
cpu_buffer->meta->pages_lost + 1);
/* Move the head */
rb_set_flag(new_tail, HYP_RB_PAGE_HEAD);
/* The new head is in place, reset the update flag */
rb_set_flag(tail_page, 0);
new_head = rb_next_page(new_tail);
}
local_set(&new_tail->page->commit, 0);
new_tail->write = 0;
new_tail->entries = 0;
WRITE_ONCE(cpu_buffer->meta->pages_touched,
cpu_buffer->meta->pages_touched + 1);
cpu_buffer->tail_page = new_tail;
return new_tail;
}
unsigned long rb_event_size(unsigned long length)
{
struct ring_buffer_event *event;
return length + RB_EVNT_HDR_SIZE + sizeof(event->array[0]);
}
static struct ring_buffer_event *
rb_add_ts_extend(struct ring_buffer_event *event, u64 delta)
{
event->type_len = RINGBUF_TYPE_TIME_EXTEND;
event->time_delta = delta & TS_MASK;
event->array[0] = delta >> TS_SHIFT;
return (struct ring_buffer_event *)((unsigned long)event + 8);
}
static struct ring_buffer_event *
rb_reserve_next(struct hyp_rb_per_cpu *cpu_buffer, unsigned long length)
{
unsigned long ts_ext_size = 0, event_size = rb_event_size(length);
struct hyp_buffer_page *tail_page = cpu_buffer->tail_page;
struct ring_buffer_event *event;
unsigned long write, prev_write;
u64 ts, time_delta;
ts = trace_clock();
time_delta = ts - cpu_buffer->write_stamp;
if (test_time_stamp(time_delta))
ts_ext_size = 8;
prev_write = tail_page->write;
write = prev_write + event_size + ts_ext_size;
if (unlikely(write > BUF_PAGE_SIZE))
tail_page = rb_move_tail(cpu_buffer);
if (!tail_page->entries) {
tail_page->page->time_stamp = ts;
time_delta = 0;
ts_ext_size = 0;
write = event_size;
prev_write = 0;
}
tail_page->write = write;
tail_page->entries++;
cpu_buffer->write_stamp = ts;
event = (struct ring_buffer_event *)(tail_page->page->data +
prev_write);
if (ts_ext_size) {
event = rb_add_ts_extend(event, time_delta);
time_delta = 0;
}
event->type_len = 0;
event->time_delta = time_delta;
event->array[0] = event_size - RB_EVNT_HDR_SIZE;
return event;
}
void *tracing_reserve_entry(unsigned long length)
{
struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
struct ring_buffer_event *rb_event;
if (atomic_cmpxchg(&cpu_buffer->status, HYP_RB_READY, HYP_RB_WRITING)
== HYP_RB_UNAVAILABLE)
return NULL;
rb_event = rb_reserve_next(cpu_buffer, length);
return &rb_event->array[1];
}
void tracing_commit_entry(void)
{
struct hyp_rb_per_cpu *cpu_buffer = this_cpu_ptr(&trace_rb);
local_set(&cpu_buffer->tail_page->page->commit,
cpu_buffer->tail_page->write);
WRITE_ONCE(cpu_buffer->meta->entries,
cpu_buffer->meta->entries + 1);
/* Paired with rb_cpu_disable_writing() */
atomic_set_release(&cpu_buffer->status, HYP_RB_READY);
}
static int rb_page_init(struct hyp_buffer_page *bpage, unsigned long hva)
{
void *hyp_va = (void *)kern_hyp_va(hva);
int ret;
ret = hyp_pin_shared_mem(hyp_va, hyp_va + PAGE_SIZE);
if (ret)
return ret;
INIT_LIST_HEAD(&bpage->list);
bpage->page = (struct buffer_data_page *)hyp_va;
local_set(&bpage->page->commit, 0);
return 0;
}
static bool rb_cpu_loaded(struct hyp_rb_per_cpu *cpu_buffer)
{
return !!cpu_buffer->bpages;
}
static void rb_cpu_disable_writing(struct hyp_rb_per_cpu *cpu_buffer)
{
int prev_status;
/* Wait for release of the buffer */
do {
prev_status = atomic_cmpxchg_acquire(&cpu_buffer->status,
HYP_RB_READY,
HYP_RB_UNAVAILABLE);
} while (prev_status == HYP_RB_WRITING);
}
static int rb_cpu_enable_writing(struct hyp_rb_per_cpu *cpu_buffer)
{
if (!rb_cpu_loaded(cpu_buffer))
return -ENODEV;
atomic_cmpxchg(&cpu_buffer->status, HYP_RB_UNAVAILABLE, HYP_RB_READY);
return 0;
}
static void rb_cpu_teardown(struct hyp_rb_per_cpu *cpu_buffer)
{
int i;
if (!rb_cpu_loaded(cpu_buffer))
return;
rb_cpu_disable_writing(cpu_buffer);
hyp_unpin_shared_mem((void *)cpu_buffer->meta,
(void *)(cpu_buffer->meta) + PAGE_SIZE);
for (i = 0; i < cpu_buffer->nr_pages; i++) {
struct hyp_buffer_page *bpage = &cpu_buffer->bpages[i];
if (!bpage->page)
continue;
hyp_unpin_shared_mem((void *)bpage->page,
(void *)bpage->page + PAGE_SIZE);
}
hyp_free(cpu_buffer->bpages);
cpu_buffer->bpages = 0;
}
static bool rb_cpu_fits_desc(struct rb_page_desc *pdesc,
unsigned long desc_end)
{
unsigned long *end;
/* Check we can at least read nr_pages */
if ((unsigned long)&pdesc->nr_page_va >= desc_end)
return false;
end = &pdesc->page_va[pdesc->nr_page_va];
return (unsigned long)end <= desc_end;
}
static int rb_cpu_init(struct rb_page_desc *pdesc, struct hyp_rb_per_cpu *cpu_buffer)
{
struct hyp_buffer_page *bpage;
int i, ret;
/* At least 1 reader page and one head */
if (pdesc->nr_page_va < 2)
return -EINVAL;
if (rb_cpu_loaded(cpu_buffer))
return -EBUSY;
bpage = hyp_zalloc(sizeof(*bpage) * pdesc->nr_page_va);
if (!bpage)
return hyp_alloc_errno();
cpu_buffer->bpages = bpage;
cpu_buffer->meta = (struct ring_buffer_meta *)kern_hyp_va(pdesc->meta_va);
ret = hyp_pin_shared_mem((void *)cpu_buffer->meta,
((void *)cpu_buffer->meta) + PAGE_SIZE);
if (ret) {
hyp_free(cpu_buffer->bpages);
return ret;
}
memset(cpu_buffer->meta, 0, sizeof(*cpu_buffer->meta));
cpu_buffer->meta->meta_page_size = PAGE_SIZE;
cpu_buffer->meta->nr_data_pages = cpu_buffer->nr_pages;
/* The reader page is not part of the ring initially */
ret = rb_page_init(bpage, pdesc->page_va[0]);
if (ret)
goto err;
cpu_buffer->nr_pages = 1;
cpu_buffer->reader_page = bpage;
cpu_buffer->tail_page = bpage + 1;
cpu_buffer->head_page = bpage + 1;
for (i = 1; i < pdesc->nr_page_va; i++) {
ret = rb_page_init(++bpage, pdesc->page_va[i]);
if (ret)
goto err;
bpage->list.next = &(bpage + 1)->list;
bpage->list.prev = &(bpage - 1)->list;
bpage->id = i;
cpu_buffer->nr_pages = i + 1;
}
/* Close the ring */
bpage->list.next = &cpu_buffer->tail_page->list;
cpu_buffer->tail_page->list.prev = &bpage->list;
/* The last init'ed page points to the head page */
rb_set_flag(bpage, HYP_RB_PAGE_HEAD);
cpu_buffer->last_overrun = 0;
return 0;
err:
rb_cpu_teardown(cpu_buffer);
return ret;
}
int __pkvm_swap_reader_tracing(int cpu)
{
struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
int ret = 0;
hyp_spin_lock(&trace_rb_lock);
if (cpu >= hyp_nr_cpus) {
ret = -EINVAL;
goto err;
}
cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
if (!rb_cpu_loaded(cpu_buffer))
ret = -ENODEV;
else
ret = rb_swap_reader_page(cpu_buffer);
err:
hyp_spin_unlock(&trace_rb_lock);
return ret;
}
static void __pkvm_teardown_tracing_locked(void)
{
int cpu;
hyp_assert_lock_held(&trace_rb_lock);
for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
rb_cpu_teardown(cpu_buffer);
}
}
void __pkvm_teardown_tracing(void)
{
hyp_spin_lock(&trace_rb_lock);
__pkvm_teardown_tracing_locked();
hyp_spin_unlock(&trace_rb_lock);
}
int __pkvm_load_tracing(unsigned long desc_hva, size_t desc_size)
{
struct hyp_trace_desc *desc = (struct hyp_trace_desc *)kern_hyp_va(desc_hva);
struct trace_page_desc *trace_pdesc = &desc->page_desc;
struct rb_page_desc *pdesc;
int ret, cpu;
if (!desc_size || !PAGE_ALIGNED(desc_hva) || !PAGE_ALIGNED(desc_size))
return -EINVAL;
ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn((void *)desc),
desc_size >> PAGE_SHIFT);
if (ret)
return ret;
hyp_spin_lock(&trace_rb_lock);
trace_clock_update(&desc->clock_data);
for_each_rb_page_desc(pdesc, cpu, trace_pdesc) {
struct hyp_rb_per_cpu *cpu_buffer;
int cpu;
ret = -EINVAL;
if (!rb_cpu_fits_desc(pdesc, desc_hva + desc_size))
break;
cpu = pdesc->cpu;
if (cpu >= hyp_nr_cpus)
break;
cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
ret = rb_cpu_init(pdesc, cpu_buffer);
if (ret)
break;
}
if (ret)
__pkvm_teardown_tracing_locked();
hyp_spin_unlock(&trace_rb_lock);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn((void *)desc),
desc_size >> PAGE_SHIFT));
return ret;
}
int __pkvm_enable_tracing(bool enable)
{
int cpu, ret = enable ? -EINVAL : 0;
hyp_spin_lock(&trace_rb_lock);
for (cpu = 0; cpu < hyp_nr_cpus; cpu++) {
struct hyp_rb_per_cpu *cpu_buffer = per_cpu_ptr(&trace_rb, cpu);
if (enable) {
if (!rb_cpu_enable_writing(cpu_buffer))
ret = 0;
} else {
rb_cpu_disable_writing(cpu_buffer);
}
}
hyp_spin_unlock(&trace_rb_lock);
return ret;
}