tools/perf/examples/bpf/augmented_raw_syscalls.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
  *
  * Test it with:
  *
  * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
  *
  * This exactly matches what is marshalled into the raw_syscall:sys_enter
  * payload expected by the 'perf trace' beautifiers.
  *
  * For now it just uses the existing tracepoint augmentation code in 'perf
  * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
  * code that will combine entry/exit in a strace like way.
  */

 #include <stdio.h>
 #include <linux/socket.h>

 /* bpf-output associated map */
 struct bpf_map SEC("maps") __augmented_syscalls__ = {
 	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
 	.key_size = sizeof(int),
 	.value_size = sizeof(u32),
 	.max_entries = __NR_CPUS__,
 };

 struct syscall_enter_args {
 	unsigned long long common_tp_fields;
 	long		   syscall_nr;
 	unsigned long	   args[6];
 };

 struct syscall_exit_args {
 	unsigned long long common_tp_fields;
 	long		   syscall_nr;
 	long		   ret;
 };

 struct augmented_filename {
 	unsigned int	size;
 	int		reserved;
 	char		value[256];
 };

 #define SYS_OPEN 2
 #define SYS_OPENAT 257

 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
 	struct {
 		struct syscall_enter_args args;
 		struct augmented_filename filename;
 	} augmented_args;
 	unsigned int len = sizeof(augmented_args);
 	const void *filename_arg = NULL;

 	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
 	/*
 	 * Yonghong and Edward Cree sayz:
 	 *
 	 * https://www.spinics.net/lists/netdev/msg531645.html
 	 *
 	 * >>   R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
 	 * >> 10: (bf) r1 = r6
 	 * >> 11: (07) r1 += 16
 	 * >> 12: (05) goto pc+2
 	 * >> 15: (79) r3 = *(u64 *)(r1 +0)
 	 * >> dereference of modified ctx ptr R1 off=16 disallowed
 	 * > Aha, we at least got a different error message this time.
 	 * > And indeed llvm has done that optimisation, rather than the more obvious
 	 * > 11: r3 = *(u64 *)(r1 +16)
 	 * > because it wants to have lots of reads share a single insn.  You may be able
 	 * > to defeat that optimisation by adding compiler barriers, idk.  Maybe someone
 	 * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
 	 * > when it's generating for bpf backend and not do that).  -O0?  ¯\_(ツ)_/¯
 	 *
 	 * The optimization mostly likes below:
 	 *
 	 *	br1:
 	 * 	...
 	 *	r1 += 16
 	 *	goto merge
 	 *	br2:
 	 *	...
 	 *	r1 += 20
 	 *	goto merge
 	 *	merge:
 	 *	*(u64 *)(r1 + 0)
 	 *
 	 * The compiler tries to merge common loads. There is no easy way to
 	 * stop this compiler optimization without turning off a lot of other
 	 * optimizations. The easiest way is to add barriers:
 	 *
 	 * 	 __asm__ __volatile__("": : :"memory")
 	 *
 	 * 	 after the ctx memory access to prevent their down stream merging.
 	 */
 	switch (augmented_args.args.syscall_nr) {
 	case SYS_OPEN:	 filename_arg = (const void *)args->args[0];
 			__asm__ __volatile__("": : :"memory");
 			 break;
 	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
 			 break;
 	}

 	if (filename_arg != NULL) {
 		augmented_args.filename.reserved = 0;
 		augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
 							      sizeof(augmented_args.filename.value),
 							      filename_arg);
 		if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
 			len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
 			len &= sizeof(augmented_args.filename.value) - 1;
 		}
 	} else {
 		len = sizeof(augmented_args.args);
 	}

 	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
 	return 0;
 }

 SEC("raw_syscalls:sys_exit")
 int sys_exit(struct syscall_exit_args *args)
 {
 	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
 }

 license(GPL);
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
	*
	* Test it with:
	*
	* perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
	*
	* This exactly matches what is marshalled into the raw_syscall:sys_enter
	* payload expected by the 'perf trace' beautifiers.
	*
	* For now it just uses the existing tracepoint augmentation code in 'perf
	* trace', in the next csets we'll hook up these with the sys_enter/sys_exit
	* code that will combine entry/exit in a strace like way.
	*/

	#include <stdio.h>
	#include <linux/socket.h>

	/* bpf-output associated map */
	struct bpf_map SEC("maps") __augmented_syscalls__ = {
	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
	.key_size = sizeof(int),
	.value_size = sizeof(u32),
	.max_entries = __NR_CPUS__,
	};

	struct syscall_enter_args {
	unsigned long long common_tp_fields;
	long syscall_nr;
	unsigned long args[6];
	};

	struct syscall_exit_args {
	unsigned long long common_tp_fields;
	long syscall_nr;
	long ret;
	};

	struct augmented_filename {
	unsigned int size;
	int reserved;
	char value[256];
	};

	#define SYS_OPEN 2
	#define SYS_OPENAT 257

	SEC("raw_syscalls:sys_enter")
	int sys_enter(struct syscall_enter_args *args)
	{
	struct {
	struct syscall_enter_args args;
	struct augmented_filename filename;
	} augmented_args;
	unsigned int len = sizeof(augmented_args);
	const void *filename_arg = NULL;

	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
	/*
	* Yonghong and Edward Cree sayz:
	*
	* https://www.spinics.net/lists/netdev/msg531645.html
	*
	* >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
	* >> 10: (bf) r1 = r6
	* >> 11: (07) r1 += 16
	* >> 12: (05) goto pc+2
	* >> 15: (79) r3 = (u64 )(r1 +0)
	* >> dereference of modified ctx ptr R1 off=16 disallowed
	* > Aha, we at least got a different error message this time.
	* > And indeed llvm has done that optimisation, rather than the more obvious
	* > 11: r3 = (u64 )(r1 +16)
	* > because it wants to have lots of reads share a single insn. You may be able
	* > to defeat that optimisation by adding compiler barriers, idk. Maybe someone
	* > with llvm knowledge can figure out how to stop it (ideally, llvm would know
	* > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯
	*
	* The optimization mostly likes below:
	*
	* br1:
	* ...
	* r1 += 16
	* goto merge
	* br2:
	* ...
	* r1 += 20
	* goto merge
	* merge:
	* (u64 )(r1 + 0)
	*
	* The compiler tries to merge common loads. There is no easy way to
	* stop this compiler optimization without turning off a lot of other
	* optimizations. The easiest way is to add barriers:
	*
	* __asm__ __volatile__("": : :"memory")
	*
	* after the ctx memory access to prevent their down stream merging.
	*/
	switch (augmented_args.args.syscall_nr) {
	case SYS_OPEN: filename_arg = (const void *)args->args[0];
	__asm__ __volatile__("": : :"memory");
	break;
	case SYS_OPENAT: filename_arg = (const void *)args->args[1];
	break;
	}

	if (filename_arg != NULL) {
	augmented_args.filename.reserved = 0;
	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
	sizeof(augmented_args.filename.value),
	filename_arg);
	if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
	len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
	len &= sizeof(augmented_args.filename.value) - 1;
	}
	} else {
	len = sizeof(augmented_args.args);
	}

	perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
	return 0;
	}

	SEC("raw_syscalls:sys_exit")
	int sys_exit(struct syscall_exit_args *args)
	{
	return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
	}

	license(GPL);