Program type BPF_PROG_TYPE_PERF_EVENT
Perf event programs that can be attached to hardware and software perf events. Once attached the BPF program is executed each time the perf event is triggered.
Usage
Perf event programs are typically used for profiling and tracing. These programs are called with the CPU register state at the time of the event. This allows the programs to collect information for each event and aggregate it in a customized way.
Perf event programs are typically placed in the perf_event
ELF header.
Context
C Structure
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
__u64 addr;
};
regs
This field contains the CPU registers at the time of the event. The type of the field is different for each architecture since each architecture has different registers. The helpers in tools/lib/bpf/bpf_tracing.h
can be used to access the registers in a portable way.
sample_period
This field contains the amount of times this perf even has been triggered.
addr
Docs could be improved
This part of the docs is incomplete, contributions are very welcome
Attachment
here are three methods of attaching perf event programs, from oldest and least recommended to newest and most recommended, however, all methods have this first part in common.
Next step is to open a new perf event using the perf_event_open
syscall:
struct perf_event_attr attr = {
.sample_freq = SAMPLE_FREQ,
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
syscall(SYS_perf_event_open,
&attr, /* struct perf_event_attr * */
-1, /* pid_t pid */
0 /* int cpu */
-1, /* int group_fd */
PERF_FLAG_FD_CLOEXEC /* unsigned long flags */
);
This syscall will return a file descriptor on success. Perf event programs can be attached to any event, as long as it is of type PERF_TYPE_HARDWARE
or PERF_TYPE_SOFTWARE
.
ioctl method
This is the oldest and least recommended method. After we have the perf event file descriptor we execute two ioctl
syscalls to attach our BPF program to the trace event and to enable the trace.
ioctl(perf_event_fd, PERF_EVENT_IOC_SET_BPF, bpf_prog_fd);
to attach.
ioctl(perf_event_fd, PERF_EVENT_IOC_ENABLE, 0);
to enable.
The perf event program can be temporarily disabled with the PERF_EVENT_IOC_DISABLE
ioctl option. Otherwise the perf event program stays attached until the perf_event goes away due to the closing of the perf_event FD or the program exiting. The perf event holds a reference to the BPF program so it will stay loaded until no more perf event program reference it.
perf_event_open
PMU
Docs could be improved
This part of the docs is incomplete, contributions are very welcome
BPF link
This is the newest and most recommended method of attaching perf event programs.
After we have gotten the perf event file descriptor we attach the program by making a bpf link via the link create syscall command.
We call the syscall command with the BPF_PERF_EVENT
attach_type
, target_fd
set to the perf event file descriptor, prog_fd
to the file descriptor of the tracepoint program, and optionally a cookie
Examples
profiling example
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <uapi/linux/perf_event.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
struct key_t {
char comm[TASK_COMM_LEN];
u32 kernstack;
u32 userstack;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, struct key_t);
__type(value, u64);
__uint(max_entries, 10000);
} counts SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(key_size, sizeof(u32));
__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
__uint(max_entries, 10000);
} stackmap SEC(".maps");
#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
SEC("perf_event")
int bpf_prog1(struct bpf_perf_event_data *ctx)
{
char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
char time_fmt2[] = "Get Time Failed, ErrCode: %d";
char addr_fmt[] = "Address recorded on event: %llx";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
struct bpf_perf_event_value value_buf;
struct key_t key;
u64 *val, one = 1;
int ret;
if (ctx->sample_period < 10000)
/* ignore warmup */
return 0;
bpf_get_current_comm(&key.comm, sizeof(key.comm));
key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS);
key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS);
if ((int)key.kernstack < 0 && (int)key.userstack < 0) {
bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period,
PT_REGS_IP(&ctx->regs));
return 0;
}
ret = bpf_perf_prog_read_value(ctx, (void *)&value_buf, sizeof(struct bpf_perf_event_value));
if (!ret)
bpf_trace_printk(time_fmt1, sizeof(time_fmt1), value_buf.enabled, value_buf.running);
else
bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
if (ctx->addr != 0)
bpf_trace_printk(addr_fmt, sizeof(addr_fmt), ctx->addr);
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
else
bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST);
return 0;
}
char _license[] SEC("license") = "GPL";
recording instruction pointer
/* Copyright 2016 Netflix, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/bpf_perf_event.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#define MAX_IPS 8192
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, u64);
__type(value, u32);
__uint(max_entries, MAX_IPS);
} ip_map SEC(".maps");
SEC("perf_event")
int do_sample(struct bpf_perf_event_data *ctx)
{
u64 ip;
u32 *value, init_val = 1;
ip = PT_REGS_IP(&ctx->regs);
value = bpf_map_lookup_elem(&ip_map, &ip);
if (value)
*value += 1;
else
/* E2BIG not tested for this example only */
bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_NOEXIST);
return 0;
}
char _license[] SEC("license") = "GPL";
Helper functions
Supported helper functions
bpf_cgrp_storage_delete
bpf_cgrp_storage_get
bpf_copy_from_user
bpf_copy_from_user_task
bpf_current_task_under_cgroup
bpf_dynptr_data
bpf_dynptr_from_mem
bpf_dynptr_read
bpf_dynptr_write
bpf_find_vma
bpf_for_each_map_elem
bpf_get_attach_cookie
bpf_get_branch_snapshot
bpf_get_current_ancestor_cgroup_id
bpf_get_current_cgroup_id
bpf_get_current_comm
bpf_get_current_pid_tgid
bpf_get_current_task
bpf_get_current_task_btf
bpf_get_current_uid_gid
bpf_get_func_ip
bpf_get_ns_current_pid_tgid
bpf_get_numa_node_id
bpf_get_prandom_u32
bpf_get_smp_processor_id
bpf_get_stack
bpf_get_stackid
bpf_get_task_stack
bpf_jiffies64
bpf_kptr_xchg
bpf_ktime_get_boot_ns
bpf_ktime_get_ns
bpf_ktime_get_tai_ns
bpf_loop
bpf_map_delete_elem
bpf_map_lookup_elem
bpf_map_lookup_percpu_elem
bpf_map_peek_elem
bpf_map_pop_elem
bpf_map_push_elem
bpf_map_update_elem
bpf_per_cpu_ptr
bpf_perf_event_output
bpf_perf_event_read
bpf_perf_event_read_value
bpf_perf_prog_read_value
bpf_probe_read
bpf_probe_read_kernel
bpf_probe_read_kernel_str
bpf_probe_read_str
bpf_probe_read_user
bpf_probe_read_user_str
bpf_probe_write_user
bpf_read_branch_records
bpf_ringbuf_discard
bpf_ringbuf_discard_dynptr
bpf_ringbuf_output
bpf_ringbuf_query
bpf_ringbuf_reserve
bpf_ringbuf_reserve_dynptr
bpf_ringbuf_submit
bpf_ringbuf_submit_dynptr
bpf_send_signal
bpf_send_signal_thread
bpf_snprintf
bpf_snprintf_btf
bpf_spin_lock
bpf_spin_unlock
bpf_strncmp
bpf_tail_call
bpf_task_pt_regs
bpf_task_storage_delete
bpf_task_storage_get
bpf_this_cpu_ptr
bpf_timer_cancel
bpf_timer_init
bpf_timer_set_callback
bpf_timer_start
bpf_trace_printk
bpf_trace_vprintk
bpf_user_ringbuf_drain
KFuncs
Supported kfuncs
bpf_arena_alloc_pages
v6.12 -bpf_arena_free_pages
v6.12 -bpf_cast_to_kern_ctx
v6.12 -bpf_cgroup_acquire
v6.12 -bpf_cgroup_ancestor
v6.12 -bpf_cgroup_from_id
v6.12 -bpf_cgroup_release
v6.12 -bpf_copy_from_user_str
v6.12 -bpf_cpumask_acquire
v6.12 -bpf_cpumask_and
v6.12 -bpf_cpumask_any_and_distribute
v6.12 -bpf_cpumask_any_distribute
v6.12 -bpf_cpumask_clear
v6.12 -bpf_cpumask_clear_cpu
v6.12 -bpf_cpumask_copy
v6.12 -bpf_cpumask_create
v6.12 -bpf_cpumask_empty
v6.12 -bpf_cpumask_equal
v6.12 -bpf_cpumask_first
v6.12 -bpf_cpumask_first_and
v6.12 -bpf_cpumask_first_zero
v6.12 -bpf_cpumask_full
v6.12 -bpf_cpumask_intersects
v6.12 -bpf_cpumask_or
v6.12 -bpf_cpumask_release
v6.12 -bpf_cpumask_set_cpu
v6.12 -bpf_cpumask_setall
v6.12 -bpf_cpumask_subset
v6.12 -bpf_cpumask_test_and_clear_cpu
v6.12 -bpf_cpumask_test_and_set_cpu
v6.12 -bpf_cpumask_test_cpu
v6.12 -bpf_cpumask_weight
v6.12 -bpf_cpumask_xor
v6.12 -bpf_dynptr_adjust
v6.12 -bpf_dynptr_clone
v6.12 -bpf_dynptr_from_skb
v6.12 -bpf_dynptr_is_null
v6.12 -bpf_dynptr_is_rdonly
v6.12 -bpf_dynptr_size
v6.12 -bpf_dynptr_slice
v6.12 -bpf_dynptr_slice_rdwr
v6.12 -bpf_get_dentry_xattr
v6.12 -bpf_get_file_xattr
v6.12 -bpf_get_fsverity_digest
v6.12 -bpf_get_kmem_cache
v6.12 -bpf_get_task_exe_file
v6.12 -bpf_iter_bits_destroy
v6.12 -bpf_iter_bits_new
v6.12 -bpf_iter_bits_next
v6.12 -bpf_iter_css_destroy
v6.12 -bpf_iter_css_new
v6.12 -bpf_iter_css_next
v6.12 -bpf_iter_css_task_destroy
v6.12 -bpf_iter_css_task_new
v6.12 -bpf_iter_css_task_next
v6.12 -bpf_iter_kmem_cache_destroy
v6.12 -bpf_iter_kmem_cache_new
v6.12 -bpf_iter_kmem_cache_next
v6.12 -bpf_iter_num_destroy
v6.12 -bpf_iter_num_new
v6.12 -bpf_iter_num_next
v6.12 -bpf_iter_scx_dsq_destroy
v6.12 -bpf_iter_scx_dsq_new
v6.12 -bpf_iter_scx_dsq_next
v6.12 -bpf_iter_task_destroy
v6.12 -bpf_iter_task_new
v6.12 -bpf_iter_task_next
v6.12 -bpf_iter_task_vma_destroy
v6.12 -bpf_iter_task_vma_new
v6.12 -bpf_iter_task_vma_next
v6.12 -bpf_key_put
v6.12 -bpf_list_pop_back
v6.12 -bpf_list_pop_front
v6.12 -bpf_list_push_back_impl
v6.12 -bpf_list_push_front_impl
v6.12 -bpf_local_irq_restore
v6.12 -bpf_local_irq_save
v6.12 -bpf_lookup_system_key
v6.12 -bpf_lookup_user_key
v6.12 -bpf_map_sum_elem_count
v6.12 -bpf_obj_drop_impl
v6.12 -bpf_obj_new_impl
v6.12 -bpf_path_d_path
v6.12 -bpf_percpu_obj_drop_impl
v6.12 -bpf_percpu_obj_new_impl
v6.12 -bpf_preempt_disable
v6.12 -bpf_preempt_enable
v6.12 -bpf_put_file
v6.12 -bpf_rbtree_add_impl
v6.12 -bpf_rbtree_first
v6.12 -bpf_rbtree_remove
v6.12 -bpf_rcu_read_lock
v6.12 -bpf_rcu_read_unlock
v6.12 -bpf_rdonly_cast
v6.12 -bpf_refcount_acquire_impl
v6.12 -bpf_send_signal_task
v6.12 -bpf_sock_destroy
v6.12 -bpf_task_acquire
v6.12 -bpf_task_from_pid
v6.12 -bpf_task_from_vpid
v6.12 -bpf_task_get_cgroup1
v6.12 -bpf_task_release
v6.12 -bpf_task_under_cgroup
v6.12 -bpf_throw
v6.12 -bpf_verify_pkcs7_signature
v6.12 -bpf_wq_init
v6.12 -bpf_wq_set_callback_impl
v6.12 -bpf_wq_start
v6.12 -cgroup_rstat_flush
v6.12 -cgroup_rstat_updated
v6.12 -crash_kexec
v6.12 -scx_bpf_cpu_rq
v6.12 -scx_bpf_cpuperf_cap
v6.12 -scx_bpf_cpuperf_cur
v6.12 -scx_bpf_cpuperf_set
v6.12 -scx_bpf_destroy_dsq
v6.12 -scx_bpf_dsq_nr_queued
v6.12 -scx_bpf_dump_bstr
v6.12 -scx_bpf_error_bstr
v6.12 -scx_bpf_exit_bstr
v6.12 -scx_bpf_get_idle_cpumask
v6.12 -scx_bpf_get_idle_smtmask
v6.12 -scx_bpf_get_online_cpumask
v6.12 -scx_bpf_get_possible_cpumask
v6.12 -scx_bpf_kick_cpu
v6.12 -scx_bpf_now
v6.12 -scx_bpf_nr_cpu_ids
v6.12 -scx_bpf_pick_any_cpu
v6.12 -scx_bpf_pick_idle_cpu
v6.12 -scx_bpf_put_cpumask
v6.12 -scx_bpf_put_idle_cpumask
v6.12 -scx_bpf_task_cgroup
v6.12 -scx_bpf_task_cpu
v6.12 -scx_bpf_task_running
v6.12 -scx_bpf_test_and_clear_cpu_idle
v6.12 -