Program type BPF_PROG_TYPE_NETFILTER
Usage
This program type is used to implement a netfilter (aka iptables / nftables) hook in eBPF.
The hook can make a decision to drop or accept the packet by returning NF_DROP (0) or NF_ACCEPT (1) respectively.
Context
The context that is passed in contains pointers to the hook state and to a full sk_buff as opposed to the __sk_buff projection type we typically see as the context in other program types.
struct bpf_nf_ctx {
const struct nf_hook_state *state;
struct sk_buff *skb;
};
The whole ctx is read-only. struct bpf_nf_ctx is defined in an internal linux kernel header file and is intentionally unstable. Users are expected to get definitions from a vmlinux.h or to copy the relevant parts of the definition into their own code. Since both the context struct and its field types are kernel internal, users should use CO-RE to access any field to ensure programs work on multiple kernel versions.
The ctx->skb pointer can be used in combination with the bpf_dynptr_from_skb kfunc to access the packet data. The returned dynptr will be read-only.
The hook state contains a lot of information about the current hook and state of the packet.
struct nf_hook_state {
u8 hook;
u8 pf;
struct net_device *in;
struct net_device *out;
struct sock *sk;
struct net *net;
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};
Attachment
These programs are attached via the link API. The netlink portion of the link create attributes look like:
struct {
__u32 pf;
__u32 hooknum;
__s32 priority;
__u32 flags;
} netfilter;
pf is the protocol family, supported values are NFPROTO_IPV4 (2) and NFPROTO_IPV6 (10).
hooknum is the hook number, supported values are NF_INET_PRE_ROUTING (0), NF_INET_LOCAL_IN (1), NF_INET_FORWARD (2), NF_INET_LOCAL_OUT (3), and NF_INET_POST_ROUTING (4).
priority is the priority of the hook, lower values are called first. NF_IP_PRI_FIRST (-2147483648) and NF_IP_PRI_LAST (2147483647) are not allowed.
flags is a bitmask of flags. Supported flags are:
BPF_F_NETFILTER_IP_DEFRAG- Enable defragmentation of IP fragments, this hook will only see defragmented packets. If theBPF_F_NETFILTER_IP_DEFRAGv6.6 flag is set, the priority must be higher thanNF_IP_PRI_CONNTRACK_DEFRAG(-400) for ensuring the prog runs after nf_defrag.
Example
// SPDX-License-Identifier: GPL-2.0-only
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tracing_net.h"
#define NF_DROP 0
#define NF_ACCEPT 1
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
volatile int shootdowns = 0;
static bool is_frag_v4(struct iphdr *iph)
{
int offset;
int flags;
offset = bpf_ntohs(iph->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
offset <<= 3;
return (flags & IP_MF) || offset;
}
static bool is_frag_v6(struct ipv6hdr *ip6h)
{
/* Simplifying assumption that there are no extension headers
* between fixed header and fragmentation header. This assumption
* is only valid in this test case. It saves us the hassle of
* searching all potential extension headers.
*/
return ip6h->nexthdr == NEXTHDR_FRAGMENT;
}
static int handle_v4(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
u8 iph_buf[20] = {};
struct iphdr *iph;
if (bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_DROP;
iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf));
if (!iph)
return NF_DROP;
/* Shootdown any frags */
if (is_frag_v4(iph)) {
shootdowns++;
return NF_DROP;
}
return NF_ACCEPT;
}
static int handle_v6(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ipv6hdr *ip6h;
u8 ip6h_buf[40] = {};
if (bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_DROP;
ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf));
if (!ip6h)
return NF_DROP;
/* Shootdown any frags */
if (is_frag_v6(ip6h)) {
shootdowns++;
return NF_DROP;
}
return NF_ACCEPT;
}
SEC("netfilter")
int defrag(struct bpf_nf_ctx *ctx)
{
struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
switch (bpf_ntohs(ctx->skb->protocol)) {
case ETH_P_IP:
return handle_v4(skb);
case ETH_P_IPV6:
return handle_v6(skb);
default:
return NF_ACCEPT;
}
}
char _license[] SEC("license") = "GPL";
Helper functions
Not all helper functions are available in all program types. These are the helper calls available for BPF_PROG_TYPE_NETFILTER programs:
Supported helper functions
bpf_cgrp_storage_deletebpf_cgrp_storage_getbpf_dynptr_databpf_dynptr_from_membpf_dynptr_readbpf_dynptr_writebpf_for_each_map_elembpf_get_current_ancestor_cgroup_idv6.4bpf_get_current_cgroup_idv6.4bpf_get_current_pid_tgidv6.10bpf_get_current_taskbpf_get_current_task_btfbpf_get_ns_current_pid_tgidv6.10bpf_get_numa_node_idbpf_get_prandom_u32bpf_get_smp_processor_idbpf_jiffies64bpf_kptr_xchgbpf_ktime_get_boot_nsbpf_ktime_get_nsbpf_ktime_get_tai_nsbpf_loopbpf_map_delete_elembpf_map_lookup_elembpf_map_lookup_percpu_elembpf_map_peek_elembpf_map_pop_elembpf_map_push_elembpf_map_update_elembpf_per_cpu_ptrbpf_probe_read_kernelbpf_probe_read_kernel_strbpf_probe_read_userbpf_probe_read_user_strbpf_ringbuf_discardbpf_ringbuf_discard_dynptrbpf_ringbuf_outputbpf_ringbuf_querybpf_ringbuf_reservebpf_ringbuf_reserve_dynptrbpf_ringbuf_submitbpf_ringbuf_submit_dynptrbpf_snprintfbpf_snprintf_btfbpf_spin_lockbpf_spin_unlockbpf_strncmpbpf_tail_callbpf_task_pt_regsbpf_this_cpu_ptrbpf_timer_cancelbpf_timer_initbpf_timer_set_callbackbpf_timer_startbpf_trace_printkbpf_trace_vprintkbpf_user_ringbuf_drain
KFuncs
Supported kfuncs
__bpf_trapbpf_arena_alloc_pagesbpf_arena_free_pagesbpf_arena_reserve_pagesbpf_cast_to_kern_ctxbpf_cgroup_read_xattrbpf_copy_from_user_dynptrbpf_copy_from_user_strbpf_copy_from_user_str_dynptrbpf_copy_from_user_task_dynptrbpf_copy_from_user_task_strbpf_copy_from_user_task_str_dynptrbpf_dynptr_adjustbpf_dynptr_clonebpf_dynptr_copybpf_dynptr_from_skbbpf_dynptr_is_nullbpf_dynptr_is_rdonlybpf_dynptr_memsetbpf_dynptr_sizebpf_dynptr_slicebpf_dynptr_slice_rdwrbpf_get_kmem_cachebpf_iter_bits_destroybpf_iter_bits_newbpf_iter_bits_nextbpf_iter_css_destroybpf_iter_css_newbpf_iter_css_nextbpf_iter_css_task_destroybpf_iter_css_task_newbpf_iter_css_task_nextbpf_iter_dmabuf_destroybpf_iter_dmabuf_newbpf_iter_dmabuf_nextbpf_iter_kmem_cache_destroybpf_iter_kmem_cache_newbpf_iter_kmem_cache_nextbpf_iter_num_destroybpf_iter_num_newbpf_iter_num_nextbpf_iter_task_destroybpf_iter_task_newbpf_iter_task_nextbpf_iter_task_vma_destroybpf_iter_task_vma_newbpf_iter_task_vma_nextbpf_local_irq_restorebpf_local_irq_savebpf_map_sum_elem_countbpf_preempt_disablebpf_preempt_enablebpf_probe_read_kernel_dynptrbpf_probe_read_kernel_str_dynptrbpf_probe_read_user_dynptrbpf_probe_read_user_str_dynptrbpf_rcu_read_lockbpf_rcu_read_unlockbpf_rdonly_castbpf_res_spin_lockbpf_res_spin_lock_irqsavebpf_res_spin_unlockbpf_res_spin_unlock_irqrestorebpf_strchrbpf_strchrnulbpf_strcmpbpf_strcspnbpf_stream_vprintkbpf_strlenbpf_strnchrbpf_strnlenbpf_strnstrbpf_strrchrbpf_strspnbpf_strstrbpf_wq_initbpf_wq_set_callback_implbpf_wq_start