Program type BPF_PROG_TYPE_NETFILTER
Usage
This program type is used to implement a netfilter (aka iptables
/ nftables
) hook in eBPF.
The hook can make a decision to drop or accept the packet by returning NF_DROP
(0) or NF_ACCEPT
(1) respectively.
Context
The context that is passed in contains pointers to the hook state and to a full sk_buff
as opposed to the __sk_buff
we typically see as the context in other program types. The whole ctx is read-only.
struct bpf_nf_ctx {
const struct nf_hook_state *state;
struct sk_buff *skb;
};
The ctx->skb
pointer can be used in combination with the bpf_dynptr_from_skb
kfunc to access the packet data. The returned dynptr will be read-only.
The hook state contains a lot of information about the current hook and state of the packet.
struct nf_hook_state {
u8 hook;
u8 pf;
struct net_device *in;
struct net_device *out;
struct sock *sk;
struct net *net;
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};
Attachment
These programs are attached via the link API. The netlink portion of the link create attributes look like:
struct {
__u32 pf;
__u32 hooknum;
__s32 priority;
__u32 flags;
} netfilter;
pf
is the protocol family, supported values are NFPROTO_IPV4
(2) and NFPROTO_IPV6
(10).
hooknum
is the hook number, supported values are NF_INET_PRE_ROUTING
(0), NF_INET_LOCAL_IN
(1), NF_INET_FORWARD
(2), NF_INET_LOCAL_OUT
(3), and NF_INET_POST_ROUTING
(4).
priority
is the priority of the hook, lower values are called first. NF_IP_PRI_FIRST
(-2147483648) and NF_IP_PRI_LAST
(2147483647) are not allowed. If the BPF_F_NETFILTER_IP_DEFRAG
flag is set, the priority must be higher than NF_IP_PRI_CONNTRACK_DEFRAG
(-400).
flags
is a bitmask of flags. Supported flags are:
NF_IP_PRI_CONNTRACK_DEFRAG
- Enable defragmentation of IP fragments, this hook will only see defragmented packets.
Example
// SPDX-License-Identifier: GPL-2.0-only
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tracing_net.h"
#define NF_DROP 0
#define NF_ACCEPT 1
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
volatile int shootdowns = 0;
static bool is_frag_v4(struct iphdr *iph)
{
int offset;
int flags;
offset = bpf_ntohs(iph->frag_off);
flags = offset & ~IP_OFFSET;
offset &= IP_OFFSET;
offset <<= 3;
return (flags & IP_MF) || offset;
}
static bool is_frag_v6(struct ipv6hdr *ip6h)
{
/* Simplifying assumption that there are no extension headers
* between fixed header and fragmentation header. This assumption
* is only valid in this test case. It saves us the hassle of
* searching all potential extension headers.
*/
return ip6h->nexthdr == NEXTHDR_FRAGMENT;
}
static int handle_v4(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
u8 iph_buf[20] = {};
struct iphdr *iph;
if (bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_DROP;
iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf));
if (!iph)
return NF_DROP;
/* Shootdown any frags */
if (is_frag_v4(iph)) {
shootdowns++;
return NF_DROP;
}
return NF_ACCEPT;
}
static int handle_v6(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ipv6hdr *ip6h;
u8 ip6h_buf[40] = {};
if (bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_DROP;
ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf));
if (!ip6h)
return NF_DROP;
/* Shootdown any frags */
if (is_frag_v6(ip6h)) {
shootdowns++;
return NF_DROP;
}
return NF_ACCEPT;
}
SEC("netfilter")
int defrag(struct bpf_nf_ctx *ctx)
{
struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
switch (bpf_ntohs(ctx->skb->protocol)) {
case ETH_P_IP:
return handle_v4(skb);
case ETH_P_IPV6:
return handle_v6(skb);
default:
return NF_ACCEPT;
}
}
char _license[] SEC("license") = "GPL";
Helper functions
Not all helper functions are available in all program types. These are the helper calls available for BPF_PROG_TYPE_NETFILTER
programs:
Supported helper functions
bpf_map_lookup_elem
bpf_map_update_elem
bpf_map_delete_elem
bpf_map_push_elem
bpf_map_pop_elem
bpf_map_peek_elem
bpf_map_lookup_percpu_elem
bpf_get_prandom_u32
bpf_get_smp_processor_id
bpf_get_numa_node_id
bpf_tail_call
bpf_ktime_get_ns
bpf_ktime_get_boot_ns
bpf_ringbuf_output
bpf_ringbuf_reserve
bpf_ringbuf_submit
bpf_ringbuf_discard
bpf_ringbuf_query
bpf_for_each_map_elem
bpf_loop
bpf_strncmp
bpf_spin_lock
bpf_spin_unlock
bpf_jiffies64
bpf_per_cpu_ptr
bpf_this_cpu_ptr
bpf_timer_init
bpf_timer_set_callback
bpf_timer_start
bpf_timer_cancel
bpf_trace_printk
bpf_get_current_task
bpf_get_current_task_btf
bpf_probe_read_user
bpf_probe_read_kernel
bpf_probe_read_user_str
bpf_probe_read_kernel_str
bpf_snprintf_btf
bpf_snprintf
bpf_task_pt_regs
bpf_trace_vprintk
bpf_cgrp_storage_get
bpf_cgrp_storage_delete
bpf_dynptr_data
bpf_dynptr_from_mem
bpf_dynptr_read
bpf_dynptr_write
bpf_kptr_xchg
bpf_ktime_get_tai_ns
bpf_ringbuf_discard_dynptr
bpf_ringbuf_reserve_dynptr
bpf_ringbuf_submit_dynptr
bpf_user_ringbuf_drain
KFuncs
Supported kfuncs
bpf_arena_alloc_pages
bpf_arena_free_pages
bpf_cast_to_kern_ctx
bpf_dynptr_adjust
bpf_dynptr_clone
bpf_dynptr_from_skb
bpf_dynptr_is_null
bpf_dynptr_is_rdonly
bpf_dynptr_size
bpf_dynptr_slice
bpf_dynptr_slice_rdwr
bpf_iter_bits_destroy
bpf_iter_bits_new
bpf_iter_bits_next
bpf_iter_css_destroy
bpf_iter_css_new
bpf_iter_css_next
bpf_iter_css_task_destroy
bpf_iter_css_task_new
bpf_iter_css_task_next
bpf_iter_num_destroy
bpf_iter_num_new
bpf_iter_num_next
bpf_iter_task_destroy
bpf_iter_task_new
bpf_iter_task_next
bpf_iter_task_vma_destroy
bpf_iter_task_vma_new
bpf_iter_task_vma_next
bpf_map_sum_elem_count
bpf_preempt_disable
bpf_preempt_enable
bpf_rcu_read_lock
bpf_rcu_read_unlock
bpf_rdonly_cast
bpf_wq_init
bpf_wq_set_callback_impl
bpf_wq_start