Program type `BPF_PROG_TYPE_NETFILTER`

Usage

This program type is used to implement a netfilter (aka iptables / nftables) hook in eBPF.

The hook can make a decision to drop or accept the packet by returning NF_DROP (0) or NF_ACCEPT (1) respectively.

Context

The context that is passed in contains pointers to the hook state and to a full sk_buff as opposed to the __sk_buff projection type we typically see as the context in other program types.

struct bpf_nf_ctx {
    const struct nf_hook_state *state;
    struct sk_buff *skb;
};

The whole ctx is read-only. struct bpf_nf_ctx is defined in an internal linux kernel header file and is intentionally unstable. Users are expected to get definitions from a vmlinux.h or to copy the relevant parts of the definition into their own code. Since both the context struct and its field types are kernel internal, users should use CO-RE to access any field to ensure programs work on multiple kernel versions.

The ctx->skb pointer can be used in combination with the bpf_dynptr_from_skb kfunc to access the packet data. The returned dynptr will be read-only.

The hook state contains a lot of information about the current hook and state of the packet.

struct nf_hook_state {
    u8 hook;
    u8 pf;
    struct net_device *in;
    struct net_device *out;
    struct sock *sk;
    struct net *net;
    int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};

Attachment

These programs are attached via the link API. The netlink portion of the link create attributes look like:

struct {
    __u32       pf;
    __u32       hooknum;
    __s32       priority;
    __u32       flags;
} netfilter;

pf is the protocol family, supported values are NFPROTO_IPV4 (2) and NFPROTO_IPV6 (10).

hooknum is the hook number, supported values are NF_INET_PRE_ROUTING (0), NF_INET_LOCAL_IN (1), NF_INET_FORWARD (2), NF_INET_LOCAL_OUT (3), and NF_INET_POST_ROUTING (4).

priority is the priority of the hook, lower values are called first. NF_IP_PRI_FIRST (-2147483648) and NF_IP_PRI_LAST (2147483647) are not allowed.

flags is a bitmask of flags. Supported flags are:

BPF_F_NETFILTER_IP_DEFRAG - Enable defragmentation of IP fragments, this hook will only see defragmented packets. If the BPF_F_NETFILTER_IP_DEFRAG v6.6 flag is set, the priority must be higher than NF_IP_PRI_CONNTRACK_DEFRAG (-400) for ensuring the prog runs after nf_defrag.

Example

// SPDX-License-Identifier: GPL-2.0-only
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "bpf_tracing_net.h"

#define NF_DROP         0
#define NF_ACCEPT       1
#define ETH_P_IP        0x0800
#define ETH_P_IPV6      0x86DD
#define IP_MF           0x2000
#define IP_OFFSET       0x1FFF
#define NEXTHDR_FRAGMENT    44

extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
                  struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
                  void *buffer, uint32_t buffer__sz) __ksym;

volatile int shootdowns = 0;

static bool is_frag_v4(struct iphdr *iph)
{
    int offset;
    int flags;

    offset = bpf_ntohs(iph->frag_off);
    flags = offset & ~IP_OFFSET;
    offset &= IP_OFFSET;
    offset <<= 3;

    return (flags & IP_MF) || offset;
}

static bool is_frag_v6(struct ipv6hdr *ip6h)
{
    /* Simplifying assumption that there are no extension headers
     * between fixed header and fragmentation header. This assumption
     * is only valid in this test case. It saves us the hassle of
     * searching all potential extension headers.
     */
    return ip6h->nexthdr == NEXTHDR_FRAGMENT;
}

static int handle_v4(struct __sk_buff *skb)
{
    struct bpf_dynptr ptr;
    u8 iph_buf[20] = {};
    struct iphdr *iph;

    if (bpf_dynptr_from_skb(skb, 0, &ptr))
        return NF_DROP;

    iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf));
    if (!iph)
        return NF_DROP;

    /* Shootdown any frags */
    if (is_frag_v4(iph)) {
        shootdowns++;
        return NF_DROP;
    }

    return NF_ACCEPT;
}

static int handle_v6(struct __sk_buff *skb)
{
    struct bpf_dynptr ptr;
    struct ipv6hdr *ip6h;
    u8 ip6h_buf[40] = {};

    if (bpf_dynptr_from_skb(skb, 0, &ptr))
        return NF_DROP;

    ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf));
    if (!ip6h)
        return NF_DROP;

    /* Shootdown any frags */
    if (is_frag_v6(ip6h)) {
        shootdowns++;
        return NF_DROP;
    }

    return NF_ACCEPT;
}

SEC("netfilter")
int defrag(struct bpf_nf_ctx *ctx)
{
    struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;

    switch (bpf_ntohs(ctx->skb->protocol)) {
    case ETH_P_IP:
        return handle_v4(skb);
    case ETH_P_IPV6:
        return handle_v6(skb);
    default:
        return NF_ACCEPT;
    }
}

char _license[] SEC("license") = "GPL";

Helper functions

Not all helper functions are available in all program types. These are the helper calls available for BPF_PROG_TYPE_NETFILTER programs:

Supported helper functions

KFuncs

Supported kfuncs

Program type BPF_PROG_TYPE_NETFILTER