eBPF入门系列:实现快速响应Ping包

通过使用 XDPTC 两种 eBPF 类型程序实现快速响应Ping包

XDP

学习项目

xdpping

编译及运行

# 使用bpf2go生成对应的golang类型的eBPF数据结构及函数
$ go generate
# 编译
$ go build

# 运行,加载eBPF程序到enp0s3网卡
$ sudo ./xdpping --dev enp0s3

源码

加载eBPF程序的用户态代码程序主要是调用Cilium社区的ebpf库实现,这里不做介绍,下面主要讲一下C语言实现的eBPF代码片段:

//go:build ignore

#include "../headers/bpf_all.h"

// 挂载点为xdp,函数名xdp_ping(可自定义),参数类型xdp_md(由内核xdp函数参数决定)
SEC("xdp")
int xdp_ping(struct xdp_md *ctx)
{
    bpf_printk("xdpping starting\n");
    void *data = ctx_ptr(ctx, data);
    void *data_end = ctx_ptr(ctx, data_end);

    // 根据原始数据包解析出以太网包头信息
    struct ethhdr *eth;
    eth = (typeof(eth))data;
    // struct ethhdr *eth = data;
    if ((void *)(eth + 1) > data_end)
        return XDP_PASS;

    // 判断是否为IP协议
    if (eth->h_proto != bpf_htons(ETH_P_IP))
        return XDP_PASS;

    // 解析IP包头信息
    struct iphdr *iph;
    iph = (typeof(iph))(eth + 1);
    // struct iphdr *iph = (void *)(eth + 1);
    if ((void *)(iph + 1) > data_end)
        return XDP_PASS;

    // 判断是否为ICMP协议
    if (iph->protocol != IPPROTO_ICMP)
        return XDP_PASS;

// CSUM_SIZE用于判断ICMP包长度合法性和计算ICMP包头checksum信息
#define CSUM_SIZE 40
    int csum_size = CSUM_SIZE;

    // 解析ICMP数据包头信息
    struct icmphdr *icmph;
    // icmph = (typeof(icmph))((void *)iph + (iph->ihl * 4)); // FAILED: R3 offset is outside of the packet
    icmph = (typeof(icmph))(iph + 1);
    // struct icmphdr *icmph = (void *)(iph + 1);
    if ((void *)(icmph) + csum_size > data_end)
        return XDP_PASS;

    // 判断是否为ICMP request类型包
    if (icmph->type != ICMP_ECHO)
        return XDP_PASS;

    // FAILED: int csum_size = iph->tot_len - sizeof(*iph); // R3 offset is outside of the packet
    // FAILED: int csum_size = data_end - (void *)icmph;    // R4 unbounded memory access, use 'var &= const' or 'if (var < const)'

    // 构造ICMP reply包头
    icmph->type = ICMP_ECHOREPLY;
    icmph->checksum = 0; // Note: reset and then checksum
    icmph->checksum = ipv4_csum(icmph, csum_size);

    // 构造IP包头
    __be32 daddr = iph->daddr;
    iph->daddr = iph->saddr;
    iph->saddr = daddr;
    iph->ttl = 64;
    iph->check = 0; // Note: reset and then checksum
    iph->check = ipv4_csum(iph, sizeof(*iph));

    // 构造以太网包头
    char dmac[ETH_ALEN];
    __builtin_memcpy(dmac, eth->h_dest, ETH_ALEN);
    __builtin_memcpy(eth->h_dest, eth->h_source, ETH_ALEN);
    __builtin_memcpy(eth->h_source, dmac, ETH_ALEN);

    bpf_printk("xdpping replay icmp echo reply\n");

    // 从当前网卡发送相应数据包,数据包不再进入内核协议栈处理
    return XDP_TX;
}

TC

学习项目

在群里听到听到大佬说使用 TC层模拟 ICMP 包响应不需要硬编码 CSUM_SIZE,于是在搜了一圈终于找到了一个现成的工具 ebpf-icmp-ping,测试过以后发现可以正常运行。想了一下刚好可以把这个纯 C 的工具改造为 Go+eBPF CO-RE 的项目,也可以作为自己第一个写的 Go+eBPF 的玩具。

项目地址:tc-icmp-ping

编译及运行

# 更新ebpf c程序后执行
$ go generate
$ go build

# 运行
$ sudo ./tc-icmp-ping -d enp0s3

源码

部分 eBPF C 代码片段:

#define ICMP_PING 8

#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
#define IP_DST_OFF (ETH_HLEN + offsetof(struct iphdr, daddr))

#define ICMP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct icmphdr, checksum))
#define ICMP_TYPE_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct icmphdr, type))
#define ICMP_CSUM_SIZE sizeof(__u16)

SEC("tc")
int pingpong(struct __sk_buff *skb)
{
	/* We will access all data through pointers to structs */
	void *data = (void *)(long)skb->data;
	void *data_end = (void *)(long)skb->data_end;

	/* first we check that the packet has enough data,
	 * so we can access the three different headers of ethernet, ip and icmp
	 */
	if (data + sizeof(struct ethhdr) + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
		return TC_ACT_UNSPEC;

	/* for easy access we re-use the Kernel's struct definitions */
	struct ethhdr  *eth  = data;
	struct iphdr   *ip   = (data + sizeof(struct ethhdr));
	struct icmphdr *icmp = (data + sizeof(struct ethhdr) + sizeof(struct iphdr));

	/* Only actual IP packets are allowed */
	if (eth->h_proto != __constant_htons(ETH_P_IP))
		return TC_ACT_UNSPEC;

	/* We handle only ICMP traffic */
	if (ip->protocol != IPPROTO_ICMP)
		return TC_ACT_UNSPEC;

	/* ...and only if it is an actual incoming ping */
	if (icmp->type != ICMP_PING)
		return TC_ACT_UNSPEC;

	/* Let's grab the MAC address.
	 * We need to copy them out, as they are 48 bits long */
	__u8 src_mac[ETH_ALEN];
	__u8 dst_mac[ETH_ALEN];
	bpf_memcpy(src_mac, eth->h_source, ETH_ALEN);
	bpf_memcpy(dst_mac, eth->h_dest, ETH_ALEN);

	/* Let's grab the IP addresses.
	 * They are 32-bit, so it is easy to access */
	__u32 src_ip = ip->saddr;
	__u32 dst_ip = ip->daddr;

    trace_printk("[action1] IP Packet, proto= %d, src= %lu, dst= %lu\n", ip->protocol, src_ip, dst_ip);

	/* Swap the MAC addresses */
	bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_source), dst_mac, ETH_ALEN, 0);
	bpf_skb_store_bytes(skb, offsetof(struct ethhdr, h_dest), src_mac, ETH_ALEN, 0);

	/* Swap the IP addresses.
	 * IP contains a checksum, but just swapping bytes does not change it.
	 * so no need to recalculate */
	bpf_skb_store_bytes(skb, IP_SRC_OFF, &dst_ip, sizeof(dst_ip), 0);
	bpf_skb_store_bytes(skb, IP_DST_OFF, &src_ip, sizeof(src_ip), 0);

	/* Change the type of the ICMP packet to 0 (ICMP Echo Reply).
	 * This changes the data, so we need to re-calculate the checksum
	 */
	__u8 new_type = 0;
	/* We need to pass the full size of the checksum here (2 bytes) */
	bpf_l4_csum_replace(skb, ICMP_CSUM_OFF, ICMP_PING, new_type, ICMP_CSUM_SIZE);
	bpf_skb_store_bytes(skb, ICMP_TYPE_OFF, &new_type, sizeof(new_type), 0);

	/* Now redirecting the modified skb on the same interface to be transmitted again */
	bpf_clone_redirect(skb, skb->ifindex, 0);

	/* We modified the packet and redirected it, it can be dropped here */
	return TC_ACT_SHOT;
}

前面的逻辑还是做数据包头解析以及协议判断,MAC 地址和 IP 地址交换以及 ICMP TYPE 字段更新这里使用了 bpf_skb_store_bytes bpf helper 函数,计算 ICMP checksum 使用了 bpf_l4_csum_replace helper 函数。

小结

XDP 挂载点计算ICMP checksum逻辑需要的ICMP payload大小没有通用的计算方式,暂时只能写死,后续想到更好的方式再来更新;后面研究一下 bpf_l4_csum_replace 函数逻辑是否可以复用在 XDP 场景。

Last updated