[bpf-next,v2,5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc
Commit Message
This helper is used to defragment IPv6 packets. Similar to the previous
bpf_ip_check_defrag() kfunc, this kfunc:
* Returns 0 on defrag + skb update success
* Returns < 0 on error
* Takes care to ensure ctx (skb) remains valid no matter what the
underlying call to _ipv6_frag_rcv() does
* Is only callable from TC clsact progs
Please see bpf_ip_check_defrag() commit for more details / suggestions.
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
include/net/ipv6_frag.h | 1 +
include/net/transp_v6.h | 1 +
net/ipv6/Makefile | 1 +
net/ipv6/af_inet6.c | 4 ++
net/ipv6/reassembly_bpf.c | 143 ++++++++++++++++++++++++++++++++++++++
5 files changed, 150 insertions(+)
create mode 100644 net/ipv6/reassembly_bpf.c
Comments
Hi Daniel,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/bce083a4293eefb048a700b5a6086e8d8c957700.1677526810.git.dxu%40dxuuu.xyz
patch subject: [PATCH bpf-next v2 5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc
config: i386-defconfig (https://download.01.org/0day-ci/archive/20230228/202302281646.GYE1qnGb-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/be4610312351d4a658435bd4649a3a830322396d
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
git checkout be4610312351d4a658435bd4649a3a830322396d
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 olddefconfig
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281646.GYE1qnGb-lkp@intel.com/
All errors (new ones prefixed by >>):
ld: net/ipv6/af_inet6.o: in function `inet6_init':
>> af_inet6.c:(.init.text+0x22a): undefined reference to `register_ipv6_reassembly_bpf'
Hi Daniel,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on bpf-next/master]
url: https://github.com/intel-lab-lkp/linux/commits/Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/r/bce083a4293eefb048a700b5a6086e8d8c957700.1677526810.git.dxu%40dxuuu.xyz
patch subject: [PATCH bpf-next v2 5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc
config: i386-debian-10.3 (https://download.01.org/0day-ci/archive/20230228/202302281707.5vUL3boJ-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/be4610312351d4a658435bd4649a3a830322396d
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
git checkout be4610312351d4a658435bd4649a3a830322396d
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 olddefconfig
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281707.5vUL3boJ-lkp@intel.com/
All errors (new ones prefixed by >>):
ld: net/ipv6/af_inet6.o: in function `inet6_init':
>> net/ipv6/af_inet6.c:1177: undefined reference to `register_ipv6_reassembly_bpf'
vim +1177 net/ipv6/af_inet6.c
1061
1062 static int __init inet6_init(void)
1063 {
1064 struct list_head *r;
1065 int err = 0;
1066
1067 sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
1068
1069 /* Register the socket-side information for inet6_create. */
1070 for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
1071 INIT_LIST_HEAD(r);
1072
1073 raw_hashinfo_init(&raw_v6_hashinfo);
1074
1075 if (disable_ipv6_mod) {
1076 pr_info("Loaded, but administratively disabled, reboot required to enable\n");
1077 goto out;
1078 }
1079
1080 err = proto_register(&tcpv6_prot, 1);
1081 if (err)
1082 goto out;
1083
1084 err = proto_register(&udpv6_prot, 1);
1085 if (err)
1086 goto out_unregister_tcp_proto;
1087
1088 err = proto_register(&udplitev6_prot, 1);
1089 if (err)
1090 goto out_unregister_udp_proto;
1091
1092 err = proto_register(&rawv6_prot, 1);
1093 if (err)
1094 goto out_unregister_udplite_proto;
1095
1096 err = proto_register(&pingv6_prot, 1);
1097 if (err)
1098 goto out_unregister_raw_proto;
1099
1100 /* We MUST register RAW sockets before we create the ICMP6,
1101 * IGMP6, or NDISC control sockets.
1102 */
1103 err = rawv6_init();
1104 if (err)
1105 goto out_unregister_ping_proto;
1106
1107 /* Register the family here so that the init calls below will
1108 * be able to create sockets. (?? is this dangerous ??)
1109 */
1110 err = sock_register(&inet6_family_ops);
1111 if (err)
1112 goto out_sock_register_fail;
1113
1114 /*
1115 * ipngwg API draft makes clear that the correct semantics
1116 * for TCP and UDP is to consider one TCP and UDP instance
1117 * in a host available by both INET and INET6 APIs and
1118 * able to communicate via both network protocols.
1119 */
1120
1121 err = register_pernet_subsys(&inet6_net_ops);
1122 if (err)
1123 goto register_pernet_fail;
1124 err = ip6_mr_init();
1125 if (err)
1126 goto ipmr_fail;
1127 err = icmpv6_init();
1128 if (err)
1129 goto icmp_fail;
1130 err = ndisc_init();
1131 if (err)
1132 goto ndisc_fail;
1133 err = igmp6_init();
1134 if (err)
1135 goto igmp_fail;
1136
1137 err = ipv6_netfilter_init();
1138 if (err)
1139 goto netfilter_fail;
1140 /* Create /proc/foo6 entries. */
1141 #ifdef CONFIG_PROC_FS
1142 err = -ENOMEM;
1143 if (raw6_proc_init())
1144 goto proc_raw6_fail;
1145 if (udplite6_proc_init())
1146 goto proc_udplite6_fail;
1147 if (ipv6_misc_proc_init())
1148 goto proc_misc6_fail;
1149 if (if6_proc_init())
1150 goto proc_if6_fail;
1151 #endif
1152 err = ip6_route_init();
1153 if (err)
1154 goto ip6_route_fail;
1155 err = ndisc_late_init();
1156 if (err)
1157 goto ndisc_late_fail;
1158 err = ip6_flowlabel_init();
1159 if (err)
1160 goto ip6_flowlabel_fail;
1161 err = ipv6_anycast_init();
1162 if (err)
1163 goto ipv6_anycast_fail;
1164 err = addrconf_init();
1165 if (err)
1166 goto addrconf_fail;
1167
1168 /* Init v6 extension headers. */
1169 err = ipv6_exthdrs_init();
1170 if (err)
1171 goto ipv6_exthdrs_fail;
1172
1173 err = ipv6_frag_init();
1174 if (err)
1175 goto ipv6_frag_fail;
1176
> 1177 err = register_ipv6_reassembly_bpf();
1178 if (err)
1179 goto ipv6_frag_fail;
1180
1181 /* Init v6 transport protocols. */
1182 err = udpv6_init();
1183 if (err)
1184 goto udpv6_fail;
1185
1186 err = udplitev6_init();
1187 if (err)
1188 goto udplitev6_fail;
1189
1190 err = udpv6_offload_init();
1191 if (err)
1192 goto udpv6_offload_fail;
1193
1194 err = tcpv6_init();
1195 if (err)
1196 goto tcpv6_fail;
1197
1198 err = ipv6_packet_init();
1199 if (err)
1200 goto ipv6_packet_fail;
1201
1202 err = pingv6_init();
1203 if (err)
1204 goto pingv6_fail;
1205
1206 err = calipso_init();
1207 if (err)
1208 goto calipso_fail;
1209
1210 err = seg6_init();
1211 if (err)
1212 goto seg6_fail;
1213
1214 err = rpl_init();
1215 if (err)
1216 goto rpl_fail;
1217
1218 err = ioam6_init();
1219 if (err)
1220 goto ioam6_fail;
1221
1222 err = igmp6_late_init();
1223 if (err)
1224 goto igmp6_late_err;
1225
@@ -15,6 +15,7 @@ enum ip6_defrag_users {
__IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+ IP6_DEFRAG_BPF,
};
/*
@@ -20,6 +20,7 @@ int ipv6_exthdrs_init(void);
void ipv6_exthdrs_exit(void);
int ipv6_frag_init(void);
void ipv6_frag_exit(void);
+int register_ipv6_reassembly_bpf(void);
/* transport protocols */
int pingv6_init(void);
@@ -26,6 +26,7 @@ ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
+ipv6-$(CONFIG_DEBUG_INFO_BTF) += reassembly_bpf.o
obj-$(CONFIG_INET6_AH) += ah6.o
obj-$(CONFIG_INET6_ESP) += esp6.o
@@ -1174,6 +1174,10 @@ static int __init inet6_init(void)
if (err)
goto ipv6_frag_fail;
+ err = register_ipv6_reassembly_bpf();
+ if (err)
+ goto ipv6_frag_fail;
+
/* Init v6 transport protocols. */
err = udpv6_init();
if (err)
new file mode 100644
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable ipv6 fragmentation helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is allowed to
+ * break compatibility for these functions since the interface they are exposed
+ * through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ipv6_frag.h>
+#include <net/ipv6_stubs.h>
+
+static int set_dst(struct sk_buff *skb, struct net *net)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct dst_entry *dst;
+
+ struct flowi6 fl6 = {
+ .flowi6_flags = FLOWI_FLAG_ANYSRC,
+ .flowi6_mark = skb->mark,
+ .flowlabel = ip6_flowinfo(ip6h),
+ .flowi6_iif = skb->skb_iif,
+ .flowi6_proto = ip6h->nexthdr,
+ .daddr = ip6h->daddr,
+ .saddr = ip6h->saddr,
+ };
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+ if (IS_ERR(dst))
+ return PTR_ERR(dst);
+
+ skb_dst_set(skb, dst);
+
+ return 0;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in reassembly BTF");
+
+/* bpf_ipv6_frag_rcv - Defragment an ipv6 packet
+ *
+ * This helper takes an skb as input. If this skb successfully reassembles
+ * the original packet, the skb is updated to contain the original, reassembled
+ * packet.
+ *
+ * Otherwise (on error or incomplete reassembly), the input skb remains
+ * unmodified.
+ *
+ * Parameters:
+ * @ctx - Pointer to program context (skb)
+ * @netns - Child network namespace id. If value is a negative signed
+ * 32-bit integer, the netns of the device in the skb is used.
+ *
+ * Return:
+ * 0 on successfully reassembly or non-fragmented packet. Negative value on
+ * error or incomplete reassembly.
+ */
+int bpf_ipv6_frag_rcv(struct __sk_buff *ctx, u64 netns)
+{
+ struct sk_buff *skb = (struct sk_buff *)ctx;
+ struct sk_buff *skb_cpy;
+ struct net *caller_net;
+ unsigned int foff;
+ struct net *net;
+ int mac_len;
+ void *mac;
+ int err;
+
+ if (unlikely(!((s32)netns < 0 || netns <= S32_MAX)))
+ return -EINVAL;
+
+ caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+ if ((s32)netns < 0) {
+ net = caller_net;
+ } else {
+ net = get_net_ns_by_id(caller_net, netns);
+ if (unlikely(!net))
+ return -EINVAL;
+ }
+
+ err = set_dst(skb, net);
+ if (err < 0)
+ return err;
+
+ mac_len = skb->mac_len;
+ skb_cpy = skb_copy(skb, GFP_ATOMIC);
+ if (!skb_cpy)
+ return -ENOMEM;
+
+ /* _ipv6_frag_rcv() expects skb->transport_header to be set to start of
+ * the frag header and nhoff to be set.
+ */
+ err = ipv6_find_hdr(skb_cpy, &foff, NEXTHDR_FRAGMENT, NULL, NULL);
+ if (err < 0)
+ return err;
+ skb_set_transport_header(skb_cpy, foff);
+ IP6CB(skb_cpy)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ /* inet6_protocol handlers return >0 on success, 0 on out of band
+ * consumption, <0 on error. We never expect to see 0 here.
+ */
+ err = _ipv6_frag_rcv(net, skb_cpy, IP6_DEFRAG_BPF);
+ if (err < 0)
+ return err;
+ else if (err == 0)
+ return -EINVAL;
+
+ skb_morph(skb, skb_cpy);
+ kfree_skb(skb_cpy);
+
+ /* _ipv6_frag_rcv() does not maintain mac header, so push empty header
+ * in so prog sees the correct layout. The empty mac header will be
+ * later pulled from cls_bpf.
+ */
+ skb->mac_len = mac_len;
+ mac = skb_push(skb, mac_len);
+ memset(mac, 0, mac_len);
+ bpf_compute_data_pointers(skb);
+
+ return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(ipv6_reassembly_kfunc_set)
+BTF_ID_FLAGS(func, bpf_ipv6_frag_rcv, KF_CHANGES_PKT)
+BTF_SET8_END(ipv6_reassembly_kfunc_set)
+
+static const struct btf_kfunc_id_set ipv6_reassembly_bpf_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &ipv6_reassembly_kfunc_set,
+};
+
+int register_ipv6_reassembly_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+ &ipv6_reassembly_bpf_kfunc_set);
+}