[bpf-next,v2,5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc

Message ID bce083a4293eefb048a700b5a6086e8d8c957700.1677526810.git.dxu@dxuuu.xyz
State New
Headers
Series Support defragmenting IPv(4|6) packets in BPF |

Commit Message

Daniel Xu Feb. 27, 2023, 7:51 p.m. UTC
  This helper is used to defragment IPv6 packets. Similar to the previous
bpf_ip_check_defrag() kfunc, this kfunc:

* Returns 0 on defrag + skb update success
* Returns < 0 on error
* Takes care to ensure ctx (skb) remains valid no matter what the
  underlying call to _ipv6_frag_rcv() does
* Is only callable from TC clsact progs

Please see bpf_ip_check_defrag() commit for more details / suggestions.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
 include/net/ipv6_frag.h   |   1 +
 include/net/transp_v6.h   |   1 +
 net/ipv6/Makefile         |   1 +
 net/ipv6/af_inet6.c       |   4 ++
 net/ipv6/reassembly_bpf.c | 143 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 150 insertions(+)
 create mode 100644 net/ipv6/reassembly_bpf.c
  

Comments

kernel test robot Feb. 28, 2023, 8:15 a.m. UTC | #1
Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/bce083a4293eefb048a700b5a6086e8d8c957700.1677526810.git.dxu%40dxuuu.xyz
patch subject: [PATCH bpf-next v2 5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc
config: i386-defconfig (https://download.01.org/0day-ci/archive/20230228/202302281646.GYE1qnGb-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/be4610312351d4a658435bd4649a3a830322396d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
        git checkout be4610312351d4a658435bd4649a3a830322396d
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281646.GYE1qnGb-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: net/ipv6/af_inet6.o: in function `inet6_init':
>> af_inet6.c:(.init.text+0x22a): undefined reference to `register_ipv6_reassembly_bpf'
  
kernel test robot Feb. 28, 2023, 9:37 a.m. UTC | #2
Hi Daniel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on bpf-next/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
patch link:    https://lore.kernel.org/r/bce083a4293eefb048a700b5a6086e8d8c957700.1677526810.git.dxu%40dxuuu.xyz
patch subject: [PATCH bpf-next v2 5/8] bpf: net: ipv6: Add bpf_ipv6_frag_rcv() kfunc
config: i386-debian-10.3 (https://download.01.org/0day-ci/archive/20230228/202302281707.5vUL3boJ-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
        # https://github.com/intel-lab-lkp/linux/commit/be4610312351d4a658435bd4649a3a830322396d
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Daniel-Xu/ip-frags-Return-actual-error-codes-from-ip_check_defrag/20230228-035449
        git checkout be4610312351d4a658435bd4649a3a830322396d
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        make W=1 O=build_dir ARCH=i386 olddefconfig
        make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202302281707.5vUL3boJ-lkp@intel.com/

All errors (new ones prefixed by >>):

   ld: net/ipv6/af_inet6.o: in function `inet6_init':
>> net/ipv6/af_inet6.c:1177: undefined reference to `register_ipv6_reassembly_bpf'


vim +1177 net/ipv6/af_inet6.c

  1061	
  1062	static int __init inet6_init(void)
  1063	{
  1064		struct list_head *r;
  1065		int err = 0;
  1066	
  1067		sock_skb_cb_check_size(sizeof(struct inet6_skb_parm));
  1068	
  1069		/* Register the socket-side information for inet6_create.  */
  1070		for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
  1071			INIT_LIST_HEAD(r);
  1072	
  1073		raw_hashinfo_init(&raw_v6_hashinfo);
  1074	
  1075		if (disable_ipv6_mod) {
  1076			pr_info("Loaded, but administratively disabled, reboot required to enable\n");
  1077			goto out;
  1078		}
  1079	
  1080		err = proto_register(&tcpv6_prot, 1);
  1081		if (err)
  1082			goto out;
  1083	
  1084		err = proto_register(&udpv6_prot, 1);
  1085		if (err)
  1086			goto out_unregister_tcp_proto;
  1087	
  1088		err = proto_register(&udplitev6_prot, 1);
  1089		if (err)
  1090			goto out_unregister_udp_proto;
  1091	
  1092		err = proto_register(&rawv6_prot, 1);
  1093		if (err)
  1094			goto out_unregister_udplite_proto;
  1095	
  1096		err = proto_register(&pingv6_prot, 1);
  1097		if (err)
  1098			goto out_unregister_raw_proto;
  1099	
  1100		/* We MUST register RAW sockets before we create the ICMP6,
  1101		 * IGMP6, or NDISC control sockets.
  1102		 */
  1103		err = rawv6_init();
  1104		if (err)
  1105			goto out_unregister_ping_proto;
  1106	
  1107		/* Register the family here so that the init calls below will
  1108		 * be able to create sockets. (?? is this dangerous ??)
  1109		 */
  1110		err = sock_register(&inet6_family_ops);
  1111		if (err)
  1112			goto out_sock_register_fail;
  1113	
  1114		/*
  1115		 *	ipngwg API draft makes clear that the correct semantics
  1116		 *	for TCP and UDP is to consider one TCP and UDP instance
  1117		 *	in a host available by both INET and INET6 APIs and
  1118		 *	able to communicate via both network protocols.
  1119		 */
  1120	
  1121		err = register_pernet_subsys(&inet6_net_ops);
  1122		if (err)
  1123			goto register_pernet_fail;
  1124		err = ip6_mr_init();
  1125		if (err)
  1126			goto ipmr_fail;
  1127		err = icmpv6_init();
  1128		if (err)
  1129			goto icmp_fail;
  1130		err = ndisc_init();
  1131		if (err)
  1132			goto ndisc_fail;
  1133		err = igmp6_init();
  1134		if (err)
  1135			goto igmp_fail;
  1136	
  1137		err = ipv6_netfilter_init();
  1138		if (err)
  1139			goto netfilter_fail;
  1140		/* Create /proc/foo6 entries. */
  1141	#ifdef CONFIG_PROC_FS
  1142		err = -ENOMEM;
  1143		if (raw6_proc_init())
  1144			goto proc_raw6_fail;
  1145		if (udplite6_proc_init())
  1146			goto proc_udplite6_fail;
  1147		if (ipv6_misc_proc_init())
  1148			goto proc_misc6_fail;
  1149		if (if6_proc_init())
  1150			goto proc_if6_fail;
  1151	#endif
  1152		err = ip6_route_init();
  1153		if (err)
  1154			goto ip6_route_fail;
  1155		err = ndisc_late_init();
  1156		if (err)
  1157			goto ndisc_late_fail;
  1158		err = ip6_flowlabel_init();
  1159		if (err)
  1160			goto ip6_flowlabel_fail;
  1161		err = ipv6_anycast_init();
  1162		if (err)
  1163			goto ipv6_anycast_fail;
  1164		err = addrconf_init();
  1165		if (err)
  1166			goto addrconf_fail;
  1167	
  1168		/* Init v6 extension headers. */
  1169		err = ipv6_exthdrs_init();
  1170		if (err)
  1171			goto ipv6_exthdrs_fail;
  1172	
  1173		err = ipv6_frag_init();
  1174		if (err)
  1175			goto ipv6_frag_fail;
  1176	
> 1177		err = register_ipv6_reassembly_bpf();
  1178		if (err)
  1179			goto ipv6_frag_fail;
  1180	
  1181		/* Init v6 transport protocols. */
  1182		err = udpv6_init();
  1183		if (err)
  1184			goto udpv6_fail;
  1185	
  1186		err = udplitev6_init();
  1187		if (err)
  1188			goto udplitev6_fail;
  1189	
  1190		err = udpv6_offload_init();
  1191		if (err)
  1192			goto udpv6_offload_fail;
  1193	
  1194		err = tcpv6_init();
  1195		if (err)
  1196			goto tcpv6_fail;
  1197	
  1198		err = ipv6_packet_init();
  1199		if (err)
  1200			goto ipv6_packet_fail;
  1201	
  1202		err = pingv6_init();
  1203		if (err)
  1204			goto pingv6_fail;
  1205	
  1206		err = calipso_init();
  1207		if (err)
  1208			goto calipso_fail;
  1209	
  1210		err = seg6_init();
  1211		if (err)
  1212			goto seg6_fail;
  1213	
  1214		err = rpl_init();
  1215		if (err)
  1216			goto rpl_fail;
  1217	
  1218		err = ioam6_init();
  1219		if (err)
  1220			goto ioam6_fail;
  1221	
  1222		err = igmp6_late_init();
  1223		if (err)
  1224			goto igmp6_late_err;
  1225
  

Patch

diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 7321ffe3a108..cf4763cd3886 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -15,6 +15,7 @@  enum ip6_defrag_users {
 	__IP6_DEFRAG_CONNTRACK_OUT	= IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX,
 	IP6_DEFRAG_CONNTRACK_BRIDGE_IN,
 	__IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX,
+	IP6_DEFRAG_BPF,
 };
 
 /*
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index d27b1caf3753..244123a74349 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -20,6 +20,7 @@  int ipv6_exthdrs_init(void);
 void ipv6_exthdrs_exit(void);
 int ipv6_frag_init(void);
 void ipv6_frag_exit(void);
+int register_ipv6_reassembly_bpf(void);
 
 /* transport protocols */
 int pingv6_init(void);
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 3036a45e8a1e..6e90ff1d20c0 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -26,6 +26,7 @@  ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
 ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
 ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
+ipv6-$(CONFIG_DEBUG_INFO_BTF) += reassembly_bpf.o
 
 obj-$(CONFIG_INET6_AH) += ah6.o
 obj-$(CONFIG_INET6_ESP) += esp6.o
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 38689bedfce7..39663de75fbd 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1174,6 +1174,10 @@  static int __init inet6_init(void)
 	if (err)
 		goto ipv6_frag_fail;
 
+	err = register_ipv6_reassembly_bpf();
+	if (err)
+		goto ipv6_frag_fail;
+
 	/* Init v6 transport protocols. */
 	err = udpv6_init();
 	if (err)
diff --git a/net/ipv6/reassembly_bpf.c b/net/ipv6/reassembly_bpf.c
new file mode 100644
index 000000000000..c6c804d4f636
--- /dev/null
+++ b/net/ipv6/reassembly_bpf.c
@@ -0,0 +1,143 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable ipv6 fragmentation helpers for TC-BPF hook
+ *
+ * These are called from SCHED_CLS BPF programs. Note that it is allowed to
+ * break compatibility for these functions since the interface they are exposed
+ * through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/filter.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ipv6_frag.h>
+#include <net/ipv6_stubs.h>
+
+static int set_dst(struct sk_buff *skb, struct net *net)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct dst_entry *dst;
+
+	struct flowi6 fl6 = {
+		.flowi6_flags = FLOWI_FLAG_ANYSRC,
+		.flowi6_mark  = skb->mark,
+		.flowlabel    = ip6_flowinfo(ip6h),
+		.flowi6_iif   = skb->skb_iif,
+		.flowi6_proto = ip6h->nexthdr,
+		.daddr	      = ip6h->daddr,
+		.saddr	      = ip6h->saddr,
+	};
+
+	dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+	if (IS_ERR(dst))
+		return PTR_ERR(dst);
+
+	skb_dst_set(skb, dst);
+
+	return 0;
+}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in reassembly BTF");
+
+/* bpf_ipv6_frag_rcv - Defragment an ipv6 packet
+ *
+ * This helper takes an skb as input. If this skb successfully reassembles
+ * the original packet, the skb is updated to contain the original, reassembled
+ * packet.
+ *
+ * Otherwise (on error or incomplete reassembly), the input skb remains
+ * unmodified.
+ *
+ * Parameters:
+ * @ctx		- Pointer to program context (skb)
+ * @netns	- Child network namespace id. If value is a negative signed
+ *		  32-bit integer, the netns of the device in the skb is used.
+ *
+ * Return:
+ * 0 on successfully reassembly or non-fragmented packet. Negative value on
+ * error or incomplete reassembly.
+ */
+int bpf_ipv6_frag_rcv(struct __sk_buff *ctx, u64 netns)
+{
+	struct sk_buff *skb = (struct sk_buff *)ctx;
+	struct sk_buff *skb_cpy;
+	struct net *caller_net;
+	unsigned int foff;
+	struct net *net;
+	int mac_len;
+	void *mac;
+	int err;
+
+	if (unlikely(!((s32)netns < 0 || netns <= S32_MAX)))
+		return -EINVAL;
+
+	caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
+	if ((s32)netns < 0) {
+		net = caller_net;
+	} else {
+		net = get_net_ns_by_id(caller_net, netns);
+		if (unlikely(!net))
+			return -EINVAL;
+	}
+
+	err = set_dst(skb, net);
+	if (err < 0)
+		return err;
+
+	mac_len = skb->mac_len;
+	skb_cpy = skb_copy(skb, GFP_ATOMIC);
+	if (!skb_cpy)
+		return -ENOMEM;
+
+	/* _ipv6_frag_rcv() expects skb->transport_header to be set to start of
+	 * the frag header and nhoff to be set.
+	 */
+	err = ipv6_find_hdr(skb_cpy, &foff, NEXTHDR_FRAGMENT, NULL, NULL);
+	if (err < 0)
+		return err;
+	skb_set_transport_header(skb_cpy, foff);
+	IP6CB(skb_cpy)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+	/* inet6_protocol handlers return >0 on success, 0 on out of band
+	 * consumption, <0 on error. We never expect to see 0 here.
+	 */
+	err = _ipv6_frag_rcv(net, skb_cpy, IP6_DEFRAG_BPF);
+	if (err < 0)
+		return err;
+	else if (err == 0)
+		return -EINVAL;
+
+	skb_morph(skb, skb_cpy);
+	kfree_skb(skb_cpy);
+
+	/* _ipv6_frag_rcv() does not maintain mac header, so push empty header
+	 * in so prog sees the correct layout. The empty mac header will be
+	 * later pulled from cls_bpf.
+	 */
+	skb->mac_len = mac_len;
+	mac = skb_push(skb, mac_len);
+	memset(mac, 0, mac_len);
+	bpf_compute_data_pointers(skb);
+
+	return 0;
+}
+
+__diag_pop()
+
+BTF_SET8_START(ipv6_reassembly_kfunc_set)
+BTF_ID_FLAGS(func, bpf_ipv6_frag_rcv, KF_CHANGES_PKT)
+BTF_SET8_END(ipv6_reassembly_kfunc_set)
+
+static const struct btf_kfunc_id_set ipv6_reassembly_bpf_kfunc_set = {
+	.owner = THIS_MODULE,
+	.set   = &ipv6_reassembly_kfunc_set,
+};
+
+int register_ipv6_reassembly_bpf(void)
+{
+	return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
+					 &ipv6_reassembly_bpf_kfunc_set);
+}