[bpf-next,4/7] netfilter: bpf: Support BPF_F_NETFILTER_IP_DEFRAG in netfilter link
Commit Message
This commit adds support for enabling IP defrag using pre-existing
netfilter defrag support. Basically all the flag does is bump a refcnt
while the link the active. Checks are also added to ensure the prog
requesting defrag support is run _after_ netfilter defrag hooks.
Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
---
include/uapi/linux/bpf.h | 5 ++
net/netfilter/nf_bpf_link.c | 108 +++++++++++++++++++++++++++++----
tools/include/uapi/linux/bpf.h | 5 ++
3 files changed, 107 insertions(+), 11 deletions(-)
Comments
Daniel Xu <dxu@dxuuu.xyz> wrote:
> +static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
> +{
> + int err;
> +
> + switch (link->hook_ops.pf) {
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> + case NFPROTO_IPV4:
> + const struct nf_defrag_v4_hook *v4_hook;
> +
> + err = request_module("nf_defrag_ipv4");
> + if (err)
> + return err;
> +
> + rcu_read_lock();
> + v4_hook = rcu_dereference(nf_defrag_v4_hook);
> + err = v4_hook->enable(link->net);
> + rcu_read_unlock();
I'd reverse this, first try rcu_dereference(), then modprobe
if thats returned NULL.
> +static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
> +{
> + switch (link->hook_ops.pf) {
> +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> + case NFPROTO_IPV4:
> + const struct nf_defrag_v4_hook *v4_hook;
> +
> + rcu_read_lock();
> + v4_hook = rcu_dereference(nf_defrag_v4_hook);
> + v4_hook->disable(link->net);
> + rcu_read_unlock();
if (v4_hook)
v4_hook->disable()
Else we get trouble on manual 'rmmod'.
> + /* make sure conntrack confirm is always last */
> + prio = attr->link_create.netfilter.priority;
> + if (prio == NF_IP_PRI_FIRST)
> + return -ERANGE; /* sabotage_in and other warts */
> + else if (prio == NF_IP_PRI_LAST)
> + return -ERANGE; /* e.g. conntrack confirm */
> + else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
> + (prio > NF_IP_PRI_FIRST && prio <= NF_IP_PRI_CONNTRACK_DEFRAG))
> + return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
You could elide the (prio > NF_IP_PRI_FIRST, its already handled by
first conditional. Otherwise this looks good to me.
On Tue, Jun 27, 2023 at 01:12:48PM +0200, Florian Westphal wrote:
> Daniel Xu <dxu@dxuuu.xyz> wrote:
> > +static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
> > +{
> > + int err;
> > +
> > + switch (link->hook_ops.pf) {
> > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> > + case NFPROTO_IPV4:
> > + const struct nf_defrag_v4_hook *v4_hook;
> > +
> > + err = request_module("nf_defrag_ipv4");
> > + if (err)
> > + return err;
> > +
> > + rcu_read_lock();
> > + v4_hook = rcu_dereference(nf_defrag_v4_hook);
> > + err = v4_hook->enable(link->net);
> > + rcu_read_unlock();
>
> I'd reverse this, first try rcu_dereference(), then modprobe
> if thats returned NULL.
Ack.
>
> > +static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
> > +{
> > + switch (link->hook_ops.pf) {
> > +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
> > + case NFPROTO_IPV4:
> > + const struct nf_defrag_v4_hook *v4_hook;
> > +
> > + rcu_read_lock();
> > + v4_hook = rcu_dereference(nf_defrag_v4_hook);
> > + v4_hook->disable(link->net);
> > + rcu_read_unlock();
>
> if (v4_hook)
> v4_hook->disable()
>
> Else we get trouble on manual 'rmmod'.
Ah good catch, thanks.
>
> > + /* make sure conntrack confirm is always last */
> > + prio = attr->link_create.netfilter.priority;
> > + if (prio == NF_IP_PRI_FIRST)
> > + return -ERANGE; /* sabotage_in and other warts */
> > + else if (prio == NF_IP_PRI_LAST)
> > + return -ERANGE; /* e.g. conntrack confirm */
> > + else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
> > + (prio > NF_IP_PRI_FIRST && prio <= NF_IP_PRI_CONNTRACK_DEFRAG))
> > + return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
>
> You could elide the (prio > NF_IP_PRI_FIRST, its already handled by
> first conditional. Otherwise this looks good to me.
>
Ah, right. It's INT_MIN.
Thanks,
Daniel
@@ -1170,6 +1170,11 @@ enum bpf_link_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/kmod.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_bpf_link.h>
@@ -23,8 +24,77 @@ struct bpf_nf_link {
struct nf_hook_ops hook_ops;
struct net *net;
u32 dead;
+ bool defrag;
};
+static int bpf_nf_enable_defrag(struct bpf_nf_link *link)
+{
+ int err;
+
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ const struct nf_defrag_v4_hook *v4_hook;
+
+ err = request_module("nf_defrag_ipv4");
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ v4_hook = rcu_dereference(nf_defrag_v4_hook);
+ err = v4_hook->enable(link->net);
+ rcu_read_unlock();
+
+ return err;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ const struct nf_defrag_v6_hook *v6_hook;
+
+ err = request_module("nf_defrag_ipv6_hooks");
+ if (err)
+ return err;
+
+ rcu_read_lock();
+ v6_hook = rcu_dereference(nf_defrag_v6_hook);
+ err = v6_hook->enable(link->net);
+ rcu_read_unlock();
+
+ return err;
+#endif
+ default:
+ return -EAFNOSUPPORT;
+ }
+}
+
+static void bpf_nf_disable_defrag(struct bpf_nf_link *link)
+{
+ switch (link->hook_ops.pf) {
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ case NFPROTO_IPV4:
+ const struct nf_defrag_v4_hook *v4_hook;
+
+ rcu_read_lock();
+ v4_hook = rcu_dereference(nf_defrag_v4_hook);
+ v4_hook->disable(link->net);
+ rcu_read_unlock();
+
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ case NFPROTO_IPV6:
+ const struct nf_defrag_v6_hook *v6_hook;
+
+ rcu_read_lock();
+ v6_hook = rcu_dereference(nf_defrag_v6_hook);
+ v6_hook->disable(link->net);
+ rcu_read_unlock();
+
+ break;
+ }
+#endif
+}
+
static void bpf_nf_link_release(struct bpf_link *link)
{
struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
@@ -37,6 +107,9 @@ static void bpf_nf_link_release(struct bpf_link *link)
*/
if (!cmpxchg(&nf_link->dead, 0, 1))
nf_unregister_net_hook(nf_link->net, &nf_link->hook_ops);
+
+ if (nf_link->defrag)
+ bpf_nf_disable_defrag(nf_link);
}
static void bpf_nf_link_dealloc(struct bpf_link *link)
@@ -92,6 +165,8 @@ static const struct bpf_link_ops bpf_nf_link_lops = {
static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
{
+ int prio;
+
switch (attr->link_create.netfilter.pf) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
@@ -102,19 +177,18 @@ static int bpf_nf_check_pf_and_hooks(const union bpf_attr *attr)
return -EAFNOSUPPORT;
}
- if (attr->link_create.netfilter.flags)
+ if (attr->link_create.netfilter.flags & ~BPF_F_NETFILTER_IP_DEFRAG)
return -EOPNOTSUPP;
- /* make sure conntrack confirm is always last.
- *
- * In the future, if userspace can e.g. request defrag, then
- * "defrag_requested && prio before NF_IP_PRI_CONNTRACK_DEFRAG"
- * should fail.
- */
- switch (attr->link_create.netfilter.priority) {
- case NF_IP_PRI_FIRST: return -ERANGE; /* sabotage_in and other warts */
- case NF_IP_PRI_LAST: return -ERANGE; /* e.g. conntrack confirm */
- }
+ /* make sure conntrack confirm is always last */
+ prio = attr->link_create.netfilter.priority;
+ if (prio == NF_IP_PRI_FIRST)
+ return -ERANGE; /* sabotage_in and other warts */
+ else if (prio == NF_IP_PRI_LAST)
+ return -ERANGE; /* e.g. conntrack confirm */
+ else if ((attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) &&
+ (prio > NF_IP_PRI_FIRST && prio <= NF_IP_PRI_CONNTRACK_DEFRAG))
+ return -ERANGE; /* cannot use defrag if prog runs before nf_defrag */
return 0;
}
@@ -156,6 +230,18 @@ int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
+ if (attr->link_create.netfilter.flags & BPF_F_NETFILTER_IP_DEFRAG) {
+ err = bpf_nf_enable_defrag(link);
+ if (err) {
+ bpf_link_cleanup(&link_primer);
+ return err;
+ }
+ /* only mark defrag enabled if enabling succeeds so cleanup path
+ * doesn't disable without a corresponding enable
+ */
+ link->defrag = true;
+ }
+
err = nf_register_net_hook(net, &link->hook_ops);
if (err) {
bpf_link_cleanup(&link_primer);
@@ -1170,6 +1170,11 @@ enum bpf_link_type {
*/
#define BPF_F_KPROBE_MULTI_RETURN (1U << 0)
+/* link_create.netfilter.flags used in LINK_CREATE command for
+ * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation.
+ */
+#define BPF_F_NETFILTER_IP_DEFRAG (1U << 0)
+
/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
* the following extensions:
*