[RFC,nf-next,v2,1/2] netfilter: bpf: support prog update

Message ID 1702873101-77522-2-git-send-email-alibuda@linux.alibaba.com
State New
Headers
Series netfilter: bpf: support prog update |

Commit Message

D. Wythe Dec. 18, 2023, 4:18 a.m. UTC
  From: "D. Wythe" <alibuda@linux.alibaba.com>

To support the prog update, we need to ensure that the prog seen
within the hook is always valid. Considering that hooks are always
protected by rcu_read_lock(), which provide us the ability to
access the prog under rcu.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 net/netfilter/nf_bpf_link.c | 63 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 48 insertions(+), 15 deletions(-)
  

Comments

Simon Horman Dec. 18, 2023, 7:06 p.m. UTC | #1
On Mon, Dec 18, 2023 at 12:18:20PM +0800, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> To support the prog update, we need to ensure that the prog seen
> within the hook is always valid. Considering that hooks are always
> protected by rcu_read_lock(), which provide us the ability to
> access the prog under rcu.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>

...

> @@ -26,8 +17,20 @@ struct bpf_nf_link {
>  	struct net *net;
>  	u32 dead;
>  	const struct nf_defrag_hook *defrag_hook;
> +	struct rcu_head head;
>  };
>  
> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
> +				    const struct nf_hook_state *s)
> +{
> +	const struct bpf_nf_link *nf_link = bpf_link;
> +	struct bpf_nf_ctx ctx = {
> +		.state = s,
> +		.skb = skb,
> +	};
> +	return bpf_prog_run(rcu_dereference(nf_link->link.prog), &ctx);

Hi,

AFAICT nf_link->link.prog isn't annotated as __rcu,
so perhaps rcu_dereference() is not correct here?

In any case, sparse seems a bit unhappy:

  .../nf_bpf_link.c:31:29: error: incompatible types in comparison expression (different address spaces):
  .../nf_bpf_link.c:31:29:    struct bpf_prog [noderef] __rcu *
  .../nf_bpf_link.c:31:29:    struct bpf_prog *

> +}
> +
>  #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>  static const struct nf_defrag_hook *
>  get_proto_defrag_hook(struct bpf_nf_link *link,

...
  
D. Wythe Dec. 19, 2023, 12:50 p.m. UTC | #2
On 12/19/23 3:06 AM, Simon Horman wrote:
> On Mon, Dec 18, 2023 at 12:18:20PM +0800, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> To support the prog update, we need to ensure that the prog seen
>> within the hook is always valid. Considering that hooks are always
>> protected by rcu_read_lock(), which provide us the ability to
>> access the prog under rcu.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ...
>
>> @@ -26,8 +17,20 @@ struct bpf_nf_link {
>>   	struct net *net;
>>   	u32 dead;
>>   	const struct nf_defrag_hook *defrag_hook;
>> +	struct rcu_head head;
>>   };
>>   
>> +static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
>> +				    const struct nf_hook_state *s)
>> +{
>> +	const struct bpf_nf_link *nf_link = bpf_link;
>> +	struct bpf_nf_ctx ctx = {
>> +		.state = s,
>> +		.skb = skb,
>> +	};
>> +	return bpf_prog_run(rcu_dereference(nf_link->link.prog), &ctx);
> Hi,
>
> AFAICT nf_link->link.prog isn't annotated as __rcu,
> so perhaps rcu_dereference() is not correct here?
>
> In any case, sparse seems a bit unhappy:
>
>    .../nf_bpf_link.c:31:29: error: incompatible types in comparison expression (different address spaces):
>    .../nf_bpf_link.c:31:29:    struct bpf_prog [noderef] __rcu *
>    .../nf_bpf_link.c:31:29:    struct bpf_prog *

Hi Simon,

thanks for the reporting.

Yes, I had anticipated that sparse would report an error. I tried to 
cast the type,
but it would compile an error likes that:


net/netfilter/nf_bpf_link.c: In function ‘nf_hook_run_bpf’:
./include/asm-generic/rwonce.h:44:70: error: lvalue required as unary 
‘&’ operand
    44 | #define __READ_ONCE(x) (*(const volatile 
__unqual_scalar_typeof(x) *)&(x))
| ^
./include/asm-generic/rwonce.h:50:2: note: in expansion of macro 
‘__READ_ONCE’
    50 |  __READ_ONCE(x);       \
       |  ^~~~~~~~~~~
./include/linux/rcupdate.h:436:43: note: in expansion of macro ‘READ_ONCE’
   436 |  typeof(*p) *local = (typeof(*p) *__force)READ_ONCE(p); \
       |                                           ^~~~~~~~~
./include/linux/rcupdate.h:584:2: note: in expansion of macro 
‘__rcu_dereference_check’
   584 |  __rcu_dereference_check((p), __UNIQUE_ID(rcu), \
       |  ^~~~~~~~~~~~~~~~~~~~~~~
./include/linux/rcupdate.h:656:28: note: in expansion of macro 
‘rcu_dereference_check’
   656 | #define rcu_dereference(p) rcu_dereference_check(p, 0)
       |                            ^~~~~~~~~~~~~~~~~~~~~
net/netfilter/nf_bpf_link.c:31:22: note: in expansion of macro 
‘rcu_dereference’
    31 |  return bpf_prog_run(rcu_dereference((const struct bpf_prog 
__rcu *)nf_link->link.prog), &ctx);
       |                      ^~~~~~~~~~~~~~~

So, I think we might need to go back to version 1.

@ Florian , what do you think ?

D. Wythe

>> +}
>> +
>>   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
>>   static const struct nf_defrag_hook *
>>   get_proto_defrag_hook(struct bpf_nf_link *link,
> ...
  
Florian Westphal Dec. 19, 2023, 2:58 p.m. UTC | #3
D. Wythe <alibuda@linux.alibaba.com> wrote:
> net/netfilter/nf_bpf_link.c:31:22: note: in expansion of macro
> ‘rcu_dereference’
>    31 |  return bpf_prog_run(rcu_dereference((const struct bpf_prog __rcu
> *)nf_link->link.prog), &ctx);
>       |                      ^~~~~~~~~~~~~~~
> 
> So, I think we might need to go back to version 1.
> 
> @ Florian , what do you think ?

Use rcu_dereference_raw().
  
D. Wythe Dec. 20, 2023, 12:40 p.m. UTC | #4
On 12/19/23 10:58 PM, Florian Westphal wrote:
> D. Wythe <alibuda@linux.alibaba.com> wrote:
>> net/netfilter/nf_bpf_link.c:31:22: note: in expansion of macro
>> ‘rcu_dereference’
>>     31 |  return bpf_prog_run(rcu_dereference((const struct bpf_prog __rcu
>> *)nf_link->link.prog), &ctx);
>>        |                      ^~~~~~~~~~~~~~~
>>
>> So, I think we might need to go back to version 1.
>>
>> @ Florian , what do you think ?
> Use rcu_dereference_raw().

Got it. I'm also good with that.

D. Wythe
  

Patch

diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index e502ec0..8eed7cf 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -8,17 +8,8 @@ 
 #include <net/netfilter/nf_bpf_link.h>
 #include <uapi/linux/netfilter_ipv4.h>
 
-static unsigned int nf_hook_run_bpf(void *bpf_prog, struct sk_buff *skb,
-				    const struct nf_hook_state *s)
-{
-	const struct bpf_prog *prog = bpf_prog;
-	struct bpf_nf_ctx ctx = {
-		.state = s,
-		.skb = skb,
-	};
-
-	return bpf_prog_run(prog, &ctx);
-}
+/* protect link update in parallel */
+static DEFINE_MUTEX(bpf_nf_mutex);
 
 struct bpf_nf_link {
 	struct bpf_link link;
@@ -26,8 +17,20 @@  struct bpf_nf_link {
 	struct net *net;
 	u32 dead;
 	const struct nf_defrag_hook *defrag_hook;
+	struct rcu_head head;
 };
 
+static unsigned int nf_hook_run_bpf(void *bpf_link, struct sk_buff *skb,
+				    const struct nf_hook_state *s)
+{
+	const struct bpf_nf_link *nf_link = bpf_link;
+	struct bpf_nf_ctx ctx = {
+		.state = s,
+		.skb = skb,
+	};
+	return bpf_prog_run(rcu_dereference(nf_link->link.prog), &ctx);
+}
+
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
 static const struct nf_defrag_hook *
 get_proto_defrag_hook(struct bpf_nf_link *link,
@@ -126,8 +129,7 @@  static void bpf_nf_link_release(struct bpf_link *link)
 static void bpf_nf_link_dealloc(struct bpf_link *link)
 {
 	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
-
-	kfree(nf_link);
+	kfree_rcu(nf_link, head);
 }
 
 static int bpf_nf_link_detach(struct bpf_link *link)
@@ -162,7 +164,34 @@  static int bpf_nf_link_fill_link_info(const struct bpf_link *link,
 static int bpf_nf_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
 			      struct bpf_prog *old_prog)
 {
-	return -EOPNOTSUPP;
+	struct bpf_nf_link *nf_link = container_of(link, struct bpf_nf_link, link);
+	int err = 0;
+
+	mutex_lock(&bpf_nf_mutex);
+
+	if (nf_link->dead) {
+		err = -EPERM;
+		goto out;
+	}
+
+	/* target old_prog mismatch */
+	if (old_prog && link->prog != old_prog) {
+		err = -EPERM;
+		goto out;
+	}
+
+	old_prog = link->prog;
+	if (old_prog == new_prog) {
+		/* don't need update */
+		bpf_prog_put(new_prog);
+		goto out;
+	}
+
+	old_prog = xchg(&link->prog, new_prog);
+	bpf_prog_put(old_prog);
+out:
+	mutex_unlock(&bpf_nf_mutex);
+	return err;
 }
 
 static const struct bpf_link_ops bpf_nf_link_lops = {
@@ -226,7 +255,11 @@  int bpf_nf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	link->hook_ops.hook = nf_hook_run_bpf;
 	link->hook_ops.hook_ops_type = NF_HOOK_OP_BPF;
-	link->hook_ops.priv = prog;
+
+	/* bpf_nf_link_release & bpf_nf_link_dealloc() can ensures that link remains
+	 * valid at all times within nf_hook_run_bpf().
+	 */
+	link->hook_ops.priv = link;
 
 	link->hook_ops.pf = attr->link_create.netfilter.pf;
 	link->hook_ops.priority = attr->link_create.netfilter.priority;