bpf: force inc_active()/dec_active() to be inline functions

Message ID 20230722074753.568696-1-arnd@kernel.org
State New
Headers
Series bpf: force inc_active()/dec_active() to be inline functions |

Commit Message

Arnd Bergmann July 22, 2023, 7:47 a.m. UTC
  From: Arnd Bergmann <arnd@arndb.de>

Splitting these out into separate helper functions means that we
actually pass an uninitialized variable into another function call
if dec_active() happens to not be inlined, and CONFIG_PREEMPT_RT
is disabled:

kernel/bpf/memalloc.c: In function 'add_obj_to_free_list':
kernel/bpf/memalloc.c:200:9: error: 'flags' is used uninitialized [-Werror=uninitialized]
  200 |         dec_active(c, flags);

Mark the two functions as __always_inline so gcc can see through
this regardless of optimizations and other options that may
prevent it otherwise.

Fixes: 18e027b1c7c6d ("bpf: Factor out inc/dec of active flag into helpers.")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 kernel/bpf/memalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
  

Comments

Yafang Shao July 23, 2023, 2:24 p.m. UTC | #1
On Sat, Jul 22, 2023 at 3:48 PM Arnd Bergmann <arnd@kernel.org> wrote:
>
> From: Arnd Bergmann <arnd@arndb.de>
>
> Splitting these out into separate helper functions means that we
> actually pass an uninitialized variable into another function call
> if dec_active() happens to not be inlined, and CONFIG_PREEMPT_RT
> is disabled:

Do you mean that the compiler can remove the flags automatically when
dec_active() is inlined, but can't remove it automatically when
dec_active() is not inlined ?
If so, why can't we improve the compiler ?

If we have to change the kernel, what about the change below?

index 51d6389..17191ee 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -165,15 +165,17 @@ static struct mem_cgroup *get_memcg(const struct
bpf_mem_cache *c)
 #endif
 }

-static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
+static unsigned long inc_active(struct bpf_mem_cache *c)
 {
+       unsigned long flags = 0;
+
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
                /* In RT irq_work runs in per-cpu kthread, so disable
                 * interrupts to avoid preemption and interrupts and
                 * reduce the chance of bpf prog executing on this cpu
                 * when active counter is busy.
                 */
-               local_irq_save(*flags);
+               local_irq_save(flags);
        /* alloc_bulk runs from irq_work which will not preempt a bpf
         * program that does unit_alloc/unit_free since IRQs are
         * disabled there. There is no race to increment 'active'
@@ -181,6 +183,7 @@ static void inc_active(struct bpf_mem_cache *c,
unsigned long *flags)
         * bpf prog preempted this loop.
         */
        WARN_ON_ONCE(local_inc_return(&c->active) != 1);
+       return flags;
 }


>
> kernel/bpf/memalloc.c: In function 'add_obj_to_free_list':
> kernel/bpf/memalloc.c:200:9: error: 'flags' is used uninitialized [-Werror=uninitialized]
>   200 |         dec_active(c, flags);
>
> Mark the two functions as __always_inline so gcc can see through
> this regardless of optimizations and other options that may
> prevent it otherwise.
>
> Fixes: 18e027b1c7c6d ("bpf: Factor out inc/dec of active flag into helpers.")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
>  kernel/bpf/memalloc.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 51d6389e5152e..23906325298da 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -165,7 +165,7 @@ static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
>  #endif
>  }
>
> -static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
> +static __always_inline void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
>  {
>         if (IS_ENABLED(CONFIG_PREEMPT_RT))
>                 /* In RT irq_work runs in per-cpu kthread, so disable
> @@ -183,7 +183,7 @@ static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
>         WARN_ON_ONCE(local_inc_return(&c->active) != 1);
>  }
>
> -static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
> +static __always_inline void dec_active(struct bpf_mem_cache *c, unsigned long flags)
>  {
>         local_dec(&c->active);
>         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> --
> 2.39.2
>
>
  
Alexei Starovoitov July 23, 2023, 4:46 p.m. UTC | #2
On Sun, Jul 23, 2023 at 7:25 AM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> On Sat, Jul 22, 2023 at 3:48 PM Arnd Bergmann <arnd@kernel.org> wrote:
> >
> > From: Arnd Bergmann <arnd@arndb.de>
> >
> > Splitting these out into separate helper functions means that we
> > actually pass an uninitialized variable into another function call
> > if dec_active() happens to not be inlined, and CONFIG_PREEMPT_RT
> > is disabled:
>
> Do you mean that the compiler can remove the flags automatically when
> dec_active() is inlined, but can't remove it automatically when
> dec_active() is not inlined ?
> If so, why can't we improve the compiler ?

Agree.
Sounds like a compiler bug.

> If we have to change the kernel, what about the change below?

To workaround the compiler bug we can simply init flag=0 to silence
the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.
  
Arnd Bergmann July 23, 2023, 6:31 p.m. UTC | #3
On Sun, Jul 23, 2023, at 18:46, Alexei Starovoitov wrote:
> On Sun, Jul 23, 2023 at 7:25 AM Yafang Shao <laoar.shao@gmail.com> wrote:
>> On Sat, Jul 22, 2023 at 3:48 PM Arnd Bergmann <arnd@kernel.org> wrote:
>> > From: Arnd Bergmann <arnd@arndb.de>
>> >
>> > Splitting these out into separate helper functions means that we
>> > actually pass an uninitialized variable into another function call
>> > if dec_active() happens to not be inlined, and CONFIG_PREEMPT_RT
>> > is disabled:
>>
>> Do you mean that the compiler can remove the flags automatically when
>> dec_active() is inlined, but can't remove it automatically when
>> dec_active() is not inlined ?

My educated guess is that it's fine when neither of them are inlined,
since then gcc can assume that 'flags' gets initialized by
inc_active(), and it's fine when both are inlined since dead code
elimination then gets rid of both the initialization and the use.

The only broken case should be when inc_active() is inlined and
gcc can tell that there is never an initialization, but 
dec_active() is not inlined, so gcc assumes it is actually used.

>> If so, why can't we improve the compiler ?
>
> Agree.
> Sounds like a compiler bug.

I don't know what you might want to change in the compiler
to avoid this. Compilers are free to decide which functions to
inline in the absence of noinline or always_inline flags.

One difference between gcc and clang is that gcc tries to
be smart about warnings by using information from inlining
to produce better warnings, while clang never uses information
across function boundaries for generated warnings, so it won't
find this one, but also would ignore an unconditional use
of the uninitialized variable. 

>> If we have to change the kernel, what about the change below?
>
> To workaround the compiler bug we can simply init flag=0 to silence
> the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.

Maybe inc_active() could return the flags instead of modifying
the stack variable? that would also result in slightly better
code when it's not inlined.

     Arnd
  
Alexei Starovoitov July 24, 2023, 6 p.m. UTC | #4
On Sun, Jul 23, 2023 at 11:32 AM Arnd Bergmann <arnd@arndb.de> wrote:
>
> >> If so, why can't we improve the compiler ?
> >
> > Agree.
> > Sounds like a compiler bug.
>
> I don't know what you might want to change in the compiler
> to avoid this. Compilers are free to decide which functions to
> inline in the absence of noinline or always_inline flags.

Clearly a compiler bug.
Compilers should not produce false positive warnings regardless
how inlining went and optimizations performed.


> One difference between gcc and clang is that gcc tries to
> be smart about warnings by using information from inlining
> to produce better warnings, while clang never uses information
> across function boundaries for generated warnings, so it won't
> find this one, but also would ignore an unconditional use
> of the uninitialized variable.
>
> >> If we have to change the kernel, what about the change below?
> >
> > To workaround the compiler bug we can simply init flag=0 to silence
> > the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.
>
> Maybe inc_active() could return the flags instead of modifying
> the stack variable? that would also result in slightly better
> code when it's not inlined.

Which gcc are we talking about here that is so buggy?
  
Arnd Bergmann July 24, 2023, 6:13 p.m. UTC | #5
On Mon, Jul 24, 2023, at 20:00, Alexei Starovoitov wrote:
> On Sun, Jul 23, 2023 at 11:32 AM Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> >> If so, why can't we improve the compiler ?
>> >
>> > Agree.
>> > Sounds like a compiler bug.
>>
>> I don't know what you might want to change in the compiler
>> to avoid this. Compilers are free to decide which functions to
>> inline in the absence of noinline or always_inline flags.
>
> Clearly a compiler bug.
> Compilers should not produce false positive warnings regardless
> how inlining went and optimizations performed.

That would be a nice idea, but until we force everyone to
migrate to clang, that's not something in our power. gcc is
well known to throw tons of warnings that depend on inlining:
-Wnull-dereference, -Wmaybe-uninitialized, -Wdiv-by-zero
and other inherently depend on how much gcc can infer from
inlining and dead code elimination.

In this case, it doesn't even require a lot of imagination,
the code is literally written as undefined behavior when
the first call is inlined and the second one is not, I don't
see what one would do in gcc to /not/ warn about passing
an uninitialized register into a function call, other than
moving the warning before inlining and DCE as clang does.

>> One difference between gcc and clang is that gcc tries to
>> be smart about warnings by using information from inlining
>> to produce better warnings, while clang never uses information
>> across function boundaries for generated warnings, so it won't
>> find this one, but also would ignore an unconditional use
>> of the uninitialized variable.
>>
>> >> If we have to change the kernel, what about the change below?
>> >
>> > To workaround the compiler bug we can simply init flag=0 to silence
>> > the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.
>>
>> Maybe inc_active() could return the flags instead of modifying
>> the stack variable? that would also result in slightly better
>> code when it's not inlined.
>
> Which gcc are we talking about here that is so buggy?

I think I only tried versions 8 through 13 for this one, but
can check others as well.

     Arnd
  
Arnd Bergmann July 24, 2023, 6:29 p.m. UTC | #6
On Mon, Jul 24, 2023, at 20:13, Arnd Bergmann wrote:

>>> One difference between gcc and clang is that gcc tries to
>>> be smart about warnings by using information from inlining
>>> to produce better warnings, while clang never uses information
>>> across function boundaries for generated warnings, so it won't
>>> find this one, but also would ignore an unconditional use
>>> of the uninitialized variable.
>>>
>>> >> If we have to change the kernel, what about the change below?
>>> >
>>> > To workaround the compiler bug we can simply init flag=0 to silence
>>> > the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.
>>>
>>> Maybe inc_active() could return the flags instead of modifying
>>> the stack variable? that would also result in slightly better
>>> code when it's not inlined.
>>
>> Which gcc are we talking about here that is so buggy?
>
> I think I only tried versions 8 through 13 for this one, but
> can check others as well.

I have a minimized test case at https://godbolt.org/z/hK4ev17fv
that shows the problem happening with all versions of gcc
(4.1 through 14.0) if I force the dec_active() function to be
inline and force inc_active() to be non-inline.

With clang, I only see the warning if I turn dec_active() into
a macro instead of an inline function. This is the expected
behavior in clang.

     Arnd
  
Alexei Starovoitov July 24, 2023, 7:15 p.m. UTC | #7
On Mon, Jul 24, 2023 at 11:30 AM Arnd Bergmann <arnd@kernel.org> wrote:
>
> On Mon, Jul 24, 2023, at 20:13, Arnd Bergmann wrote:
>
> >>> One difference between gcc and clang is that gcc tries to
> >>> be smart about warnings by using information from inlining
> >>> to produce better warnings, while clang never uses information
> >>> across function boundaries for generated warnings, so it won't
> >>> find this one, but also would ignore an unconditional use
> >>> of the uninitialized variable.
> >>>
> >>> >> If we have to change the kernel, what about the change below?
> >>> >
> >>> > To workaround the compiler bug we can simply init flag=0 to silence
> >>> > the warn, but even that is silly. Passing flag=0 into irqrestore is buggy.
> >>>
> >>> Maybe inc_active() could return the flags instead of modifying
> >>> the stack variable? that would also result in slightly better
> >>> code when it's not inlined.
> >>
> >> Which gcc are we talking about here that is so buggy?
> >
> > I think I only tried versions 8 through 13 for this one, but
> > can check others as well.
>
> I have a minimized test case at https://godbolt.org/z/hK4ev17fv
> that shows the problem happening with all versions of gcc
> (4.1 through 14.0) if I force the dec_active() function to be
> inline and force inc_active() to be non-inline.

That's a bit of cheating, but I see your point now.
How about we do:
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 51d6389e5152..3fa0944cb975 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -183,11 +183,11 @@ static void inc_active(struct bpf_mem_cache *c,
unsigned long *flags)
        WARN_ON_ONCE(local_inc_return(&c->active) != 1);
 }

-static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
+static void dec_active(struct bpf_mem_cache *c, unsigned long *flags)
 {
        local_dec(&c->active);
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
-               local_irq_restore(flags);
+               local_irq_restore(*flags);
 }

 static void add_obj_to_free_list(struct bpf_mem_cache *c, void *obj)
@@ -197,7 +197,7 @@ static void add_obj_to_free_list(struct
bpf_mem_cache *c, void *obj)
        inc_active(c, &flags);
        __llist_add(obj, &c->free_llist);
        c->free_cnt++;
-       dec_active(c, flags);
+       dec_active(c, &flags);


It's symmetrical and there is no 'flags = 0' ugliness.
  
Arnd Bergmann July 24, 2023, 8:41 p.m. UTC | #8
On Mon, Jul 24, 2023, at 21:15, Alexei Starovoitov wrote:
> On Mon, Jul 24, 2023 at 11:30 AM Arnd Bergmann <arnd@kernel.org> wrote:
>> On Mon, Jul 24, 2023, at 20:13, Arnd Bergmann wrote:
>>
>> I have a minimized test case at https://godbolt.org/z/hK4ev17fv
>> that shows the problem happening with all versions of gcc
>> (4.1 through 14.0) if I force the dec_active() function to be
>> inline and force inc_active() to be non-inline.
>
> That's a bit of cheating, but I see your point now.
> How about we do:
> diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> index 51d6389e5152..3fa0944cb975 100644
> --- a/kernel/bpf/memalloc.c
> +++ b/kernel/bpf/memalloc.c
> @@ -183,11 +183,11 @@ static void inc_active(struct bpf_mem_cache *c,
> unsigned long *flags)
>         WARN_ON_ONCE(local_inc_return(&c->active) != 1);
>  }
>
> -static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
> +static void dec_active(struct bpf_mem_cache *c, unsigned long *flags)
>  {
>         local_dec(&c->active);
>         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> -               local_irq_restore(flags);
> +               local_irq_restore(*flags);
>  }


Sure, that's fine. Between this and the two suggestions I had
(__always_inline or passing the flags from  inc_active as a
return code), I don't have a strong preference, so pick whichever
you like.

      Arnd
  
Alexei Starovoitov July 25, 2023, 6:15 p.m. UTC | #9
On Mon, Jul 24, 2023 at 1:41 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Mon, Jul 24, 2023, at 21:15, Alexei Starovoitov wrote:
> > On Mon, Jul 24, 2023 at 11:30 AM Arnd Bergmann <arnd@kernel.org> wrote:
> >> On Mon, Jul 24, 2023, at 20:13, Arnd Bergmann wrote:
> >>
> >> I have a minimized test case at https://godbolt.org/z/hK4ev17fv
> >> that shows the problem happening with all versions of gcc
> >> (4.1 through 14.0) if I force the dec_active() function to be
> >> inline and force inc_active() to be non-inline.
> >
> > That's a bit of cheating, but I see your point now.
> > How about we do:
> > diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
> > index 51d6389e5152..3fa0944cb975 100644
> > --- a/kernel/bpf/memalloc.c
> > +++ b/kernel/bpf/memalloc.c
> > @@ -183,11 +183,11 @@ static void inc_active(struct bpf_mem_cache *c,
> > unsigned long *flags)
> >         WARN_ON_ONCE(local_inc_return(&c->active) != 1);
> >  }
> >
> > -static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
> > +static void dec_active(struct bpf_mem_cache *c, unsigned long *flags)
> >  {
> >         local_dec(&c->active);
> >         if (IS_ENABLED(CONFIG_PREEMPT_RT))
> > -               local_irq_restore(flags);
> > +               local_irq_restore(*flags);
> >  }
>
>
> Sure, that's fine. Between this and the two suggestions I had
> (__always_inline or passing the flags from  inc_active as a
> return code), I don't have a strong preference, so pick whichever
> you like.

I think:
static void dec_active(struct bpf_mem_cache *c, unsigned long *flags)
is cleaner.
Could you send a patch?
  
Arnd Bergmann July 25, 2023, 8:27 p.m. UTC | #10
On Tue, Jul 25, 2023, at 20:15, Alexei Starovoitov wrote:
> On Mon, Jul 24, 2023 at 1:41 PM Arnd Bergmann <arnd@arndb.de> wrote:
>>
>> Sure, that's fine. Between this and the two suggestions I had
>> (__always_inline or passing the flags from  inc_active as a
>> return code), I don't have a strong preference, so pick whichever
>> you like.
>
> I think:
> static void dec_active(struct bpf_mem_cache *c, unsigned long *flags)
> is cleaner.
> Could you send a patch?

Ok, done,

     Arnd
  

Patch

diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 51d6389e5152e..23906325298da 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -165,7 +165,7 @@  static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c)
 #endif
 }
 
-static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
+static __always_inline void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
 {
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))
 		/* In RT irq_work runs in per-cpu kthread, so disable
@@ -183,7 +183,7 @@  static void inc_active(struct bpf_mem_cache *c, unsigned long *flags)
 	WARN_ON_ONCE(local_inc_return(&c->active) != 1);
 }
 
-static void dec_active(struct bpf_mem_cache *c, unsigned long flags)
+static __always_inline void dec_active(struct bpf_mem_cache *c, unsigned long flags)
 {
 	local_dec(&c->active);
 	if (IS_ENABLED(CONFIG_PREEMPT_RT))