[net-next,20/24] net: intel: Use nested-BH locking for XDP redirect.
Commit Message
The per-CPU variables used during bpf_prog_run_xdp() invocation and
later during xdp_do_redirect() rely on disabled BH for their protection.
Without locking in local_bh_disable() on PREEMPT_RT these data structure
require explicit locking.
This is a follow-up on the previous change which introduced
bpf_run_lock.redirect_lock and uses it now within drivers.
The simple way is to acquire the lock before bpf_prog_run_xdp() is
invoked and hold it until the end of function.
This does not always work because some drivers (cpsw, atlantic) invoke
xdp_do_flush() in the same context.
Acquiring the lock in bpf_prog_run_xdp() and dropping in
xdp_do_redirect() (without touching drivers) does not work because not
all driver, which use bpf_prog_run_xdp(), do support XDP_REDIRECT (and
invoke xdp_do_redirect()).
Ideally the minimal locking scope would be bpf_prog_run_xdp() +
xdp_do_redirect() and everything else (error recovery, DMA unmapping,
free/ alloc of memory, …) would happen outside of the locked section.
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Tony Nguyen <anthony.l.nguyen@intel.com>
Cc: bpf@vger.kernel.org (open list:XDP
Cc: intel-wired-lan@lists.osuosl.org
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 +
drivers/net/ethernet/intel/i40e/i40e_xsk.c | 22 +++++++++--------
drivers/net/ethernet/intel/ice/ice_txrx.c | 1 +
drivers/net/ethernet/intel/ice/ice_xsk.c | 21 ++++++++--------
drivers/net/ethernet/intel/igb/igb_main.c | 1 +
drivers/net/ethernet/intel/igc/igc_main.c | 5 +++-
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 +
drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c | 24 ++++++++++---------
.../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++-
9 files changed, 46 insertions(+), 33 deletions(-)
Comments
Hi Sebastian,
kernel test robot noticed the following build errors:
[auto build test ERROR on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Andrzej-Siewior/locking-local_lock-Introduce-guard-definition-for-local_lock/20231216-011911
base: net-next/main
patch link: https://lore.kernel.org/r/20231215171020.687342-21-bigeasy%40linutronix.de
patch subject: [PATCH net-next 20/24] net: intel: Use nested-BH locking for XDP redirect.
config: arm-defconfig (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312161212.D5tju5i6-lkp@intel.com/
All errors (new ones prefixed by >>):
>> drivers/net/ethernet/intel/igb/igb_main.c:8620:3: error: cannot jump from this goto statement to its label
goto xdp_out;
^
drivers/net/ethernet/intel/igb/igb_main.c:8624:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
^
include/linux/cleanup.h:142:15: note: expanded from macro 'guard'
CLASS(_name, __UNIQUE_ID(guard))
^
include/linux/compiler.h:180:29: note: expanded from macro '__UNIQUE_ID'
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
^
include/linux/compiler_types.h:84:22: note: expanded from macro '__PASTE'
#define __PASTE(a,b) ___PASTE(a,b)
^
include/linux/compiler_types.h:83:23: note: expanded from macro '___PASTE'
#define ___PASTE(a,b) a##b
^
<scratch space>:52:1: note: expanded from here
__UNIQUE_ID_guard753
^
1 error generated.
vim +8620 drivers/net/ethernet/intel/igb/igb_main.c
b1bb2eb0a0deb0 Alexander Duyck 2017-02-06 8608
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8609 static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8610 struct igb_ring *rx_ring,
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8611 struct xdp_buff *xdp)
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8612 {
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8613 int err, result = IGB_XDP_PASS;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8614 struct bpf_prog *xdp_prog;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8615 u32 act;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8616
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8617 xdp_prog = READ_ONCE(rx_ring->xdp_prog);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8618
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8619 if (!xdp_prog)
9cbc948b5a20c9 Sven Auhagen 2020-09-02 @8620 goto xdp_out;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8621
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8622 prefetchw(xdp->data_hard_start); /* xdp_frame write */
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8623
d568b111738dbb Sebastian Andrzej Siewior 2023-12-15 8624 guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8625 act = bpf_prog_run_xdp(xdp_prog, xdp);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8626 switch (act) {
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8627 case XDP_PASS:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8628 break;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8629 case XDP_TX:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8630 result = igb_xdp_xmit_back(adapter, xdp);
74431c40b9c5fa Magnus Karlsson 2021-05-10 8631 if (result == IGB_XDP_CONSUMED)
74431c40b9c5fa Magnus Karlsson 2021-05-10 8632 goto out_failure;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8633 break;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8634 case XDP_REDIRECT:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8635 err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
74431c40b9c5fa Magnus Karlsson 2021-05-10 8636 if (err)
74431c40b9c5fa Magnus Karlsson 2021-05-10 8637 goto out_failure;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8638 result = IGB_XDP_REDIR;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8639 break;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8640 default:
c8064e5b4adac5 Paolo Abeni 2021-11-30 8641 bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8642 fallthrough;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8643 case XDP_ABORTED:
74431c40b9c5fa Magnus Karlsson 2021-05-10 8644 out_failure:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8645 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8646 fallthrough;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8647 case XDP_DROP:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8648 result = IGB_XDP_CONSUMED;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8649 break;
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8650 }
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8651 xdp_out:
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8652 return ERR_PTR(-result);
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8653 }
9cbc948b5a20c9 Sven Auhagen 2020-09-02 8654
On Sat, Dec 16, 2023 at 12:53:43PM +0800, kernel test robot wrote:
> Hi Sebastian,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on net-next/main]
>
> url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Andrzej-Siewior/locking-local_lock-Introduce-guard-definition-for-local_lock/20231216-011911
> base: net-next/main
> patch link: https://lore.kernel.org/r/20231215171020.687342-21-bigeasy%40linutronix.de
> patch subject: [PATCH net-next 20/24] net: intel: Use nested-BH locking for XDP redirect.
> config: arm-defconfig (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/config)
> compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202312161212.D5tju5i6-lkp@intel.com/
>
> All errors (new ones prefixed by >>):
>
> >> drivers/net/ethernet/intel/igb/igb_main.c:8620:3: error: cannot jump from this goto statement to its label
> goto xdp_out;
> ^
> drivers/net/ethernet/intel/igb/igb_main.c:8624:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
> guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
> ^
> include/linux/cleanup.h:142:15: note: expanded from macro 'guard'
> CLASS(_name, __UNIQUE_ID(guard))
> ^
> include/linux/compiler.h:180:29: note: expanded from macro '__UNIQUE_ID'
> #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
> ^
> include/linux/compiler_types.h:84:22: note: expanded from macro '__PASTE'
> #define __PASTE(a,b) ___PASTE(a,b)
> ^
> include/linux/compiler_types.h:83:23: note: expanded from macro '___PASTE'
> #define ___PASTE(a,b) a##b
> ^
> <scratch space>:52:1: note: expanded from here
> __UNIQUE_ID_guard753
> ^
> 1 error generated.
I initially thought that this may have been
https://github.com/ClangBuiltLinux/linux/issues/1886 but asm goto is not
involved here.
This error occurs because jumping over the initialization of a variable
declared with __attribute__((__cleanup__(...))) does not prevent the
clean up function from running as one may expect it to, but could
instead result in the clean up function getting run on uninitialized
memory. A contrived example (see the bottom of the "Output" tabs for the
execution output):
https://godbolt.org/z/9bvGboxvc
While there is a warning from GCC in that example, I don't see one in
the kernel's case. I see there is an open GCC issue around this problem:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91951
While it is possible that there may not actually be a problem with how
the kernel uses __attribute__((__cleanup__(...))) and gotos, I think
clang's behavior is reasonable given the potential footguns that this
construct has.
Cheers,
Nathan
On Mon, Dec 18, 2023 at 4:01 PM Nathan Chancellor <nathan@kernel.org> wrote:
>
> On Sat, Dec 16, 2023 at 12:53:43PM +0800, kernel test robot wrote:
> > Hi Sebastian,
> >
> > kernel test robot noticed the following build errors:
> >
> > [auto build test ERROR on net-next/main]
> >
> > url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Andrzej-Siewior/locking-local_lock-Introduce-guard-definition-for-local_lock/20231216-011911
> > base: net-next/main
> > patch link: https://lore.kernel.org/r/20231215171020.687342-21-bigeasy%40linutronix.de
> > patch subject: [PATCH net-next 20/24] net: intel: Use nested-BH locking for XDP redirect.
> > config: arm-defconfig (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/config)
> > compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project.git f28c006a5895fc0e329fe15fead81e37457cb1d1)
> > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231216/202312161212.D5tju5i6-lkp@intel.com/reproduce)
> >
> > If you fix the issue in a separate patch/commit (i.e. not just a new version of
> > the same patch/commit), kindly add following tags
> > | Reported-by: kernel test robot <lkp@intel.com>
> > | Closes: https://lore.kernel.org/oe-kbuild-all/202312161212.D5tju5i6-lkp@intel.com/
> >
> > All errors (new ones prefixed by >>):
> >
> > >> drivers/net/ethernet/intel/igb/igb_main.c:8620:3: error: cannot jump from this goto statement to its label
> > goto xdp_out;
> > ^
^ The problematic goto should be replaced with an early return. (and
perhaps a comment that you can't jump over __cleanup variable
initialization).
Otherwise the compiler cannot put the cleanup in the destination basic
block; it would have to split the edges and have all the happy paths
go to a synthesized basic block that runs the cleanup, then jumps to
the original destination.
> > drivers/net/ethernet/intel/igb/igb_main.c:8624:2: note: jump bypasses initialization of variable with __attribute__((cleanup))
> > guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
> > ^
> > include/linux/cleanup.h:142:15: note: expanded from macro 'guard'
> > CLASS(_name, __UNIQUE_ID(guard))
> > ^
> > include/linux/compiler.h:180:29: note: expanded from macro '__UNIQUE_ID'
> > #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
> > ^
> > include/linux/compiler_types.h:84:22: note: expanded from macro '__PASTE'
> > #define __PASTE(a,b) ___PASTE(a,b)
> > ^
> > include/linux/compiler_types.h:83:23: note: expanded from macro '___PASTE'
> > #define ___PASTE(a,b) a##b
> > ^
> > <scratch space>:52:1: note: expanded from here
> > __UNIQUE_ID_guard753
> > ^
> > 1 error generated.
>
> I initially thought that this may have been
> https://github.com/ClangBuiltLinux/linux/issues/1886 but asm goto is not
> involved here.
>
> This error occurs because jumping over the initialization of a variable
> declared with __attribute__((__cleanup__(...))) does not prevent the
> clean up function from running as one may expect it to, but could
> instead result in the clean up function getting run on uninitialized
> memory. A contrived example (see the bottom of the "Output" tabs for the
> execution output):
>
> https://godbolt.org/z/9bvGboxvc
>
> While there is a warning from GCC in that example, I don't see one in
> the kernel's case. I see there is an open GCC issue around this problem:
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91951
>
> While it is possible that there may not actually be a problem with how
> the kernel uses __attribute__((__cleanup__(...))) and gotos, I think
> clang's behavior is reasonable given the potential footguns that this
> construct has.
>
> Cheers,
> Nathan
>
@@ -2326,6 +2326,7 @@ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp, struct
prefetchw(xdp->data_hard_start); /* xdp_frame write */
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -201,17 +201,19 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp,
struct i40e_ring *xdp_ring;
u32 act;
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
- if (likely(act == XDP_REDIRECT)) {
- err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- if (!err)
- return I40E_XDP_REDIR;
- if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
- result = I40E_XDP_EXIT;
- else
- result = I40E_XDP_CONSUMED;
- goto out_failure;
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ if (!err)
+ return I40E_XDP_REDIR;
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+ result = I40E_XDP_EXIT;
+ else
+ result = I40E_XDP_CONSUMED;
+ goto out_failure;
+ }
}
switch (act) {
@@ -571,6 +571,7 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
if (!xdp_prog)
goto exit;
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -762,17 +762,18 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp,
int err, result = ICE_XDP_PASS;
u32 act;
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
act = bpf_prog_run_xdp(xdp_prog, xdp);
-
- if (likely(act == XDP_REDIRECT)) {
- err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- if (!err)
- return ICE_XDP_REDIR;
- if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
- result = ICE_XDP_EXIT;
- else
- result = ICE_XDP_CONSUMED;
- goto out_failure;
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ if (!err)
+ return ICE_XDP_REDIR;
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+ result = ICE_XDP_EXIT;
+ else
+ result = ICE_XDP_CONSUMED;
+ goto out_failure;
+ }
}
switch (act) {
@@ -8621,6 +8621,7 @@ static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
prefetchw(xdp->data_hard_start); /* xdp_frame write */
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -2485,7 +2485,10 @@ static int __igc_xdp_run_prog(struct igc_adapter *adapter,
struct bpf_prog *prog,
struct xdp_buff *xdp)
{
- u32 act = bpf_prog_run_xdp(prog, xdp);
+ u32 act;
+
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
+ act = bpf_prog_run_xdp(prog, xdp);
switch (act) {
case XDP_PASS:
@@ -2203,6 +2203,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
prefetchw(xdp->data_hard_start); /* xdp_frame write */
+ guard(local_lock_nested_bh)(&bpf_run_lock.redirect_lock);
act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
@@ -104,18 +104,20 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
struct xdp_frame *xdpf;
u32 act;
- xdp_prog = READ_ONCE(rx_ring->xdp_prog);
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock) {
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
- if (likely(act == XDP_REDIRECT)) {
- err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- if (!err)
- return IXGBE_XDP_REDIR;
- if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
- result = IXGBE_XDP_EXIT;
- else
- result = IXGBE_XDP_CONSUMED;
- goto out_failure;
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ if (!err)
+ return IXGBE_XDP_REDIR;
+ if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS)
+ result = IXGBE_XDP_EXIT;
+ else
+ result = IXGBE_XDP_CONSUMED;
+ goto out_failure;
+ }
}
switch (act) {
@@ -1059,7 +1059,8 @@ static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
if (!xdp_prog)
goto xdp_out;
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ scoped_guard(local_lock_nested_bh, &bpf_run_lock.redirect_lock)
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
break;