[v6] net/core: Introduce netdev_core_stats_inc()

Message ID 20230928100418.521594-1-yajun.deng@linux.dev
State New
Headers
Series [v6] net/core: Introduce netdev_core_stats_inc() |

Commit Message

Yajun Deng Sept. 28, 2023, 10:04 a.m. UTC
  Although there is a kfree_skb_reason() helper function that can be used to
find the reason why this skb is dropped, but most callers didn't increase
one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.

For the users, people are more concerned about why the dropped in ip
is increasing.

Introduce netdev_core_stats_inc() for trace the caller of the dropped
skb. Also, add __code to netdev_core_stats_alloc(), as it's called
unlinkly.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
---
v6: merge netdev_core_stats and netdev_core_stats_inc together
v5: Access the per cpu pointer before reach the relevant offset.
v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
v3: __cold should be added to the netdev_core_stats_alloc().
v2: use __cold instead of inline in dev_core_stats().
v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
---
 include/linux/netdevice.h | 21 ++++-----------------
 net/core/dev.c            | 17 +++++++++++++++--
 2 files changed, 19 insertions(+), 19 deletions(-)
  

Comments

Eric Dumazet Sept. 28, 2023, 2:18 p.m. UTC | #1
On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>
> Although there is a kfree_skb_reason() helper function that can be used to
> find the reason why this skb is dropped, but most callers didn't increase
> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
>
> For the users, people are more concerned about why the dropped in ip
> is increasing.
>
> Introduce netdev_core_stats_inc() for trace the caller of the dropped
> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
> unlinkly.
>
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> ---
> v6: merge netdev_core_stats and netdev_core_stats_inc together
> v5: Access the per cpu pointer before reach the relevant offset.
> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
> v3: __cold should be added to the netdev_core_stats_alloc().
> v2: use __cold instead of inline in dev_core_stats().
> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
> ---
>  include/linux/netdevice.h | 21 ++++-----------------
>  net/core/dev.c            | 17 +++++++++++++++--
>  2 files changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 7e520c14eb8c..eb1fa04fbccc 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
>         return false;
>  }
>
> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
> -
> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
> -{
> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> -
> -       if (likely(p))
> -               return p;
> -
> -       return netdev_core_stats_alloc(dev);
> -}
> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
>
>  #define DEV_CORE_STATS_INC(FIELD)                                              \
>  static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
>  {                                                                              \
> -       struct net_device_core_stats __percpu *p;                               \
> -                                                                               \
> -       p = dev_core_stats(dev);                                                \
> -       if (p)                                                                  \
> -               this_cpu_inc(p->FIELD);                                         \

Note that we were using this_cpu_inc() which implied :
- IRQ safety, and
- a barrier paired with :

net/core/dev.c:10548:                   storage->rx_dropped +=
READ_ONCE(core_stats->rx_dropped);
net/core/dev.c:10549:                   storage->tx_dropped +=
READ_ONCE(core_stats->tx_dropped);
net/core/dev.c:10550:                   storage->rx_nohandler +=
READ_ONCE(core_stats->rx_nohandler);
net/core/dev.c:10551:                   storage->rx_otherhost_dropped
+= READ_ONCE(core_stats->rx_otherhost_dropped);


> +       netdev_core_stats_inc(dev,                                              \
> +                       offsetof(struct net_device_core_stats, FIELD));         \
>  }
>  DEV_CORE_STATS_INC(rx_dropped)
>  DEV_CORE_STATS_INC(tx_dropped)
>  DEV_CORE_STATS_INC(rx_nohandler)
>  DEV_CORE_STATS_INC(rx_otherhost_dropped)
> +#undef DEV_CORE_STATS_INC
>
>  static __always_inline int ____dev_forward_skb(struct net_device *dev,
>                                                struct sk_buff *skb,
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 606a366cc209..88a32c392c1d 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
>  }
>  EXPORT_SYMBOL(netdev_stats_to_stats64);
>
> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
> +               struct net_device *dev)
>  {
>         struct net_device_core_stats __percpu *p;
>
> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
>         /* This READ_ONCE() pairs with the cmpxchg() above */
>         return READ_ONCE(dev->core_stats);
>  }
> -EXPORT_SYMBOL(netdev_core_stats_alloc);
> +
> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
> +{
> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> +
> +       if (unlikely(!p))
> +               p = netdev_core_stats_alloc(dev);
> +
> +       if (p)
> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;

While here you are using a ++ operation that :

- is not irq safe
- might cause store-tearing.

I would suggest a preliminary patch converting the "unsigned long" fields in
struct net_device_core_stats to local_t

You might be able tweak this to

unsigned long __percpu *field = (unsigned long __percpu) ((u8 *)p + offset);
this_cpu_inc(field);
  
Yajun Deng Sept. 28, 2023, 3:40 p.m. UTC | #2
On 2023/9/28 22:18, Eric Dumazet wrote:
> On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>> Although there is a kfree_skb_reason() helper function that can be used to
>> find the reason why this skb is dropped, but most callers didn't increase
>> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
>>
>> For the users, people are more concerned about why the dropped in ip
>> is increasing.
>>
>> Introduce netdev_core_stats_inc() for trace the caller of the dropped
>> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
>> unlinkly.
>>
>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>> ---
>> v6: merge netdev_core_stats and netdev_core_stats_inc together
>> v5: Access the per cpu pointer before reach the relevant offset.
>> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
>> v3: __cold should be added to the netdev_core_stats_alloc().
>> v2: use __cold instead of inline in dev_core_stats().
>> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
>> ---
>>   include/linux/netdevice.h | 21 ++++-----------------
>>   net/core/dev.c            | 17 +++++++++++++++--
>>   2 files changed, 19 insertions(+), 19 deletions(-)
>>
>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>> index 7e520c14eb8c..eb1fa04fbccc 100644
>> --- a/include/linux/netdevice.h
>> +++ b/include/linux/netdevice.h
>> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
>>          return false;
>>   }
>>
>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
>> -
>> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
>> -{
>> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>> -
>> -       if (likely(p))
>> -               return p;
>> -
>> -       return netdev_core_stats_alloc(dev);
>> -}
>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
>>
>>   #define DEV_CORE_STATS_INC(FIELD)                                              \
>>   static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
>>   {                                                                              \
>> -       struct net_device_core_stats __percpu *p;                               \
>> -                                                                               \
>> -       p = dev_core_stats(dev);                                                \
>> -       if (p)                                                                  \
>> -               this_cpu_inc(p->FIELD);                                         \
> Note that we were using this_cpu_inc() which implied :
> - IRQ safety, and
> - a barrier paired with :
>
> net/core/dev.c:10548:                   storage->rx_dropped +=
> READ_ONCE(core_stats->rx_dropped);
> net/core/dev.c:10549:                   storage->tx_dropped +=
> READ_ONCE(core_stats->tx_dropped);
> net/core/dev.c:10550:                   storage->rx_nohandler +=
> READ_ONCE(core_stats->rx_nohandler);
> net/core/dev.c:10551:                   storage->rx_otherhost_dropped
> += READ_ONCE(core_stats->rx_otherhost_dropped);
>
>
>> +       netdev_core_stats_inc(dev,                                              \
>> +                       offsetof(struct net_device_core_stats, FIELD));         \
>>   }
>>   DEV_CORE_STATS_INC(rx_dropped)
>>   DEV_CORE_STATS_INC(tx_dropped)
>>   DEV_CORE_STATS_INC(rx_nohandler)
>>   DEV_CORE_STATS_INC(rx_otherhost_dropped)
>> +#undef DEV_CORE_STATS_INC
>>
>>   static __always_inline int ____dev_forward_skb(struct net_device *dev,
>>                                                 struct sk_buff *skb,
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 606a366cc209..88a32c392c1d 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
>>   }
>>   EXPORT_SYMBOL(netdev_stats_to_stats64);
>>
>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
>> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
>> +               struct net_device *dev)
>>   {
>>          struct net_device_core_stats __percpu *p;
>>
>> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
>>          /* This READ_ONCE() pairs with the cmpxchg() above */
>>          return READ_ONCE(dev->core_stats);
>>   }
>> -EXPORT_SYMBOL(netdev_core_stats_alloc);
>> +
>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
>> +{
>> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>> +
>> +       if (unlikely(!p))
>> +               p = netdev_core_stats_alloc(dev);
>> +
>> +       if (p)
>> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
> While here you are using a ++ operation that :
>
> - is not irq safe
> - might cause store-tearing.
>
> I would suggest a preliminary patch converting the "unsigned long" fields in
> struct net_device_core_stats to local_t

Do you mean it needs to revert the commit 6510ea973d8d ("net: Use 
this_cpu_inc() to increment

net->core_stats") first? But it would allocate memory which breaks on 
PREEMPT_RT.

>
> You might be able tweak this to
>
> unsigned long __percpu *field = (unsigned long __percpu) ((u8 *)p + offset);
> this_cpu_inc(field);
  
Eric Dumazet Sept. 28, 2023, 3:44 p.m. UTC | #3
On Thu, Sep 28, 2023 at 5:40 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>
>
> On 2023/9/28 22:18, Eric Dumazet wrote:
> > On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
> >> Although there is a kfree_skb_reason() helper function that can be used to
> >> find the reason why this skb is dropped, but most callers didn't increase
> >> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
> >>
> >> For the users, people are more concerned about why the dropped in ip
> >> is increasing.
> >>
> >> Introduce netdev_core_stats_inc() for trace the caller of the dropped
> >> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
> >> unlinkly.
> >>
> >> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> >> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> >> ---
> >> v6: merge netdev_core_stats and netdev_core_stats_inc together
> >> v5: Access the per cpu pointer before reach the relevant offset.
> >> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
> >> v3: __cold should be added to the netdev_core_stats_alloc().
> >> v2: use __cold instead of inline in dev_core_stats().
> >> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
> >> ---
> >>   include/linux/netdevice.h | 21 ++++-----------------
> >>   net/core/dev.c            | 17 +++++++++++++++--
> >>   2 files changed, 19 insertions(+), 19 deletions(-)
> >>
> >> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> >> index 7e520c14eb8c..eb1fa04fbccc 100644
> >> --- a/include/linux/netdevice.h
> >> +++ b/include/linux/netdevice.h
> >> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
> >>          return false;
> >>   }
> >>
> >> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
> >> -
> >> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
> >> -{
> >> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> >> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> >> -
> >> -       if (likely(p))
> >> -               return p;
> >> -
> >> -       return netdev_core_stats_alloc(dev);
> >> -}
> >> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
> >>
> >>   #define DEV_CORE_STATS_INC(FIELD)                                              \
> >>   static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
> >>   {                                                                              \
> >> -       struct net_device_core_stats __percpu *p;                               \
> >> -                                                                               \
> >> -       p = dev_core_stats(dev);                                                \
> >> -       if (p)                                                                  \
> >> -               this_cpu_inc(p->FIELD);                                         \
> > Note that we were using this_cpu_inc() which implied :
> > - IRQ safety, and
> > - a barrier paired with :
> >
> > net/core/dev.c:10548:                   storage->rx_dropped +=
> > READ_ONCE(core_stats->rx_dropped);
> > net/core/dev.c:10549:                   storage->tx_dropped +=
> > READ_ONCE(core_stats->tx_dropped);
> > net/core/dev.c:10550:                   storage->rx_nohandler +=
> > READ_ONCE(core_stats->rx_nohandler);
> > net/core/dev.c:10551:                   storage->rx_otherhost_dropped
> > += READ_ONCE(core_stats->rx_otherhost_dropped);
> >
> >
> >> +       netdev_core_stats_inc(dev,                                              \
> >> +                       offsetof(struct net_device_core_stats, FIELD));         \
> >>   }
> >>   DEV_CORE_STATS_INC(rx_dropped)
> >>   DEV_CORE_STATS_INC(tx_dropped)
> >>   DEV_CORE_STATS_INC(rx_nohandler)
> >>   DEV_CORE_STATS_INC(rx_otherhost_dropped)
> >> +#undef DEV_CORE_STATS_INC
> >>
> >>   static __always_inline int ____dev_forward_skb(struct net_device *dev,
> >>                                                 struct sk_buff *skb,
> >> diff --git a/net/core/dev.c b/net/core/dev.c
> >> index 606a366cc209..88a32c392c1d 100644
> >> --- a/net/core/dev.c
> >> +++ b/net/core/dev.c
> >> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
> >>   }
> >>   EXPORT_SYMBOL(netdev_stats_to_stats64);
> >>
> >> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
> >> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
> >> +               struct net_device *dev)
> >>   {
> >>          struct net_device_core_stats __percpu *p;
> >>
> >> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
> >>          /* This READ_ONCE() pairs with the cmpxchg() above */
> >>          return READ_ONCE(dev->core_stats);
> >>   }
> >> -EXPORT_SYMBOL(netdev_core_stats_alloc);
> >> +
> >> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
> >> +{
> >> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> >> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> >> +
> >> +       if (unlikely(!p))
> >> +               p = netdev_core_stats_alloc(dev);
> >> +
> >> +       if (p)
> >> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
> > While here you are using a ++ operation that :
> >
> > - is not irq safe
> > - might cause store-tearing.
> >
> > I would suggest a preliminary patch converting the "unsigned long" fields in
> > struct net_device_core_stats to local_t
>
> Do you mean it needs to revert the commit 6510ea973d8d ("net: Use
> this_cpu_inc() to increment
>
> net->core_stats") first? But it would allocate memory which breaks on
> PREEMPT_RT.

I think I provided an (untested) alternative.

unsigned long __percpu *field = (__force unsigned long __percpu *)
((__force u8 *)p + offset);
this_cpu_inc(field);


>
> >
> > You might be able tweak this to
> >
> > unsigned long __percpu *field = (unsigned long __percpu) ((u8 *)p + offset);
> > this_cpu_inc(field);
  
Yajun Deng Sept. 28, 2023, 4:16 p.m. UTC | #4
On 2023/9/28 23:44, Eric Dumazet wrote:
> On Thu, Sep 28, 2023 at 5:40 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>
>> On 2023/9/28 22:18, Eric Dumazet wrote:
>>> On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>>> Although there is a kfree_skb_reason() helper function that can be used to
>>>> find the reason why this skb is dropped, but most callers didn't increase
>>>> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
>>>>
>>>> For the users, people are more concerned about why the dropped in ip
>>>> is increasing.
>>>>
>>>> Introduce netdev_core_stats_inc() for trace the caller of the dropped
>>>> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
>>>> unlinkly.
>>>>
>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>>>> ---
>>>> v6: merge netdev_core_stats and netdev_core_stats_inc together
>>>> v5: Access the per cpu pointer before reach the relevant offset.
>>>> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
>>>> v3: __cold should be added to the netdev_core_stats_alloc().
>>>> v2: use __cold instead of inline in dev_core_stats().
>>>> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
>>>> ---
>>>>    include/linux/netdevice.h | 21 ++++-----------------
>>>>    net/core/dev.c            | 17 +++++++++++++++--
>>>>    2 files changed, 19 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>> index 7e520c14eb8c..eb1fa04fbccc 100644
>>>> --- a/include/linux/netdevice.h
>>>> +++ b/include/linux/netdevice.h
>>>> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
>>>>           return false;
>>>>    }
>>>>
>>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
>>>> -
>>>> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
>>>> -{
>>>> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>>>> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>>>> -
>>>> -       if (likely(p))
>>>> -               return p;
>>>> -
>>>> -       return netdev_core_stats_alloc(dev);
>>>> -}
>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
>>>>
>>>>    #define DEV_CORE_STATS_INC(FIELD)                                              \
>>>>    static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
>>>>    {                                                                              \
>>>> -       struct net_device_core_stats __percpu *p;                               \
>>>> -                                                                               \
>>>> -       p = dev_core_stats(dev);                                                \
>>>> -       if (p)                                                                  \
>>>> -               this_cpu_inc(p->FIELD);                                         \
>>> Note that we were using this_cpu_inc() which implied :
>>> - IRQ safety, and
>>> - a barrier paired with :
>>>
>>> net/core/dev.c:10548:                   storage->rx_dropped +=
>>> READ_ONCE(core_stats->rx_dropped);
>>> net/core/dev.c:10549:                   storage->tx_dropped +=
>>> READ_ONCE(core_stats->tx_dropped);
>>> net/core/dev.c:10550:                   storage->rx_nohandler +=
>>> READ_ONCE(core_stats->rx_nohandler);
>>> net/core/dev.c:10551:                   storage->rx_otherhost_dropped
>>> += READ_ONCE(core_stats->rx_otherhost_dropped);
>>>
>>>
>>>> +       netdev_core_stats_inc(dev,                                              \
>>>> +                       offsetof(struct net_device_core_stats, FIELD));         \
>>>>    }
>>>>    DEV_CORE_STATS_INC(rx_dropped)
>>>>    DEV_CORE_STATS_INC(tx_dropped)
>>>>    DEV_CORE_STATS_INC(rx_nohandler)
>>>>    DEV_CORE_STATS_INC(rx_otherhost_dropped)
>>>> +#undef DEV_CORE_STATS_INC
>>>>
>>>>    static __always_inline int ____dev_forward_skb(struct net_device *dev,
>>>>                                                  struct sk_buff *skb,
>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>> index 606a366cc209..88a32c392c1d 100644
>>>> --- a/net/core/dev.c
>>>> +++ b/net/core/dev.c
>>>> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
>>>>    }
>>>>    EXPORT_SYMBOL(netdev_stats_to_stats64);
>>>>
>>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
>>>> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
>>>> +               struct net_device *dev)
>>>>    {
>>>>           struct net_device_core_stats __percpu *p;
>>>>
>>>> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
>>>>           /* This READ_ONCE() pairs with the cmpxchg() above */
>>>>           return READ_ONCE(dev->core_stats);
>>>>    }
>>>> -EXPORT_SYMBOL(netdev_core_stats_alloc);
>>>> +
>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
>>>> +{
>>>> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>>>> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>>>> +
>>>> +       if (unlikely(!p))
>>>> +               p = netdev_core_stats_alloc(dev);
>>>> +
>>>> +       if (p)
>>>> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
>>> While here you are using a ++ operation that :
>>>
>>> - is not irq safe
>>> - might cause store-tearing.
>>>
>>> I would suggest a preliminary patch converting the "unsigned long" fields in
>>> struct net_device_core_stats to local_t
>> Do you mean it needs to revert the commit 6510ea973d8d ("net: Use
>> this_cpu_inc() to increment
>>
>> net->core_stats") first? But it would allocate memory which breaks on
>> PREEMPT_RT.
> I think I provided an (untested) alternative.
>
> unsigned long __percpu *field = (__force unsigned long __percpu *)
> ((__force u8 *)p + offset);
> this_cpu_inc(field);

unsigned long __percpu *field = (__force unsigned long __percpu *)
((__force u8 *)p + offset);
this_cpu_inc(*(int *)field);

This would compiler success. But I didn't test it.
This cold look complex.
Shoud I base v3? Export dev_core_stats_*_inc() intead of introduce netdev_core_stats_inc().
That would be easy.

>
>>> You might be able tweak this to
>>>
>>> unsigned long __percpu *field = (unsigned long __percpu) ((u8 *)p + offset);
>>> this_cpu_inc(field);
  
Eric Dumazet Sept. 28, 2023, 4:23 p.m. UTC | #5
On Thu, Sep 28, 2023 at 6:16 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>
>
> On 2023/9/28 23:44, Eric Dumazet wrote:
> > On Thu, Sep 28, 2023 at 5:40 PM Yajun Deng <yajun.deng@linux.dev> wrote:
> >>
> >> On 2023/9/28 22:18, Eric Dumazet wrote:
> >>> On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
> >>>> Although there is a kfree_skb_reason() helper function that can be used to
> >>>> find the reason why this skb is dropped, but most callers didn't increase
> >>>> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
> >>>>
> >>>> For the users, people are more concerned about why the dropped in ip
> >>>> is increasing.
> >>>>
> >>>> Introduce netdev_core_stats_inc() for trace the caller of the dropped
> >>>> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
> >>>> unlinkly.
> >>>>
> >>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> >>>> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
> >>>> ---
> >>>> v6: merge netdev_core_stats and netdev_core_stats_inc together
> >>>> v5: Access the per cpu pointer before reach the relevant offset.
> >>>> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
> >>>> v3: __cold should be added to the netdev_core_stats_alloc().
> >>>> v2: use __cold instead of inline in dev_core_stats().
> >>>> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
> >>>> ---
> >>>>    include/linux/netdevice.h | 21 ++++-----------------
> >>>>    net/core/dev.c            | 17 +++++++++++++++--
> >>>>    2 files changed, 19 insertions(+), 19 deletions(-)
> >>>>
> >>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> >>>> index 7e520c14eb8c..eb1fa04fbccc 100644
> >>>> --- a/include/linux/netdevice.h
> >>>> +++ b/include/linux/netdevice.h
> >>>> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
> >>>>           return false;
> >>>>    }
> >>>>
> >>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
> >>>> -
> >>>> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
> >>>> -{
> >>>> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> >>>> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> >>>> -
> >>>> -       if (likely(p))
> >>>> -               return p;
> >>>> -
> >>>> -       return netdev_core_stats_alloc(dev);
> >>>> -}
> >>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
> >>>>
> >>>>    #define DEV_CORE_STATS_INC(FIELD)                                              \
> >>>>    static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
> >>>>    {                                                                              \
> >>>> -       struct net_device_core_stats __percpu *p;                               \
> >>>> -                                                                               \
> >>>> -       p = dev_core_stats(dev);                                                \
> >>>> -       if (p)                                                                  \
> >>>> -               this_cpu_inc(p->FIELD);                                         \
> >>> Note that we were using this_cpu_inc() which implied :
> >>> - IRQ safety, and
> >>> - a barrier paired with :
> >>>
> >>> net/core/dev.c:10548:                   storage->rx_dropped +=
> >>> READ_ONCE(core_stats->rx_dropped);
> >>> net/core/dev.c:10549:                   storage->tx_dropped +=
> >>> READ_ONCE(core_stats->tx_dropped);
> >>> net/core/dev.c:10550:                   storage->rx_nohandler +=
> >>> READ_ONCE(core_stats->rx_nohandler);
> >>> net/core/dev.c:10551:                   storage->rx_otherhost_dropped
> >>> += READ_ONCE(core_stats->rx_otherhost_dropped);
> >>>
> >>>
> >>>> +       netdev_core_stats_inc(dev,                                              \
> >>>> +                       offsetof(struct net_device_core_stats, FIELD));         \
> >>>>    }
> >>>>    DEV_CORE_STATS_INC(rx_dropped)
> >>>>    DEV_CORE_STATS_INC(tx_dropped)
> >>>>    DEV_CORE_STATS_INC(rx_nohandler)
> >>>>    DEV_CORE_STATS_INC(rx_otherhost_dropped)
> >>>> +#undef DEV_CORE_STATS_INC
> >>>>
> >>>>    static __always_inline int ____dev_forward_skb(struct net_device *dev,
> >>>>                                                  struct sk_buff *skb,
> >>>> diff --git a/net/core/dev.c b/net/core/dev.c
> >>>> index 606a366cc209..88a32c392c1d 100644
> >>>> --- a/net/core/dev.c
> >>>> +++ b/net/core/dev.c
> >>>> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
> >>>>    }
> >>>>    EXPORT_SYMBOL(netdev_stats_to_stats64);
> >>>>
> >>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
> >>>> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
> >>>> +               struct net_device *dev)
> >>>>    {
> >>>>           struct net_device_core_stats __percpu *p;
> >>>>
> >>>> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
> >>>>           /* This READ_ONCE() pairs with the cmpxchg() above */
> >>>>           return READ_ONCE(dev->core_stats);
> >>>>    }
> >>>> -EXPORT_SYMBOL(netdev_core_stats_alloc);
> >>>> +
> >>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
> >>>> +{
> >>>> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
> >>>> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
> >>>> +
> >>>> +       if (unlikely(!p))
> >>>> +               p = netdev_core_stats_alloc(dev);
> >>>> +
> >>>> +       if (p)
> >>>> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
> >>> While here you are using a ++ operation that :
> >>>
> >>> - is not irq safe
> >>> - might cause store-tearing.
> >>>
> >>> I would suggest a preliminary patch converting the "unsigned long" fields in
> >>> struct net_device_core_stats to local_t
> >> Do you mean it needs to revert the commit 6510ea973d8d ("net: Use
> >> this_cpu_inc() to increment
> >>
> >> net->core_stats") first? But it would allocate memory which breaks on
> >> PREEMPT_RT.
> > I think I provided an (untested) alternative.
> >
> > unsigned long __percpu *field = (__force unsigned long __percpu *)
> > ((__force u8 *)p + offset);
> > this_cpu_inc(field);
>
> unsigned long __percpu *field = (__force unsigned long __percpu *)
> ((__force u8 *)p + offset);
> this_cpu_inc(*(int *)field);
>
> This would compiler success. But I didn't test it.
> This cold look complex.

Why exactly ? Not very different from the cast you already had.

> Shoud I base v3? Export dev_core_stats_*_inc() intead of introduce netdev_core_stats_inc().
> That would be easy.

Well, you tell me, but this does not look incremental to me.

I do not think we need 4 different (and maybe more to come if struct
net_device_core_stats
grows in the future) functions for some hardly used path.
  
Yajun Deng Sept. 28, 2023, 4:32 p.m. UTC | #6
On 2023/9/29 00:23, Eric Dumazet wrote:
> On Thu, Sep 28, 2023 at 6:16 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>
>> On 2023/9/28 23:44, Eric Dumazet wrote:
>>> On Thu, Sep 28, 2023 at 5:40 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>>> On 2023/9/28 22:18, Eric Dumazet wrote:
>>>>> On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>>>>> Although there is a kfree_skb_reason() helper function that can be used to
>>>>>> find the reason why this skb is dropped, but most callers didn't increase
>>>>>> one of rx_dropped, tx_dropped, rx_nohandler and rx_otherhost_dropped.
>>>>>>
>>>>>> For the users, people are more concerned about why the dropped in ip
>>>>>> is increasing.
>>>>>>
>>>>>> Introduce netdev_core_stats_inc() for trace the caller of the dropped
>>>>>> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
>>>>>> unlinkly.
>>>>>>
>>>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>>>> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>>>>>> ---
>>>>>> v6: merge netdev_core_stats and netdev_core_stats_inc together
>>>>>> v5: Access the per cpu pointer before reach the relevant offset.
>>>>>> v4: Introduce netdev_core_stats_inc() instead of export dev_core_stats_*_inc()
>>>>>> v3: __cold should be added to the netdev_core_stats_alloc().
>>>>>> v2: use __cold instead of inline in dev_core_stats().
>>>>>> v1: https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
>>>>>> ---
>>>>>>     include/linux/netdevice.h | 21 ++++-----------------
>>>>>>     net/core/dev.c            | 17 +++++++++++++++--
>>>>>>     2 files changed, 19 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>>>> index 7e520c14eb8c..eb1fa04fbccc 100644
>>>>>> --- a/include/linux/netdevice.h
>>>>>> +++ b/include/linux/netdevice.h
>>>>>> @@ -4002,32 +4002,19 @@ static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
>>>>>>            return false;
>>>>>>     }
>>>>>>
>>>>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
>>>>>> -
>>>>>> -static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
>>>>>> -{
>>>>>> -       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>>>>>> -       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>>>>>> -
>>>>>> -       if (likely(p))
>>>>>> -               return p;
>>>>>> -
>>>>>> -       return netdev_core_stats_alloc(dev);
>>>>>> -}
>>>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
>>>>>>
>>>>>>     #define DEV_CORE_STATS_INC(FIELD)                                              \
>>>>>>     static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)                \
>>>>>>     {                                                                              \
>>>>>> -       struct net_device_core_stats __percpu *p;                               \
>>>>>> -                                                                               \
>>>>>> -       p = dev_core_stats(dev);                                                \
>>>>>> -       if (p)                                                                  \
>>>>>> -               this_cpu_inc(p->FIELD);                                         \
>>>>> Note that we were using this_cpu_inc() which implied :
>>>>> - IRQ safety, and
>>>>> - a barrier paired with :
>>>>>
>>>>> net/core/dev.c:10548:                   storage->rx_dropped +=
>>>>> READ_ONCE(core_stats->rx_dropped);
>>>>> net/core/dev.c:10549:                   storage->tx_dropped +=
>>>>> READ_ONCE(core_stats->tx_dropped);
>>>>> net/core/dev.c:10550:                   storage->rx_nohandler +=
>>>>> READ_ONCE(core_stats->rx_nohandler);
>>>>> net/core/dev.c:10551:                   storage->rx_otherhost_dropped
>>>>> += READ_ONCE(core_stats->rx_otherhost_dropped);
>>>>>
>>>>>
>>>>>> +       netdev_core_stats_inc(dev,                                              \
>>>>>> +                       offsetof(struct net_device_core_stats, FIELD));         \
>>>>>>     }
>>>>>>     DEV_CORE_STATS_INC(rx_dropped)
>>>>>>     DEV_CORE_STATS_INC(tx_dropped)
>>>>>>     DEV_CORE_STATS_INC(rx_nohandler)
>>>>>>     DEV_CORE_STATS_INC(rx_otherhost_dropped)
>>>>>> +#undef DEV_CORE_STATS_INC
>>>>>>
>>>>>>     static __always_inline int ____dev_forward_skb(struct net_device *dev,
>>>>>>                                                   struct sk_buff *skb,
>>>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>>>> index 606a366cc209..88a32c392c1d 100644
>>>>>> --- a/net/core/dev.c
>>>>>> +++ b/net/core/dev.c
>>>>>> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
>>>>>>     }
>>>>>>     EXPORT_SYMBOL(netdev_stats_to_stats64);
>>>>>>
>>>>>> -struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
>>>>>> +static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
>>>>>> +               struct net_device *dev)
>>>>>>     {
>>>>>>            struct net_device_core_stats __percpu *p;
>>>>>>
>>>>>> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
>>>>>>            /* This READ_ONCE() pairs with the cmpxchg() above */
>>>>>>            return READ_ONCE(dev->core_stats);
>>>>>>     }
>>>>>> -EXPORT_SYMBOL(netdev_core_stats_alloc);
>>>>>> +
>>>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
>>>>>> +{
>>>>>> +       /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
>>>>>> +       struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
>>>>>> +
>>>>>> +       if (unlikely(!p))
>>>>>> +               p = netdev_core_stats_alloc(dev);
>>>>>> +
>>>>>> +       if (p)
>>>>>> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
>>>>> While here you are using a ++ operation that :
>>>>>
>>>>> - is not irq safe
>>>>> - might cause store-tearing.
>>>>>
>>>>> I would suggest a preliminary patch converting the "unsigned long" fields in
>>>>> struct net_device_core_stats to local_t
>>>> Do you mean it needs to revert the commit 6510ea973d8d ("net: Use
>>>> this_cpu_inc() to increment
>>>>
>>>> net->core_stats") first? But it would allocate memory which breaks on
>>>> PREEMPT_RT.
>>> I think I provided an (untested) alternative.
>>>
>>> unsigned long __percpu *field = (__force unsigned long __percpu *)
>>> ((__force u8 *)p + offset);
>>> this_cpu_inc(field);
>> unsigned long __percpu *field = (__force unsigned long __percpu *)
>> ((__force u8 *)p + offset);
>> this_cpu_inc(*(int *)field);
>>
>> This would compiler success. But I didn't test it.
>> This cold look complex.
> Why exactly ? Not very different from the cast you already had.
Okay, I'll test it.
>
>> Shoud I base v3? Export dev_core_stats_*_inc() intead of introduce netdev_core_stats_inc().
>> That would be easy.
> Well, you tell me, but this does not look incremental to me.
>
> I do not think we need 4 different (and maybe more to come if struct
> net_device_core_stats
> grows in the future) functions for some hardly used path.
  
Yajun Deng Sept. 29, 2023, 5:38 a.m. UTC | #7
On 2023/9/29 00:32, Yajun Deng wrote:
>
> On 2023/9/29 00:23, Eric Dumazet wrote:
>> On Thu, Sep 28, 2023 at 6:16 PM Yajun Deng <yajun.deng@linux.dev> wrote:
>>>
>>> On 2023/9/28 23:44, Eric Dumazet wrote:
>>>> On Thu, Sep 28, 2023 at 5:40 PM Yajun Deng <yajun.deng@linux.dev> 
>>>> wrote:
>>>>> On 2023/9/28 22:18, Eric Dumazet wrote:
>>>>>> On Thu, Sep 28, 2023 at 12:04 PM Yajun Deng 
>>>>>> <yajun.deng@linux.dev> wrote:
>>>>>>> Although there is a kfree_skb_reason() helper function that can 
>>>>>>> be used to
>>>>>>> find the reason why this skb is dropped, but most callers didn't 
>>>>>>> increase
>>>>>>> one of rx_dropped, tx_dropped, rx_nohandler and 
>>>>>>> rx_otherhost_dropped.
>>>>>>>
>>>>>>> For the users, people are more concerned about why the dropped 
>>>>>>> in ip
>>>>>>> is increasing.
>>>>>>>
>>>>>>> Introduce netdev_core_stats_inc() for trace the caller of the 
>>>>>>> dropped
>>>>>>> skb. Also, add __code to netdev_core_stats_alloc(), as it's called
>>>>>>> unlinkly.
>>>>>>>
>>>>>>> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
>>>>>>> Suggested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
>>>>>>> ---
>>>>>>> v6: merge netdev_core_stats and netdev_core_stats_inc together
>>>>>>> v5: Access the per cpu pointer before reach the relevant offset.
>>>>>>> v4: Introduce netdev_core_stats_inc() instead of export 
>>>>>>> dev_core_stats_*_inc()
>>>>>>> v3: __cold should be added to the netdev_core_stats_alloc().
>>>>>>> v2: use __cold instead of inline in dev_core_stats().
>>>>>>> v1: 
>>>>>>> https://lore.kernel.org/netdev/20230911082016.3694700-1-yajun.deng@linux.dev/
>>>>>>> ---
>>>>>>>     include/linux/netdevice.h | 21 ++++-----------------
>>>>>>>     net/core/dev.c            | 17 +++++++++++++++--
>>>>>>>     2 files changed, 19 insertions(+), 19 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>>>>> index 7e520c14eb8c..eb1fa04fbccc 100644
>>>>>>> --- a/include/linux/netdevice.h
>>>>>>> +++ b/include/linux/netdevice.h
>>>>>>> @@ -4002,32 +4002,19 @@ static __always_inline bool 
>>>>>>> __is_skb_forwardable(const struct net_device *dev,
>>>>>>>            return false;
>>>>>>>     }
>>>>>>>
>>>>>>> -struct net_device_core_stats __percpu 
>>>>>>> *netdev_core_stats_alloc(struct net_device *dev);
>>>>>>> -
>>>>>>> -static inline struct net_device_core_stats __percpu 
>>>>>>> *dev_core_stats(struct net_device *dev)
>>>>>>> -{
>>>>>>> -       /* This READ_ONCE() pairs with the write in 
>>>>>>> netdev_core_stats_alloc() */
>>>>>>> -       struct net_device_core_stats __percpu *p = 
>>>>>>> READ_ONCE(dev->core_stats);
>>>>>>> -
>>>>>>> -       if (likely(p))
>>>>>>> -               return p;
>>>>>>> -
>>>>>>> -       return netdev_core_stats_alloc(dev);
>>>>>>> -}
>>>>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset);
>>>>>>>
>>>>>>>     #define DEV_CORE_STATS_INC(FIELD) \
>>>>>>>     static inline void dev_core_stats_##FIELD##_inc(struct 
>>>>>>> net_device *dev)                \
>>>>>>> { \
>>>>>>> -       struct net_device_core_stats __percpu 
>>>>>>> *p;                               \
>>>>>>> - \
>>>>>>> -       p = dev_core_stats(dev); \
>>>>>>> -       if (p) \
>>>>>>> - this_cpu_inc(p->FIELD); \
>>>>>> Note that we were using this_cpu_inc() which implied :
>>>>>> - IRQ safety, and
>>>>>> - a barrier paired with :
>>>>>>
>>>>>> net/core/dev.c:10548: storage->rx_dropped +=
>>>>>> READ_ONCE(core_stats->rx_dropped);
>>>>>> net/core/dev.c:10549: storage->tx_dropped +=
>>>>>> READ_ONCE(core_stats->tx_dropped);
>>>>>> net/core/dev.c:10550: storage->rx_nohandler +=
>>>>>> READ_ONCE(core_stats->rx_nohandler);
>>>>>> net/core/dev.c:10551: storage->rx_otherhost_dropped
>>>>>> += READ_ONCE(core_stats->rx_otherhost_dropped);
>>>>>>
>>>>>>
>>>>>>> + netdev_core_stats_inc(dev, \
>>>>>>> +                       offsetof(struct net_device_core_stats, 
>>>>>>> FIELD));         \
>>>>>>>     }
>>>>>>>     DEV_CORE_STATS_INC(rx_dropped)
>>>>>>>     DEV_CORE_STATS_INC(tx_dropped)
>>>>>>>     DEV_CORE_STATS_INC(rx_nohandler)
>>>>>>>     DEV_CORE_STATS_INC(rx_otherhost_dropped)
>>>>>>> +#undef DEV_CORE_STATS_INC
>>>>>>>
>>>>>>>     static __always_inline int ____dev_forward_skb(struct 
>>>>>>> net_device *dev,
>>>>>>> struct sk_buff *skb,
>>>>>>> diff --git a/net/core/dev.c b/net/core/dev.c
>>>>>>> index 606a366cc209..88a32c392c1d 100644
>>>>>>> --- a/net/core/dev.c
>>>>>>> +++ b/net/core/dev.c
>>>>>>> @@ -10497,7 +10497,8 @@ void netdev_stats_to_stats64(struct 
>>>>>>> rtnl_link_stats64 *stats64,
>>>>>>>     }
>>>>>>>     EXPORT_SYMBOL(netdev_stats_to_stats64);
>>>>>>>
>>>>>>> -struct net_device_core_stats __percpu 
>>>>>>> *netdev_core_stats_alloc(struct net_device *dev)
>>>>>>> +static __cold struct net_device_core_stats __percpu 
>>>>>>> *netdev_core_stats_alloc(
>>>>>>> +               struct net_device *dev)
>>>>>>>     {
>>>>>>>            struct net_device_core_stats __percpu *p;
>>>>>>>
>>>>>>> @@ -10510,7 +10511,19 @@ struct net_device_core_stats __percpu 
>>>>>>> *netdev_core_stats_alloc(struct net_device
>>>>>>>            /* This READ_ONCE() pairs with the cmpxchg() above */
>>>>>>>            return READ_ONCE(dev->core_stats);
>>>>>>>     }
>>>>>>> -EXPORT_SYMBOL(netdev_core_stats_alloc);
>>>>>>> +
>>>>>>> +void netdev_core_stats_inc(struct net_device *dev, u32 offset)
>>>>>>> +{
>>>>>>> +       /* This READ_ONCE() pairs with the write in 
>>>>>>> netdev_core_stats_alloc() */
>>>>>>> +       struct net_device_core_stats __percpu *p = 
>>>>>>> READ_ONCE(dev->core_stats);
>>>>>>> +
>>>>>>> +       if (unlikely(!p))
>>>>>>> +               p = netdev_core_stats_alloc(dev);
>>>>>>> +
>>>>>>> +       if (p)
>>>>>>> +               (*(unsigned long *)((void *)this_cpu_ptr(p) + 
>>>>>>> offset))++;
>>>>>> While here you are using a ++ operation that :
>>>>>>
>>>>>> - is not irq safe
>>>>>> - might cause store-tearing.
>>>>>>
>>>>>> I would suggest a preliminary patch converting the "unsigned 
>>>>>> long" fields in
>>>>>> struct net_device_core_stats to local_t
>>>>> Do you mean it needs to revert the commit 6510ea973d8d ("net: Use
>>>>> this_cpu_inc() to increment
>>>>>
>>>>> net->core_stats") first? But it would allocate memory which breaks on
>>>>> PREEMPT_RT.
>>>> I think I provided an (untested) alternative.
>>>>
>>>> unsigned long __percpu *field = (__force unsigned long __percpu *)
>>>> ((__force u8 *)p + offset);
>>>> this_cpu_inc(field);
>>> unsigned long __percpu *field = (__force unsigned long __percpu *)
>>> ((__force u8 *)p + offset);
>>> this_cpu_inc(*(int *)field);
>>>
>>> This would compiler success. But I didn't test it.
>>> This cold look complex.
>> Why exactly ? Not very different from the cast you already had.
> Okay, I'll test it.


It seems something wrong.

"ip -s a" would see the 'dropped' is increasing. But I cann't trace 
anything by the following cmd.

"sudo  python3  /usr/share/bcc/tools/trace netdev_core_stats_inc"

If I change back to "(*(unsigned long *)((void *)this_cpu_ptr(p) + 
offset))++; ", I can trace the caller.

So the following code would accidentally change somthing.

unsigned long __percpu *field = (__force unsigned long __percpu *) 
((__force u8 *)p + offset);

this_cpu_inc(*field);

>>
>>> Shoud I base v3? Export dev_core_stats_*_inc() intead of introduce 
>>> netdev_core_stats_inc().
>>> That would be easy.
>> Well, you tell me, but this does not look incremental to me.
>>
>> I do not think we need 4 different (and maybe more to come if struct
>> net_device_core_stats
>> grows in the future) functions for some hardly used path.
  

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7e520c14eb8c..eb1fa04fbccc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4002,32 +4002,19 @@  static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
 	return false;
 }
 
-struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev);
-
-static inline struct net_device_core_stats __percpu *dev_core_stats(struct net_device *dev)
-{
-	/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
-	struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
-
-	if (likely(p))
-		return p;
-
-	return netdev_core_stats_alloc(dev);
-}
+void netdev_core_stats_inc(struct net_device *dev, u32 offset);
 
 #define DEV_CORE_STATS_INC(FIELD)						\
 static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev)		\
 {										\
-	struct net_device_core_stats __percpu *p;				\
-										\
-	p = dev_core_stats(dev);						\
-	if (p)									\
-		this_cpu_inc(p->FIELD);						\
+	netdev_core_stats_inc(dev,						\
+			offsetof(struct net_device_core_stats, FIELD));		\
 }
 DEV_CORE_STATS_INC(rx_dropped)
 DEV_CORE_STATS_INC(tx_dropped)
 DEV_CORE_STATS_INC(rx_nohandler)
 DEV_CORE_STATS_INC(rx_otherhost_dropped)
+#undef DEV_CORE_STATS_INC
 
 static __always_inline int ____dev_forward_skb(struct net_device *dev,
 					       struct sk_buff *skb,
diff --git a/net/core/dev.c b/net/core/dev.c
index 606a366cc209..88a32c392c1d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10497,7 +10497,8 @@  void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
 }
 EXPORT_SYMBOL(netdev_stats_to_stats64);
 
-struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
+static __cold struct net_device_core_stats __percpu *netdev_core_stats_alloc(
+		struct net_device *dev)
 {
 	struct net_device_core_stats __percpu *p;
 
@@ -10510,7 +10511,19 @@  struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device
 	/* This READ_ONCE() pairs with the cmpxchg() above */
 	return READ_ONCE(dev->core_stats);
 }
-EXPORT_SYMBOL(netdev_core_stats_alloc);
+
+void netdev_core_stats_inc(struct net_device *dev, u32 offset)
+{
+	/* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */
+	struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats);
+
+	if (unlikely(!p))
+		p = netdev_core_stats_alloc(dev);
+
+	if (p)
+		(*(unsigned long *)((void *)this_cpu_ptr(p) + offset))++;
+}
+EXPORT_SYMBOL_GPL(netdev_core_stats_inc);
 
 /**
  *	dev_get_stats	- get network device statistics