[RESEND,bpf-next,09/15] xdp: Add VLAN tag hint

Message ID 20230512152607.992209-10-larysa.zaremba@intel.com
State New
Headers
Series new kfunc XDP hints and ice implementation |

Commit Message

Larysa Zaremba May 12, 2023, 3:26 p.m. UTC
  Implement functionality that enables drivers to expose VLAN tag
to XDP code.

Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 Documentation/networking/xdp-rx-metadata.rst | 11 ++++++++-
 include/linux/netdevice.h                    |  2 ++
 include/net/xdp.h                            |  4 ++++
 kernel/bpf/offload.c                         |  4 ++++
 net/core/xdp.c                               | 24 ++++++++++++++++++++
 5 files changed, 44 insertions(+), 1 deletion(-)
  

Comments

Stanislav Fomichev May 12, 2023, 6:28 p.m. UTC | #1
On 05/12, Larysa Zaremba wrote:
> Implement functionality that enables drivers to expose VLAN tag
> to XDP code.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>

Acked-by: Stanislav Fomichev <sdf@google.com>

> ---
>  Documentation/networking/xdp-rx-metadata.rst | 11 ++++++++-
>  include/linux/netdevice.h                    |  2 ++
>  include/net/xdp.h                            |  4 ++++
>  kernel/bpf/offload.c                         |  4 ++++
>  net/core/xdp.c                               | 24 ++++++++++++++++++++
>  5 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
> index 25ce72af81c2..73a78029c596 100644
> --- a/Documentation/networking/xdp-rx-metadata.rst
> +++ b/Documentation/networking/xdp-rx-metadata.rst
> @@ -18,7 +18,16 @@ Currently, the following kfuncs are supported. In the future, as more
>  metadata is supported, this set will grow:
>  
>  .. kernel-doc:: net/core/xdp.c
> -   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
> +   :identifiers: bpf_xdp_metadata_rx_timestamp
> +
> +.. kernel-doc:: net/core/xdp.c
> +   :identifiers: bpf_xdp_metadata_rx_hash
> +
> +.. kernel-doc:: net/core/xdp.c
> +   :identifiers: bpf_xdp_metadata_rx_ctag
> +
> +.. kernel-doc:: net/core/xdp.c
> +   :identifiers: bpf_xdp_metadata_rx_stag
>  
>  An XDP program can use these kfuncs to read the metadata into stack
>  variables for its own consumption. Or, to pass the metadata on to other
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 08fbd4622ccf..fdae37fe11f5 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1655,6 +1655,8 @@ struct xdp_metadata_ops {
>  	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
>  	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
>  			       enum xdp_rss_hash_type *rss_type);
> +	int	(*xmo_rx_ctag)(const struct xdp_md *ctx, u16 *vlan_tag);
> +	int	(*xmo_rx_stag)(const struct xdp_md *ctx, u16 *vlan_tag);
>  };
>  
>  /**
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 6381560efae2..2db7439fc60f 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -389,6 +389,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
>  			   bpf_xdp_metadata_rx_timestamp) \
>  	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
>  			   bpf_xdp_metadata_rx_hash) \
> +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_CTAG, \
> +			   bpf_xdp_metadata_rx_ctag) \
> +	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_STAG, \
> +			   bpf_xdp_metadata_rx_stag) \
>  
>  enum {
>  #define XDP_METADATA_KFUNC(name, _) name,
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index d9c9f45e3529..2c6b6e82cfac 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -848,6 +848,10 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
>  		p = ops->xmo_rx_timestamp;
>  	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
>  		p = ops->xmo_rx_hash;
> +	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_CTAG))
> +		p = ops->xmo_rx_ctag;
> +	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_STAG))
> +		p = ops->xmo_rx_stag;
>  out:
>  	up_read(&bpf_devs_lock);
>  
> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 41e5ca8643ec..eff21501609f 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>  	return -EOPNOTSUPP;
>  }
>  
> +/**
> + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
> + * @ctx: XDP context pointer.
> + * @vlan_tag: Return value pointer.
> + *
> + * Returns 0 on success or ``-errno`` on error.
> + */
> +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +/**
> + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
> + * @ctx: XDP context pointer.
> + * @vlan_tag: Return value pointer.
> + *
> + * Returns 0 on success or ``-errno`` on error.
> + */
> +__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>  __diag_pop();
>  
>  BTF_SET8_START(xdp_metadata_kfunc_ids)
> -- 
> 2.35.3
>
  
Jesper Dangaard Brouer May 15, 2023, 3:36 p.m. UTC | #2
On 12/05/2023 17.26, Larysa Zaremba wrote:
> Implement functionality that enables drivers to expose VLAN tag
> to XDP code.
> 
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> ---
[...]

> diff --git a/net/core/xdp.c b/net/core/xdp.c
> index 41e5ca8643ec..eff21501609f 100644
> --- a/net/core/xdp.c
> +++ b/net/core/xdp.c
> @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>   	return -EOPNOTSUPP;
>   }
>   

Remember below becomes part of main documentation on HW metadata hints:
  - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html

Hint compiling locally I use:
  make SPHINXDIRS="networking" htmldocs

> +/**
> + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.

Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
Like wise below "stag".

I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.

When reading BPF code that use these function names, then I would have
to ask Google for help, or find-and-read this doc.

Can we come-up with a more intuitive name, that e.g. helps when reading
the BPF-prog code?

> + * @ctx: XDP context pointer.
> + * @vlan_tag: Return value pointer.
> + *

IMHO right here, there should be a description.

E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
but the raw VLAN tag that also contains the prio numbers etc.

It this VLAN tag expected to be in network-byte-order ?
IMHO this doc should define what is expected (and driver devel must
follow this).

> + * Returns 0 on success or ``-errno`` on error.
> + */
> +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +/**
> + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
> + * @ctx: XDP context pointer.
> + * @vlan_tag: Return value pointer.
> + *
> + * Returns 0 on success or ``-errno`` on error.

IMHO we should provide more guidance to expected return codes, and what
they mean.  IMHO driver developers must only return codes that are
described here, and if they invent a new, add it as part of their patch.

See, formatting in bpf_xdp_metadata_rx_hash and check how this gets
compiled into HTML.


> + */
> +__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
  
Larysa Zaremba May 15, 2023, 4:09 p.m. UTC | #3
On Mon, May 15, 2023 at 05:36:12PM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 12/05/2023 17.26, Larysa Zaremba wrote:
> > Implement functionality that enables drivers to expose VLAN tag
> > to XDP code.
> > 
> > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > ---
> [...]
> 
> > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > index 41e5ca8643ec..eff21501609f 100644
> > --- a/net/core/xdp.c
> > +++ b/net/core/xdp.c
> > @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> >   	return -EOPNOTSUPP;
> >   }
> 
> Remember below becomes part of main documentation on HW metadata hints:
>  - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html
> 
> Hint compiling locally I use:
>  make SPHINXDIRS="networking" htmldocs
> 
> > +/**
> > + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
> 
> Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
> Like wise below "stag".
> 
> I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.
> 
> When reading BPF code that use these function names, then I would have
> to ask Google for help, or find-and-read this doc.
> 
> Can we come-up with a more intuitive name, that e.g. helps when reading
> the BPF-prog code?

Well, my reasoning for such naming is that if someone can configure s-tag 
stripping in ethtool with 'rx-vlan-stag-hw-parse', they shouldn't have any 
problem with understanding those function names.

One possible improvement that comes to mind is maybe (similarly ethtool) calling 
c-tag just 'tag' and letting s-tag stay 'stag'. Because c-tag is this default 
802.1q tag, which is supported by various hardware, while s-tag is significantly 
less widespread.

But there are many options, really.

What are your suggestions?

> 
> > + * @ctx: XDP context pointer.
> > + * @vlan_tag: Return value pointer.
> > + *
> 
> IMHO right here, there should be a description.
> 
> E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
> but the raw VLAN tag that also contains the prio numbers etc.
> 
> It this VLAN tag expected to be in network-byte-order ?
> IMHO this doc should define what is expected (and driver devel must
> follow this).

Will specify that.

> 
> > + * Returns 0 on success or ``-errno`` on error.
> > + */
> > +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> > +/**
> > + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
> > + * @ctx: XDP context pointer.
> > + * @vlan_tag: Return value pointer.
> > + *
> > + * Returns 0 on success or ``-errno`` on error.
> 
> IMHO we should provide more guidance to expected return codes, and what
> they mean.  IMHO driver developers must only return codes that are
> described here, and if they invent a new, add it as part of their patch.

That's a good suggestion, I will expand the comment to describe error codes used 
so far.

> 
> See, formatting in bpf_xdp_metadata_rx_hash and check how this gets
> compiled into HTML.
> 
> 
> > + */
> > +__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
>
  
Jesper Dangaard Brouer May 22, 2023, 8:37 a.m. UTC | #4
On 15/05/2023 18.09, Larysa Zaremba wrote:
> On Mon, May 15, 2023 at 05:36:12PM +0200, Jesper Dangaard Brouer wrote:
>>
>>
>> On 12/05/2023 17.26, Larysa Zaremba wrote:
>>> Implement functionality that enables drivers to expose VLAN tag
>>> to XDP code.
>>>
>>> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
>>> ---
>> [...]
>>
>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>> index 41e5ca8643ec..eff21501609f 100644
>>> --- a/net/core/xdp.c
>>> +++ b/net/core/xdp.c
>>> @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>>>    	return -EOPNOTSUPP;
>>>    }
>>
>> Remember below becomes part of main documentation on HW metadata hints:
>>   - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html
>>
>> Hint compiling locally I use:
>>   make SPHINXDIRS="networking" htmldocs
>>
>>> +/**
>>> + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
>>
>> Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
>> Like wise below "stag".
>>
>> I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.
>>
>> When reading BPF code that use these function names, then I would have
>> to ask Google for help, or find-and-read this doc.
>>
>> Can we come-up with a more intuitive name, that e.g. helps when reading
>> the BPF-prog code?
> 
> Well, my reasoning for such naming is that if someone can configure s-tag
> stripping in ethtool with 'rx-vlan-stag-hw-parse', they shouldn't have any
> problem with understanding those function names.
> 

Naming is hard.  My perspective is conveying the meaning without having
to be knowledgeable about ethtool VLAN commands.  My perspective is a
casual BPF-programmer that reads "bpf_xdp_metadata_rx_stag()".
Hopefully we can choose a name that says "vlan" somewhere, such that the
person reading this doesn't have to lookup and find the documentation to
deduct this code is related to VLANs.

> One possible improvement that comes to mind is maybe (similarly ethtool) calling
> c-tag just 'tag' and letting s-tag stay 'stag'. Because c-tag is this default
> 802.1q tag, which is supported by various hardware, while s-tag is significantly
> less widespread.
> 
> But there are many options, really.
> 
> What are your suggestions?
>

One suggestion is (the symmetrical):
  * bpf_xdp_metadata_rx_vlan_inner_tag
  * bpf_xdp_metadata_rx_vlan_outer_tag

As you say above the first "inner" VLAN tag is just the regular 802.1Q
VLAN tag.  The concept of C-tag and S-tag is from 802.1ad that
introduced the concept of double tagging.

Thus one could argue for shorter names like:
  * bpf_xdp_metadata_rx_vlan_tag
  * bpf_xdp_metadata_rx_vlan_outer_tag


>>
>>> + * @ctx: XDP context pointer.
>>> + * @vlan_tag: Return value pointer.
>>> + *
>>
>> IMHO right here, there should be a description.
>>
>> E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
>> but the raw VLAN tag that also contains the prio numbers etc.
>>
>> It this VLAN tag expected to be in network-byte-order ?
>> IMHO this doc should define what is expected (and driver devel must
>> follow this).
> 
> Will specify that.
> 
>>
>>> + * Returns 0 on success or ``-errno`` on error.
>>> + */
>>> +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
>>> +{
>>> +	return -EOPNOTSUPP;
>>> +}
>>> +
>>> +/**
>>> + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
>>> + * @ctx: XDP context pointer.
>>> + * @vlan_tag: Return value pointer.
>>> + *

(p.s. Googling I find multiple definitions of what the "S" in S-tag
means. The most reliable or statistically consistent seems to be
"Service tag", or "Service provider tag".)

The description for the renamed "bpf_xdp_metadata_rx_vlan_outer_tag"
should IMHO explain that the outer VLAN tag is often refered to as the 
S-tag (or Service-tag) in Q-in-Q (802.1ad) terminology.  Perhaps we can 
even spell out that some hardware support (and must be configured via 
ethtool) to extract this stag.

A dump of the tool rx-vlan related commands:

   $ ethtool -k i40e2 | grep rx-vlan
   rx-vlan-offload: on
   rx-vlan-filter: on [fixed]
   rx-vlan-stag-hw-parse: off [fixed]
   rx-vlan-stag-filter: off [fixed]




>>> + * Returns 0 on success or ``-errno`` on error.
>>
>> IMHO we should provide more guidance to expected return codes, and what
>> they mean.  IMHO driver developers must only return codes that are
>> described here, and if they invent a new, add it as part of their patch.
> 
> That's a good suggestion, I will expand the comment to describe error codes used
> so far.
> 
>>
>> See, formatting in bpf_xdp_metadata_rx_hash and check how this gets
>> compiled into HTML.
>>
>>
>>> + */
>>> +__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
>>> +{
>>> +	return -EOPNOTSUPP;
>>> +}
>>> +
>>
>
  
Larysa Zaremba May 22, 2023, 3:48 p.m. UTC | #5
On Mon, May 22, 2023 at 10:37:33AM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 15/05/2023 18.09, Larysa Zaremba wrote:
> > On Mon, May 15, 2023 at 05:36:12PM +0200, Jesper Dangaard Brouer wrote:
> > > 
> > > 
> > > On 12/05/2023 17.26, Larysa Zaremba wrote:
> > > > Implement functionality that enables drivers to expose VLAN tag
> > > > to XDP code.
> > > > 
> > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > ---
> > > [...]
> > > 
> > > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > > index 41e5ca8643ec..eff21501609f 100644
> > > > --- a/net/core/xdp.c
> > > > +++ b/net/core/xdp.c
> > > > @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> > > >    	return -EOPNOTSUPP;
> > > >    }
> > > 
> > > Remember below becomes part of main documentation on HW metadata hints:
> > >   - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html
> > > 
> > > Hint compiling locally I use:
> > >   make SPHINXDIRS="networking" htmldocs
> > > 
> > > > +/**
> > > > + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
> > > 
> > > Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
> > > Like wise below "stag".
> > > 
> > > I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.
> > > 
> > > When reading BPF code that use these function names, then I would have
> > > to ask Google for help, or find-and-read this doc.
> > > 
> > > Can we come-up with a more intuitive name, that e.g. helps when reading
> > > the BPF-prog code?
> > 
> > Well, my reasoning for such naming is that if someone can configure s-tag
> > stripping in ethtool with 'rx-vlan-stag-hw-parse', they shouldn't have any
> > problem with understanding those function names.
> > 
> 
> Naming is hard.  My perspective is conveying the meaning without having
> to be knowledgeable about ethtool VLAN commands.  My perspective is a
> casual BPF-programmer that reads "bpf_xdp_metadata_rx_stag()".
> Hopefully we can choose a name that says "vlan" somewhere, such that the
> person reading this doesn't have to lookup and find the documentation to
> deduct this code is related to VLANs.
> 
> > One possible improvement that comes to mind is maybe (similarly ethtool) calling
> > c-tag just 'tag' and letting s-tag stay 'stag'. Because c-tag is this default
> > 802.1q tag, which is supported by various hardware, while s-tag is significantly
> > less widespread.
> > 
> > But there are many options, really.
> > 
> > What are your suggestions?
> > 
> 
> One suggestion is (the symmetrical):
>  * bpf_xdp_metadata_rx_vlan_inner_tag
>  * bpf_xdp_metadata_rx_vlan_outer_tag
> 
> As you say above the first "inner" VLAN tag is just the regular 802.1Q
> VLAN tag.  The concept of C-tag and S-tag is from 802.1ad that
> introduced the concept of double tagging.
> 
> Thus one could argue for shorter names like:
>  * bpf_xdp_metadata_rx_vlan_tag
>  * bpf_xdp_metadata_rx_vlan_outer_tag
>

AFAIK, outer tag is a broader term, it's pretty often used for stacked 802.1Q 
headers. I can't find what exactly is an expected behavior for rxvlan and
rx-vlan-stag-hw-parse in ethtool, but iavf documentation states that rxvlan
"enables outer or single 802.1Q VLAN stripping" and rx-vlan-stag-hw-parse
"enables outer or single 802.1ad VLAN stripping". This is in consistent with how 
ice hardware behaves. More credible sources would be welcome.

What about:
  * bpf_xdp_metadata_rx_vlan_tag
  * bpf_xdp_metadata_rx_vlan_qinq_tag

> 
> > > 
> > > > + * @ctx: XDP context pointer.
> > > > + * @vlan_tag: Return value pointer.
> > > > + *
> > > 
> > > IMHO right here, there should be a description.
> > > 
> > > E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
> > > but the raw VLAN tag that also contains the prio numbers etc.
> > > 
> > > It this VLAN tag expected to be in network-byte-order ?
> > > IMHO this doc should define what is expected (and driver devel must
> > > follow this).
> > 
> > Will specify that.
> > 
> > > 
> > > > + * Returns 0 on success or ``-errno`` on error.
> > > > + */
> > > > +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
> > > > +{
> > > > +	return -EOPNOTSUPP;
> > > > +}
> > > > +
> > > > +/**
> > > > + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
> > > > + * @ctx: XDP context pointer.
> > > > + * @vlan_tag: Return value pointer.
> > > > + *
> 
> (p.s. Googling I find multiple definitions of what the "S" in S-tag
> means. The most reliable or statistically consistent seems to be
> "Service tag", or "Service provider tag".)
> 
> The description for the renamed "bpf_xdp_metadata_rx_vlan_outer_tag"
> should IMHO explain that the outer VLAN tag is often refered to as the S-tag
> (or Service-tag) in Q-in-Q (802.1ad) terminology.  Perhaps we can even spell
> out that some hardware support (and must be configured via ethtool) to
> extract this stag.
> 
> A dump of the tool rx-vlan related commands:
> 
>   $ ethtool -k i40e2 | grep rx-vlan
>   rx-vlan-offload: on
>   rx-vlan-filter: on [fixed]
>   rx-vlan-stag-hw-parse: off [fixed]
>   rx-vlan-stag-filter: off [fixed]
> 
> 
> 
> 
> > > > + * Returns 0 on success or ``-errno`` on error.
> > > 
> > > IMHO we should provide more guidance to expected return codes, and what
> > > they mean.  IMHO driver developers must only return codes that are
> > > described here, and if they invent a new, add it as part of their patch.
> > 
> > That's a good suggestion, I will expand the comment to describe error codes used
> > so far.
> > 
> > > 
> > > See, formatting in bpf_xdp_metadata_rx_hash and check how this gets
> > > compiled into HTML.
> > > 
> > > 
> > > > + */
> > > > +__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
> > > > +{
> > > > +	return -EOPNOTSUPP;
> > > > +}
> > > > +
> > > 
> > 
>
  
Jesper Dangaard Brouer May 23, 2023, 10:16 a.m. UTC | #6
On 22/05/2023 17.48, Larysa Zaremba wrote:
> On Mon, May 22, 2023 at 10:37:33AM +0200, Jesper Dangaard Brouer wrote:
>>
>>
>> On 15/05/2023 18.09, Larysa Zaremba wrote:
>>> On Mon, May 15, 2023 at 05:36:12PM +0200, Jesper Dangaard Brouer wrote:
>>>>
>>>>
>>>> On 12/05/2023 17.26, Larysa Zaremba wrote:
>>>>> Implement functionality that enables drivers to expose VLAN tag
>>>>> to XDP code.
>>>>>
>>>>> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
>>>>> ---
>>>> [...]
>>>>
>>>>> diff --git a/net/core/xdp.c b/net/core/xdp.c
>>>>> index 41e5ca8643ec..eff21501609f 100644
>>>>> --- a/net/core/xdp.c
>>>>> +++ b/net/core/xdp.c
>>>>> @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
>>>>>     	return -EOPNOTSUPP;
>>>>>     }
>>>>
>>>> Remember below becomes part of main documentation on HW metadata hints:
>>>>    - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html
>>>>
>>>> Hint compiling locally I use:
>>>>    make SPHINXDIRS="networking" htmldocs
>>>>
>>>>> +/**
>>>>> + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
>>>>
>>>> Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
>>>> Like wise below "stag".
>>>>
>>>> I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.
>>>>
>>>> When reading BPF code that use these function names, then I would have
>>>> to ask Google for help, or find-and-read this doc.
>>>>
>>>> Can we come-up with a more intuitive name, that e.g. helps when reading
>>>> the BPF-prog code?
>>>
>>> Well, my reasoning for such naming is that if someone can configure s-tag
>>> stripping in ethtool with 'rx-vlan-stag-hw-parse', they shouldn't have any
>>> problem with understanding those function names.
>>>
>>
>> Naming is hard.  My perspective is conveying the meaning without having
>> to be knowledgeable about ethtool VLAN commands.  My perspective is a
>> casual BPF-programmer that reads "bpf_xdp_metadata_rx_stag()".
>> Hopefully we can choose a name that says "vlan" somewhere, such that the
>> person reading this doesn't have to lookup and find the documentation to
>> deduct this code is related to VLANs.
>>
>>> One possible improvement that comes to mind is maybe (similarly ethtool) calling
>>> c-tag just 'tag' and letting s-tag stay 'stag'. Because c-tag is this default
>>> 802.1q tag, which is supported by various hardware, while s-tag is significantly
>>> less widespread.
>>>
>>> But there are many options, really.
>>>
>>> What are your suggestions?
>>>
>>
>> One suggestion is (the symmetrical):
>>   * bpf_xdp_metadata_rx_vlan_inner_tag
>>   * bpf_xdp_metadata_rx_vlan_outer_tag
>>
>> As you say above the first "inner" VLAN tag is just the regular 802.1Q
>> VLAN tag.  The concept of C-tag and S-tag is from 802.1ad that
>> introduced the concept of double tagging.
>>
>> Thus one could argue for shorter names like:
>>   * bpf_xdp_metadata_rx_vlan_tag
>>   * bpf_xdp_metadata_rx_vlan_outer_tag
>>
> 
> AFAIK, outer tag is a broader term, it's pretty often used for stacked 802.1Q
> headers. I can't find what exactly is an expected behavior for rxvlan and
> rx-vlan-stag-hw-parse in ethtool, but iavf documentation states that rxvlan
> "enables outer or single 802.1Q VLAN stripping" and rx-vlan-stag-hw-parse
> "enables outer or single 802.1ad VLAN stripping". This is in consistent with how
> ice hardware behaves. More credible sources would be welcome.
> 

It would be good to figure out how other hardware behaves.

The iavf doc sounds like very similar behavior from both functions, just 
802.1Q vs 802.1ad.
Sounds like both will just pop/strip the outer vlan tag.
I have seen Ethertype 802.1Q being used (in practice) for double tagged
packets, even-though 802.1ad should have been used to comply with the
standard.

> What about:
>    * bpf_xdp_metadata_rx_vlan_tag
>    * bpf_xdp_metadata_rx_vlan_qinq_tag
> 

This sounds good to me.

I do wonder if we really need two functions for this?
Would one function be enough?

Given the (iavf) description, the functions basically does the same.
Looking at your ice driver implementation, they could be merged into one
function, as it is the same location in the descriptor.

>>
>>>>
>>>>> + * @ctx: XDP context pointer.
>>>>> + * @vlan_tag: Return value pointer.
>>>>> + *
>>>>
>>>> IMHO right here, there should be a description.
>>>>
>>>> E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
>>>> but the raw VLAN tag that also contains the prio numbers etc.
>>>>
>>>> It this VLAN tag expected to be in network-byte-order ?
>>>> IMHO this doc should define what is expected (and driver devel must
>>>> follow this).
>>>
>>> Will specify that.
>>>
>>>>
>>>>> + * Returns 0 on success or ``-errno`` on error.
>>>>> + */
>>>>> +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
>>>>> +{
>>>>> +	return -EOPNOTSUPP;
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
>>>>> + * @ctx: XDP context pointer.
>>>>> + * @vlan_tag: Return value pointer.
>>>>> + *
>>
>> (p.s. Googling I find multiple definitions of what the "S" in S-tag
>> means. The most reliable or statistically consistent seems to be
>> "Service tag", or "Service provider tag".)
>>
>> The description for the renamed "bpf_xdp_metadata_rx_vlan_outer_tag"
>> should IMHO explain that the outer VLAN tag is often refered to as the S-tag
>> (or Service-tag) in Q-in-Q (802.1ad) terminology.  Perhaps we can even spell
>> out that some hardware support (and must be configured via ethtool) to
>> extract this stag.
>>
>> A dump of the tool rx-vlan related commands:
>>
>>    $ ethtool -k i40e2 | grep rx-vlan
>>    rx-vlan-offload: on
>>    rx-vlan-filter: on [fixed]
>>    rx-vlan-stag-hw-parse: off [fixed]
>>    rx-vlan-stag-filter: off [fixed]
>>
[...]
  
Larysa Zaremba May 23, 2023, 5:35 p.m. UTC | #7
On Tue, May 23, 2023 at 12:16:46PM +0200, Jesper Dangaard Brouer wrote:
> 
> 
> On 22/05/2023 17.48, Larysa Zaremba wrote:
> > On Mon, May 22, 2023 at 10:37:33AM +0200, Jesper Dangaard Brouer wrote:
> > > 
> > > 
> > > On 15/05/2023 18.09, Larysa Zaremba wrote:
> > > > On Mon, May 15, 2023 at 05:36:12PM +0200, Jesper Dangaard Brouer wrote:
> > > > > 
> > > > > 
> > > > > On 12/05/2023 17.26, Larysa Zaremba wrote:
> > > > > > Implement functionality that enables drivers to expose VLAN tag
> > > > > > to XDP code.
> > > > > > 
> > > > > > Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
> > > > > > ---
> > > > > [...]
> > > > > 
> > > > > > diff --git a/net/core/xdp.c b/net/core/xdp.c
> > > > > > index 41e5ca8643ec..eff21501609f 100644
> > > > > > --- a/net/core/xdp.c
> > > > > > +++ b/net/core/xdp.c
> > > > > > @@ -738,6 +738,30 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
> > > > > >     	return -EOPNOTSUPP;
> > > > > >     }
> > > > > 
> > > > > Remember below becomes part of main documentation on HW metadata hints:
> > > > >    - https://kernel.org/doc/html/latest/networking/xdp-rx-metadata.html
> > > > > 
> > > > > Hint compiling locally I use:
> > > > >    make SPHINXDIRS="networking" htmldocs
> > > > > 
> > > > > > +/**
> > > > > > + * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
> > > > > 
> > > > > Is bpf_xdp_metadata_rx_ctag a good function name for the inner vlan tag?
> > > > > Like wise below "stag".
> > > > > 
> > > > > I cannot remember if the C-tag or S-tag is the inner or outer vlan tag.
> > > > > 
> > > > > When reading BPF code that use these function names, then I would have
> > > > > to ask Google for help, or find-and-read this doc.
> > > > > 
> > > > > Can we come-up with a more intuitive name, that e.g. helps when reading
> > > > > the BPF-prog code?
> > > > 
> > > > Well, my reasoning for such naming is that if someone can configure s-tag
> > > > stripping in ethtool with 'rx-vlan-stag-hw-parse', they shouldn't have any
> > > > problem with understanding those function names.
> > > > 
> > > 
> > > Naming is hard.  My perspective is conveying the meaning without having
> > > to be knowledgeable about ethtool VLAN commands.  My perspective is a
> > > casual BPF-programmer that reads "bpf_xdp_metadata_rx_stag()".
> > > Hopefully we can choose a name that says "vlan" somewhere, such that the
> > > person reading this doesn't have to lookup and find the documentation to
> > > deduct this code is related to VLANs.
> > > 
> > > > One possible improvement that comes to mind is maybe (similarly ethtool) calling
> > > > c-tag just 'tag' and letting s-tag stay 'stag'. Because c-tag is this default
> > > > 802.1q tag, which is supported by various hardware, while s-tag is significantly
> > > > less widespread.
> > > > 
> > > > But there are many options, really.
> > > > 
> > > > What are your suggestions?
> > > > 
> > > 
> > > One suggestion is (the symmetrical):
> > >   * bpf_xdp_metadata_rx_vlan_inner_tag
> > >   * bpf_xdp_metadata_rx_vlan_outer_tag
> > > 
> > > As you say above the first "inner" VLAN tag is just the regular 802.1Q
> > > VLAN tag.  The concept of C-tag and S-tag is from 802.1ad that
> > > introduced the concept of double tagging.
> > > 
> > > Thus one could argue for shorter names like:
> > >   * bpf_xdp_metadata_rx_vlan_tag
> > >   * bpf_xdp_metadata_rx_vlan_outer_tag
> > > 
> > 
> > AFAIK, outer tag is a broader term, it's pretty often used for stacked 802.1Q
> > headers. I can't find what exactly is an expected behavior for rxvlan and
> > rx-vlan-stag-hw-parse in ethtool, but iavf documentation states that rxvlan
> > "enables outer or single 802.1Q VLAN stripping" and rx-vlan-stag-hw-parse
> > "enables outer or single 802.1ad VLAN stripping". This is in consistent with how
> > ice hardware behaves. More credible sources would be welcome.
> > 
> 
> It would be good to figure out how other hardware behaves.
> 
> The iavf doc sounds like very similar behavior from both functions, just
> 802.1Q vs 802.1ad.
> Sounds like both will just pop/strip the outer vlan tag.
> I have seen Ethertype 802.1Q being used (in practice) for double tagged
> packets, even-though 802.1ad should have been used to comply with the
> standard.
> 
> > What about:
> >    * bpf_xdp_metadata_rx_vlan_tag
> >    * bpf_xdp_metadata_rx_vlan_qinq_tag
> > 
> 
> This sounds good to me.
> 
> I do wonder if we really need two functions for this?
> Would one function be enough?
> 
> Given the (iavf) description, the functions basically does the same.
> Looking at your ice driver implementation, they could be merged into one
> function, as it is the same location in the descriptor.
>

This design was very debatable in the first place.
I looked at different in-tree driver implementations of NETIF_F_HW_VLAN_STAG_RX
feature once more. Among those I could comprehend, seems like none has c-tag and 
s-tag stored separately. Actually, there are 2 situations:

1. (ex. mlx4) HW always strips outer or single VLAN tag, without distinction 
between 802.1Q and 802.1ad. TPID in such case is deduced from descriptor. 
NETIF_F_HW_VLAN_STAG_RX and NETIF_F_HW_VLAN_CTAG_RX must be enabled together.

2. (ex. ice) HW strips outer or single VLAN tag with a configured TPID. In such 
case descriptor doesn't carry info about TPID, because it's the same for all 
stripped tags. C-tag and s-tag stripping are mutually exclusive.
Example:
 - 802.1Q double VLAN, with s-tag stripping enabled, packet arrives 
   untouched, with c-tag stripping outermost tag gets stripped.
 - 802.1ad+802.1Q, with s-tag stripping enabled, 802.1ad header gets stripped,
   with c-tag stripping, packet arrives untouched.

Obviously, I can be sure only about our hardware.

Long story short, probably re-inventing the wheel wasn't a good idea on my part. 
Now I am much more inclined to just copy the logic from skb, so function would 
look like this:

  bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __u16 *vlan_tag,
			       __u16 *tpid);

Maybe some applications would make use of just:

  bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, __u16 *vlan_tag);

Both of the above functions would return information about outermost tag, if was 
stripped. Would have to think about the naming.

Comments are welcome!

> > > 
> > > > > 
> > > > > > + * @ctx: XDP context pointer.
> > > > > > + * @vlan_tag: Return value pointer.
> > > > > > + *
> > > > > 
> > > > > IMHO right here, there should be a description.
> > > > > 
> > > > > E.g. for what a VLAN "tag" means.  I assume a "tag" isn't the VLAN id,
> > > > > but the raw VLAN tag that also contains the prio numbers etc.
> > > > > 
> > > > > It this VLAN tag expected to be in network-byte-order ?
> > > > > IMHO this doc should define what is expected (and driver devel must
> > > > > follow this).
> > > > 
> > > > Will specify that.
> > > > 
> > > > > 
> > > > > > + * Returns 0 on success or ``-errno`` on error.
> > > > > > + */
> > > > > > +__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
> > > > > > +{
> > > > > > +	return -EOPNOTSUPP;
> > > > > > +}
> > > > > > +
> > > > > > +/**
> > > > > > + * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
> > > > > > + * @ctx: XDP context pointer.
> > > > > > + * @vlan_tag: Return value pointer.
> > > > > > + *
> > > 
> > > (p.s. Googling I find multiple definitions of what the "S" in S-tag
> > > means. The most reliable or statistically consistent seems to be
> > > "Service tag", or "Service provider tag".)
> > > 
> > > The description for the renamed "bpf_xdp_metadata_rx_vlan_outer_tag"
> > > should IMHO explain that the outer VLAN tag is often refered to as the S-tag
> > > (or Service-tag) in Q-in-Q (802.1ad) terminology.  Perhaps we can even spell
> > > out that some hardware support (and must be configured via ethtool) to
> > > extract this stag.
> > > 
> > > A dump of the tool rx-vlan related commands:
> > > 
> > >    $ ethtool -k i40e2 | grep rx-vlan
> > >    rx-vlan-offload: on
> > >    rx-vlan-filter: on [fixed]
> > >    rx-vlan-stag-hw-parse: off [fixed]
> > >    rx-vlan-stag-filter: off [fixed]
> > > 
> [...]
>
  

Patch

diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst
index 25ce72af81c2..73a78029c596 100644
--- a/Documentation/networking/xdp-rx-metadata.rst
+++ b/Documentation/networking/xdp-rx-metadata.rst
@@ -18,7 +18,16 @@  Currently, the following kfuncs are supported. In the future, as more
 metadata is supported, this set will grow:
 
 .. kernel-doc:: net/core/xdp.c
-   :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
+   :identifiers: bpf_xdp_metadata_rx_timestamp
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_hash
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_ctag
+
+.. kernel-doc:: net/core/xdp.c
+   :identifiers: bpf_xdp_metadata_rx_stag
 
 An XDP program can use these kfuncs to read the metadata into stack
 variables for its own consumption. Or, to pass the metadata on to other
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 08fbd4622ccf..fdae37fe11f5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1655,6 +1655,8 @@  struct xdp_metadata_ops {
 	int	(*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
 	int	(*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
 			       enum xdp_rss_hash_type *rss_type);
+	int	(*xmo_rx_ctag)(const struct xdp_md *ctx, u16 *vlan_tag);
+	int	(*xmo_rx_stag)(const struct xdp_md *ctx, u16 *vlan_tag);
 };
 
 /**
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 6381560efae2..2db7439fc60f 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -389,6 +389,10 @@  void xdp_attachment_setup(struct xdp_attachment_info *info,
 			   bpf_xdp_metadata_rx_timestamp) \
 	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
 			   bpf_xdp_metadata_rx_hash) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_CTAG, \
+			   bpf_xdp_metadata_rx_ctag) \
+	XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_STAG, \
+			   bpf_xdp_metadata_rx_stag) \
 
 enum {
 #define XDP_METADATA_KFUNC(name, _) name,
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index d9c9f45e3529..2c6b6e82cfac 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -848,6 +848,10 @@  void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
 		p = ops->xmo_rx_timestamp;
 	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
 		p = ops->xmo_rx_hash;
+	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_CTAG))
+		p = ops->xmo_rx_ctag;
+	else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_STAG))
+		p = ops->xmo_rx_stag;
 out:
 	up_read(&bpf_devs_lock);
 
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 41e5ca8643ec..eff21501609f 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -738,6 +738,30 @@  __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 	return -EOPNOTSUPP;
 }
 
+/**
+ * bpf_xdp_metadata_rx_ctag - Read XDP packet inner vlan tag.
+ * @ctx: XDP context pointer.
+ * @vlan_tag: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_ctag(const struct xdp_md *ctx, u16 *vlan_tag)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ * bpf_xdp_metadata_rx_stag - Read XDP packet outer vlan tag.
+ * @ctx: XDP context pointer.
+ * @vlan_tag: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_stag(const struct xdp_md *ctx, u16 *vlan_tag)
+{
+	return -EOPNOTSUPP;
+}
+
 __diag_pop();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)