[v2,1/3] rust: macros: add `decl_generics` to `parse_generics()`
Commit Message
The generic parameters on a type definition can specify default values.
Currently `parse_generics()` cannot handle this though. For example when
parsing the following generics:
<T: Clone, const N: usize = 0>
The `impl_generics` will be set to `T: Clone, const N: usize = 0` and
`ty_generics` will be set to `T, N`. Now using the `impl_generics` on an
impl block:
impl<$($impl_generics)*> Foo {}
will result in invalid Rust code, because default values are only
available on type definitions.
Therefore add parsing support for generic parameter default values using
a new kind of generics called `decl_generics` and change the old
behavior of `impl_generics` to not contain the generic parameter default
values.
Now `Generics` has three fields:
- `impl_generics`: the generics with bounds
(e.g. `T: Clone, const N: usize`)
- `decl_generics`: the generics with bounds and default values
(e.g. `T: Clone, const N: usize = 0`)
- `ty_generics`: contains the generics without bounds and without
default values (e.g. `T, N`)
`impl_generics` is designed to be used on `impl<$impl_generics>`,
`decl_generics` for the type definition, so `struct Foo<$decl_generics>`
and `ty_generics` whenever you use the type, so `Foo<$ty_generics>`.
Here is an example that uses all three different types of generics:
let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input);
quote! {
struct Foo<$($decl_generics)*> {
// ...
}
impl<$impl_generics> Foo<$ty_generics> {
fn foo() {
// ...
}
}
}
The next commit contains a fix to the `#[pin_data]` macro making it
compatible with generic parameter default values by relying on this new
behavior.
Signed-off-by: Benno Lossin <benno.lossin@proton.me>
---
v1 -> v2:
- improve documentation and commit message explanation
- add motivation to commit message
rust/macros/helpers.rs | 122 ++++++++++++++++++++++++++++++----------
rust/macros/pin_data.rs | 1 +
rust/macros/zeroable.rs | 1 +
3 files changed, 94 insertions(+), 30 deletions(-)
base-commit: d9857c16cfc6bce7764e1b79956c6a028f97f4d0
Comments
On 12/13/23 19:08, Benno Lossin wrote:
> The generic parameters on a type definition can specify default values.
> Currently `parse_generics()` cannot handle this though. For example when
> parsing the following generics:
>
> <T: Clone, const N: usize = 0>
>
> The `impl_generics` will be set to `T: Clone, const N: usize = 0` and
> `ty_generics` will be set to `T, N`. Now using the `impl_generics` on an
> impl block:
>
> impl<$($impl_generics)*> Foo {}
>
> will result in invalid Rust code, because default values are only
> available on type definitions.
>
> Therefore add parsing support for generic parameter default values using
> a new kind of generics called `decl_generics` and change the old
> behavior of `impl_generics` to not contain the generic parameter default
> values.
>
> Now `Generics` has three fields:
> - `impl_generics`: the generics with bounds
> (e.g. `T: Clone, const N: usize`)
> - `decl_generics`: the generics with bounds and default values
> (e.g. `T: Clone, const N: usize = 0`)
> - `ty_generics`: contains the generics without bounds and without
> default values (e.g. `T, N`)
>
> `impl_generics` is designed to be used on `impl<$impl_generics>`,
> `decl_generics` for the type definition, so `struct Foo<$decl_generics>`
> and `ty_generics` whenever you use the type, so `Foo<$ty_generics>`.
>
> Here is an example that uses all three different types of generics:
>
> let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input);
> quote! {
> struct Foo<$($decl_generics)*> {
> // ...
> }
>
> impl<$impl_generics> Foo<$ty_generics> {
> fn foo() {
> // ...
> }
> }
> }
>
> The next commit contains a fix to the `#[pin_data]` macro making it
> compatible with generic parameter default values by relying on this new
> behavior.
>
> Signed-off-by: Benno Lossin <benno.lossin@proton.me>
> ---
> [...]
Reviewed-by: Martin Rodriguez Reboredo <yakoyoku@gmail.com>
The logic looks good to me, some nits:
> +/// Parsed generics.
> +///
> +/// See the field documentation for an explanation what each of the fields represents.
> +///
> +/// # Examples
> +///
> +/// ```rust,ignore
> +/// # let input = todo!();
> +/// let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input);
> +/// quote! {
> +/// struct Foo<$($decl_generics)*> {
> +/// // ...
> +/// }
> +///
> +/// impl<$impl_generics> Foo<$ty_generics> {
> +/// fn foo() {
> +/// // ...
> +/// }
> +/// }
> +/// }
> +/// ```
> pub(crate) struct Generics {
> + /// The generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`).
> + ///
> + /// Use this on type definitions e.g. `struct Foo<$decl_generics> ...` (or `union`/`enum`).
> + pub(crate) decl_generics: Vec<TokenTree>,
> + /// The generics with bounds (e.g. `T: Clone, const N: usize`).
> + ///
> + /// Use this on `impl` blocks e.g. `impl<$impl_generics> Trait for ...`.
> pub(crate) impl_generics: Vec<TokenTree>,
> + /// The generics without bounds and without default values (e.g. `T, N`).
> + ///
> + /// Use this when you use the type that is declared with these generics e.g.
> + /// `Foo<$ty_generics>`.
> pub(crate) ty_generics: Vec<TokenTree>,
> }
>
> @@ -81,6 +113,8 @@ pub(crate) struct Generics {
> pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
> // `impl_generics`, the declared generics with their bounds.
> let mut impl_generics = vec![];
> + // The generics with bounds and default values.
> + let mut decl_generics = vec![];
It'll be great if the order if the same as the declaration in struct
above.
> // Only the names of the generics, without any bounds.
> let mut ty_generics = vec![];
> // Tokens not related to the generics e.g. the `where` token and definition.
> @@ -90,10 +124,17 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
> let mut toks = input.into_iter();
> // If we are at the beginning of a generic parameter.
> let mut at_start = true;
> - for tt in &mut toks {
> + let mut skip_until_comma = false;
> + while let Some(tt) = toks.next() {
> + if nesting == 1 && matches!(&tt, TokenTree::Punct(p) if p.as_char() == '>') {
> + // Found the end of the generics.
> + break;
> + } else if nesting >= 1 {
> + decl_generics.push(tt.clone());
> + }
> match tt.clone() {
> TokenTree::Punct(p) if p.as_char() == '<' => {
> - if nesting >= 1 {
> + if nesting >= 1 && !skip_until_comma {
> // This is inside of the generics and part of some bound.
> impl_generics.push(tt);
> }
> @@ -105,49 +146,70 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
> break;
> } else {
> nesting -= 1;
> - if nesting >= 1 {
> + if nesting >= 1 && !skip_until_comma {
> // We are still inside of the generics and part of some bound.
> impl_generics.push(tt);
> }
> - if nesting == 0 {
> - break;
> - }
> }
> }
> - tt => {
> + TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' => {
> if nesting == 1 {
> - // Here depending on the token, it might be a generic variable name.
> - match &tt {
> - // Ignore const.
> - TokenTree::Ident(i) if i.to_string() == "const" => {}
> - TokenTree::Ident(_) if at_start => {
> - ty_generics.push(tt.clone());
> - // We also already push the `,` token, this makes it easier to append
> - // generics.
> - ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
> - at_start = false;
> - }
> - TokenTree::Punct(p) if p.as_char() == ',' => at_start = true,
> - // Lifetimes begin with `'`.
> - TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
> - ty_generics.push(tt.clone());
> - }
> - _ => {}
> - }
> + impl_generics.push(TokenTree::Punct(p.clone()));
> + ty_generics.push(TokenTree::Punct(p));
This could just be
impl_generics.push(tt.clone());
ty_generics.push(tt);
?
> + skip_until_comma = false;
> }
> - if nesting >= 1 {
> - impl_generics.push(tt);
> - } else if nesting == 0 {
> + }
> + tt if !skip_until_comma => {
> + match nesting {
> // If we haven't entered the generics yet, we still want to keep these tokens.
> - rest.push(tt);
> + 0 => rest.push(tt),
> + 1 => {
> + // Here depending on the token, it might be a generic variable name.
> + match tt {
> + TokenTree::Ident(i) if at_start && i.to_string() == "const" => {
> + let Some(name) = toks.next() else {
> + // Parsing error.
> + break;
> + };
> + impl_generics.push(TokenTree::Ident(i));
Similar, this could just be push tt.
> + impl_generics.push(name.clone());
> + ty_generics.push(name.clone());
> + decl_generics.push(name);
> + at_start = false;
> + }
> + tt @ TokenTree::Ident(_) if at_start => {
> + impl_generics.push(tt.clone());
> + ty_generics.push(tt);
> + at_start = false;
> + }
> + TokenTree::Punct(p) if p.as_char() == ',' => {
> + impl_generics.push(TokenTree::Punct(p.clone()));
> + ty_generics.push(TokenTree::Punct(p));
> + at_start = true;
> + }
I am not sure why the ident above uses tt, but this spells thing all
out.
> + // Lifetimes begin with `'`.
> + TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
> + ty_generics.push(TokenTree::Punct(p.clone()));
> + impl_generics.push(TokenTree::Punct(p));
> + }
ditto
> + // Generics can have default values, we skip these.
> + TokenTree::Punct(p) if p.as_char() == '=' => {
> + skip_until_comma = true;
> + }
> + tt => impl_generics.push(tt),
> + }
> + }
> + _ => impl_generics.push(tt),
> }
> }
> + _ => {}
> }
> }
> rest.extend(toks);
> (
> Generics {
> impl_generics,
> + decl_generics,
> ty_generics,
> },
> rest,
> diff --git a/rust/macros/pin_data.rs b/rust/macros/pin_data.rs
> index 6d58cfda9872..022e68e9720d 100644
> --- a/rust/macros/pin_data.rs
> +++ b/rust/macros/pin_data.rs
> @@ -10,6 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream {
> let (
> Generics {
> impl_generics,
> + decl_generics: _,
> ty_generics,
> },
> rest,
> diff --git a/rust/macros/zeroable.rs b/rust/macros/zeroable.rs
> index 0d605c46ab3b..cfee2cec18d5 100644
> --- a/rust/macros/zeroable.rs
> +++ b/rust/macros/zeroable.rs
> @@ -7,6 +7,7 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream {
> let (
> Generics {
> impl_generics,
> + decl_generics: _,
> ty_generics,
> },
> mut rest,
>
> base-commit: d9857c16cfc6bce7764e1b79956c6a028f97f4d0
On Wed, Dec 13, 2023 at 11:08 PM Benno Lossin <benno.lossin@proton.me> wrote:
>
> The generic parameters on a type definition can specify default values.
> Currently `parse_generics()` cannot handle this though. For example when
> parsing the following generics:
>
> <T: Clone, const N: usize = 0>
>
> The `impl_generics` will be set to `T: Clone, const N: usize = 0` and
> `ty_generics` will be set to `T, N`. Now using the `impl_generics` on an
> impl block:
>
> impl<$($impl_generics)*> Foo {}
>
> will result in invalid Rust code, because default values are only
> available on type definitions.
>
> Therefore add parsing support for generic parameter default values using
> a new kind of generics called `decl_generics` and change the old
> behavior of `impl_generics` to not contain the generic parameter default
> values.
>
> Now `Generics` has three fields:
> - `impl_generics`: the generics with bounds
> (e.g. `T: Clone, const N: usize`)
> - `decl_generics`: the generics with bounds and default values
> (e.g. `T: Clone, const N: usize = 0`)
> - `ty_generics`: contains the generics without bounds and without
> default values (e.g. `T, N`)
>
> `impl_generics` is designed to be used on `impl<$impl_generics>`,
> `decl_generics` for the type definition, so `struct Foo<$decl_generics>`
> and `ty_generics` whenever you use the type, so `Foo<$ty_generics>`.
>
> Here is an example that uses all three different types of generics:
>
> let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input);
> quote! {
> struct Foo<$($decl_generics)*> {
> // ...
> }
>
> impl<$impl_generics> Foo<$ty_generics> {
> fn foo() {
> // ...
> }
> }
> }
>
> The next commit contains a fix to the `#[pin_data]` macro making it
> compatible with generic parameter default values by relying on this new
> behavior.
>
> Signed-off-by: Benno Lossin <benno.lossin@proton.me>
Tested-by: Alice Ryhl <aliceryhl@google.com>
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-use proc_macro::{token_stream, Group, Punct, Spacing, TokenStream, TokenTree};
+use proc_macro::{token_stream, Group, TokenStream, TokenTree};
pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option<String> {
if let Some(TokenTree::Ident(ident)) = it.next() {
@@ -70,8 +70,40 @@ pub(crate) fn expect_end(it: &mut token_stream::IntoIter) {
}
}
+/// Parsed generics.
+///
+/// See the field documentation for an explanation what each of the fields represents.
+///
+/// # Examples
+///
+/// ```rust,ignore
+/// # let input = todo!();
+/// let (Generics { decl_generics, impl_generics, ty_generics }, rest) = parse_generics(input);
+/// quote! {
+/// struct Foo<$($decl_generics)*> {
+/// // ...
+/// }
+///
+/// impl<$impl_generics> Foo<$ty_generics> {
+/// fn foo() {
+/// // ...
+/// }
+/// }
+/// }
+/// ```
pub(crate) struct Generics {
+ /// The generics with bounds and default values (e.g. `T: Clone, const N: usize = 0`).
+ ///
+ /// Use this on type definitions e.g. `struct Foo<$decl_generics> ...` (or `union`/`enum`).
+ pub(crate) decl_generics: Vec<TokenTree>,
+ /// The generics with bounds (e.g. `T: Clone, const N: usize`).
+ ///
+ /// Use this on `impl` blocks e.g. `impl<$impl_generics> Trait for ...`.
pub(crate) impl_generics: Vec<TokenTree>,
+ /// The generics without bounds and without default values (e.g. `T, N`).
+ ///
+ /// Use this when you use the type that is declared with these generics e.g.
+ /// `Foo<$ty_generics>`.
pub(crate) ty_generics: Vec<TokenTree>,
}
@@ -81,6 +113,8 @@ pub(crate) struct Generics {
pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
// `impl_generics`, the declared generics with their bounds.
let mut impl_generics = vec![];
+ // The generics with bounds and default values.
+ let mut decl_generics = vec![];
// Only the names of the generics, without any bounds.
let mut ty_generics = vec![];
// Tokens not related to the generics e.g. the `where` token and definition.
@@ -90,10 +124,17 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
let mut toks = input.into_iter();
// If we are at the beginning of a generic parameter.
let mut at_start = true;
- for tt in &mut toks {
+ let mut skip_until_comma = false;
+ while let Some(tt) = toks.next() {
+ if nesting == 1 && matches!(&tt, TokenTree::Punct(p) if p.as_char() == '>') {
+ // Found the end of the generics.
+ break;
+ } else if nesting >= 1 {
+ decl_generics.push(tt.clone());
+ }
match tt.clone() {
TokenTree::Punct(p) if p.as_char() == '<' => {
- if nesting >= 1 {
+ if nesting >= 1 && !skip_until_comma {
// This is inside of the generics and part of some bound.
impl_generics.push(tt);
}
@@ -105,49 +146,70 @@ pub(crate) fn parse_generics(input: TokenStream) -> (Generics, Vec<TokenTree>) {
break;
} else {
nesting -= 1;
- if nesting >= 1 {
+ if nesting >= 1 && !skip_until_comma {
// We are still inside of the generics and part of some bound.
impl_generics.push(tt);
}
- if nesting == 0 {
- break;
- }
}
}
- tt => {
+ TokenTree::Punct(p) if skip_until_comma && p.as_char() == ',' => {
if nesting == 1 {
- // Here depending on the token, it might be a generic variable name.
- match &tt {
- // Ignore const.
- TokenTree::Ident(i) if i.to_string() == "const" => {}
- TokenTree::Ident(_) if at_start => {
- ty_generics.push(tt.clone());
- // We also already push the `,` token, this makes it easier to append
- // generics.
- ty_generics.push(TokenTree::Punct(Punct::new(',', Spacing::Alone)));
- at_start = false;
- }
- TokenTree::Punct(p) if p.as_char() == ',' => at_start = true,
- // Lifetimes begin with `'`.
- TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
- ty_generics.push(tt.clone());
- }
- _ => {}
- }
+ impl_generics.push(TokenTree::Punct(p.clone()));
+ ty_generics.push(TokenTree::Punct(p));
+ skip_until_comma = false;
}
- if nesting >= 1 {
- impl_generics.push(tt);
- } else if nesting == 0 {
+ }
+ tt if !skip_until_comma => {
+ match nesting {
// If we haven't entered the generics yet, we still want to keep these tokens.
- rest.push(tt);
+ 0 => rest.push(tt),
+ 1 => {
+ // Here depending on the token, it might be a generic variable name.
+ match tt {
+ TokenTree::Ident(i) if at_start && i.to_string() == "const" => {
+ let Some(name) = toks.next() else {
+ // Parsing error.
+ break;
+ };
+ impl_generics.push(TokenTree::Ident(i));
+ impl_generics.push(name.clone());
+ ty_generics.push(name.clone());
+ decl_generics.push(name);
+ at_start = false;
+ }
+ tt @ TokenTree::Ident(_) if at_start => {
+ impl_generics.push(tt.clone());
+ ty_generics.push(tt);
+ at_start = false;
+ }
+ TokenTree::Punct(p) if p.as_char() == ',' => {
+ impl_generics.push(TokenTree::Punct(p.clone()));
+ ty_generics.push(TokenTree::Punct(p));
+ at_start = true;
+ }
+ // Lifetimes begin with `'`.
+ TokenTree::Punct(p) if p.as_char() == '\'' && at_start => {
+ ty_generics.push(TokenTree::Punct(p.clone()));
+ impl_generics.push(TokenTree::Punct(p));
+ }
+ // Generics can have default values, we skip these.
+ TokenTree::Punct(p) if p.as_char() == '=' => {
+ skip_until_comma = true;
+ }
+ tt => impl_generics.push(tt),
+ }
+ }
+ _ => impl_generics.push(tt),
}
}
+ _ => {}
}
}
rest.extend(toks);
(
Generics {
impl_generics,
+ decl_generics,
ty_generics,
},
rest,
@@ -10,6 +10,7 @@ pub(crate) fn pin_data(args: TokenStream, input: TokenStream) -> TokenStream {
let (
Generics {
impl_generics,
+ decl_generics: _,
ty_generics,
},
rest,
@@ -7,6 +7,7 @@ pub(crate) fn derive(input: TokenStream) -> TokenStream {
let (
Generics {
impl_generics,
+ decl_generics: _,
ty_generics,
},
mut rest,