i386: fix assert (__builtin_cpu_supports ("x86-64") >= 0)

Message ID d68f00ea-199b-2980-0ae6-df53da370a5c@suse.cz
State Accepted
Headers
Series i386: fix assert (__builtin_cpu_supports ("x86-64") >= 0) |

Checks

Context Check Description
snail/gcc-patch-check success Github commit url

Commit Message

Martin Liška Nov. 25, 2022, 12:57 p.m. UTC
  Similar story as PR103661, we again return a negative number
for __builtin_cpu_supports:

Documentation says:

int __builtin_cpu_supports(const char *feature)
This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
while we return -2147483648.

Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
but for __builtin_cpu_supports.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

PR target/107551

gcc/ChangeLog:

	* config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
	as for PR103661.
	* doc/extend.texi: Fix "x86-64" use.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/builtin_target.c: Add more checks.
---
 gcc/config/i386/i386-builtins.cc              | 25 ++++++++-----------
 gcc/doc/extend.texi                           | 22 ++++++++--------
 .../gcc.target/i386/builtin_target.c          |  5 ++++
 3 files changed, 26 insertions(+), 26 deletions(-)
  

Comments

Martin Liška Dec. 2, 2022, 9:46 a.m. UTC | #1
PING^1

On 11/25/22 13:57, Martin Liška wrote:
> Similar story as PR103661, we again return a negative number
> for __builtin_cpu_supports:
> 
> Documentation says:
> 
> int __builtin_cpu_supports(const char *feature)
> This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
> while we return -2147483648.
> 
> Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
> but for __builtin_cpu_supports.
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin
> 
> PR target/107551
> 
> gcc/ChangeLog:
> 
> 	* config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> 	as for PR103661.
> 	* doc/extend.texi: Fix "x86-64" use.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/i386/builtin_target.c: Add more checks.
> ---
>  gcc/config/i386/i386-builtins.cc              | 25 ++++++++-----------
>  gcc/doc/extend.texi                           | 22 ++++++++--------
>  .../gcc.target/i386/builtin_target.c          |  5 ++++
>  3 files changed, 26 insertions(+), 26 deletions(-)
> 
> diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> index eacdf072244..c57c1380298 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -2181,18 +2181,14 @@ fold_builtin_cpu (tree fndecl, tree *args)
>  	      varpool_node::add (ix86_cpu_features2_var);
>  	    }
>  
> +	  /* Skip __cpu_features[0].  */
>  	  feature -= INT_TYPE_SIZE;
> -	  field_val = 1U << (feature % INT_TYPE_SIZE);
>  	  tree index = size_int (feature / INT_TYPE_SIZE);
> +	  feature = feature % INT_TYPE_SIZE;
>  	  array_elt = build4 (ARRAY_REF, unsigned_type_node,
>  			      ix86_cpu_features2_var,
>  			      index, NULL_TREE, NULL_TREE);
>  	  /* Return __cpu_features2[index] & field_val  */
> -	  final = build2 (BIT_AND_EXPR, unsigned_type_node,
> -			  array_elt,
> -			  build_int_cstu (unsigned_type_node,
> -					  field_val));
> -	  return build1 (NOP_EXPR, integer_type_node, final);
>  	}
>        else
>  	{
> @@ -2209,16 +2205,17 @@ fold_builtin_cpu (tree fndecl, tree *args)
>  	  array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
>  			      integer_zero_node, NULL_TREE, NULL_TREE);
>  
> -	  field_val = (1U << feature);
>  	  /* Return __cpu_model.__cpu_features[0] & field_val  */
> -	  final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> -			  build_int_cstu (unsigned_type_node, field_val));
> -	  if (feature == (INT_TYPE_SIZE - 1))
> -	    return build2 (NE_EXPR, integer_type_node, final,
> -			   build_int_cst (unsigned_type_node, 0));
> -	  else
> -	    return build1 (NOP_EXPR, integer_type_node, final);
>  	}
> +
> +      field_val = (1U << feature);
> +      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> +		      build_int_cstu (unsigned_type_node, field_val));
> +      if (feature == (INT_TYPE_SIZE - 1))
> +	return build2 (NE_EXPR, integer_type_node, final,
> +		       build_int_cst (unsigned_type_node, 0));
> +      else
> +	return build1 (NOP_EXPR, integer_type_node, final);
>      }
>    gcc_unreachable ();
>  }
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index b1dd39e64b8..d3812fa55b0 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21897,18 +21897,6 @@ AMD Family 19h Zen version 3.
>  
>  @item znver4
>  AMD Family 19h Zen version 4.
> -
> -@item x86-64
> -Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> -
> -@item x86-64-v2
> -x86-64-v2 microarchitecture level.
> -
> -@item x86-64-v3
> -x86-64-v3 microarchitecture level.
> -
> -@item x86-64-v4
> -x86-64-v4 microarchitecture level.
>  @end table
>  
>  Here is an example:
> @@ -22002,6 +21990,16 @@ VPCLMULQDQ instructions.
>  AVX512VNNI instructions.
>  @item avx512bitalg
>  AVX512BITALG instructions.
> +@item x86-64
> +Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> +@item x86-64-v2
> +x86-64-v2 microarchitecture level.
> +@item x86-64-v3
> +x86-64-v3 microarchitecture level.
> +@item x86-64-v4
> +x86-64-v4 microarchitecture level.
> +
> +
>  @end table
>  
>  Here is an example:
> diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
> index 3e7505a8c3a..fff643c13b0 100644
> --- a/gcc/testsuite/gcc.target/i386/builtin_target.c
> +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
> @@ -95,6 +95,11 @@ quick_check ()
>  
>    assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
>  
> +  assert (__builtin_cpu_supports ("x86-64") >= 0);
> +  assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
> +  assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
> +  assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
> +
>    /* Check CPU type.  */
>    assert (__builtin_cpu_is ("amd") >= 0);
>
  
Uros Bizjak Dec. 2, 2022, 9:54 a.m. UTC | #2
On Fri, Dec 2, 2022 at 10:46 AM Martin Liška <mliska@suse.cz> wrote:
>
> PING^1
>
> On 11/25/22 13:57, Martin Liška wrote:
> > Similar story as PR103661, we again return a negative number
> > for __builtin_cpu_supports:
> >
> > Documentation says:
> >
> > int __builtin_cpu_supports(const char *feature)
> > This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
> > while we return -2147483648.
> >
> > Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
> > but for __builtin_cpu_supports.
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >
> > Ready to be installed?
> > Thanks,
> > Martin
> >
> > PR target/107551
> >
> > gcc/ChangeLog:
> >
> >       * config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> >       as for PR103661.
> >       * doc/extend.texi: Fix "x86-64" use.
> >
> > gcc/testsuite/ChangeLog:
> >
> >       * gcc.target/i386/builtin_target.c: Add more checks.

I'm not quite familiar with this part of the compiler, but if Jakub is
OK with the patch, consider it rubber-stamped OK.

Thanks,
Uros.

> > ---
> >  gcc/config/i386/i386-builtins.cc              | 25 ++++++++-----------
> >  gcc/doc/extend.texi                           | 22 ++++++++--------
> >  .../gcc.target/i386/builtin_target.c          |  5 ++++
> >  3 files changed, 26 insertions(+), 26 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > index eacdf072244..c57c1380298 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -2181,18 +2181,14 @@ fold_builtin_cpu (tree fndecl, tree *args)
> >             varpool_node::add (ix86_cpu_features2_var);
> >           }
> >
> > +       /* Skip __cpu_features[0].  */
> >         feature -= INT_TYPE_SIZE;
> > -       field_val = 1U << (feature % INT_TYPE_SIZE);
> >         tree index = size_int (feature / INT_TYPE_SIZE);
> > +       feature = feature % INT_TYPE_SIZE;
> >         array_elt = build4 (ARRAY_REF, unsigned_type_node,
> >                             ix86_cpu_features2_var,
> >                             index, NULL_TREE, NULL_TREE);
> >         /* Return __cpu_features2[index] & field_val  */
> > -       final = build2 (BIT_AND_EXPR, unsigned_type_node,
> > -                       array_elt,
> > -                       build_int_cstu (unsigned_type_node,
> > -                                       field_val));
> > -       return build1 (NOP_EXPR, integer_type_node, final);
> >       }
> >        else
> >       {
> > @@ -2209,16 +2205,17 @@ fold_builtin_cpu (tree fndecl, tree *args)
> >         array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
> >                             integer_zero_node, NULL_TREE, NULL_TREE);
> >
> > -       field_val = (1U << feature);
> >         /* Return __cpu_model.__cpu_features[0] & field_val  */
> > -       final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> > -                       build_int_cstu (unsigned_type_node, field_val));
> > -       if (feature == (INT_TYPE_SIZE - 1))
> > -         return build2 (NE_EXPR, integer_type_node, final,
> > -                        build_int_cst (unsigned_type_node, 0));
> > -       else
> > -         return build1 (NOP_EXPR, integer_type_node, final);
> >       }
> > +
> > +      field_val = (1U << feature);
> > +      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> > +                   build_int_cstu (unsigned_type_node, field_val));
> > +      if (feature == (INT_TYPE_SIZE - 1))
> > +     return build2 (NE_EXPR, integer_type_node, final,
> > +                    build_int_cst (unsigned_type_node, 0));
> > +      else
> > +     return build1 (NOP_EXPR, integer_type_node, final);
> >      }
> >    gcc_unreachable ();
> >  }
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index b1dd39e64b8..d3812fa55b0 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -21897,18 +21897,6 @@ AMD Family 19h Zen version 3.
> >
> >  @item znver4
> >  AMD Family 19h Zen version 4.
> > -
> > -@item x86-64
> > -Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> > -
> > -@item x86-64-v2
> > -x86-64-v2 microarchitecture level.
> > -
> > -@item x86-64-v3
> > -x86-64-v3 microarchitecture level.
> > -
> > -@item x86-64-v4
> > -x86-64-v4 microarchitecture level.
> >  @end table
> >
> >  Here is an example:
> > @@ -22002,6 +21990,16 @@ VPCLMULQDQ instructions.
> >  AVX512VNNI instructions.
> >  @item avx512bitalg
> >  AVX512BITALG instructions.
> > +@item x86-64
> > +Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> > +@item x86-64-v2
> > +x86-64-v2 microarchitecture level.
> > +@item x86-64-v3
> > +x86-64-v3 microarchitecture level.
> > +@item x86-64-v4
> > +x86-64-v4 microarchitecture level.
> > +
> > +
> >  @end table
> >
> >  Here is an example:
> > diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
> > index 3e7505a8c3a..fff643c13b0 100644
> > --- a/gcc/testsuite/gcc.target/i386/builtin_target.c
> > +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
> > @@ -95,6 +95,11 @@ quick_check ()
> >
> >    assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
> >
> > +  assert (__builtin_cpu_supports ("x86-64") >= 0);
> > +  assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
> > +  assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
> > +  assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
> > +
> >    /* Check CPU type.  */
> >    assert (__builtin_cpu_is ("amd") >= 0);
> >
>
  
Martin Liška Dec. 7, 2022, 10:30 a.m. UTC | #3
On 12/2/22 10:54, Uros Bizjak wrote:
> I'm not quite familiar with this part of the compiler, but if Jakub is
> OK with the patch, consider it rubber-stamped OK.

Thanks Uros, Jakub can you please approve it?

Thanks,
Martin

> 
> Thanks,
> Uros.
  
Jakub Jelinek Dec. 7, 2022, 11:27 a.m. UTC | #4
On Fri, Nov 25, 2022 at 01:57:35PM +0100, Martin Liška wrote:
> PR target/107551
> 
> gcc/ChangeLog:
> 
> 	* config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> 	as for PR103661.
> 	* doc/extend.texi: Fix "x86-64" use.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/i386/builtin_target.c: Add more checks.
> +
> +      field_val = (1U << feature);

Just
      field_val = 1U << feature;
?

> +      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> +		      build_int_cstu (unsigned_type_node, field_val));
> +      if (feature == (INT_TYPE_SIZE - 1))

Just
      if (feature == INT_TYPE_SIZE - 1)
?

> +	return build2 (NE_EXPR, integer_type_node, final,
> +		       build_int_cst (unsigned_type_node, 0));
> +      else
> +	return build1 (NOP_EXPR, integer_type_node, final);
>      }
>    gcc_unreachable ();
>  }

Otherwise LGTM, though I must say the destinction for when
__builtin_cpu_is and __builtin_cpu_supports works looks completely random.

	Jakub
  
Martin Liška Dec. 9, 2022, 9:18 a.m. UTC | #5
On 12/7/22 12:27, Jakub Jelinek wrote:
> On Fri, Nov 25, 2022 at 01:57:35PM +0100, Martin Liška wrote:
>> PR target/107551
>>
>> gcc/ChangeLog:
>>
>> 	* config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
>> 	as for PR103661.
>> 	* doc/extend.texi: Fix "x86-64" use.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 	* gcc.target/i386/builtin_target.c: Add more checks.
>> +
>> +      field_val = (1U << feature);
> 
> Just
>       field_val = 1U << feature;
> ?
> 
>> +      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
>> +		      build_int_cstu (unsigned_type_node, field_val));
>> +      if (feature == (INT_TYPE_SIZE - 1))
> 
> Just
>       if (feature == INT_TYPE_SIZE - 1)
> ?

Sure, included the 2 aforementioned suggestions.

> 
>> +	return build2 (NE_EXPR, integer_type_node, final,
>> +		       build_int_cst (unsigned_type_node, 0));
>> +      else
>> +	return build1 (NOP_EXPR, integer_type_node, final);
>>      }
>>    gcc_unreachable ();
>>  }
> 
> Otherwise LGTM, though I must say the destinction for when
> __builtin_cpu_is and __builtin_cpu_supports works looks completely random.

I'm going to push the revision.

What exactly do you mean by random? I just know there are differences in between
x86 and ppc:

int __builtin_cpu_supports(const char *feature)
This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.

This function returns a value of 1 if the run-time CPU supports the HWCAP feature feature and returns 0 otherwise.

Martin

> 
> 	Jakub
>
  
Jakub Jelinek Dec. 9, 2022, 9:31 a.m. UTC | #6
On Fri, Dec 09, 2022 at 10:18:34AM +0100, Martin Liška wrote:
> I'm going to push the revision.
> 
> What exactly do you mean by random? I just know there are differences in between
> x86 and ppc:
> 
> int __builtin_cpu_supports(const char *feature)
> This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
> 
> This function returns a value of 1 if the run-time CPU supports the HWCAP feature feature and returns 0 otherwise.

Because x86-64-v2 etc. isn't a HWCAP feature, but rather an architecture
(or set of canned HWCAP features).  So __builtin_cpu_is for those,
or especially for __builtin_cpu_is ("x86-64") would make more sense.
Though I see that many of the valid -march= values actually aren't supported
by __builtin_cpu_is either, whether it is pentium, nocona, lakemont etc.
but does support say some vendors (intel, amd).

	Jakub
  

Patch

diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index eacdf072244..c57c1380298 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -2181,18 +2181,14 @@  fold_builtin_cpu (tree fndecl, tree *args)
 	      varpool_node::add (ix86_cpu_features2_var);
 	    }
 
+	  /* Skip __cpu_features[0].  */
 	  feature -= INT_TYPE_SIZE;
-	  field_val = 1U << (feature % INT_TYPE_SIZE);
 	  tree index = size_int (feature / INT_TYPE_SIZE);
+	  feature = feature % INT_TYPE_SIZE;
 	  array_elt = build4 (ARRAY_REF, unsigned_type_node,
 			      ix86_cpu_features2_var,
 			      index, NULL_TREE, NULL_TREE);
 	  /* Return __cpu_features2[index] & field_val  */
-	  final = build2 (BIT_AND_EXPR, unsigned_type_node,
-			  array_elt,
-			  build_int_cstu (unsigned_type_node,
-					  field_val));
-	  return build1 (NOP_EXPR, integer_type_node, final);
 	}
       else
 	{
@@ -2209,16 +2205,17 @@  fold_builtin_cpu (tree fndecl, tree *args)
 	  array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
 			      integer_zero_node, NULL_TREE, NULL_TREE);
 
-	  field_val = (1U << feature);
 	  /* Return __cpu_model.__cpu_features[0] & field_val  */
-	  final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
-			  build_int_cstu (unsigned_type_node, field_val));
-	  if (feature == (INT_TYPE_SIZE - 1))
-	    return build2 (NE_EXPR, integer_type_node, final,
-			   build_int_cst (unsigned_type_node, 0));
-	  else
-	    return build1 (NOP_EXPR, integer_type_node, final);
 	}
+
+      field_val = (1U << feature);
+      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
+		      build_int_cstu (unsigned_type_node, field_val));
+      if (feature == (INT_TYPE_SIZE - 1))
+	return build2 (NE_EXPR, integer_type_node, final,
+		       build_int_cst (unsigned_type_node, 0));
+      else
+	return build1 (NOP_EXPR, integer_type_node, final);
     }
   gcc_unreachable ();
 }
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index b1dd39e64b8..d3812fa55b0 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21897,18 +21897,6 @@  AMD Family 19h Zen version 3.
 
 @item znver4
 AMD Family 19h Zen version 4.
-
-@item x86-64
-Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
-
-@item x86-64-v2
-x86-64-v2 microarchitecture level.
-
-@item x86-64-v3
-x86-64-v3 microarchitecture level.
-
-@item x86-64-v4
-x86-64-v4 microarchitecture level.
 @end table
 
 Here is an example:
@@ -22002,6 +21990,16 @@  VPCLMULQDQ instructions.
 AVX512VNNI instructions.
 @item avx512bitalg
 AVX512BITALG instructions.
+@item x86-64
+Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
+@item x86-64-v2
+x86-64-v2 microarchitecture level.
+@item x86-64-v3
+x86-64-v3 microarchitecture level.
+@item x86-64-v4
+x86-64-v4 microarchitecture level.
+
+
 @end table
 
 Here is an example:
diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
index 3e7505a8c3a..fff643c13b0 100644
--- a/gcc/testsuite/gcc.target/i386/builtin_target.c
+++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
@@ -95,6 +95,11 @@  quick_check ()
 
   assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
 
+  assert (__builtin_cpu_supports ("x86-64") >= 0);
+  assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
+  assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
+  assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
+
   /* Check CPU type.  */
   assert (__builtin_cpu_is ("amd") >= 0);