i386: fix assert (__builtin_cpu_supports ("x86-64") >= 0)
Checks
Commit Message
Similar story as PR103661, we again return a negative number
for __builtin_cpu_supports:
Documentation says:
int __builtin_cpu_supports(const char *feature)
This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
while we return -2147483648.
Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
but for __builtin_cpu_supports.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
Ready to be installed?
Thanks,
Martin
PR target/107551
gcc/ChangeLog:
* config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
as for PR103661.
* doc/extend.texi: Fix "x86-64" use.
gcc/testsuite/ChangeLog:
* gcc.target/i386/builtin_target.c: Add more checks.
---
gcc/config/i386/i386-builtins.cc | 25 ++++++++-----------
gcc/doc/extend.texi | 22 ++++++++--------
.../gcc.target/i386/builtin_target.c | 5 ++++
3 files changed, 26 insertions(+), 26 deletions(-)
Comments
PING^1
On 11/25/22 13:57, Martin Liška wrote:
> Similar story as PR103661, we again return a negative number
> for __builtin_cpu_supports:
>
> Documentation says:
>
> int __builtin_cpu_supports(const char *feature)
> This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
> while we return -2147483648.
>
> Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
> but for __builtin_cpu_supports.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> PR target/107551
>
> gcc/ChangeLog:
>
> * config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> as for PR103661.
> * doc/extend.texi: Fix "x86-64" use.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/builtin_target.c: Add more checks.
> ---
> gcc/config/i386/i386-builtins.cc | 25 ++++++++-----------
> gcc/doc/extend.texi | 22 ++++++++--------
> .../gcc.target/i386/builtin_target.c | 5 ++++
> 3 files changed, 26 insertions(+), 26 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> index eacdf072244..c57c1380298 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -2181,18 +2181,14 @@ fold_builtin_cpu (tree fndecl, tree *args)
> varpool_node::add (ix86_cpu_features2_var);
> }
>
> + /* Skip __cpu_features[0]. */
> feature -= INT_TYPE_SIZE;
> - field_val = 1U << (feature % INT_TYPE_SIZE);
> tree index = size_int (feature / INT_TYPE_SIZE);
> + feature = feature % INT_TYPE_SIZE;
> array_elt = build4 (ARRAY_REF, unsigned_type_node,
> ix86_cpu_features2_var,
> index, NULL_TREE, NULL_TREE);
> /* Return __cpu_features2[index] & field_val */
> - final = build2 (BIT_AND_EXPR, unsigned_type_node,
> - array_elt,
> - build_int_cstu (unsigned_type_node,
> - field_val));
> - return build1 (NOP_EXPR, integer_type_node, final);
> }
> else
> {
> @@ -2209,16 +2205,17 @@ fold_builtin_cpu (tree fndecl, tree *args)
> array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
> integer_zero_node, NULL_TREE, NULL_TREE);
>
> - field_val = (1U << feature);
> /* Return __cpu_model.__cpu_features[0] & field_val */
> - final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> - build_int_cstu (unsigned_type_node, field_val));
> - if (feature == (INT_TYPE_SIZE - 1))
> - return build2 (NE_EXPR, integer_type_node, final,
> - build_int_cst (unsigned_type_node, 0));
> - else
> - return build1 (NOP_EXPR, integer_type_node, final);
> }
> +
> + field_val = (1U << feature);
> + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> + build_int_cstu (unsigned_type_node, field_val));
> + if (feature == (INT_TYPE_SIZE - 1))
> + return build2 (NE_EXPR, integer_type_node, final,
> + build_int_cst (unsigned_type_node, 0));
> + else
> + return build1 (NOP_EXPR, integer_type_node, final);
> }
> gcc_unreachable ();
> }
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index b1dd39e64b8..d3812fa55b0 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21897,18 +21897,6 @@ AMD Family 19h Zen version 3.
>
> @item znver4
> AMD Family 19h Zen version 4.
> -
> -@item x86-64
> -Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> -
> -@item x86-64-v2
> -x86-64-v2 microarchitecture level.
> -
> -@item x86-64-v3
> -x86-64-v3 microarchitecture level.
> -
> -@item x86-64-v4
> -x86-64-v4 microarchitecture level.
> @end table
>
> Here is an example:
> @@ -22002,6 +21990,16 @@ VPCLMULQDQ instructions.
> AVX512VNNI instructions.
> @item avx512bitalg
> AVX512BITALG instructions.
> +@item x86-64
> +Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> +@item x86-64-v2
> +x86-64-v2 microarchitecture level.
> +@item x86-64-v3
> +x86-64-v3 microarchitecture level.
> +@item x86-64-v4
> +x86-64-v4 microarchitecture level.
> +
> +
> @end table
>
> Here is an example:
> diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
> index 3e7505a8c3a..fff643c13b0 100644
> --- a/gcc/testsuite/gcc.target/i386/builtin_target.c
> +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
> @@ -95,6 +95,11 @@ quick_check ()
>
> assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
>
> + assert (__builtin_cpu_supports ("x86-64") >= 0);
> + assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
> + assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
> + assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
> +
> /* Check CPU type. */
> assert (__builtin_cpu_is ("amd") >= 0);
>
On Fri, Dec 2, 2022 at 10:46 AM Martin Liška <mliska@suse.cz> wrote:
>
> PING^1
>
> On 11/25/22 13:57, Martin Liška wrote:
> > Similar story as PR103661, we again return a negative number
> > for __builtin_cpu_supports:
> >
> > Documentation says:
> >
> > int __builtin_cpu_supports(const char *feature)
> > This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
> > while we return -2147483648.
> >
> > Moreover, I noticed "x86-64" is not a valid option for __builtin_cpu_is,
> > but for __builtin_cpu_supports.
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >
> > Ready to be installed?
> > Thanks,
> > Martin
> >
> > PR target/107551
> >
> > gcc/ChangeLog:
> >
> > * config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> > as for PR103661.
> > * doc/extend.texi: Fix "x86-64" use.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.target/i386/builtin_target.c: Add more checks.
I'm not quite familiar with this part of the compiler, but if Jakub is
OK with the patch, consider it rubber-stamped OK.
Thanks,
Uros.
> > ---
> > gcc/config/i386/i386-builtins.cc | 25 ++++++++-----------
> > gcc/doc/extend.texi | 22 ++++++++--------
> > .../gcc.target/i386/builtin_target.c | 5 ++++
> > 3 files changed, 26 insertions(+), 26 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
> > index eacdf072244..c57c1380298 100644
> > --- a/gcc/config/i386/i386-builtins.cc
> > +++ b/gcc/config/i386/i386-builtins.cc
> > @@ -2181,18 +2181,14 @@ fold_builtin_cpu (tree fndecl, tree *args)
> > varpool_node::add (ix86_cpu_features2_var);
> > }
> >
> > + /* Skip __cpu_features[0]. */
> > feature -= INT_TYPE_SIZE;
> > - field_val = 1U << (feature % INT_TYPE_SIZE);
> > tree index = size_int (feature / INT_TYPE_SIZE);
> > + feature = feature % INT_TYPE_SIZE;
> > array_elt = build4 (ARRAY_REF, unsigned_type_node,
> > ix86_cpu_features2_var,
> > index, NULL_TREE, NULL_TREE);
> > /* Return __cpu_features2[index] & field_val */
> > - final = build2 (BIT_AND_EXPR, unsigned_type_node,
> > - array_elt,
> > - build_int_cstu (unsigned_type_node,
> > - field_val));
> > - return build1 (NOP_EXPR, integer_type_node, final);
> > }
> > else
> > {
> > @@ -2209,16 +2205,17 @@ fold_builtin_cpu (tree fndecl, tree *args)
> > array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
> > integer_zero_node, NULL_TREE, NULL_TREE);
> >
> > - field_val = (1U << feature);
> > /* Return __cpu_model.__cpu_features[0] & field_val */
> > - final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> > - build_int_cstu (unsigned_type_node, field_val));
> > - if (feature == (INT_TYPE_SIZE - 1))
> > - return build2 (NE_EXPR, integer_type_node, final,
> > - build_int_cst (unsigned_type_node, 0));
> > - else
> > - return build1 (NOP_EXPR, integer_type_node, final);
> > }
> > +
> > + field_val = (1U << feature);
> > + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> > + build_int_cstu (unsigned_type_node, field_val));
> > + if (feature == (INT_TYPE_SIZE - 1))
> > + return build2 (NE_EXPR, integer_type_node, final,
> > + build_int_cst (unsigned_type_node, 0));
> > + else
> > + return build1 (NOP_EXPR, integer_type_node, final);
> > }
> > gcc_unreachable ();
> > }
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index b1dd39e64b8..d3812fa55b0 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -21897,18 +21897,6 @@ AMD Family 19h Zen version 3.
> >
> > @item znver4
> > AMD Family 19h Zen version 4.
> > -
> > -@item x86-64
> > -Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> > -
> > -@item x86-64-v2
> > -x86-64-v2 microarchitecture level.
> > -
> > -@item x86-64-v3
> > -x86-64-v3 microarchitecture level.
> > -
> > -@item x86-64-v4
> > -x86-64-v4 microarchitecture level.
> > @end table
> >
> > Here is an example:
> > @@ -22002,6 +21990,16 @@ VPCLMULQDQ instructions.
> > AVX512VNNI instructions.
> > @item avx512bitalg
> > AVX512BITALG instructions.
> > +@item x86-64
> > +Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
> > +@item x86-64-v2
> > +x86-64-v2 microarchitecture level.
> > +@item x86-64-v3
> > +x86-64-v3 microarchitecture level.
> > +@item x86-64-v4
> > +x86-64-v4 microarchitecture level.
> > +
> > +
> > @end table
> >
> > Here is an example:
> > diff --git a/gcc/testsuite/gcc.target/i386/builtin_target.c b/gcc/testsuite/gcc.target/i386/builtin_target.c
> > index 3e7505a8c3a..fff643c13b0 100644
> > --- a/gcc/testsuite/gcc.target/i386/builtin_target.c
> > +++ b/gcc/testsuite/gcc.target/i386/builtin_target.c
> > @@ -95,6 +95,11 @@ quick_check ()
> >
> > assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
> >
> > + assert (__builtin_cpu_supports ("x86-64") >= 0);
> > + assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
> > + assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
> > + assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
> > +
> > /* Check CPU type. */
> > assert (__builtin_cpu_is ("amd") >= 0);
> >
>
On 12/2/22 10:54, Uros Bizjak wrote:
> I'm not quite familiar with this part of the compiler, but if Jakub is
> OK with the patch, consider it rubber-stamped OK.
Thanks Uros, Jakub can you please approve it?
Thanks,
Martin
>
> Thanks,
> Uros.
On Fri, Nov 25, 2022 at 01:57:35PM +0100, Martin Liška wrote:
> PR target/107551
>
> gcc/ChangeLog:
>
> * config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
> as for PR103661.
> * doc/extend.texi: Fix "x86-64" use.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/builtin_target.c: Add more checks.
> +
> + field_val = (1U << feature);
Just
field_val = 1U << feature;
?
> + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
> + build_int_cstu (unsigned_type_node, field_val));
> + if (feature == (INT_TYPE_SIZE - 1))
Just
if (feature == INT_TYPE_SIZE - 1)
?
> + return build2 (NE_EXPR, integer_type_node, final,
> + build_int_cst (unsigned_type_node, 0));
> + else
> + return build1 (NOP_EXPR, integer_type_node, final);
> }
> gcc_unreachable ();
> }
Otherwise LGTM, though I must say the destinction for when
__builtin_cpu_is and __builtin_cpu_supports works looks completely random.
Jakub
On 12/7/22 12:27, Jakub Jelinek wrote:
> On Fri, Nov 25, 2022 at 01:57:35PM +0100, Martin Liška wrote:
>> PR target/107551
>>
>> gcc/ChangeLog:
>>
>> * config/i386/i386-builtins.cc (fold_builtin_cpu): Use same path
>> as for PR103661.
>> * doc/extend.texi: Fix "x86-64" use.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/i386/builtin_target.c: Add more checks.
>> +
>> + field_val = (1U << feature);
>
> Just
> field_val = 1U << feature;
> ?
>
>> + final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
>> + build_int_cstu (unsigned_type_node, field_val));
>> + if (feature == (INT_TYPE_SIZE - 1))
>
> Just
> if (feature == INT_TYPE_SIZE - 1)
> ?
Sure, included the 2 aforementioned suggestions.
>
>> + return build2 (NE_EXPR, integer_type_node, final,
>> + build_int_cst (unsigned_type_node, 0));
>> + else
>> + return build1 (NOP_EXPR, integer_type_node, final);
>> }
>> gcc_unreachable ();
>> }
>
> Otherwise LGTM, though I must say the destinction for when
> __builtin_cpu_is and __builtin_cpu_supports works looks completely random.
I'm going to push the revision.
What exactly do you mean by random? I just know there are differences in between
x86 and ppc:
int __builtin_cpu_supports(const char *feature)
This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
This function returns a value of 1 if the run-time CPU supports the HWCAP feature feature and returns 0 otherwise.
Martin
>
> Jakub
>
On Fri, Dec 09, 2022 at 10:18:34AM +0100, Martin Liška wrote:
> I'm going to push the revision.
>
> What exactly do you mean by random? I just know there are differences in between
> x86 and ppc:
>
> int __builtin_cpu_supports(const char *feature)
> This function returns a positive integer if the run-time CPU supports feature and returns 0 otherwise.
>
> This function returns a value of 1 if the run-time CPU supports the HWCAP feature feature and returns 0 otherwise.
Because x86-64-v2 etc. isn't a HWCAP feature, but rather an architecture
(or set of canned HWCAP features). So __builtin_cpu_is for those,
or especially for __builtin_cpu_is ("x86-64") would make more sense.
Though I see that many of the valid -march= values actually aren't supported
by __builtin_cpu_is either, whether it is pentium, nocona, lakemont etc.
but does support say some vendors (intel, amd).
Jakub
@@ -2181,18 +2181,14 @@ fold_builtin_cpu (tree fndecl, tree *args)
varpool_node::add (ix86_cpu_features2_var);
}
+ /* Skip __cpu_features[0]. */
feature -= INT_TYPE_SIZE;
- field_val = 1U << (feature % INT_TYPE_SIZE);
tree index = size_int (feature / INT_TYPE_SIZE);
+ feature = feature % INT_TYPE_SIZE;
array_elt = build4 (ARRAY_REF, unsigned_type_node,
ix86_cpu_features2_var,
index, NULL_TREE, NULL_TREE);
/* Return __cpu_features2[index] & field_val */
- final = build2 (BIT_AND_EXPR, unsigned_type_node,
- array_elt,
- build_int_cstu (unsigned_type_node,
- field_val));
- return build1 (NOP_EXPR, integer_type_node, final);
}
else
{
@@ -2209,16 +2205,17 @@ fold_builtin_cpu (tree fndecl, tree *args)
array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
integer_zero_node, NULL_TREE, NULL_TREE);
- field_val = (1U << feature);
/* Return __cpu_model.__cpu_features[0] & field_val */
- final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
- build_int_cstu (unsigned_type_node, field_val));
- if (feature == (INT_TYPE_SIZE - 1))
- return build2 (NE_EXPR, integer_type_node, final,
- build_int_cst (unsigned_type_node, 0));
- else
- return build1 (NOP_EXPR, integer_type_node, final);
}
+
+ field_val = (1U << feature);
+ final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
+ build_int_cstu (unsigned_type_node, field_val));
+ if (feature == (INT_TYPE_SIZE - 1))
+ return build2 (NE_EXPR, integer_type_node, final,
+ build_int_cst (unsigned_type_node, 0));
+ else
+ return build1 (NOP_EXPR, integer_type_node, final);
}
gcc_unreachable ();
}
@@ -21897,18 +21897,6 @@ AMD Family 19h Zen version 3.
@item znver4
AMD Family 19h Zen version 4.
-
-@item x86-64
-Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
-
-@item x86-64-v2
-x86-64-v2 microarchitecture level.
-
-@item x86-64-v3
-x86-64-v3 microarchitecture level.
-
-@item x86-64-v4
-x86-64-v4 microarchitecture level.
@end table
Here is an example:
@@ -22002,6 +21990,16 @@ VPCLMULQDQ instructions.
AVX512VNNI instructions.
@item avx512bitalg
AVX512BITALG instructions.
+@item x86-64
+Baseline x86-64 microarchitecture level (as defined in x86-64 psABI).
+@item x86-64-v2
+x86-64-v2 microarchitecture level.
+@item x86-64-v3
+x86-64-v3 microarchitecture level.
+@item x86-64-v4
+x86-64-v4 microarchitecture level.
+
+
@end table
Here is an example:
@@ -95,6 +95,11 @@ quick_check ()
assert (__builtin_cpu_supports ("avx512vpopcntdq") >= 0);
+ assert (__builtin_cpu_supports ("x86-64") >= 0);
+ assert (__builtin_cpu_supports ("x86-64-v2") >= 0);
+ assert (__builtin_cpu_supports ("x86-64-v3") >= 0);
+ assert (__builtin_cpu_supports ("x86-64-v4") >= 0);
+
/* Check CPU type. */
assert (__builtin_cpu_is ("amd") >= 0);