Support vector conversion for AVX512 vcvtudq2pd/vcvttps2udq/vcvttpd2udq.
Checks
Commit Message
There's some typo for the standard pattern name for unsigned_{float,fix},
it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
in current trunk, the patch fix the typo.
Also vcvttps2udq is available under AVX512VL, so it can be generated
directly instead of being emulated via vcvttps2dq.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
Ok for GCC14 stage1{or maybe for trunk)?
gcc/ChangeLog:
PR target/85048
* config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
Generate vcvtudq2ps under AVX512VL.
(fixuns_truncv4dfv4si2): New expander.
(floatuns<si2dfmodelower><mode>2): New expander.
gcc/testsuite/ChangeLog:
* g++.target/i386/pr85048.C: New test.
---
gcc/config/i386/sse.md | 18 ++++++++++++--
gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
2 files changed, 49 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
Comments
On Thu, Mar 30, 2023 at 3:47 AM liuhongt <hongtao.liu@intel.com> wrote:
>
> There's some typo for the standard pattern name for unsigned_{float,fix},
> it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
> in current trunk, the patch fix the typo.
>
> Also vcvttps2udq is available under AVX512VL, so it can be generated
> directly instead of being emulated via vcvttps2dq.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> Ok for GCC14 stage1{or maybe for trunk)?
>
> gcc/ChangeLog:
>
> PR target/85048
> * config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
> Generate vcvtudq2ps under AVX512VL.
> (fixuns_truncv4dfv4si2): New expander.
> (floatuns<si2dfmodelower><mode>2): New expander.
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/i386/pr85048.C: New test.
> ---
> gcc/config/i386/sse.md | 18 ++++++++++++--
> gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
> 2 files changed, 49 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 172ec3bea4f..9c2bd468c65 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
> (match_operand:VF1 1 "register_operand")]
> "TARGET_SSE2"
> {
> - if (<MODE>mode == V16SFmode)
> - emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
> + /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */
> + if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
> + emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
> operands[1]));
> else
> {
> @@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
> (set_attr "prefix" "evex")
> (set_attr "mode" "V4SF")])
>
> +(define_expand "floatuns<si2dfmodelower><mode>2"
> + [(set (match_operand:VF2_512_256VL 0 "register_operand")
> + (unsigned_float:VF2_512_256VL
> + (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
> + "TARGET_AVX512F")
> +
Just rename the instruction and fix all its call sites. The name of
the insn pattern is internal to the compiler and can be renamed at
will.
> (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
> [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
> (unsigned_float:VF2_512_256VL
> @@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
> (set_attr "prefix" "maybe_evex")
> (set_attr "mode" "OI")])
>
> +
> +/* The standard pattern name is fixuns_truncmn2. */
> +(define_expand "fixuns_truncv4dfv4si2"
> + [(set (match_operand:V4SI 0 "register_operand")
> + (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
> + "TARGET_AVX512VL && TARGET_AVX512F")
> +
Also the above.
Uros.
> (define_insn "ufix_truncv4dfv4si2<mask_name>"
> [(set (match_operand:V4SI 0 "register_operand" "=v")
> (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
> diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C
> new file mode 100644
> index 00000000000..52973c18ebd
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr85048.C
> @@ -0,0 +1,33 @@
> +/* PR target/85048 */
> +/* { dg-do compile } */
> +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
> +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
> +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
> +
> +#include <cstdint>
> +
> +template <class T, int N, int Size = N * sizeof(T)>
> +using V [[gnu::vector_size(Size)]] = T;
> +
> +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
> + return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
> +}
> +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
> + return V<To, 8>{
> + To(x[0]), To(x[1]), To(x[2]), To(x[3]),
> + To(x[4]), To(x[5]), To(x[6]), To(x[7])
> + };
> +}
> +
> +#define _(name, from, to, size) \
> +auto name(V<from, size> x) { return cvt##size<from, to>(x); }
> +// integral -> double
> +_(vcvtudq2pd, uint32_t, double, 4)
> +_(vcvtudq2pd, uint32_t, double, 8)
> +
> +_( cvttps2udq, float, uint32_t, 4)
> +_(vcvttps2udq, float, uint32_t, 8)
> +
> +// double -> integral
> +_(vcvttpd2udq, double, uint32_t, 4)
> --
> 2.39.1.388.g2fc9e9ca3c
>
On Thu, Mar 30, 2023 at 8:17 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Mar 30, 2023 at 3:47 AM liuhongt <hongtao.liu@intel.com> wrote:
> >
> > There's some typo for the standard pattern name for unsigned_{float,fix},
> > it should be floatunsmn2/fixuns_truncmn2, not ufloatmn2/ufix_truncmn2
> > in current trunk, the patch fix the typo.
> >
> > Also vcvttps2udq is available under AVX512VL, so it can be generated
> > directly instead of being emulated via vcvttps2dq.
> >
> > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
> > Ok for GCC14 stage1{or maybe for trunk)?
> >
> > gcc/ChangeLog:
> >
> > PR target/85048
> > * config/i386/sse.md (floatuns<si2dfmodelower><mode>2):
> > Generate vcvtudq2ps under AVX512VL.
> > (fixuns_truncv4dfv4si2): New expander.
> > (floatuns<si2dfmodelower><mode>2): New expander.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * g++.target/i386/pr85048.C: New test.
> > ---
> > gcc/config/i386/sse.md | 18 ++++++++++++--
> > gcc/testsuite/g++.target/i386/pr85048.C | 33 +++++++++++++++++++++++++
> > 2 files changed, 49 insertions(+), 2 deletions(-)
> > create mode 100644 gcc/testsuite/g++.target/i386/pr85048.C
> >
> > diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> > index 172ec3bea4f..9c2bd468c65 100644
> > --- a/gcc/config/i386/sse.md
> > +++ b/gcc/config/i386/sse.md
> > @@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
> > (match_operand:VF1 1 "register_operand")]
> > "TARGET_SSE2"
> > {
> > - if (<MODE>mode == V16SFmode)
> > - emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
> > + /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */
> > + if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
> > + emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
> > operands[1]));
> > else
> > {
> > @@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
> > (set_attr "prefix" "evex")
> > (set_attr "mode" "V4SF")])
> >
> > +(define_expand "floatuns<si2dfmodelower><mode>2"
> > + [(set (match_operand:VF2_512_256VL 0 "register_operand")
> > + (unsigned_float:VF2_512_256VL
> > + (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
> > + "TARGET_AVX512F")
> > +
>
> Just rename the instruction and fix all its call sites. The name of
> the insn pattern is internal to the compiler and can be renamed at
> will.
Ideally, we should standardize all the names to a standard name, so
e.g. ufix_ -> fixuns_ and ufloat -> floatuns.
Uros.
> > (define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
> > [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
> > (unsigned_float:VF2_512_256VL
> > @@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
> > (set_attr "prefix" "maybe_evex")
> > (set_attr "mode" "OI")])
> >
> > +
> > +/* The standard pattern name is fixuns_truncmn2. */
> > +(define_expand "fixuns_truncv4dfv4si2"
> > + [(set (match_operand:V4SI 0 "register_operand")
> > + (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
> > + "TARGET_AVX512VL && TARGET_AVX512F")
> > +
>
> Also the above.
>
> Uros.
>
> > (define_insn "ufix_truncv4dfv4si2<mask_name>"
> > [(set (match_operand:V4SI 0 "register_operand" "=v")
> > (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
> > diff --git a/gcc/testsuite/g++.target/i386/pr85048.C b/gcc/testsuite/g++.target/i386/pr85048.C
> > new file mode 100644
> > index 00000000000..52973c18ebd
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.target/i386/pr85048.C
> > @@ -0,0 +1,33 @@
> > +/* PR target/85048 */
> > +/* { dg-do compile } */
> > +/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
> > +/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
> > +
> > +#include <cstdint>
> > +
> > +template <class T, int N, int Size = N * sizeof(T)>
> > +using V [[gnu::vector_size(Size)]] = T;
> > +
> > +template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
> > + return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
> > +}
> > +template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
> > + return V<To, 8>{
> > + To(x[0]), To(x[1]), To(x[2]), To(x[3]),
> > + To(x[4]), To(x[5]), To(x[6]), To(x[7])
> > + };
> > +}
> > +
> > +#define _(name, from, to, size) \
> > +auto name(V<from, size> x) { return cvt##size<from, to>(x); }
> > +// integral -> double
> > +_(vcvtudq2pd, uint32_t, double, 4)
> > +_(vcvtudq2pd, uint32_t, double, 8)
> > +
> > +_( cvttps2udq, float, uint32_t, 4)
> > +_(vcvttps2udq, float, uint32_t, 8)
> > +
> > +// double -> integral
> > +_(vcvttpd2udq, double, uint32_t, 4)
> > --
> > 2.39.1.388.g2fc9e9ca3c
> >
@@ -8014,8 +8014,9 @@ (define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
(match_operand:VF1 1 "register_operand")]
"TARGET_SSE2"
{
- if (<MODE>mode == V16SFmode)
- emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
+ /* AVX512 support vcvttps2udq for all 128/256/512-bit vectors. */
+ if (<MODE>mode == V16SFmode || TARGET_AVX512VL)
+ emit_insn (gen_ufix_trunc<mode><sseintvecmodelower>2 (operands[0],
operands[1]));
else
{
@@ -8413,6 +8414,12 @@ (define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
(set_attr "prefix" "evex")
(set_attr "mode" "V4SF")])
+(define_expand "floatuns<si2dfmodelower><mode>2"
+ [(set (match_operand:VF2_512_256VL 0 "register_operand")
+ (unsigned_float:VF2_512_256VL
+ (match_operand:<si2dfmode> 1 "nonimmediate_operand")))]
+ "TARGET_AVX512F")
+
(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
[(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
(unsigned_float:VF2_512_256VL
@@ -8694,6 +8701,13 @@ (define_insn "fix_truncv4dfv4si2<mask_name>"
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "OI")])
+
+/* The standard pattern name is fixuns_truncmn2. */
+(define_expand "fixuns_truncv4dfv4si2"
+ [(set (match_operand:V4SI 0 "register_operand")
+ (unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512VL && TARGET_AVX512F")
+
(define_insn "ufix_truncv4dfv4si2<mask_name>"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
new file mode 100644
@@ -0,0 +1,33 @@
+/* PR target/85048 */
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2 -mavx512vl -mavx512dq -mprefer-vector-width=512" } */
+/* { dg-final { scan-assembler-times {(?n)vcvtudq2pd[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttps2udq[ \t]+} 2 } } */
+/* { dg-final { scan-assembler-times {(?n)vcvttpd2udqy?[ \t]+} 1 } } */
+
+#include <cstdint>
+
+template <class T, int N, int Size = N * sizeof(T)>
+using V [[gnu::vector_size(Size)]] = T;
+
+template <class From, class To> V<To, 4> cvt4(V<From, 4> x) {
+ return V<To, 4>{To(x[0]), To(x[1]), To(x[2]), To(x[3])};
+}
+template <class From, class To> V<To, 8> cvt8(V<From, 8> x) {
+ return V<To, 8>{
+ To(x[0]), To(x[1]), To(x[2]), To(x[3]),
+ To(x[4]), To(x[5]), To(x[6]), To(x[7])
+ };
+}
+
+#define _(name, from, to, size) \
+auto name(V<from, size> x) { return cvt##size<from, to>(x); }
+// integral -> double
+_(vcvtudq2pd, uint32_t, double, 4)
+_(vcvtudq2pd, uint32_t, double, 8)
+
+_( cvttps2udq, float, uint32_t, 4)
+_(vcvttps2udq, float, uint32_t, 8)
+
+// double -> integral
+_(vcvttpd2udq, double, uint32_t, 4)