i386: Fix undefined masks in vpopcnt tests
Checks
Commit Message
The files changed in this patch had tests for masked and unmasked
popcnt. However, the mask inputs to the masked forms were undefined,
and would be set to zero by init_regs. Any combine-like pass that
ran after init_regs could then fold the masked forms into the
unmasked ones. I saw this while testing the late-combine pass
on x86.
Tested on x86_64-linux-gnu. OK to install? (I didn't think this
counted as obvious because there are other ways of initialising
the mask.)
Richard
gcc/testsuite/
* gcc.target/i386/avx512bitalg-vpopcntb.c: Use an asm to define
the mask.
* gcc.target/i386/avx512bitalg-vpopcntbvl.c: Likewise.
* gcc.target/i386/avx512bitalg-vpopcntw.c: Likewise.
* gcc.target/i386/avx512bitalg-vpopcntwvl.c: Likewise.
* gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Likewise.
* gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Likewise.
---
gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c | 1 +
gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c | 1 +
gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c | 1 +
gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c | 1 +
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c | 1 +
gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c | 1 +
6 files changed, 6 insertions(+)
Comments
On Tue, Oct 24, 2023 at 6:10 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> The files changed in this patch had tests for masked and unmasked
> popcnt. However, the mask inputs to the masked forms were undefined,
> and would be set to zero by init_regs. Any combine-like pass that
> ran after init_regs could then fold the masked forms into the
> unmasked ones. I saw this while testing the late-combine pass
> on x86.
>
> Tested on x86_64-linux-gnu. OK to install? (I didn't think this
> counted as obvious because there are other ways of initialising
> the mask.)
Maybe just move the definition of the mask outside of the functions as
extern __mmask16 msk;
But of course your approach is also ok, so either way is ok with me.
>
> Richard
>
>
> gcc/testsuite/
> * gcc.target/i386/avx512bitalg-vpopcntb.c: Use an asm to define
> the mask.
> * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntw.c: Likewise.
> * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Likewise.
> * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Likewise.
> * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Likewise.
> ---
> gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c | 1 +
> gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c | 1 +
> gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c | 1 +
> gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c | 1 +
> gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c | 1 +
> gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c | 1 +
> 6 files changed, 6 insertions(+)
>
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c
> index 44b82c0519d..c52088161a0 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c
> @@ -11,6 +11,7 @@ extern __m512i z, z1;
> int foo ()
> {
> __mmask16 msk;
> + asm volatile ("" : "=k" (msk));
> __m512i c = _mm512_popcnt_epi8 (z);
> asm volatile ("" : "+v" (c));
> c = _mm512_mask_popcnt_epi8 (z1, msk, z);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c
> index 8c2dfaba9c6..7d11c6c4623 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c
> @@ -16,6 +16,7 @@ int foo ()
> {
> __mmask32 msk32;
> __mmask16 msk16;
> + asm volatile ("" : "=k" (msk16), "=k" (msk32));
> __m256i c256 = _mm256_popcnt_epi8 (y);
> asm volatile ("" : "+v" (c256));
> c256 = _mm256_mask_popcnt_epi8 (y_1, msk32, y);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c
> index 2ef8589f6c1..bc470415e9b 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c
> @@ -11,6 +11,7 @@ extern __m512i z, z1;
> int foo ()
> {
> __mmask16 msk;
> + asm volatile ("" : "=k" (msk));
> __m512i c = _mm512_popcnt_epi16 (z);
> asm volatile ("" : "+v" (c));
> c = _mm512_mask_popcnt_epi16 (z1, msk, z);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c
> index c976461b12e..3a6af3ed8a1 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c
> @@ -16,6 +16,7 @@ int foo ()
> {
> __mmask16 msk16;
> __mmask8 msk8;
> + asm volatile ("" : "=k" (msk16), "=k" (msk8));
> __m256i c256 = _mm256_popcnt_epi16 (y);
> asm volatile ("" : "+v" (c256));
> c256 = _mm256_mask_popcnt_epi16 (y_1, msk16, y);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
> index b4d82f97032..0a54ae83055 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c
> @@ -20,6 +20,7 @@ int foo ()
> {
> __mmask16 msk;
> __mmask8 msk8;
> + asm volatile ("" : "=k" (msk), "=k" (msk8));
> __m128i a = _mm_popcnt_epi32 (x);
> asm volatile ("" : "+v" (a));
> a = _mm_mask_popcnt_epi32 (x_1, msk8, x);
> diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
> index e87d6c999b6..c11e6e00998 100644
> --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
> +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c
> @@ -19,6 +19,7 @@ extern __m512i z, z_1;
> int foo ()
> {
> __mmask8 msk;
> + asm volatile ("" : "=k" (msk));
> __m128i a = _mm_popcnt_epi64 (x);
> asm volatile ("" : "+v" (a));
> a = _mm_mask_popcnt_epi64 (x_1, msk, x);
> --
> 2.25.1
>
@@ -11,6 +11,7 @@ extern __m512i z, z1;
int foo ()
{
__mmask16 msk;
+ asm volatile ("" : "=k" (msk));
__m512i c = _mm512_popcnt_epi8 (z);
asm volatile ("" : "+v" (c));
c = _mm512_mask_popcnt_epi8 (z1, msk, z);
@@ -16,6 +16,7 @@ int foo ()
{
__mmask32 msk32;
__mmask16 msk16;
+ asm volatile ("" : "=k" (msk16), "=k" (msk32));
__m256i c256 = _mm256_popcnt_epi8 (y);
asm volatile ("" : "+v" (c256));
c256 = _mm256_mask_popcnt_epi8 (y_1, msk32, y);
@@ -11,6 +11,7 @@ extern __m512i z, z1;
int foo ()
{
__mmask16 msk;
+ asm volatile ("" : "=k" (msk));
__m512i c = _mm512_popcnt_epi16 (z);
asm volatile ("" : "+v" (c));
c = _mm512_mask_popcnt_epi16 (z1, msk, z);
@@ -16,6 +16,7 @@ int foo ()
{
__mmask16 msk16;
__mmask8 msk8;
+ asm volatile ("" : "=k" (msk16), "=k" (msk8));
__m256i c256 = _mm256_popcnt_epi16 (y);
asm volatile ("" : "+v" (c256));
c256 = _mm256_mask_popcnt_epi16 (y_1, msk16, y);
@@ -20,6 +20,7 @@ int foo ()
{
__mmask16 msk;
__mmask8 msk8;
+ asm volatile ("" : "=k" (msk), "=k" (msk8));
__m128i a = _mm_popcnt_epi32 (x);
asm volatile ("" : "+v" (a));
a = _mm_mask_popcnt_epi32 (x_1, msk8, x);
@@ -19,6 +19,7 @@ extern __m512i z, z_1;
int foo ()
{
__mmask8 msk;
+ asm volatile ("" : "=k" (msk));
__m128i a = _mm_popcnt_epi64 (x);
asm volatile ("" : "+v" (a));
a = _mm_mask_popcnt_epi64 (x_1, msk, x);