[V4,2/2] rs6000: use mtvsrws to move sf from si p9
Checks
Commit Message
Hi,
As mentioned in PR108338, on p9, we could use mtvsrws to implement
the bitcast from SI to SF (or lowpart DI to SF).
For code:
*(long long*)buff = di;
float f = *(float*)(buff);
"sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" is generated.
A better one would be "mtvsrws 1,3 ; xscvspdpn 1,1".
Compare with previous patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html
"highpart DI-->SF" is put to a seperate patch.
Pass bootstrap and regression on ppc64{,le}.
Is this ok for trunk?
BR,
Jeff (Jiufu Guo)
gcc/ChangeLog:
* config/rs6000/rs6000.md (movsf_from_si): Update to generate mtvsrws
for P9.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr108338.c: Updated to check mtvsrws for p9.
---
gcc/config/rs6000/rs6000.md | 25 ++++++++++++++++-----
gcc/testsuite/gcc.target/powerpc/pr108338.c | 6 +++--
2 files changed, 23 insertions(+), 8 deletions(-)
Comments
Hi,
I would like to have a ping.....
BR,
Jeff (Jiufu Guo)
Jiufu Guo <guojiufu@linux.ibm.com> writes:
> Hi,
>
> As mentioned in PR108338, on p9, we could use mtvsrws to implement
> the bitcast from SI to SF (or lowpart DI to SF).
>
> For code:
> *(long long*)buff = di;
> float f = *(float*)(buff);
>
> "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" is generated.
> A better one would be "mtvsrws 1,3 ; xscvspdpn 1,1".
>
> Compare with previous patch:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html
> "highpart DI-->SF" is put to a seperate patch.
>
> Pass bootstrap and regression on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu Guo)
>
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.md (movsf_from_si): Update to generate mtvsrws
> for P9.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pr108338.c: Updated to check mtvsrws for p9.
>
> ---
> gcc/config/rs6000/rs6000.md | 25 ++++++++++++++++-----
> gcc/testsuite/gcc.target/powerpc/pr108338.c | 6 +++--
> 2 files changed, 23 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 8c92cbf976de915136ad5dba24e69a363d21438d..c03e677bca79e8fb1acb276d07d0acfae009f6d8 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -8280,13 +8280,26 @@ (define_insn_and_split "movsf_from_si"
> {
> rtx op0 = operands[0];
> rtx op1 = operands[1];
> - rtx op2 = operands[2];
> - rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>
> - /* Move SF value to upper 32-bits for xscvspdpn. */
> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> - emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + /* Move lowpart 32-bits from register for SFmode. */
> + if (TARGET_P9_VECTOR)
> + {
> + /* Using mtvsrws;xscvspdpn. */
> + rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
> + emit_insn (gen_vsx_splat_v4si (op0_v, op1));
> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + }
> + else
> + {
> + rtx op2 = operands[2];
> + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
> +
> + /* Using ashl;mtvsrd;xscvspdpn. */
> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> + emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + }
> +
> DONE;
> }
> [(set_attr "length"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> index 6db65595343c2407fc32f68f5f52a1f7196c371d..0565e5254ed0a8cc579cf505a3f865426dcf62ae 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr108338.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> @@ -19,9 +19,11 @@ float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>
> /* Under lp64, parameter 'l' is in one DI reg, then bitcast sub DI to SF. */
> /* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
> +/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> /* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>
> union di_sf_sf
> {
Hi Jeff,
on 2023/8/30 15:43, Jiufu Guo wrote:
> Hi,
>
> As mentioned in PR108338, on p9, we could use mtvsrws to implement
> the bitcast from SI to SF (or lowpart DI to SF).
>
> For code:
> *(long long*)buff = di;
> float f = *(float*)(buff);
>
> "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" is generated.
> A better one would be "mtvsrws 1,3 ; xscvspdpn 1,1".
>
> Compare with previous patch:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html
> "highpart DI-->SF" is put to a seperate patch.
>
> Pass bootstrap and regression on ppc64{,le}.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu Guo)
>
Nit: Missing a PR marker line.
> gcc/ChangeLog:
>
> * config/rs6000/rs6000.md (movsf_from_si): Update to generate mtvsrws
> for P9.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pr108338.c: Updated to check mtvsrws for p9.
>
> ---
> gcc/config/rs6000/rs6000.md | 25 ++++++++++++++++-----
> gcc/testsuite/gcc.target/powerpc/pr108338.c | 6 +++--
> 2 files changed, 23 insertions(+), 8 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 8c92cbf976de915136ad5dba24e69a363d21438d..c03e677bca79e8fb1acb276d07d0acfae009f6d8 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -8280,13 +8280,26 @@ (define_insn_and_split "movsf_from_si"
> {
> rtx op0 = operands[0];
> rtx op1 = operands[1];
> - rtx op2 = operands[2];
> - rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>
> - /* Move SF value to upper 32-bits for xscvspdpn. */
> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> - emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + /* Move lowpart 32-bits from register for SFmode. */
> + if (TARGET_P9_VECTOR)
> + {
> + /* Using mtvsrws;xscvspdpn. */
> + rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
> + emit_insn (gen_vsx_splat_v4si (op0_v, op1));
> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + }
> + else
> + {
> + rtx op2 = operands[2];
> + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
> +
> + /* Using ashl;mtvsrd;xscvspdpn. */
Nit: Use sldi instead of ashl as the others are actual
mnemonics but ashl isn't.
> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
> + emit_insn (gen_p8_mtvsrd_sf (op0, op2));
> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
> + }
> +
> DONE;
> }
> [(set_attr "length"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> index 6db65595343c2407fc32f68f5f52a1f7196c371d..0565e5254ed0a8cc579cf505a3f865426dcf62ae 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr108338.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
> @@ -19,9 +19,11 @@ float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>
> /* Under lp64, parameter 'l' is in one DI reg, then bitcast sub DI to SF. */
> /* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
> +/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
> /* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
> -/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>
This part might need a fresh as the comments to patch 1/2.
The others look good to me, thanks!
BR,
Kewen
Hi,
"Kewen.Lin" <linkw@linux.ibm.com> writes:
> Hi Jeff,
>
> on 2023/8/30 15:43, Jiufu Guo wrote:
>> Hi,
>>
>> As mentioned in PR108338, on p9, we could use mtvsrws to implement
>> the bitcast from SI to SF (or lowpart DI to SF).
>>
>> For code:
>> *(long long*)buff = di;
>> float f = *(float*)(buff);
>>
>> "sldi 9,3,32 ; mtvsrd 1,9 ; xscvspdpn 1,1" is generated.
>> A better one would be "mtvsrws 1,3 ; xscvspdpn 1,1".
>>
>> Compare with previous patch:
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-July/623533.html
>> "highpart DI-->SF" is put to a seperate patch.
>>
>> Pass bootstrap and regression on ppc64{,le}.
>> Is this ok for trunk?
>>
>> BR,
>> Jeff (Jiufu Guo)
>>
> Nit: Missing a PR marker line.
Ok, this patch would share the PR108338.
>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000.md (movsf_from_si): Update to generate mtvsrws
>> for P9.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.target/powerpc/pr108338.c: Updated to check mtvsrws for p9.
>>
>> ---
>> gcc/config/rs6000/rs6000.md | 25 ++++++++++++++++-----
>> gcc/testsuite/gcc.target/powerpc/pr108338.c | 6 +++--
>> 2 files changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
>> index 8c92cbf976de915136ad5dba24e69a363d21438d..c03e677bca79e8fb1acb276d07d0acfae009f6d8 100644
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -8280,13 +8280,26 @@ (define_insn_and_split "movsf_from_si"
>> {
>> rtx op0 = operands[0];
>> rtx op1 = operands[1];
>> - rtx op2 = operands[2];
>> - rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>>
>> - /* Move SF value to upper 32-bits for xscvspdpn. */
>> - emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>> - emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>> - emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>> + /* Move lowpart 32-bits from register for SFmode. */
>> + if (TARGET_P9_VECTOR)
>> + {
>> + /* Using mtvsrws;xscvspdpn. */
>> + rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
>> + emit_insn (gen_vsx_splat_v4si (op0_v, op1));
>> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>> + }
>> + else
>> + {
>> + rtx op2 = operands[2];
>> + rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
>> +
>> + /* Using ashl;mtvsrd;xscvspdpn. */
>
> Nit: Use sldi instead of ashl as the others are actual
> mnemonics but ashl isn't.
Oh, yes, thanks for your insight review!
>
>> + emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
>> + emit_insn (gen_p8_mtvsrd_sf (op0, op2));
>> + emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
>> + }
>> +
>> DONE;
>> }
>> [(set_attr "length"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr108338.c b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>> index 6db65595343c2407fc32f68f5f52a1f7196c371d..0565e5254ed0a8cc579cf505a3f865426dcf62ae 100644
>> --- a/gcc/testsuite/gcc.target/powerpc/pr108338.c
>> +++ b/gcc/testsuite/gcc.target/powerpc/pr108338.c
>> @@ -19,9 +19,11 @@ float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
>>
>> /* Under lp64, parameter 'l' is in one DI reg, then bitcast sub DI to SF. */
>> /* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>> -/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>> +/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
>> +/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>> +/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
>> /* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>> -/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
>>
>
> This part might need a fresh as the comments to patch 1/2.
Yes, thanks!
>
> The others look good to me, thanks!
BR,
Jeff (Jiufu Guo)
>
> BR,
> Kewen
@@ -8280,13 +8280,26 @@ (define_insn_and_split "movsf_from_si"
{
rtx op0 = operands[0];
rtx op1 = operands[1];
- rtx op2 = operands[2];
- rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
- /* Move SF value to upper 32-bits for xscvspdpn. */
- emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
- emit_insn (gen_p8_mtvsrd_sf (op0, op2));
- emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ /* Move lowpart 32-bits from register for SFmode. */
+ if (TARGET_P9_VECTOR)
+ {
+ /* Using mtvsrws;xscvspdpn. */
+ rtx op0_v = gen_rtx_REG (V4SImode, REGNO (op0));
+ emit_insn (gen_vsx_splat_v4si (op0_v, op1));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+ else
+ {
+ rtx op2 = operands[2];
+ rtx op1_di = gen_rtx_REG (DImode, REGNO (op1));
+
+ /* Using ashl;mtvsrd;xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_insn (gen_p8_mtvsrd_sf (op0, op2));
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ }
+
DONE;
}
[(set_attr "length"
@@ -19,9 +19,11 @@ float __attribute__ ((noipa)) sf_from_di_off4 (long long l)
/* Under lp64, parameter 'l' is in one DI reg, then bitcast sub DI to SF. */
/* { dg-final { scan-assembler-times {\mxscvspdpn\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
-/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && has_arch_pwr8 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 2 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && { has_arch_pwr8 && { ! has_arch_pwr9 } } } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrd\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
+/* { dg-final { scan-assembler-times {\mmtvsrws\M} 1 { target { lp64 && has_arch_pwr9 } } } } */
/* { dg-final { scan-assembler-times {\mrldicr\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
-/* { dg-final { scan-assembler-times {\msldi\M} 1 { target { lp64 && has_arch_pwr8 } } } } */
union di_sf_sf
{