[v2] LoongArch: Fix incorrect code generation for sad pattern

Message ID 20231214124904.5801-1-xujiahao@loongson.cn
State Unresolved
Headers
Series [v2] LoongArch: Fix incorrect code generation for sad pattern |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Jiahao Xu Dec. 14, 2023, 12:49 p.m. UTC
  When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c
and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad
code. This patch to fixed them, for sad patterns, use zero expansion instead of sign
expansion for reduction.

Currently, we are fixing failed vectorized tests, and in the future, we will
enable more tests of "vect" for LoongArch.

gcc/ChangeLog:

	* config/loongarch/lasx.md: Use zero expansion instruction.
	* config/loongarch/lsx.md: Ditto.
  

Comments

chenglulu Dec. 21, 2023, 9:22 a.m. UTC | #1
Pushed to r14-6773.

在 2023/12/14 下午8:49, Jiahao Xu 写道:
> When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c
> and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad
> code. This patch to fixed them, for sad patterns, use zero expansion instead of sign
> expansion for reduction.
>
> Currently, we are fixing failed vectorized tests, and in the future, we will
> enable more tests of "vect" for LoongArch.
>
> gcc/ChangeLog:
>
> 	* config/loongarch/lasx.md: Use zero expansion instruction.
> 	* config/loongarch/lsx.md: Ditto.
>
> diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
> index eeac8cd984b..db6871507e2 100644
> --- a/gcc/config/loongarch/lasx.md
> +++ b/gcc/config/loongarch/lasx.md
> @@ -5097,8 +5097,8 @@ (define_expand "usadv32qi"
>     rtx t2 = gen_reg_rtx (V16HImode);
>     rtx t3 = gen_reg_rtx (V8SImode);
>     emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
> -  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
> -  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
> +  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
> +  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
>     emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
>     DONE;
>   })
> @@ -5114,8 +5114,8 @@ (define_expand "ssadv32qi"
>     rtx t2 = gen_reg_rtx (V16HImode);
>     rtx t3 = gen_reg_rtx (V8SImode);
>     emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
> -  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
> -  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
> +  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
> +  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
>     emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
>     DONE;
>   })
> diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
> index dbdb423011b..5e5e2503636 100644
> --- a/gcc/config/loongarch/lsx.md
> +++ b/gcc/config/loongarch/lsx.md
> @@ -3468,8 +3468,8 @@ (define_expand "usadv16qi"
>     rtx t2 = gen_reg_rtx (V8HImode);
>     rtx t3 = gen_reg_rtx (V4SImode);
>     emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
> -  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
> -  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
> +  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
> +  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
>     emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
>     DONE;
>   })
> @@ -3485,8 +3485,8 @@ (define_expand "ssadv16qi"
>     rtx t2 = gen_reg_rtx (V8HImode);
>     rtx t3 = gen_reg_rtx (V4SImode);
>     emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
> -  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
> -  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
> +  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
> +  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
>     emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
>     DONE;
>   })
  

Patch

diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index eeac8cd984b..db6871507e2 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -5097,8 +5097,8 @@  (define_expand "usadv32qi"
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
   emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
-  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
-  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
+  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
+  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
   DONE;
 })
@@ -5114,8 +5114,8 @@  (define_expand "ssadv32qi"
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
   emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
-  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
-  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
+  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
+  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
   DONE;
 })
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index dbdb423011b..5e5e2503636 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -3468,8 +3468,8 @@  (define_expand "usadv16qi"
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
   emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
-  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
-  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
+  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
+  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
   DONE;
 })
@@ -3485,8 +3485,8 @@  (define_expand "ssadv16qi"
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
   emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
-  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
-  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
+  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
+  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
   DONE;
 })