When I attempt to enable vect_usad_char effective target for LoongArch, some
tests fail. These tests fail because the sad pattern generates bad code. This
patch to fixed them, for sad patterns, use zero expansion instead of sign
expansion for reduction.
Currently, we are fixing failed vectorized tests, and in the future, we will
enable more tests of "vect" for LoongArch.
gcc/ChangeLog:
* config/loongarch/lasx.md: Use zero expansion instruction.
* config/loongarch/lsx.md: Ditto.
@@ -5097,8 +5097,8 @@ (define_expand "usadv32qi"
rtx t2 = gen_reg_rtx (V16HImode);
rtx t3 = gen_reg_rtx (V8SImode);
emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
+ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
+ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
DONE;
})
@@ -5114,8 +5114,8 @@ (define_expand "ssadv32qi"
rtx t2 = gen_reg_rtx (V16HImode);
rtx t3 = gen_reg_rtx (V8SImode);
emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
+ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
+ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
DONE;
})
@@ -3468,8 +3468,8 @@ (define_expand "usadv16qi"
rtx t2 = gen_reg_rtx (V8HImode);
rtx t3 = gen_reg_rtx (V4SImode);
emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
+ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
+ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
DONE;
})
@@ -3485,8 +3485,8 @@ (define_expand "ssadv16qi"
rtx t2 = gen_reg_rtx (V8HImode);
rtx t3 = gen_reg_rtx (V4SImode);
emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
+ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
+ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
DONE;
})