@@ -900,11 +900,11 @@ BDESC (OPTION_MASK_ISA_SSE3, 0, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd"
/* SSSE3 */
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_nothing, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
BDESC (OPTION_MASK_ISA_SSSE3, 0, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
@@ -18433,6 +18433,7 @@ bool
ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
{
gimple *stmt = gsi_stmt (*gsi), *g;
+ gimple_seq stmts = NULL;
tree fndecl = gimple_call_fndecl (stmt);
gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
int n_args = gimple_call_num_args (stmt);
@@ -18555,7 +18556,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
{
loc = gimple_location (stmt);
tree type = TREE_TYPE (arg2);
- gimple_seq stmts = NULL;
if (VECTOR_FLOAT_TYPE_P (type))
{
tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
@@ -18610,7 +18610,6 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree zero_vec = build_zero_cst (type);
tree minus_one_vec = build_minus_one_cst (type);
tree cmp_type = truth_type_for (type);
- gimple_seq stmts = NULL;
tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
g = gimple_build_assign (gimple_call_lhs (stmt),
@@ -18904,14 +18903,18 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
break;
case IX86_BUILTIN_PABSB:
+ case IX86_BUILTIN_PABSW:
+ case IX86_BUILTIN_PABSD:
+ /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE. */
+ if (!TARGET_64BIT)
+ break;
+ /* FALLTHRU. */
case IX86_BUILTIN_PABSB128:
case IX86_BUILTIN_PABSB256:
case IX86_BUILTIN_PABSB512:
- case IX86_BUILTIN_PABSW:
case IX86_BUILTIN_PABSW128:
case IX86_BUILTIN_PABSW256:
case IX86_BUILTIN_PABSW512:
- case IX86_BUILTIN_PABSD:
case IX86_BUILTIN_PABSD128:
case IX86_BUILTIN_PABSD256:
case IX86_BUILTIN_PABSD512:
@@ -18933,9 +18936,19 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
if (n_args > 1
&& !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
break;
- loc = gimple_location (stmt);
- g = gimple_build_assign (gimple_call_lhs (stmt), ABS_EXPR, arg0);
- gsi_replace (gsi, g, false);
+ {
+ tree utype, ures, vce;
+ utype = unsigned_type_for (TREE_TYPE (arg0));
+ /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
+ instead of ABS_EXPR to hanlde overflow case(TYPE_MIN). */
+ ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
+ gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+ loc = gimple_location (stmt);
+ vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
+ g = gimple_build_assign (gimple_call_lhs (stmt),
+ VIEW_CONVERT_EXPR, vce);
+ gsi_replace (gsi, g, false);
+ }
return true;
default:
@@ -1,7 +1,7 @@
#include <immintrin.h>
/* { dg-do compile } */
/* { dg-options "-mavx512bw -O2 -mavx512vl -fdump-tree-optimized" } */
-/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "builtin_ia32_pabs" "optimized" { target { ! ia32 } } } } */
__m64
new file mode 100644
@@ -0,0 +1,22 @@
+#include <immintrin.h>
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+/* { dg-final { scan-assembler-times "pabs" 3 } } */
+
+__m64
+absb_64 (__m64 a)
+{
+ return _mm_abs_pi8(a);
+}
+
+__m64
+absw_64 (__m64 a)
+{
+ return _mm_abs_pi16(a);
+}
+
+__m64
+absd_64 (__m64 a)
+{
+ return _mm_abs_pi32(a);
+}
new file mode 100644
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vpblendvb" 2 } } */
+#include <immintrin.h>
+
+__m128i do_stuff_128(__m128i X0, __m128i X1) {
+ __m128i AbsX0 = _mm_abs_epi8(X0);
+ __m128i Result = _mm_blendv_epi8(AbsX0, X1, AbsX0);
+ return Result;
+}
+
+__m256i do_stuff_256(__m256i X0, __m256i X1) {
+ __m256i AbsX0 = _mm256_abs_epi8(X0);
+ __m256i Result = _mm256_blendv_epi8(AbsX0, X1, AbsX0);
+ return Result;
+}