Support Intel CMPccXADD

Message ID 20221103062657.58427-1-haochen.jiang@intel.com
State Not Applicable
Headers
Series Support Intel CMPccXADD |

Checks

Context Check Description
snail/gcc-patch-check fail Git am fail log

Commit Message

Jiang, Haochen Nov. 3, 2022, 6:26 a.m. UTC
  Hi all,

I just revised the patch according to review. The changes comparing to
previous version is mentioned below.

Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

        * common/config/i386/cpuinfo.h (get_available_features):
	Detect cmpccxadd.
	* common/config/i386/i386-common.cc
	(OPTION_MASK_ISA2_CMPCCXADD_SET,
	OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
	(ix86_handle_option): Handle -mcmpccxadd.
        * common/config/i386/i386-cpuinfo.h (enum processor_features):
	Add FEATURE_CMPCCXADD.
        * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
	cmpccxadd.
	* config.gcc: Add cmpccxaddintrin.h.
	* config/i386/cpuid.h (bit_CMPCCXADD): New.
	* config/i386/i386-builtin-types.def:
	Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
	and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
	* config/i386/i386-builtin.def (BDESC): Add new builtins.
	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
	__CMPCCXADD__.
	* config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
	Add new parameter to indicate constant position.
	Handle INT_FTYPE_PINT_INT_INT_INT
	and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
	* config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
	* config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
	(ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
	* config/i386/i386.opt: Add option -mcmpccxadd.
	* config/i386/sync.md (cmpccxadd_<mode>): New define insn.
	* config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
	* doc/extend.texi: Document cmpccxadd.
	* doc/invoke.texi: Document -mcmpccxadd.
	* doc/sourcebuild.texi: Document target cmpccxadd.
	* config/i386/cmpccxaddintrin.h: New file.

gcc/testsuite/ChangeLog:

	* g++.dg/other/i386-2.C: Add -mcmpccxadd.
	* g++.dg/other/i386-3.C: Ditto.
	* gcc.target/i386/avx-1.c: Ditto.
	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
	* gcc.target/i386/sse-13.c: Add -mcmpccxadd.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/x86gprintrin-1.c: Ditto.
	* gcc.target/i386/x86gprintrin-2.c: Ditto.
	* gcc.target/i386/x86gprintrin-3.c: Ditto.
	* gcc.target/i386/x86gprintrin-4.c: Ditto.
	* gcc.target/i386/x86gprintrin-5.c: Ditto.
	* gcc.target/i386/cmpccxadd-1.c: New test.
	* gcc.target/i386/cmpccxadd-2.c: Ditto.
---
 gcc/common/config/i386/cpuinfo.h              |   2 +
 gcc/common/config/i386/i386-common.cc         |  15 ++
 gcc/common/config/i386/i386-cpuinfo.h         |   1 +
 gcc/common/config/i386/i386-isas.h            |   1 +
 gcc/config.gcc                                |   3 +-
 gcc/config/i386/cmpccxaddintrin.h             |  89 +++++++++++
 gcc/config/i386/cpuid.h                       |   1 +
 gcc/config/i386/i386-builtin-types.def        |   4 +
 gcc/config/i386/i386-builtin.def              |   4 +
 gcc/config/i386/i386-c.cc                     |   2 +
 gcc/config/i386/i386-expand.cc                |  22 ++-
 gcc/config/i386/i386-isa.def                  |   1 +
 gcc/config/i386/i386-options.cc               |   4 +-
 gcc/config/i386/i386.opt                      |   5 +
 gcc/config/i386/sync.md                       |  29 ++++
 gcc/config/i386/x86gprintrin.h                |   2 +
 gcc/doc/extend.texi                           |   5 +
 gcc/doc/invoke.texi                           |  10 +-
 gcc/doc/sourcebuild.texi                      |   3 +
 gcc/testsuite/g++.dg/other/i386-2.C           |   2 +-
 gcc/testsuite/g++.dg/other/i386-3.C           |   2 +-
 gcc/testsuite/gcc.target/i386/avx-1.c         |   4 +
 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 ++++++++
 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++++++++++++++++++
 gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
 gcc/testsuite/gcc.target/i386/sse-13.c        |   6 +-
 gcc/testsuite/gcc.target/i386/sse-23.c        |   6 +-
 .../gcc.target/i386/x86gprintrin-1.c          |   2 +-
 .../gcc.target/i386/x86gprintrin-2.c          |   6 +-
 .../gcc.target/i386/x86gprintrin-3.c          |   2 +-
 .../gcc.target/i386/x86gprintrin-4.c          |   2 +-
 .../gcc.target/i386/x86gprintrin-5.c          |   6 +-
 gcc/testsuite/lib/target-supports.exp         |  10 ++
 33 files changed, 437 insertions(+), 15 deletions(-)
 create mode 100644 gcc/config/i386/cmpccxaddintrin.h
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c

diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h
--- /dev/null
+++ b/gcc/config/i386/cmpccxaddintrin.h
+#define __cmpccxadd_epi64(A,B,C,D) \
+  __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
+			      (long long) (C), (_CMPCCX_ENUM) (D))
+#endif

Fixed a type issue here.


Change both test files to align with the order of opcode.
  

Comments

Uros Bizjak Nov. 3, 2022, 7:47 a.m. UTC | #1
On Thu, Nov 3, 2022 at 7:29 AM Haochen Jiang <haochen.jiang@intel.com> wrote:
>
> Hi all,
>
> I just revised the patch according to review. The changes comparing to
> previous version is mentioned below.
>
> Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
>         * common/config/i386/cpuinfo.h (get_available_features):
>         Detect cmpccxadd.
>         * common/config/i386/i386-common.cc
>         (OPTION_MASK_ISA2_CMPCCXADD_SET,
>         OPTION_MASK_ISA2_CMPCCXADD_UNSET): New.
>         (ix86_handle_option): Handle -mcmpccxadd.
>         * common/config/i386/i386-cpuinfo.h (enum processor_features):
>         Add FEATURE_CMPCCXADD.
>         * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
>         cmpccxadd.
>         * config.gcc: Add cmpccxaddintrin.h.
>         * config/i386/cpuid.h (bit_CMPCCXADD): New.
>         * config/i386/i386-builtin-types.def:
>         Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT)
>         and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT).
>         * config/i386/i386-builtin.def (BDESC): Add new builtins.
>         * config/i386/i386-c.cc (ix86_target_macros_internal): Define
>         __CMPCCXADD__.
>         * config/i386/i386-expand.cc (ix86_expand_special_args_builtin):
>         Add new parameter to indicate constant position.
>         Handle INT_FTYPE_PINT_INT_INT_INT
>         and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT.
>         * config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD).
>         * config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd.
>         (ix86_valid_target_attribute_inner_p): Handle cmpccxadd.
>         * config/i386/i386.opt: Add option -mcmpccxadd.
>         * config/i386/sync.md (cmpccxadd_<mode>): New define insn.
>         * config/i386/x86gprintrin.h: Include cmpccxaddintrin.h.
>         * doc/extend.texi: Document cmpccxadd.
>         * doc/invoke.texi: Document -mcmpccxadd.
>         * doc/sourcebuild.texi: Document target cmpccxadd.
>         * config/i386/cmpccxaddintrin.h: New file.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.dg/other/i386-2.C: Add -mcmpccxadd.
>         * g++.dg/other/i386-3.C: Ditto.
>         * gcc.target/i386/avx-1.c: Ditto.
>         * gcc.target/i386/funcspec-56.inc: Add new target attribute.
>         * gcc.target/i386/sse-13.c: Add -mcmpccxadd.
>         * gcc.target/i386/sse-23.c: Ditto.
>         * gcc.target/i386/x86gprintrin-1.c: Ditto.
>         * gcc.target/i386/x86gprintrin-2.c: Ditto.
>         * gcc.target/i386/x86gprintrin-3.c: Ditto.
>         * gcc.target/i386/x86gprintrin-4.c: Ditto.
>         * gcc.target/i386/x86gprintrin-5.c: Ditto.
>         * gcc.target/i386/cmpccxadd-1.c: New test.
>         * gcc.target/i386/cmpccxadd-2.c: Ditto.

LGTM, with a small pattern adjustment, see inline.

Thanks,
Uros.

> ---
>  gcc/common/config/i386/cpuinfo.h              |   2 +
>  gcc/common/config/i386/i386-common.cc         |  15 ++
>  gcc/common/config/i386/i386-cpuinfo.h         |   1 +
>  gcc/common/config/i386/i386-isas.h            |   1 +
>  gcc/config.gcc                                |   3 +-
>  gcc/config/i386/cmpccxaddintrin.h             |  89 +++++++++++
>  gcc/config/i386/cpuid.h                       |   1 +
>  gcc/config/i386/i386-builtin-types.def        |   4 +
>  gcc/config/i386/i386-builtin.def              |   4 +
>  gcc/config/i386/i386-c.cc                     |   2 +
>  gcc/config/i386/i386-expand.cc                |  22 ++-
>  gcc/config/i386/i386-isa.def                  |   1 +
>  gcc/config/i386/i386-options.cc               |   4 +-
>  gcc/config/i386/i386.opt                      |   5 +
>  gcc/config/i386/sync.md                       |  29 ++++
>  gcc/config/i386/x86gprintrin.h                |   2 +
>  gcc/doc/extend.texi                           |   5 +
>  gcc/doc/invoke.texi                           |  10 +-
>  gcc/doc/sourcebuild.texi                      |   3 +
>  gcc/testsuite/g++.dg/other/i386-2.C           |   2 +-
>  gcc/testsuite/g++.dg/other/i386-3.C           |   2 +-
>  gcc/testsuite/gcc.target/i386/avx-1.c         |   4 +
>  gcc/testsuite/gcc.target/i386/cmpccxadd-1.c   |  61 ++++++++
>  gcc/testsuite/gcc.target/i386/cmpccxadd-2.c   | 138 ++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/funcspec-56.inc |   2 +
>  gcc/testsuite/gcc.target/i386/sse-13.c        |   6 +-
>  gcc/testsuite/gcc.target/i386/sse-23.c        |   6 +-
>  .../gcc.target/i386/x86gprintrin-1.c          |   2 +-
>  .../gcc.target/i386/x86gprintrin-2.c          |   6 +-
>  .../gcc.target/i386/x86gprintrin-3.c          |   2 +-
>  .../gcc.target/i386/x86gprintrin-4.c          |   2 +-
>  .../gcc.target/i386/x86gprintrin-5.c          |   6 +-
>  gcc/testsuite/lib/target-supports.exp         |  10 ++
>  33 files changed, 437 insertions(+), 15 deletions(-)
>  create mode 100644 gcc/config/i386/cmpccxaddintrin.h
>  create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c
>
> diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h
> --- /dev/null
> +++ b/gcc/config/i386/cmpccxaddintrin.h
> +#define __cmpccxadd_epi64(A,B,C,D) \
> +  __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
> +                             (long long) (C), (_CMPCCX_ENUM) (D))
> +#endif
>
> Fixed a type issue here.
>
> diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> --- a/gcc/config/i386/sync.md
> +++ b/gcc/config/i386/sync.md
> @@ -1061,3 +1064,29 @@
>         (any_logic:SWI (match_dup 0) (match_dup 1)))]
>    ""
>    "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
> +
> +;; CMPCCXADD
> +
> +(define_insn "cmpccxadd_<mode>"
> +  [(set (match_operand:SWI48x 0 "register_operand" "=r")
> +       (unspec_volatile:SWI48x
> +         [(match_operand:SWI48x 1 "memory_operand" "+m")
> +          (match_operand:SWI48x 2 "register_operand" "0")
> +          (match_operand:SWI48x 3 "register_operand" "r")
> +          (match_operand:SI 4 "const_0_to_15_operand" "n")]
> +         UNSPECV_CMPCCXADD))
> +   (set (match_dup 1)
> +       (unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
> +   (set (reg:CC FLAGS_REG)
> +       (unspec_volatile:CC [(const_int 0)] UNSPECV_CMPCCXADD))]
> +  "TARGET_CMPCCXADD && TARGET_64BIT"

I think flags are not usable here, so just use:

   (clobber (reg:CC FLAGS_REG))]

Uros.

> +{
> +  char buf[128];
> +  const char *ops = "cmp%sxadd\t{%%3, %%0, %%1|%%1, %%0, %%3}";
> +  char const *cc[16] = {"o" ,"no", "b", "nb", "z", "nz", "be", "nbe",
> +                       "s", "ns", "p", "np", "l", "nl", "le", "nle"};
> +
> +  snprintf (buf, sizeof (buf), ops, cc[INTVAL (operands[4])]);
> +  output_asm_insn (buf, operands);
> +  return "";
> +})
>
> Changed the whole pattern like how cmpxchg did.
> Also adjust the cc array order to align with opcode.
>
> diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
> @@ -0,0 +1,61 @@
> +/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mcmpccxadd" } */
> +/* { dg-final { scan-assembler-times "cmpoxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnoxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpbxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnbxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpzxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnzxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpbexadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnbexadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpsxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnsxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmppxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnpxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmplxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnlxadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmplexadd\[ \\t\]" 2 } } */
> +/* { dg-final { scan-assembler-times "cmpnlexadd\[ \\t\]" 2 } } */
> +#include <x86gprintrin.h>
> +
> +int *a;
> +int b, c;
> +long long *d;
> +long long e, f;
> +
> +void extern
> +cmpccxadd_test(void)
> +{
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_O);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_O);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NO);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NO);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_B);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_B);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NB);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NB);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_Z);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_Z);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NZ);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NZ);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_BE);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_BE);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NBE);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NBE);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_S);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_S);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NS);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NS);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_P);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_P);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NP);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NP);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_L);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_L);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NL);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NL);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_LE);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_LE);
> +  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NLE);
> +  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NLE);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c
> @@ -0,0 +1,138 @@
> +/* { dg-do run { target { ! ia32 } } } */
> +/* { dg-options "-O2 -mcmpccxadd" } */
> +/* { dg-require-effective-target cmpccxadd } */
> +
> +#include <stdlib.h>
> +#include <x86gprintrin.h>
> +
> +int
> +main()
> +{
> +  if (!__builtin_cpu_supports("cmpccxadd"))
> +    return 0;
> +
> +  int srcdest1[16] = { -2147483648,1,1,1,1,2,1,2,1,2,4,2,1,1,1,2 };
> +  int srcdest2[16] = { 1,1,2,1,1,1,1,1,2,1,1,1,2,1,1,1 };
> +  int src3[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 };
> +  int _srcdest1[16], _srcdest2[16], res[16], cond[16];
> +  long long srcdest1_64[16] = { -9223372036854775807LL-1,1,1,1,1,2,1,2,1,2,4,2,1,1,1,2 };
> +  long long srcdest2_64[16] = { 1,1,2,1,1,1,1,1,2,1,1,1,2,1,1,1 };
> +  long long src3_64[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 };
> +  long long _srcdest1_64[16], _srcdest2_64[16], res_64[16], cond_64[16];
> +
> +  int tmp2[16];
> +  long long tmp2_64[16];
> +
> +  int cf[16] = { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 };
> +  int of[16] = { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
> +  int sf[16] = { 0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0 };
> +  int zf[16] = { 0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0 };
> +  int af[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
> +  int pf[16] = { 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 };
> +
> +  for (int i = 0; i < 16; i++)
> +  {
> +    tmp2[i] = srcdest1[i] + src3[i];
> +    tmp2_64[i] = srcdest1_64[i] + src3_64[i];
> +  }
> +
> +  cond[0] = of[0] == 1 ? 1 : 0;
> +  cond[1] = of[1] == 0 ? 1 : 0;
> +  cond[2] = cf[2] == 1 ? 1 : 0;
> +  cond[3] = cf[3] == 0 ? 1 : 0;
> +  cond[4] = zf[4] == 1 ? 1 : 0;
> +  cond[5] = zf[5] == 0 ? 1 : 0;
> +  cond[6] = (cf[6] || zf[6]) == 1 ? 1 : 0;
> +  cond[7] = (cf[7] || zf[7]) == 0 ? 1 : 0;
> +  cond[8] = sf[8] == 1 ? 1 : 0;
> +  cond[9] = sf[9] == 0 ? 1 : 0;
> +  cond[10] = pf[10] == 1 ? 1 : 0;
> +  cond[11] = pf[11] == 0 ? 1 : 0;
> +  cond[12] = ((sf[12] && !of[12]) || (!sf[12] && of[12])) == 1 ? 1 : 0;
> +  cond[13] = ((sf[13] && !of[13]) || (!sf[13] && of[13])) == 0 ? 1 : 0;
> +  cond[14] = (((sf[14] && !of[14]) || (!sf[14] && of[14])) || zf[14]) == 1 ? 1 : 0;
> +  cond[15] = (((sf[15] && !of[15]) || (!sf[15] && of[15])) || zf[15]) == 0 ? 1 : 0;
> +
> +  cond_64[0] = of[0] == 1 ? 1 : 0;
> +  cond_64[1] = of[1] == 0 ? 1 : 0;
> +  cond_64[2] = cf[2] == 1 ? 1 : 0;
> +  cond_64[3] = cf[3] == 0 ? 1 : 0;
> +  cond_64[4] = zf[4] == 1 ? 1 : 0;
> +  cond_64[5] = zf[5] == 0 ? 1 : 0;
> +  cond_64[6] = (cf[6] || zf[6]) == 1 ? 1 : 0;
> +  cond_64[7] = (cf[7] || zf[7]) == 0 ? 1 : 0;
> +  cond_64[8] = sf[8] == 1 ? 1 : 0;
> +  cond_64[9] = sf[9] == 0 ? 1 : 0;
> +  cond_64[10] = pf[10] == 1 ? 1 : 0;
> +  cond_64[11] = pf[11] == 0 ? 1 : 0;
> +  cond_64[12] = ((sf[12] && !of[12]) || (!sf[12] && of[12])) == 1 ? 1 : 0;
> +  cond_64[13] = ((sf[13] && !of[13]) || (!sf[13] && of[13])) == 0 ? 1 : 0;
> +  cond_64[14] = (((sf[14] && !of[14]) || (!sf[14] && of[14])) || zf[14]) == 1 ? 1 : 0;
> +  cond_64[15] = (((sf[15] && !of[15]) || (!sf[15] && of[15])) || zf[15]) == 0 ? 1 : 0;
> +
> +  for (int i = 0; i < 16; i++)
> +  {
> +    if (cond[i] == 1)
> +    {
> +      _srcdest1[i] = tmp2[i];
> +    }
> +    else
> +    {
> +      _srcdest1[i] = srcdest1[i];
> +    }
> +    if (cond_64[i] == 1)
> +    {
> +      _srcdest1_64[i] = tmp2_64[i];
> +    }
> +    else
> +    {
> +      _srcdest1_64[i] = srcdest1_64[i];
> +    }
> +    _srcdest2[i] = srcdest1[i];
> +    _srcdest2_64[i] = srcdest1_64[i];
> +  }
> +
> +  res[0] = __cmpccxadd_epi32 (&srcdest1[0], srcdest2[0], src3[0], _CMPCCX_O);
> +  res[1] = __cmpccxadd_epi32 (&srcdest1[1], srcdest2[1], src3[1], _CMPCCX_NO);
> +  res[2] = __cmpccxadd_epi32 (&srcdest1[2], srcdest2[2], src3[2], _CMPCCX_B);
> +  res[3] = __cmpccxadd_epi32 (&srcdest1[3], srcdest2[3], src3[3], _CMPCCX_NB);
> +  res[4] = __cmpccxadd_epi32 (&srcdest1[4], srcdest2[4], src3[4], _CMPCCX_Z);
> +  res[5] = __cmpccxadd_epi32 (&srcdest1[5], srcdest2[5], src3[5], _CMPCCX_NZ);
> +  res[6] = __cmpccxadd_epi32 (&srcdest1[6], srcdest2[6], src3[6], _CMPCCX_BE);
> +  res[7] = __cmpccxadd_epi32 (&srcdest1[7], srcdest2[7], src3[7], _CMPCCX_NBE);
> +  res[8] = __cmpccxadd_epi32 (&srcdest1[8], srcdest2[8], src3[8], _CMPCCX_S);
> +  res[9] = __cmpccxadd_epi32 (&srcdest1[9], srcdest2[9], src3[9], _CMPCCX_NS);
> +  res[10] = __cmpccxadd_epi32 (&srcdest1[10], srcdest2[10], src3[10], _CMPCCX_P);
> +  res[11] = __cmpccxadd_epi32 (&srcdest1[11], srcdest2[11], src3[11], _CMPCCX_NP);
> +  res[12] = __cmpccxadd_epi32 (&srcdest1[12], srcdest2[12], src3[12], _CMPCCX_L);
> +  res[13] = __cmpccxadd_epi32 (&srcdest1[13], srcdest2[13], src3[13], _CMPCCX_NL);
> +  res[14] = __cmpccxadd_epi32 (&srcdest1[14], srcdest2[14], src3[14], _CMPCCX_LE);
> +  res[15] = __cmpccxadd_epi32 (&srcdest1[15], srcdest2[15], src3[15], _CMPCCX_NLE);
> +
> +  res_64[0] = __cmpccxadd_epi64 (&srcdest1_64[0], srcdest2_64[0], src3_64[0], _CMPCCX_O);
> +  res_64[1] = __cmpccxadd_epi64 (&srcdest1_64[1], srcdest2_64[1], src3_64[1], _CMPCCX_NO);
> +  res_64[2] = __cmpccxadd_epi64 (&srcdest1_64[2], srcdest2_64[2], src3_64[2], _CMPCCX_B);
> +  res_64[3] = __cmpccxadd_epi64 (&srcdest1_64[3], srcdest2_64[3], src3_64[3], _CMPCCX_NB);
> +  res_64[4] = __cmpccxadd_epi64 (&srcdest1_64[4], srcdest2_64[4], src3_64[4], _CMPCCX_Z);
> +  res_64[5] = __cmpccxadd_epi64 (&srcdest1_64[5], srcdest2_64[5], src3_64[5], _CMPCCX_NZ);
> +  res_64[6] = __cmpccxadd_epi64 (&srcdest1_64[6], srcdest2_64[6], src3_64[6], _CMPCCX_BE);
> +  res_64[7] = __cmpccxadd_epi64 (&srcdest1_64[7], srcdest2_64[7], src3_64[7], _CMPCCX_NBE);
> +  res_64[8] = __cmpccxadd_epi64 (&srcdest1_64[8], srcdest2_64[8], src3_64[8], _CMPCCX_S);
> +  res_64[9] = __cmpccxadd_epi64 (&srcdest1_64[9], srcdest2_64[9], src3_64[9], _CMPCCX_NS);
> +  res_64[10] = __cmpccxadd_epi64 (&srcdest1_64[10], srcdest2_64[10], src3_64[10], _CMPCCX_P);
> +  res_64[11] = __cmpccxadd_epi64 (&srcdest1_64[11], srcdest2_64[11], src3_64[11], _CMPCCX_NP);
> +  res_64[12] = __cmpccxadd_epi64 (&srcdest1_64[12], srcdest2_64[12], src3_64[12], _CMPCCX_L);
> +  res_64[13] = __cmpccxadd_epi64 (&srcdest1_64[13], srcdest2_64[13], src3_64[13], _CMPCCX_NL);
> +  res_64[14] = __cmpccxadd_epi64 (&srcdest1_64[14], srcdest2_64[14], src3_64[14], _CMPCCX_LE);
> +  res_64[15] = __cmpccxadd_epi64 (&srcdest1_64[15], srcdest2_64[15], src3_64[15], _CMPCCX_NLE);
> +
> +  for (int i = 0; i < 16; i++)
> +  {
> +    if ((srcdest1[i] != _srcdest1[i]) || (res[i] != _srcdest2[i]))
> +      abort();
> +    if ((srcdest1_64[i] != _srcdest1_64[i]) || (res_64[i] != _srcdest2_64[i]))
> +      abort();
> +  }
> +
> +  return 0;
> +}
>
> Change both test files to align with the order of opcode.
>
> --
> 2.18.1
>
  

Patch

diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -1061,3 +1064,29 @@ 
 	(any_logic:SWI (match_dup 0) (match_dup 1)))]
   ""
   "lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
+
+;; CMPCCXADD
+
+(define_insn "cmpccxadd_<mode>"
+  [(set (match_operand:SWI48x 0 "register_operand" "=r")
+	(unspec_volatile:SWI48x
+	  [(match_operand:SWI48x 1 "memory_operand" "+m")
+	   (match_operand:SWI48x 2 "register_operand" "0")
+	   (match_operand:SWI48x 3 "register_operand" "r")
+	   (match_operand:SI 4 "const_0_to_15_operand" "n")]
+	  UNSPECV_CMPCCXADD))
+   (set (match_dup 1)
+	(unspec_volatile:SWI48x [(const_int 0)] UNSPECV_CMPCCXADD))
+   (set (reg:CC FLAGS_REG)
+	(unspec_volatile:CC [(const_int 0)] UNSPECV_CMPCCXADD))]
+  "TARGET_CMPCCXADD && TARGET_64BIT"
+{
+  char buf[128];
+  const char *ops = "cmp%sxadd\t{%%3, %%0, %%1|%%1, %%0, %%3}";
+  char const *cc[16] = {"o" ,"no", "b", "nb", "z", "nz", "be", "nbe",
+			"s", "ns", "p", "np", "l", "nl", "le", "nle"};
+
+  snprintf (buf, sizeof (buf), ops, cc[INTVAL (operands[4])]);
+  output_asm_insn (buf, operands);
+  return "";
+})

Changed the whole pattern like how cmpxchg did.
Also adjust the cc array order to align with opcode.

diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c
@@ -0,0 +1,61 @@ 
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mcmpccxadd" } */
+/* { dg-final { scan-assembler-times "cmpoxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnoxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpbxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnbxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpzxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnzxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpbexadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnbexadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpsxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnsxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmppxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnpxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmplxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnlxadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmplexadd\[ \\t\]" 2 } } */
+/* { dg-final { scan-assembler-times "cmpnlexadd\[ \\t\]" 2 } } */
+#include <x86gprintrin.h>
+
+int *a;
+int b, c;
+long long *d;
+long long e, f;
+
+void extern
+cmpccxadd_test(void)
+{
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_O);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_O);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NO);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NO);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_B);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_B);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NB);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NB);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_Z);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_Z);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NZ);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NZ);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_BE);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_BE);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NBE);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NBE);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_S);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_S);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NS);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NS);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_P);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_P);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NP);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NP);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_L);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_L);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NL);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NL);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_LE);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_LE);
+  b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NLE);
+  e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NLE);
+}
diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c
@@ -0,0 +1,138 @@ 
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-O2 -mcmpccxadd" } */
+/* { dg-require-effective-target cmpccxadd } */
+
+#include <stdlib.h>
+#include <x86gprintrin.h>
+
+int
+main()
+{
+  if (!__builtin_cpu_supports("cmpccxadd"))
+    return 0;
+	
+  int srcdest1[16] = { -2147483648,1,1,1,1,2,1,2,1,2,4,2,1,1,1,2 };
+  int srcdest2[16] = { 1,1,2,1,1,1,1,1,2,1,1,1,2,1,1,1 };
+  int src3[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 };
+  int _srcdest1[16], _srcdest2[16], res[16], cond[16];
+  long long srcdest1_64[16] = { -9223372036854775807LL-1,1,1,1,1,2,1,2,1,2,4,2,1,1,1,2 };
+  long long srcdest2_64[16] = { 1,1,2,1,1,1,1,1,2,1,1,1,2,1,1,1 };
+  long long src3_64[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 };
+  long long _srcdest1_64[16], _srcdest2_64[16], res_64[16], cond_64[16];
+
+  int tmp2[16];
+  long long tmp2_64[16];
+
+  int cf[16] = { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+  int of[16] = { 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+  int sf[16] = { 0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0 };
+  int zf[16] = { 0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0 };
+  int af[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
+  int pf[16] = { 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0 };
+
+  for (int i = 0; i < 16; i++)
+  {
+    tmp2[i] = srcdest1[i] + src3[i];
+    tmp2_64[i] = srcdest1_64[i] + src3_64[i];
+  }
+
+  cond[0] = of[0] == 1 ? 1 : 0;
+  cond[1] = of[1] == 0 ? 1 : 0;
+  cond[2] = cf[2] == 1 ? 1 : 0;
+  cond[3] = cf[3] == 0 ? 1 : 0;
+  cond[4] = zf[4] == 1 ? 1 : 0;
+  cond[5] = zf[5] == 0 ? 1 : 0;
+  cond[6] = (cf[6] || zf[6]) == 1 ? 1 : 0;
+  cond[7] = (cf[7] || zf[7]) == 0 ? 1 : 0;
+  cond[8] = sf[8] == 1 ? 1 : 0;
+  cond[9] = sf[9] == 0 ? 1 : 0;
+  cond[10] = pf[10] == 1 ? 1 : 0;
+  cond[11] = pf[11] == 0 ? 1 : 0;
+  cond[12] = ((sf[12] && !of[12]) || (!sf[12] && of[12])) == 1 ? 1 : 0;
+  cond[13] = ((sf[13] && !of[13]) || (!sf[13] && of[13])) == 0 ? 1 : 0;
+  cond[14] = (((sf[14] && !of[14]) || (!sf[14] && of[14])) || zf[14]) == 1 ? 1 : 0;
+  cond[15] = (((sf[15] && !of[15]) || (!sf[15] && of[15])) || zf[15]) == 0 ? 1 : 0;
+
+  cond_64[0] = of[0] == 1 ? 1 : 0;
+  cond_64[1] = of[1] == 0 ? 1 : 0;
+  cond_64[2] = cf[2] == 1 ? 1 : 0;
+  cond_64[3] = cf[3] == 0 ? 1 : 0;
+  cond_64[4] = zf[4] == 1 ? 1 : 0;
+  cond_64[5] = zf[5] == 0 ? 1 : 0;
+  cond_64[6] = (cf[6] || zf[6]) == 1 ? 1 : 0;
+  cond_64[7] = (cf[7] || zf[7]) == 0 ? 1 : 0;
+  cond_64[8] = sf[8] == 1 ? 1 : 0;
+  cond_64[9] = sf[9] == 0 ? 1 : 0;
+  cond_64[10] = pf[10] == 1 ? 1 : 0;
+  cond_64[11] = pf[11] == 0 ? 1 : 0;
+  cond_64[12] = ((sf[12] && !of[12]) || (!sf[12] && of[12])) == 1 ? 1 : 0;
+  cond_64[13] = ((sf[13] && !of[13]) || (!sf[13] && of[13])) == 0 ? 1 : 0;
+  cond_64[14] = (((sf[14] && !of[14]) || (!sf[14] && of[14])) || zf[14]) == 1 ? 1 : 0;
+  cond_64[15] = (((sf[15] && !of[15]) || (!sf[15] && of[15])) || zf[15]) == 0 ? 1 : 0;
+
+  for (int i = 0; i < 16; i++)
+  {
+    if (cond[i] == 1)
+    {
+      _srcdest1[i] = tmp2[i];
+    }
+    else
+    {
+      _srcdest1[i] = srcdest1[i];
+    }
+    if (cond_64[i] == 1)
+    {
+      _srcdest1_64[i] = tmp2_64[i];
+    }
+    else
+    {
+      _srcdest1_64[i] = srcdest1_64[i];
+    }
+    _srcdest2[i] = srcdest1[i];
+    _srcdest2_64[i] = srcdest1_64[i];
+  }
+
+  res[0] = __cmpccxadd_epi32 (&srcdest1[0], srcdest2[0], src3[0], _CMPCCX_O);
+  res[1] = __cmpccxadd_epi32 (&srcdest1[1], srcdest2[1], src3[1], _CMPCCX_NO);
+  res[2] = __cmpccxadd_epi32 (&srcdest1[2], srcdest2[2], src3[2], _CMPCCX_B);
+  res[3] = __cmpccxadd_epi32 (&srcdest1[3], srcdest2[3], src3[3], _CMPCCX_NB);
+  res[4] = __cmpccxadd_epi32 (&srcdest1[4], srcdest2[4], src3[4], _CMPCCX_Z);
+  res[5] = __cmpccxadd_epi32 (&srcdest1[5], srcdest2[5], src3[5], _CMPCCX_NZ);
+  res[6] = __cmpccxadd_epi32 (&srcdest1[6], srcdest2[6], src3[6], _CMPCCX_BE);
+  res[7] = __cmpccxadd_epi32 (&srcdest1[7], srcdest2[7], src3[7], _CMPCCX_NBE);
+  res[8] = __cmpccxadd_epi32 (&srcdest1[8], srcdest2[8], src3[8], _CMPCCX_S);
+  res[9] = __cmpccxadd_epi32 (&srcdest1[9], srcdest2[9], src3[9], _CMPCCX_NS);
+  res[10] = __cmpccxadd_epi32 (&srcdest1[10], srcdest2[10], src3[10], _CMPCCX_P);
+  res[11] = __cmpccxadd_epi32 (&srcdest1[11], srcdest2[11], src3[11], _CMPCCX_NP);
+  res[12] = __cmpccxadd_epi32 (&srcdest1[12], srcdest2[12], src3[12], _CMPCCX_L);
+  res[13] = __cmpccxadd_epi32 (&srcdest1[13], srcdest2[13], src3[13], _CMPCCX_NL);
+  res[14] = __cmpccxadd_epi32 (&srcdest1[14], srcdest2[14], src3[14], _CMPCCX_LE);
+  res[15] = __cmpccxadd_epi32 (&srcdest1[15], srcdest2[15], src3[15], _CMPCCX_NLE);
+
+  res_64[0] = __cmpccxadd_epi64 (&srcdest1_64[0], srcdest2_64[0], src3_64[0], _CMPCCX_O);
+  res_64[1] = __cmpccxadd_epi64 (&srcdest1_64[1], srcdest2_64[1], src3_64[1], _CMPCCX_NO);
+  res_64[2] = __cmpccxadd_epi64 (&srcdest1_64[2], srcdest2_64[2], src3_64[2], _CMPCCX_B);
+  res_64[3] = __cmpccxadd_epi64 (&srcdest1_64[3], srcdest2_64[3], src3_64[3], _CMPCCX_NB);
+  res_64[4] = __cmpccxadd_epi64 (&srcdest1_64[4], srcdest2_64[4], src3_64[4], _CMPCCX_Z);
+  res_64[5] = __cmpccxadd_epi64 (&srcdest1_64[5], srcdest2_64[5], src3_64[5], _CMPCCX_NZ);
+  res_64[6] = __cmpccxadd_epi64 (&srcdest1_64[6], srcdest2_64[6], src3_64[6], _CMPCCX_BE);
+  res_64[7] = __cmpccxadd_epi64 (&srcdest1_64[7], srcdest2_64[7], src3_64[7], _CMPCCX_NBE);
+  res_64[8] = __cmpccxadd_epi64 (&srcdest1_64[8], srcdest2_64[8], src3_64[8], _CMPCCX_S);
+  res_64[9] = __cmpccxadd_epi64 (&srcdest1_64[9], srcdest2_64[9], src3_64[9], _CMPCCX_NS);
+  res_64[10] = __cmpccxadd_epi64 (&srcdest1_64[10], srcdest2_64[10], src3_64[10], _CMPCCX_P);
+  res_64[11] = __cmpccxadd_epi64 (&srcdest1_64[11], srcdest2_64[11], src3_64[11], _CMPCCX_NP);
+  res_64[12] = __cmpccxadd_epi64 (&srcdest1_64[12], srcdest2_64[12], src3_64[12], _CMPCCX_L);
+  res_64[13] = __cmpccxadd_epi64 (&srcdest1_64[13], srcdest2_64[13], src3_64[13], _CMPCCX_NL);
+  res_64[14] = __cmpccxadd_epi64 (&srcdest1_64[14], srcdest2_64[14], src3_64[14], _CMPCCX_LE);
+  res_64[15] = __cmpccxadd_epi64 (&srcdest1_64[15], srcdest2_64[15], src3_64[15], _CMPCCX_NLE);
+
+  for (int i = 0; i < 16; i++)
+  {
+    if ((srcdest1[i] != _srcdest1[i]) || (res[i] != _srcdest2[i]))
+      abort();
+    if ((srcdest1_64[i] != _srcdest1_64[i]) || (res_64[i] != _srcdest2_64[i]))
+      abort();
+  }
+
+  return 0;
+}