@@ -228,13 +228,22 @@ (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp")
(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "pmxvi4ger8")])
+(define_int_attr vvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8 "pmdmxvi4ger8")])
+
(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "pmxvi4ger8pp")])
+(define_int_attr avvi4i4i8_dm [(UNSPEC_MMA_PMXVI4GER8PP "pmdmxvi4ger8pp")])
+
(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "pmxvi16ger2")
(UNSPEC_MMA_PMXVI16GER2S "pmxvi16ger2s")
(UNSPEC_MMA_PMXVF16GER2 "pmxvf16ger2")
(UNSPEC_MMA_PMXVBF16GER2 "pmxvbf16ger2")])
+(define_int_attr vvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2 "pmdmxvi16ger2")
+ (UNSPEC_MMA_PMXVI16GER2S "pmdmxvi16ger2s")
+ (UNSPEC_MMA_PMXVF16GER2 "pmdmxvf16ger2")
+ (UNSPEC_MMA_PMXVBF16GER2 "pmdmxvbf16ger2")])
+
(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp")
(UNSPEC_MMA_PMXVI16GER2SPP "pmxvi16ger2spp")
(UNSPEC_MMA_PMXVF16GER2PP "pmxvf16ger2pp")
@@ -246,25 +255,54 @@ (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "pmxvi16ger2pp")
(UNSPEC_MMA_PMXVBF16GER2NP "pmxvbf16ger2np")
(UNSPEC_MMA_PMXVBF16GER2NN "pmxvbf16ger2nn")])
+(define_int_attr avvi4i4i2_dm [(UNSPEC_MMA_PMXVI16GER2PP "pmdmxvi16ger2pp")
+ (UNSPEC_MMA_PMXVI16GER2SPP "pmdmxvi16ger2spp")
+ (UNSPEC_MMA_PMXVF16GER2PP "pmdmxvf16ger2pp")
+ (UNSPEC_MMA_PMXVF16GER2PN "pmdmxvf16ger2pn")
+ (UNSPEC_MMA_PMXVF16GER2NP "pmdmxvf16ger2np")
+ (UNSPEC_MMA_PMXVF16GER2NN "pmdmxvf16ger2nn")
+ (UNSPEC_MMA_PMXVBF16GER2PP "pmdmxvbf16ger2pp")
+ (UNSPEC_MMA_PMXVBF16GER2PN "pmdmxvbf16ger2pn")
+ (UNSPEC_MMA_PMXVBF16GER2NP "pmdmxvbf16ger2np")
+ (UNSPEC_MMA_PMXVBF16GER2NN "pmdmxvbf16ger2nn")])
+
(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "pmxvf32ger")])
+(define_int_attr vvi4i4_dm [(UNSPEC_MMA_PMXVF32GER "pmdmxvf32ger")])
+
(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "pmxvf32gerpp")
(UNSPEC_MMA_PMXVF32GERPN "pmxvf32gerpn")
(UNSPEC_MMA_PMXVF32GERNP "pmxvf32gernp")
(UNSPEC_MMA_PMXVF32GERNN "pmxvf32gernn")])
+(define_int_attr avvi4i4_dm [(UNSPEC_MMA_PMXVF32GERPP "pmdmxvf32gerpp")
+ (UNSPEC_MMA_PMXVF32GERPN "pmdmxvf32gerpn")
+ (UNSPEC_MMA_PMXVF32GERNP "pmdmxvf32gernp")
+ (UNSPEC_MMA_PMXVF32GERNN "pmdmxvf32gernn")])
+
(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "pmxvf64ger")])
+(define_int_attr pvi4i2_dm [(UNSPEC_MMA_PMXVF64GER "pmdmxvf64ger")])
+
(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "pmxvf64gerpp")
(UNSPEC_MMA_PMXVF64GERPN "pmxvf64gerpn")
(UNSPEC_MMA_PMXVF64GERNP "pmxvf64gernp")
(UNSPEC_MMA_PMXVF64GERNN "pmxvf64gernn")])
+(define_int_attr apvi4i2_dm [(UNSPEC_MMA_PMXVF64GERPP "pmdmxvf64gerpp")
+ (UNSPEC_MMA_PMXVF64GERPN "pmdmxvf64gerpn")
+ (UNSPEC_MMA_PMXVF64GERNP "pmdmxvf64gernp")
+ (UNSPEC_MMA_PMXVF64GERNN "pmdmxvf64gernn")])
+
(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "pmxvi8ger4")])
+(define_int_attr vvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4 "pmdmxvi8ger4")])
+
(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "pmxvi8ger4pp")
(UNSPEC_MMA_PMXVI8GER4SPP "pmxvi8ger4spp")])
+(define_int_attr avvi4i4i4_dm [(UNSPEC_MMA_PMXVI8GER4PP "pmdmxvi8ger4pp")
+ (UNSPEC_MMA_PMXVI8GER4SPP "pmdmxvi8ger4spp")])
;; Vector pair support. OOmode can only live in VSRs.
(define_expand "movoo"
@@ -666,7 +704,10 @@ (define_insn "mma_<vv>"
(match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_VV))]
"TARGET_MMA"
- "<vv> %A0,%x1,%x2"
+ "@
+ dm<vv> %A0,%x1,%x2
+ <vv> %A0,%x1,%x2
+ <vv> %A0,%x1,%x2"
[(set_attr "type" "mma")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -687,7 +728,10 @@ (define_insn "mma_<pv>"
(match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
MMA_PV))]
"TARGET_MMA"
- "<pv> %A0,%x1,%x2"
+ "@
+ dm<pv> %A0,%x1,%x2
+ <pv> %A0,%x1,%x2
+ <pv> %A0,%x1,%x2"
[(set_attr "type" "mma")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -698,7 +742,10 @@ (define_insn "mma_<apv>"
(match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
MMA_APV))]
"TARGET_MMA"
- "<apv> %A0,%x2,%x3"
+ "@
+ dm<apv> %A0,%x2,%x3
+ <apv> %A0,%x2,%x3
+ <apv> %A0,%x2,%x3"
[(set_attr "type" "mma")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -711,7 +758,10 @@ (define_insn "mma_<vvi4i4i8>"
(match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
MMA_VVI4I4I8))]
"TARGET_MMA"
- "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+ "@
+ dm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -740,7 +790,10 @@ (define_insn "mma_<vvi4i4i2>"
(match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_VVI4I4I2))]
"TARGET_MMA"
- "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+ "@
+ <vvi4i4i2_dm> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -755,7 +808,10 @@ (define_insn "mma_<avvi4i4i2>"
(match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
MMA_AVVI4I4I2))]
"TARGET_MMA"
- "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+ "@
+ <avvi4i4i2_dm> %A0,%x2,%x3,%4,%5,%6
+ <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+ <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -768,7 +824,10 @@ (define_insn "mma_<vvi4i4>"
(match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4))]
"TARGET_MMA"
- "<vvi4i4> %A0,%x1,%x2,%3,%4"
+ "@
+ <vvi4i4_dm> %A0,%x1,%x2,%3,%4
+ <vvi4i4> %A0,%x1,%x2,%3,%4
+ <vvi4i4> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -782,7 +841,10 @@ (define_insn "mma_<avvi4i4>"
(match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4))]
"TARGET_MMA"
- "<avvi4i4> %A0,%x2,%x3,%4,%5"
+ "@
+ <avvi4i4_dm> %A0,%x2,%x3,%4,%5
+ <avvi4i4> %A0,%x2,%x3,%4,%5
+ <avvi4i4> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -795,7 +857,10 @@ (define_insn "mma_<pvi4i2>"
(match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
MMA_PVI4I2))]
"TARGET_MMA"
- "<pvi4i2> %A0,%x1,%x2,%3,%4"
+ "@
+ <pvi4i2_dm> %A0,%x1,%x2,%3,%4
+ <pvi4i2> %A0,%x1,%x2,%3,%4
+ <pvi4i2> %A0,%x1,%x2,%3,%4"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -809,7 +874,10 @@ (define_insn "mma_<apvi4i2>"
(match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
MMA_APVI4I2))]
"TARGET_MMA"
- "<apvi4i2> %A0,%x2,%x3,%4,%5"
+ "@
+ <apvi4i2_dm> %A0,%x2,%x3,%4,%5
+ <apvi4i2> %A0,%x2,%x3,%4,%5
+ <apvi4i2> %A0,%x2,%x3,%4,%5"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -823,7 +891,10 @@ (define_insn "mma_<vvi4i4i4>"
(match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
MMA_VVI4I4I4))]
"TARGET_MMA"
- "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+ "@
+ <vvi4i4i4_dm> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+ <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
@@ -838,7 +909,10 @@ (define_insn "mma_<avvi4i4i4>"
(match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
MMA_AVVI4I4I4))]
"TARGET_MMA"
- "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+ "@
+ <avvi4i4i4_dm> %A0,%x2,%x3,%4,%5,%6
+ <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+ <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
[(set_attr "type" "mma")
(set_attr "prefixed" "yes")
(set_attr "isa" "dm,not_dm,not_dm")])
new file mode 100644
@@ -0,0 +1,194 @@
+/* Test derived from mma-double-1.c, modified for dense math. */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J) \
+ __builtin_mma_disassemble_acc (result, ACC); \
+ rowC = (v4sf_t *) &CO[0*ldc+J]; \
+ rowC[0] += result[0]; \
+ rowC = (v4sf_t *) &CO[1*ldc+J]; \
+ rowC[0] += result[1]; \
+ rowC = (v4sf_t *) &CO[2*ldc+J]; \
+ rowC[0] += result[2]; \
+ rowC = (v4sf_t *) &CO[3*ldc+J]; \
+ rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+ __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+ v4sf_t result[4];
+ v4sf_t *rowC;
+ for (int l = 0; l < n; l += 4)
+ {
+ double *CO;
+ double *AO;
+ AO = A;
+ CO = C;
+ C += m * 4;
+ for (int j = 0; j < m; j += 16)
+ {
+ double *BO = B;
+ __builtin_mma_xxsetaccz (&acc0);
+ __builtin_mma_xxsetaccz (&acc1);
+ __builtin_mma_xxsetaccz (&acc2);
+ __builtin_mma_xxsetaccz (&acc3);
+ __builtin_mma_xxsetaccz (&acc4);
+ __builtin_mma_xxsetaccz (&acc5);
+ __builtin_mma_xxsetaccz (&acc6);
+ __builtin_mma_xxsetaccz (&acc7);
+ unsigned long i;
+
+ for (i = 0; i < k; i++)
+ {
+ vec_t *rowA = (vec_t *) & AO[i * 16];
+ __vector_pair rowB;
+ vec_t *rb = (vec_t *) & BO[i * 4];
+ __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+ __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+ __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+ __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+ __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+ __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+ __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+ __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+ __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+ }
+ SAVE_ACC (&acc0, m, 0);
+ SAVE_ACC (&acc2, m, 4);
+ SAVE_ACC (&acc1, m, 2);
+ SAVE_ACC (&acc3, m, 6);
+ SAVE_ACC (&acc4, m, 8);
+ SAVE_ACC (&acc6, m, 12);
+ SAVE_ACC (&acc5, m, 10);
+ SAVE_ACC (&acc7, m, 14);
+ AO += k * 16;
+ BO += k * 4;
+ CO += 16;
+ }
+ B += k * 4;
+ }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+ for (int j = 0; j < column; j++)
+ {
+ for (int i = 0; i < row; i++)
+ {
+ matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+ }
+ }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+ for (int j = 0; j < column; j++)
+ for (int i = 0; i < row; i++)
+ matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+ printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+ for (int i = 0; i < row; i++)
+ {
+ for (int j = 0; j < column; j++)
+ {
+ printf ("%f ", matrix[j * row + i]);
+ }
+ printf ("\n");
+ }
+ printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+ int rowsA, colsB, common;
+ int i, j, k;
+ int ret = 0;
+
+ for (int t = 16; t <= 128; t += 16)
+ {
+ for (int t1 = 4; t1 <= 16; t1 += 4)
+ {
+ rowsA = t;
+ colsB = t1;
+ common = 1;
+ /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+ double A[rowsA * common];
+ double B[common * colsB];
+ double C[rowsA * colsB];
+ double D[rowsA * colsB];
+
+
+ init (A, rowsA, common);
+ init (B, common, colsB);
+ init0 (C, D, rowsA, colsB);
+ DM (rowsA, colsB, common, A, B, C);
+
+ for (i = 0; i < colsB; i++)
+ {
+ for (j = 0; j < rowsA; j++)
+ {
+ D[i * rowsA + j] = 0;
+ for (k = 0; k < common; k++)
+ {
+ D[i * rowsA + j] +=
+ A[k * rowsA + j] * B[k + common * i];
+ }
+ }
+ }
+ for (i = 0; i < colsB; i++)
+ {
+ for (j = 0; j < rowsA; j++)
+ {
+ for (k = 0; k < common; k++)
+ {
+ if (D[i * rowsA + j] != C[i * rowsA + j])
+ {
+ printf ("Error %d,%d,%d\n",i,j,k);
+ ret++;
+ }
+ }
+ }
+ }
+ if (ret)
+ {
+ print ("A", A, rowsA, common);
+ print ("B", B, common, colsB);
+ print ("C", C, rowsA, colsB);
+ print ("D", D, rowsA, colsB);
+ }
+ }
+ }
+
+#ifdef VERBOSE
+ if (ret)
+ printf ("DM double test fail: %d errors\n",ret);
+ else
+ printf ("DM double test success: 0 DM errors\n");
+#else
+ if (ret)
+ abort();
+#endif
+
+ return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M} } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M} } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
@@ -6938,6 +6938,25 @@ proc check_effective_target_power10_ok { } {
}
}
+# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math
+# which enables the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+ return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+ __vector_quad vq;
+ void test (void)
+ {
+ #ifndef __PPC_DMR__
+ #error "target does not have dense math support."
+ #else
+ /* Make sure we have dense math support. */
+ __vector_quad dmr;
+ __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+ vq = dmr;
+ #endif
+ }
+ } "-mcpu=future"]
+}
+
# Return 1 if this is a PowerPC target supporting -mfloat128 via either
# software emulation on power7/power8 systems or hardware support on power9.