Repost [PATCH 5/6] PowerPC: Switch to dense math names for all MMA operations.

Message ID ZZiTiojbYNzVvJEV@cowardly-lion.the-meissners.org
State Unresolved
Headers
Series Repost [PATCH 5/6] PowerPC: Switch to dense math names for all MMA operations. |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Michael Meissner Jan. 5, 2024, 11:40 p.m. UTC
  This patch changes the assembler instruction names for MMA instructions from
the original name used in power10 to the new name when used with the dense math
system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
same bits for either spelling.

The patches have been tested on both little and big endian systems.  Can I check
it into the master branch?

2024-01-05   Michael Meissner  <meissner@linux.ibm.com>

gcc/

	* config/rs6000/mma.md (vvi4i4i8_dm): New int attribute.
	(avvi4i4i8_dm): Likewise.
	(vvi4i4i2_dm): Likewise.
	(avvi4i4i2_dm): Likewise.
	(vvi4i4_dm): Likewise.
	(avvi4i4_dm): Likewise.
	(pvi4i2_dm): Likewise.
	(apvi4i2_dm): Likewise.
	(vvi4i4i4_dm): Likewise.
	(avvi4i4i4_dm): Likewise.
	(mma_<vv>): Add support for running on DMF systems, generating the dense
	math instruction and using the dense math accumulators.
	(mma_<avv>): Likewise.
	(mma_<pv>): Likewise.
	(mma_<apv>): Likewise.
	(mma_<vvi4i4i8>): Likewise.
	(mma_<avvi4i4i8>): Likewise.
	(mma_<vvi4i4i2>): Likewise.
	(mma_<avvi4i4i2>): Likewise.
	(mma_<vvi4i4>): Likewise.
	(mma_<avvi4i4): Likewise.
	(mma_<pvi4i2>): Likewise.
	(mma_<apvi4i2): Likewise.
	(mma_<vvi4i4i4>): Likewise.
	(mma_<avvi4i4i4>): Likewise.

gcc/testsuite/

	* gcc.target/powerpc/dm-double-test.c: New test.
	* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
	target test.
---
 gcc/config/rs6000/mma.md                      |  98 +++++++--
 .../gcc.target/powerpc/dm-double-test.c       | 194 ++++++++++++++++++
 gcc/testsuite/lib/target-supports.exp         |  19 ++
 3 files changed, 299 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/dm-double-test.c
  

Comments

Michael Meissner Jan. 19, 2024, 6:48 p.m. UTC | #1
Ping

| Date: Fri, 5 Jan 2024 18:40:58 -0500
| From: Michael Meissner <meissner@linux.ibm.com>
| Subject: Repost [PATCH 5/6] PowerPC: Switch to dense math names for all MMA operations.
| Message-ID: <ZZiTiojbYNzVvJEV@cowardly-lion.the-meissners.org>

https://gcc.gnu.org/pipermail/gcc-patches/2024-January/641965.html
  
Kewen.Lin Feb. 4, 2024, 5:47 a.m. UTC | #2
Hi Mike,

on 2024/1/6 07:40, Michael Meissner wrote:
> This patch changes the assembler instruction names for MMA instructions from
> the original name used in power10 to the new name when used with the dense math
> system.  I.e. xvf64gerpp becomes dmxvf64gerpp.  The assembler will emit the
> same bits for either spelling.
> 
> The patches have been tested on both little and big endian systems.  Can I check
> it into the master branch?
> 
> 2024-01-05   Michael Meissner  <meissner@linux.ibm.com>
> 
> gcc/
> 
> 	* config/rs6000/mma.md (vvi4i4i8_dm): New int attribute.
> 	(avvi4i4i8_dm): Likewise.
> 	(vvi4i4i2_dm): Likewise.
> 	(avvi4i4i2_dm): Likewise.
> 	(vvi4i4_dm): Likewise.
> 	(avvi4i4_dm): Likewise.
> 	(pvi4i2_dm): Likewise.
> 	(apvi4i2_dm): Likewise.
> 	(vvi4i4i4_dm): Likewise.
> 	(avvi4i4i4_dm): Likewise.
> 	(mma_<vv>): Add support for running on DMF systems, generating the dense
> 	math instruction and using the dense math accumulators.
> 	(mma_<avv>): Likewise.
> 	(mma_<pv>): Likewise.
> 	(mma_<apv>): Likewise.
> 	(mma_<vvi4i4i8>): Likewise.
> 	(mma_<avvi4i4i8>): Likewise.
> 	(mma_<vvi4i4i2>): Likewise.
> 	(mma_<avvi4i4i2>): Likewise.
> 	(mma_<vvi4i4>): Likewise.
> 	(mma_<avvi4i4): Likewise.
> 	(mma_<pvi4i2>): Likewise.
> 	(mma_<apvi4i2): Likewise.
> 	(mma_<vvi4i4i4>): Likewise.
> 	(mma_<avvi4i4i4>): Likewise.
> 
> gcc/testsuite/
> 
> 	* gcc.target/powerpc/dm-double-test.c: New test.
> 	* lib/target-supports.exp (check_effective_target_ppc_dmr_ok): New
> 	target test.
> ---
>  gcc/config/rs6000/mma.md                      |  98 +++++++--
>  .../gcc.target/powerpc/dm-double-test.c       | 194 ++++++++++++++++++
>  gcc/testsuite/lib/target-supports.exp         |  19 ++
>  3 files changed, 299 insertions(+), 12 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/dm-double-test.c
> 
> diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> index 525a85146ff..f06e6bbb184 100644
> --- a/gcc/config/rs6000/mma.md
> +++ b/gcc/config/rs6000/mma.md
> @@ -227,13 +227,22 @@ (define_int_attr apv		[(UNSPEC_MMA_XVF64GERPP		"xvf64gerpp")
>  
>  (define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
>  
> +(define_int_attr vvi4i4i8_dm	[(UNSPEC_MMA_PMXVI4GER8		"pmdmxvi4ger8")])

Can we update vvi4i4i8 to

(define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])

by avoiding to introduce vvi4i4i8_dm, then its use places would be like:

-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"

and 

- define_insn "mma_<vvi4i4i8>"
+ define_insn "mma_pm<vvi4i4i8>"

(or updating its use in corresponding bif expander field)

?  

This comment is also applied for the other iterators changes.

> +
>  (define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
>  
> +(define_int_attr avvi4i4i8_dm	[(UNSPEC_MMA_PMXVI4GER8PP	"pmdmxvi4ger8pp")])
> +
>  (define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
>  				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
>  				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
>  				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
>  
> +(define_int_attr vvi4i4i2_dm	[(UNSPEC_MMA_PMXVI16GER2	"pmdmxvi16ger2")
> +				 (UNSPEC_MMA_PMXVI16GER2S	"pmdmxvi16ger2s")
> +				 (UNSPEC_MMA_PMXVF16GER2	"pmdmxvf16ger2")
> +				 (UNSPEC_MMA_PMXVBF16GER2	"pmdmxvbf16ger2")])
> +
>  (define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
>  				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
>  				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
> @@ -245,25 +254,54 @@ (define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
>  				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
>  				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
>  
> +(define_int_attr avvi4i4i2_dm	[(UNSPEC_MMA_PMXVI16GER2PP	"pmdmxvi16ger2pp")
> +				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmdmxvi16ger2spp")
> +				 (UNSPEC_MMA_PMXVF16GER2PP	"pmdmxvf16ger2pp")
> +				 (UNSPEC_MMA_PMXVF16GER2PN	"pmdmxvf16ger2pn")
> +				 (UNSPEC_MMA_PMXVF16GER2NP	"pmdmxvf16ger2np")
> +				 (UNSPEC_MMA_PMXVF16GER2NN	"pmdmxvf16ger2nn")
> +				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmdmxvbf16ger2pp")
> +				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmdmxvbf16ger2pn")
> +				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmdmxvbf16ger2np")
> +				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmdmxvbf16ger2nn")])
> +
>  (define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
>  
> +(define_int_attr vvi4i4_dm	[(UNSPEC_MMA_PMXVF32GER		"pmdmxvf32ger")])
> +
>  (define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
>  				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
>  				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
>  				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
>  
> +(define_int_attr avvi4i4_dm	[(UNSPEC_MMA_PMXVF32GERPP	"pmdmxvf32gerpp")
> +				 (UNSPEC_MMA_PMXVF32GERPN	"pmdmxvf32gerpn")
> +				 (UNSPEC_MMA_PMXVF32GERNP	"pmdmxvf32gernp")
> +				 (UNSPEC_MMA_PMXVF32GERNN	"pmdmxvf32gernn")])
> +
>  (define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
>  
> +(define_int_attr pvi4i2_dm	[(UNSPEC_MMA_PMXVF64GER		"pmdmxvf64ger")])
> +
>  (define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
>  				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
>  				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
>  				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
>  
> +(define_int_attr apvi4i2_dm	[(UNSPEC_MMA_PMXVF64GERPP	"pmdmxvf64gerpp")
> +				 (UNSPEC_MMA_PMXVF64GERPN	"pmdmxvf64gerpn")
> +				 (UNSPEC_MMA_PMXVF64GERNP	"pmdmxvf64gernp")
> +				 (UNSPEC_MMA_PMXVF64GERNN	"pmdmxvf64gernn")])
> +
>  (define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
>  
> +(define_int_attr vvi4i4i4_dm	[(UNSPEC_MMA_PMXVI8GER4		"pmdmxvi8ger4")])
> +
>  (define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
>  				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
>  
> +(define_int_attr avvi4i4i4_dm	[(UNSPEC_MMA_PMXVI8GER4PP	"pmdmxvi8ger4pp")
> +				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmdmxvi8ger4spp")])
>  
>  ;; Vector pair support.  OOmode can only live in VSRs.
>  (define_expand "movoo"
> @@ -629,7 +667,10 @@ (define_insn "mma_<vv>"
>  		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
>  		    MMA_VV))]
>    "TARGET_MMA"
> -  "<vv> %A0,%x1,%x2"
> +  "@
> +   dm<vv> %A0,%x1,%x2
> +   <vv> %A0,%x1,%x2
> +   <vv> %A0,%x1,%x2"
>    [(set_attr "type" "mma")
>     (set_attr "isa" "dm,not_dm,not_dm")])
>  
> @@ -650,7 +691,10 @@ (define_insn "mma_<pv>"
>  		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
>  		    MMA_PV))]
>    "TARGET_MMA"
> -  "<pv> %A0,%x1,%x2"
> +  "@
> +   dm<pv> %A0,%x1,%x2
> +   <pv> %A0,%x1,%x2
> +   <pv> %A0,%x1,%x2"
>    [(set_attr "type" "mma")
>     (set_attr "isa" "dm,not_dm,not_dm")])
>  
> @@ -661,7 +705,10 @@ (define_insn "mma_<apv>"
>  		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
>  		    MMA_APV))]
>    "TARGET_MMA"
> -  "<apv> %A0,%x2,%x3"
> +  "@
> +   dm<apv> %A0,%x2,%x3
> +   <apv> %A0,%x2,%x3
> +   <apv> %A0,%x2,%x3"
>    [(set_attr "type" "mma")
>     (set_attr "isa" "dm,not_dm,not_dm")])
>  
> @@ -674,7 +721,10 @@ (define_insn "mma_<vvi4i4i8>"
>  		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
>  		    MMA_VVI4I4I8))]
>    "TARGET_MMA"
> -  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
> +  "@
> +   dm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5

typo?  I think you meant <vvi4i4i8_dm>, but it doesn't matter any more with the
above suggestion.

> +   <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
> +   <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -703,7 +753,10 @@ (define_insn "mma_<vvi4i4i2>"
>  		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
>  		    MMA_VVI4I4I2))]
>    "TARGET_MMA"
> -  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
> +  "@
> +   <vvi4i4i2_dm> %A0,%x1,%x2,%3,%4,%5
> +   <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
> +   <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -718,7 +771,10 @@ (define_insn "mma_<avvi4i4i2>"
>  		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
>  		    MMA_AVVI4I4I2))]
>    "TARGET_MMA"
> -  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
> +  "@
> +   <avvi4i4i2_dm> %A0,%x2,%x3,%4,%5,%6
> +   <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
> +   <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -731,7 +787,10 @@ (define_insn "mma_<vvi4i4>"
>  		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
>  		    MMA_VVI4I4))]
>    "TARGET_MMA"
> -  "<vvi4i4> %A0,%x1,%x2,%3,%4"
> +  "@
> +   <vvi4i4_dm> %A0,%x1,%x2,%3,%4
> +   <vvi4i4> %A0,%x1,%x2,%3,%4
> +   <vvi4i4> %A0,%x1,%x2,%3,%4"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -745,7 +804,10 @@ (define_insn "mma_<avvi4i4>"
>  		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
>  		    MMA_AVVI4I4))]
>    "TARGET_MMA"
> -  "<avvi4i4> %A0,%x2,%x3,%4,%5"
> +  "@
> +   <avvi4i4_dm> %A0,%x2,%x3,%4,%5
> +   <avvi4i4> %A0,%x2,%x3,%4,%5
> +   <avvi4i4> %A0,%x2,%x3,%4,%5"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -758,7 +820,10 @@ (define_insn "mma_<pvi4i2>"
>  		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
>  		    MMA_PVI4I2))]
>    "TARGET_MMA"
> -  "<pvi4i2> %A0,%x1,%x2,%3,%4"
> +  "@
> +   <pvi4i2_dm> %A0,%x1,%x2,%3,%4
> +   <pvi4i2> %A0,%x1,%x2,%3,%4
> +   <pvi4i2> %A0,%x1,%x2,%3,%4"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -772,7 +837,10 @@ (define_insn "mma_<apvi4i2>"
>  		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
>  		    MMA_APVI4I2))]
>    "TARGET_MMA"
> -  "<apvi4i2> %A0,%x2,%x3,%4,%5"
> +  "@
> +   <apvi4i2_dm> %A0,%x2,%x3,%4,%5
> +   <apvi4i2> %A0,%x2,%x3,%4,%5
> +   <apvi4i2> %A0,%x2,%x3,%4,%5"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -786,7 +854,10 @@ (define_insn "mma_<vvi4i4i4>"
>  		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
>  		    MMA_VVI4I4I4))]
>    "TARGET_MMA"
> -  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
> +  "@
> +   <vvi4i4i4_dm> %A0,%x1,%x2,%3,%4,%5
> +   <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
> +   <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> @@ -801,7 +872,10 @@ (define_insn "mma_<avvi4i4i4>"
>  		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
>  		    MMA_AVVI4I4I4))]
>    "TARGET_MMA"
> -  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
> +  "@
> +   <avvi4i4i4_dm> %A0,%x2,%x3,%4,%5,%6
> +   <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
> +   <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
>    [(set_attr "type" "mma")
>     (set_attr "prefixed" "yes")
>     (set_attr "isa" "dm,not_dm,not_dm")])
> diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
> new file mode 100644
> index 00000000000..66c19779585
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
> @@ -0,0 +1,194 @@
> +/* Test derived from mma-double-1.c, modified for dense math.  */
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_dense_math_ok } */
> +/* { dg-options "-mdejagnu-cpu=future -O2" } */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <altivec.h>
> +
> +typedef unsigned char vec_t __attribute__ ((vector_size (16)));
> +typedef double v4sf_t __attribute__ ((vector_size (16)));
> +#define SAVE_ACC(ACC, ldc, J)  \
> +	  __builtin_mma_disassemble_acc (result, ACC); \
> +	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
> +          rowC[0] += result[0]; \
> +          rowC = (v4sf_t *) &CO[1*ldc+J]; \
> +          rowC[0] += result[1]; \
> +          rowC = (v4sf_t *) &CO[2*ldc+J]; \
> +          rowC[0] += result[2]; \
> +          rowC = (v4sf_t *) &CO[3*ldc+J]; \
> +	  rowC[0] += result[3];
> +
> +void
> +DM (int m, int n, int k, double *A, double *B, double *C)
> +{
> +  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
> +  v4sf_t result[4];
> +  v4sf_t *rowC;
> +  for (int l = 0; l < n; l += 4)
> +    {
> +      double *CO;
> +      double *AO;
> +      AO = A;
> +      CO = C;
> +      C += m * 4;
> +      for (int j = 0; j < m; j += 16)
> +	{
> +	  double *BO = B;
> +	  __builtin_mma_xxsetaccz (&acc0);
> +	  __builtin_mma_xxsetaccz (&acc1);
> +	  __builtin_mma_xxsetaccz (&acc2);
> +	  __builtin_mma_xxsetaccz (&acc3);
> +	  __builtin_mma_xxsetaccz (&acc4);
> +	  __builtin_mma_xxsetaccz (&acc5);
> +	  __builtin_mma_xxsetaccz (&acc6);
> +	  __builtin_mma_xxsetaccz (&acc7);
> +	  unsigned long i;
> +
> +	  for (i = 0; i < k; i++)
> +	    {
> +	      vec_t *rowA = (vec_t *) & AO[i * 16];
> +	      __vector_pair rowB;
> +	      vec_t *rb = (vec_t *) & BO[i * 4];
> +	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
> +	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
> +	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
> +	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
> +	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
> +	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
> +	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
> +	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
> +	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
> +	    }
> +	  SAVE_ACC (&acc0, m, 0);
> +	  SAVE_ACC (&acc2, m, 4);
> +	  SAVE_ACC (&acc1, m, 2);
> +	  SAVE_ACC (&acc3, m, 6);
> +	  SAVE_ACC (&acc4, m, 8);
> +	  SAVE_ACC (&acc6, m, 12);
> +	  SAVE_ACC (&acc5, m, 10);
> +	  SAVE_ACC (&acc7, m, 14);
> +	  AO += k * 16;
> +	  BO += k * 4;
> +	  CO += 16;
> +	}
> +      B += k * 4;
> +    }
> +}
> +
> +void
> +init (double *matrix, int row, int column)
> +{
> +  for (int j = 0; j < column; j++)
> +    {
> +      for (int i = 0; i < row; i++)
> +	{
> +	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
> +	}
> +    }
> +}
> +
> +void
> +init0 (double *matrix, double *matrix1, int row, int column)
> +{
> +  for (int j = 0; j < column; j++)
> +    for (int i = 0; i < row; i++)
> +      matrix[j * row + i] = matrix1[j * row + i] = 0;
> +}
> +
> +
> +void
> +print (const char *name, const double *matrix, int row, int column)
> +{
> +  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
> +  for (int i = 0; i < row; i++)
> +    {
> +      for (int j = 0; j < column; j++)
> +	{
> +	  printf ("%f ", matrix[j * row + i]);
> +	}
> +      printf ("\n");
> +    }
> +  printf ("\n");
> +}
> +
> +int
> +main (int argc, char *argv[])
> +{
> +  int rowsA, colsB, common;
> +  int i, j, k;
> +  int ret = 0;
> +
> +  for (int t = 16; t <= 128; t += 16)
> +    {
> +      for (int t1 = 4; t1 <= 16; t1 += 4)
> +	{
> +	  rowsA = t;
> +	  colsB = t1;
> +	  common = 1;
> +	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
> +	  double A[rowsA * common];
> +	  double B[common * colsB];
> +	  double C[rowsA * colsB];
> +	  double D[rowsA * colsB];
> +
> +
> +	  init (A, rowsA, common);
> +	  init (B, common, colsB);
> +	  init0 (C, D, rowsA, colsB);
> +	  DM (rowsA, colsB, common, A, B, C);
> +
> +	  for (i = 0; i < colsB; i++)
> +	    {
> +	      for (j = 0; j < rowsA; j++)
> +		{
> +		  D[i * rowsA + j] = 0;
> +		  for (k = 0; k < common; k++)
> +		    {
> +		      D[i * rowsA + j] +=
> +			A[k * rowsA + j] * B[k + common * i];
> +		    }
> +		}
> +	    }
> +	  for (i = 0; i < colsB; i++)
> +	    {
> +	      for (j = 0; j < rowsA; j++)
> +		{
> +		  for (k = 0; k < common; k++)
> +		    {
> +		      if (D[i * rowsA + j] != C[i * rowsA + j])
> +			{
> +			  printf ("Error %d,%d,%d\n",i,j,k);
> +			  ret++;
> +			}
> +		    }
> +		}
> +	    }
> +	  if (ret)
> +	    {
> +	      print ("A", A, rowsA, common);
> +	      print ("B", B, common, colsB);
> +	      print ("C", C, rowsA, colsB);
> +	      print ("D", D, rowsA, colsB);
> +	    }
> +	}
> +    }
> +  
> +#ifdef VERBOSE
> +  if (ret)
> +    printf ("DM double test fail: %d errors\n",ret);
> +  else
> +    printf ("DM double test success: 0 DM errors\n");
> +#else
> +  if (ret)
> +    abort();
> +#endif
> +      
> +  return ret;
> +}
> +
> +/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
> +/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
> +/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
> +
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index 1b4a3fb18df..2dec3682a2f 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7101,6 +7101,25 @@ proc check_effective_target_power10_ok { } {
>      }
>  }
>  
> +# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math

s/ or -mdense-math//

The others look good to me, thanks!

BR,
Kewen

> +# which enables the dense math operations.
> +proc check_effective_target_powerpc_dense_math_ok { } {
> +	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
> +		__vector_quad vq;
> +		void test (void)
> +		{
> +		#ifndef __PPC_DMR__
> +		#error "target does not have dense math support."
> +		#else
> +		/* Make sure we have dense math support.  */
> +		  __vector_quad dmr;
> +		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
> +		  vq = dmr;
> +		#endif
> +		}
> +	} "-mcpu=future"]
> +}
> +
>  # Return 1 if this is a PowerPC target supporting -mfloat128 via either
>  # software emulation on power7/power8 systems or hardware support on power9.
>
  
Michael Meissner Feb. 7, 2024, 8:01 p.m. UTC | #3
On Sun, Feb 04, 2024 at 01:47:12PM +0800, Kewen.Lin wrote:
> > diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
> > index 525a85146ff..f06e6bbb184 100644
> > --- a/gcc/config/rs6000/mma.md
> > +++ b/gcc/config/rs6000/mma.md
> > @@ -227,13 +227,22 @@ (define_int_attr apv		[(UNSPEC_MMA_XVF64GERPP		"xvf64gerpp")
> >  
> >  (define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
> >  
> > +(define_int_attr vvi4i4i8_dm	[(UNSPEC_MMA_PMXVI4GER8		"pmdmxvi4ger8")])
> 
> Can we update vvi4i4i8 to
> 
> (define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"xvi4ger8")])
> 
> by avoiding to introduce vvi4i4i8_dm, then its use places would be like:
> 
> -  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
> +  "@
> +   pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
> +   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
> +   pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
> 
> and 
> 
> - define_insn "mma_<vvi4i4i8>"
> + define_insn "mma_pm<vvi4i4i8>"
> 
> (or updating its use in corresponding bif expander field)

Yes I can do that.
  

Patch

diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 525a85146ff..f06e6bbb184 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -227,13 +227,22 @@  (define_int_attr apv		[(UNSPEC_MMA_XVF64GERPP		"xvf64gerpp")
 
 (define_int_attr vvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8		"pmxvi4ger8")])
 
+(define_int_attr vvi4i4i8_dm	[(UNSPEC_MMA_PMXVI4GER8		"pmdmxvi4ger8")])
+
 (define_int_attr avvi4i4i8	[(UNSPEC_MMA_PMXVI4GER8PP	"pmxvi4ger8pp")])
 
+(define_int_attr avvi4i4i8_dm	[(UNSPEC_MMA_PMXVI4GER8PP	"pmdmxvi4ger8pp")])
+
 (define_int_attr vvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2	"pmxvi16ger2")
 				 (UNSPEC_MMA_PMXVI16GER2S	"pmxvi16ger2s")
 				 (UNSPEC_MMA_PMXVF16GER2	"pmxvf16ger2")
 				 (UNSPEC_MMA_PMXVBF16GER2	"pmxvbf16ger2")])
 
+(define_int_attr vvi4i4i2_dm	[(UNSPEC_MMA_PMXVI16GER2	"pmdmxvi16ger2")
+				 (UNSPEC_MMA_PMXVI16GER2S	"pmdmxvi16ger2s")
+				 (UNSPEC_MMA_PMXVF16GER2	"pmdmxvf16ger2")
+				 (UNSPEC_MMA_PMXVBF16GER2	"pmdmxvbf16ger2")])
+
 (define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
 				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmxvi16ger2spp")
 				 (UNSPEC_MMA_PMXVF16GER2PP	"pmxvf16ger2pp")
@@ -245,25 +254,54 @@  (define_int_attr avvi4i4i2	[(UNSPEC_MMA_PMXVI16GER2PP	"pmxvi16ger2pp")
 				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmxvbf16ger2np")
 				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmxvbf16ger2nn")])
 
+(define_int_attr avvi4i4i2_dm	[(UNSPEC_MMA_PMXVI16GER2PP	"pmdmxvi16ger2pp")
+				 (UNSPEC_MMA_PMXVI16GER2SPP	"pmdmxvi16ger2spp")
+				 (UNSPEC_MMA_PMXVF16GER2PP	"pmdmxvf16ger2pp")
+				 (UNSPEC_MMA_PMXVF16GER2PN	"pmdmxvf16ger2pn")
+				 (UNSPEC_MMA_PMXVF16GER2NP	"pmdmxvf16ger2np")
+				 (UNSPEC_MMA_PMXVF16GER2NN	"pmdmxvf16ger2nn")
+				 (UNSPEC_MMA_PMXVBF16GER2PP	"pmdmxvbf16ger2pp")
+				 (UNSPEC_MMA_PMXVBF16GER2PN	"pmdmxvbf16ger2pn")
+				 (UNSPEC_MMA_PMXVBF16GER2NP	"pmdmxvbf16ger2np")
+				 (UNSPEC_MMA_PMXVBF16GER2NN	"pmdmxvbf16ger2nn")])
+
 (define_int_attr vvi4i4		[(UNSPEC_MMA_PMXVF32GER		"pmxvf32ger")])
 
+(define_int_attr vvi4i4_dm	[(UNSPEC_MMA_PMXVF32GER		"pmdmxvf32ger")])
+
 (define_int_attr avvi4i4	[(UNSPEC_MMA_PMXVF32GERPP	"pmxvf32gerpp")
 				 (UNSPEC_MMA_PMXVF32GERPN	"pmxvf32gerpn")
 				 (UNSPEC_MMA_PMXVF32GERNP	"pmxvf32gernp")
 				 (UNSPEC_MMA_PMXVF32GERNN	"pmxvf32gernn")])
 
+(define_int_attr avvi4i4_dm	[(UNSPEC_MMA_PMXVF32GERPP	"pmdmxvf32gerpp")
+				 (UNSPEC_MMA_PMXVF32GERPN	"pmdmxvf32gerpn")
+				 (UNSPEC_MMA_PMXVF32GERNP	"pmdmxvf32gernp")
+				 (UNSPEC_MMA_PMXVF32GERNN	"pmdmxvf32gernn")])
+
 (define_int_attr pvi4i2		[(UNSPEC_MMA_PMXVF64GER		"pmxvf64ger")])
 
+(define_int_attr pvi4i2_dm	[(UNSPEC_MMA_PMXVF64GER		"pmdmxvf64ger")])
+
 (define_int_attr apvi4i2	[(UNSPEC_MMA_PMXVF64GERPP	"pmxvf64gerpp")
 				 (UNSPEC_MMA_PMXVF64GERPN	"pmxvf64gerpn")
 				 (UNSPEC_MMA_PMXVF64GERNP	"pmxvf64gernp")
 				 (UNSPEC_MMA_PMXVF64GERNN	"pmxvf64gernn")])
 
+(define_int_attr apvi4i2_dm	[(UNSPEC_MMA_PMXVF64GERPP	"pmdmxvf64gerpp")
+				 (UNSPEC_MMA_PMXVF64GERPN	"pmdmxvf64gerpn")
+				 (UNSPEC_MMA_PMXVF64GERNP	"pmdmxvf64gernp")
+				 (UNSPEC_MMA_PMXVF64GERNN	"pmdmxvf64gernn")])
+
 (define_int_attr vvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4		"pmxvi8ger4")])
 
+(define_int_attr vvi4i4i4_dm	[(UNSPEC_MMA_PMXVI8GER4		"pmdmxvi8ger4")])
+
 (define_int_attr avvi4i4i4	[(UNSPEC_MMA_PMXVI8GER4PP	"pmxvi8ger4pp")
 				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmxvi8ger4spp")])
 
+(define_int_attr avvi4i4i4_dm	[(UNSPEC_MMA_PMXVI8GER4PP	"pmdmxvi8ger4pp")
+				 (UNSPEC_MMA_PMXVI8GER4SPP	"pmdmxvi8ger4spp")])
 
 ;; Vector pair support.  OOmode can only live in VSRs.
 (define_expand "movoo"
@@ -629,7 +667,10 @@  (define_insn "mma_<vv>"
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_VV))]
   "TARGET_MMA"
-  "<vv> %A0,%x1,%x2"
+  "@
+   dm<vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2
+   <vv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -650,7 +691,10 @@  (define_insn "mma_<pv>"
 		    (match_operand:V16QI 2 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_PV))]
   "TARGET_MMA"
-  "<pv> %A0,%x1,%x2"
+  "@
+   dm<pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2
+   <pv> %A0,%x1,%x2"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -661,7 +705,10 @@  (define_insn "mma_<apv>"
 		    (match_operand:V16QI 3 "vsx_register_operand" "wa,v,?wa")]
 		    MMA_APV))]
   "TARGET_MMA"
-  "<apv> %A0,%x2,%x3"
+  "@
+   dm<apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3
+   <apv> %A0,%x2,%x3"
   [(set_attr "type" "mma")
    (set_attr "isa" "dm,not_dm,not_dm")])
 
@@ -674,7 +721,10 @@  (define_insn "mma_<vvi4i4i8>"
 		    (match_operand:SI 5 "u8bit_cint_operand" "n,n,n")]
 		    MMA_VVI4I4I8))]
   "TARGET_MMA"
-  "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   dm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -703,7 +753,10 @@  (define_insn "mma_<vvi4i4i2>"
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_VVI4I4I2))]
   "TARGET_MMA"
-  "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   <vvi4i4i2_dm> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -718,7 +771,10 @@  (define_insn "mma_<avvi4i4i2>"
 		    (match_operand:SI 6 "const_0_to_3_operand" "n,n,n")]
 		    MMA_AVVI4I4I2))]
   "TARGET_MMA"
-  "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   <avvi4i4i2_dm> %A0,%x2,%x3,%4,%5,%6
+   <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6
+   <avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -731,7 +787,10 @@  (define_insn "mma_<vvi4i4>"
 		    (match_operand:SI 4 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4))]
   "TARGET_MMA"
-  "<vvi4i4> %A0,%x1,%x2,%3,%4"
+  "@
+   <vvi4i4_dm> %A0,%x1,%x2,%3,%4
+   <vvi4i4> %A0,%x1,%x2,%3,%4
+   <vvi4i4> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -745,7 +804,10 @@  (define_insn "mma_<avvi4i4>"
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4))]
   "TARGET_MMA"
-  "<avvi4i4> %A0,%x2,%x3,%4,%5"
+  "@
+   <avvi4i4_dm> %A0,%x2,%x3,%4,%5
+   <avvi4i4> %A0,%x2,%x3,%4,%5
+   <avvi4i4> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -758,7 +820,10 @@  (define_insn "mma_<pvi4i2>"
 		    (match_operand:SI 4 "const_0_to_3_operand" "n,n,n")]
 		    MMA_PVI4I2))]
   "TARGET_MMA"
-  "<pvi4i2> %A0,%x1,%x2,%3,%4"
+  "@
+   <pvi4i2_dm> %A0,%x1,%x2,%3,%4
+   <pvi4i2> %A0,%x1,%x2,%3,%4
+   <pvi4i2> %A0,%x1,%x2,%3,%4"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -772,7 +837,10 @@  (define_insn "mma_<apvi4i2>"
 		    (match_operand:SI 5 "const_0_to_3_operand" "n,n,n")]
 		    MMA_APVI4I2))]
   "TARGET_MMA"
-  "<apvi4i2> %A0,%x2,%x3,%4,%5"
+  "@
+   <apvi4i2_dm> %A0,%x2,%x3,%4,%5
+   <apvi4i2> %A0,%x2,%x3,%4,%5
+   <apvi4i2> %A0,%x2,%x3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -786,7 +854,10 @@  (define_insn "mma_<vvi4i4i4>"
 		    (match_operand:SI 5 "const_0_to_15_operand" "n,n,n")]
 		    MMA_VVI4I4I4))]
   "TARGET_MMA"
-  "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
+  "@
+   <vvi4i4i4_dm> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5
+   <vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
@@ -801,7 +872,10 @@  (define_insn "mma_<avvi4i4i4>"
 		    (match_operand:SI 6 "const_0_to_15_operand" "n,n,n")]
 		    MMA_AVVI4I4I4))]
   "TARGET_MMA"
-  "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
+  "@
+   <avvi4i4i4_dm> %A0,%x2,%x3,%4,%5,%6
+   <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6
+   <avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
   [(set_attr "type" "mma")
    (set_attr "prefixed" "yes")
    (set_attr "isa" "dm,not_dm,not_dm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/dm-double-test.c b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
new file mode 100644
index 00000000000..66c19779585
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dm-double-test.c
@@ -0,0 +1,194 @@ 
+/* Test derived from mma-double-1.c, modified for dense math.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_dense_math_ok } */
+/* { dg-options "-mdejagnu-cpu=future -O2" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <altivec.h>
+
+typedef unsigned char vec_t __attribute__ ((vector_size (16)));
+typedef double v4sf_t __attribute__ ((vector_size (16)));
+#define SAVE_ACC(ACC, ldc, J)  \
+	  __builtin_mma_disassemble_acc (result, ACC); \
+	  rowC = (v4sf_t *) &CO[0*ldc+J]; \
+          rowC[0] += result[0]; \
+          rowC = (v4sf_t *) &CO[1*ldc+J]; \
+          rowC[0] += result[1]; \
+          rowC = (v4sf_t *) &CO[2*ldc+J]; \
+          rowC[0] += result[2]; \
+          rowC = (v4sf_t *) &CO[3*ldc+J]; \
+	  rowC[0] += result[3];
+
+void
+DM (int m, int n, int k, double *A, double *B, double *C)
+{
+  __vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
+  v4sf_t result[4];
+  v4sf_t *rowC;
+  for (int l = 0; l < n; l += 4)
+    {
+      double *CO;
+      double *AO;
+      AO = A;
+      CO = C;
+      C += m * 4;
+      for (int j = 0; j < m; j += 16)
+	{
+	  double *BO = B;
+	  __builtin_mma_xxsetaccz (&acc0);
+	  __builtin_mma_xxsetaccz (&acc1);
+	  __builtin_mma_xxsetaccz (&acc2);
+	  __builtin_mma_xxsetaccz (&acc3);
+	  __builtin_mma_xxsetaccz (&acc4);
+	  __builtin_mma_xxsetaccz (&acc5);
+	  __builtin_mma_xxsetaccz (&acc6);
+	  __builtin_mma_xxsetaccz (&acc7);
+	  unsigned long i;
+
+	  for (i = 0; i < k; i++)
+	    {
+	      vec_t *rowA = (vec_t *) & AO[i * 16];
+	      __vector_pair rowB;
+	      vec_t *rb = (vec_t *) & BO[i * 4];
+	      __builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
+	      __builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
+	      __builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
+	      __builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
+	      __builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
+	      __builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
+	      __builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
+	      __builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
+	      __builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
+	    }
+	  SAVE_ACC (&acc0, m, 0);
+	  SAVE_ACC (&acc2, m, 4);
+	  SAVE_ACC (&acc1, m, 2);
+	  SAVE_ACC (&acc3, m, 6);
+	  SAVE_ACC (&acc4, m, 8);
+	  SAVE_ACC (&acc6, m, 12);
+	  SAVE_ACC (&acc5, m, 10);
+	  SAVE_ACC (&acc7, m, 14);
+	  AO += k * 16;
+	  BO += k * 4;
+	  CO += 16;
+	}
+      B += k * 4;
+    }
+}
+
+void
+init (double *matrix, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    {
+      for (int i = 0; i < row; i++)
+	{
+	  matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
+	}
+    }
+}
+
+void
+init0 (double *matrix, double *matrix1, int row, int column)
+{
+  for (int j = 0; j < column; j++)
+    for (int i = 0; i < row; i++)
+      matrix[j * row + i] = matrix1[j * row + i] = 0;
+}
+
+
+void
+print (const char *name, const double *matrix, int row, int column)
+{
+  printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
+  for (int i = 0; i < row; i++)
+    {
+      for (int j = 0; j < column; j++)
+	{
+	  printf ("%f ", matrix[j * row + i]);
+	}
+      printf ("\n");
+    }
+  printf ("\n");
+}
+
+int
+main (int argc, char *argv[])
+{
+  int rowsA, colsB, common;
+  int i, j, k;
+  int ret = 0;
+
+  for (int t = 16; t <= 128; t += 16)
+    {
+      for (int t1 = 4; t1 <= 16; t1 += 4)
+	{
+	  rowsA = t;
+	  colsB = t1;
+	  common = 1;
+	  /* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
+	  double A[rowsA * common];
+	  double B[common * colsB];
+	  double C[rowsA * colsB];
+	  double D[rowsA * colsB];
+
+
+	  init (A, rowsA, common);
+	  init (B, common, colsB);
+	  init0 (C, D, rowsA, colsB);
+	  DM (rowsA, colsB, common, A, B, C);
+
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  D[i * rowsA + j] = 0;
+		  for (k = 0; k < common; k++)
+		    {
+		      D[i * rowsA + j] +=
+			A[k * rowsA + j] * B[k + common * i];
+		    }
+		}
+	    }
+	  for (i = 0; i < colsB; i++)
+	    {
+	      for (j = 0; j < rowsA; j++)
+		{
+		  for (k = 0; k < common; k++)
+		    {
+		      if (D[i * rowsA + j] != C[i * rowsA + j])
+			{
+			  printf ("Error %d,%d,%d\n",i,j,k);
+			  ret++;
+			}
+		    }
+		}
+	    }
+	  if (ret)
+	    {
+	      print ("A", A, rowsA, common);
+	      print ("B", B, common, colsB);
+	      print ("C", C, rowsA, colsB);
+	      print ("D", D, rowsA, colsB);
+	    }
+	}
+    }
+  
+#ifdef VERBOSE
+  if (ret)
+    printf ("DM double test fail: %d errors\n",ret);
+  else
+    printf ("DM double test success: 0 DM errors\n");
+#else
+  if (ret)
+    abort();
+#endif
+      
+  return ret;
+}
+
+/* { dg-final { scan-assembler {\mdmsetdmrz\M}      } } */
+/* { dg-final { scan-assembler {\mdmxvf64gerpp\M}   } } */
+/* { dg-final { scan-assembler {\mdmxxextfdmr512\M} } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 1b4a3fb18df..2dec3682a2f 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7101,6 +7101,25 @@  proc check_effective_target_power10_ok { } {
     }
 }
 
+# Return 1 if this is a PowerPC target supporting -mcpu=future or -mdense-math
+# which enables the dense math operations.
+proc check_effective_target_powerpc_dense_math_ok { } {
+	return [check_no_compiler_messages_nocache powerpc_dense_math_ok assembly {
+		__vector_quad vq;
+		void test (void)
+		{
+		#ifndef __PPC_DMR__
+		#error "target does not have dense math support."
+		#else
+		/* Make sure we have dense math support.  */
+		  __vector_quad dmr;
+		  __asm__ ("dmsetaccz %A0" : "=wD" (dmr));
+		  vq = dmr;
+		#endif
+		}
+	} "-mcpu=future"]
+}
+
 # Return 1 if this is a PowerPC target supporting -mfloat128 via either
 # software emulation on power7/power8 systems or hardware support on power9.