[V2,rs6000] Disable generation of scalar modulo instructions

Message ID 36819cc8-c948-1426-2429-7fe4f6b67c94@linux.ibm.com
State Corrupt patch
Headers
Series [V2,rs6000] Disable generation of scalar modulo instructions |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Pat Haugen April 18, 2023, 12:22 p.m. UTC
  Updated from prior patch to also disable for int128.


Disable generation of scalar modulo instructions.

It was recently discovered that the scalar modulo instructions can suffer
noticeable performance issues for certain input values. This patch disables
their generation since the equivalent div/mul/sub sequence does not suffer
the same problem.

Bootstrapped and regression tested on powerpc64/powerpc64le.
Ok for master and backports after burn in?

-Pat


2023-04-18  Pat Haugen  <pthaugen@linux.ibm.com>

gcc/
	* config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
	* config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
	(define_expand umod<mode>3): New.
	(define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
	(umodti3, modti3): Disable.

gcc/testsuite/
	* gcc.target/powerpc/clone1.c: Add xfails.
	* gcc.target/powerpc/clone3.c: Likewise.
	* gcc.target/powerpc/mod-1.c: Likewise.
	* gcc.target/powerpc/mod-2.c: Likewise.
	* gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
  

Comments

Pat Haugen May 4, 2023, 8:25 p.m. UTC | #1
Ping.

On 4/18/23 7:22 AM, Pat Haugen via Gcc-patches wrote:
> Updated from prior patch to also disable for int128.
> 
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-04-18  Pat Haugen  <pthaugen@linux.ibm.com>
> 
> gcc/
>      * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>      * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>      (define_expand umod<mode>3): New.
>      (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>      (umodti3, modti3): Disable.
> 
> gcc/testsuite/
>      * gcc.target/powerpc/clone1.c: Add xfails.
>      * gcc.target/powerpc/clone3.c: Likewise.
>      * gcc.target/powerpc/mod-1.c: Likewise.
>      * gcc.target/powerpc/mod-2.c: Likewise.
>      * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..1cf0a0013c0 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>          rs6000_asm_output_opcode (STREAM);                \
>       }                                    \
>     while (0)
> +
> +/* Disable generation of scalar modulo instructions due to performance 
> issues
> +   with certain input values. This can be removed in the future when the
> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 44f7dd509cb..4f397bc9179 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
>       FAIL;
> 
>         operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
>       }
>     else
>       {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>            (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "mods<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> 
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod<mode>3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +          (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +    FAIL;
> 
> -(define_insn "umod<mode>3"
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*umod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>             (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "modu<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> @@ -3507,7 +3543,7 @@ (define_insn "umodti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>            (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmoduq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> @@ -3516,7 +3552,7 @@ (define_insn "modti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>           (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmodsq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
> b/gcc/testsuite/gcc.target/powerpc/clone1.c
> index c69fd2aa1b8..74323ca0e8c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
> @@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
> b/gcc/testsuite/gcc.target/powerpc/clone3.c
> index 911b88b781d..d3eb4dd2378 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
> @@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
>   /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> index 861ba670af4..74af98f5bc3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> @@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
>   unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "modsd " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-times "modud " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "mulld "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divd "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> -/* { dg-final { scan-assembler-not   "divdu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modsd " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modud " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mulld "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divd "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divdu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> index 441ec5878f1..896e2e35260 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> @@ -5,8 +5,9 @@
>   int ismod (int a, int b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
> b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> index 84685e5ff43..148998c8c9d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> @@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)
> 
>   /* { dg-final { scan-assembler {\mvdivsq\M} } } */
>   /* { dg-final { scan-assembler {\mvdivuq\M} } } */
> -/* { dg-final { scan-assembler {\mvmodsq\M} } } */
> -/* { dg-final { scan-assembler {\mvmoduq\M} } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */
  
Pat Haugen May 18, 2023, 5:57 p.m. UTC | #2
Ping.

On 4/18/23 7:22 AM, Pat Haugen via Gcc-patches wrote:
> Updated from prior patch to also disable for int128.
> 
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-04-18  Pat Haugen  <pthaugen@linux.ibm.com>
> 
> gcc/
>      * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>      * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>      (define_expand umod<mode>3): New.
>      (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>      (umodti3, modti3): Disable.
> 
> gcc/testsuite/
>      * gcc.target/powerpc/clone1.c: Add xfails.
>      * gcc.target/powerpc/clone3.c: Likewise.
>      * gcc.target/powerpc/mod-1.c: Likewise.
>      * gcc.target/powerpc/mod-2.c: Likewise.
>      * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..1cf0a0013c0 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>          rs6000_asm_output_opcode (STREAM);                \
>       }                                    \
>     while (0)
> +
> +/* Disable generation of scalar modulo instructions due to performance 
> issues
> +   with certain input values. This can be removed in the future when the
> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 44f7dd509cb..4f397bc9179 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
>       FAIL;
> 
>         operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
>       }
>     else
>       {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>            (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "mods<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> 
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod<mode>3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +          (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +    FAIL;
> 
> -(define_insn "umod<mode>3"
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*umod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>             (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "modu<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> @@ -3507,7 +3543,7 @@ (define_insn "umodti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>            (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmoduq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> @@ -3516,7 +3552,7 @@ (define_insn "modti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>           (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmodsq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
> b/gcc/testsuite/gcc.target/powerpc/clone1.c
> index c69fd2aa1b8..74323ca0e8c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
> @@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
> b/gcc/testsuite/gcc.target/powerpc/clone3.c
> index 911b88b781d..d3eb4dd2378 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
> @@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
>   /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> index 861ba670af4..74af98f5bc3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> @@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
>   unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "modsd " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-times "modud " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "mulld "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divd "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> -/* { dg-final { scan-assembler-not   "divdu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modsd " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modud " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mulld "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divd "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divdu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> index 441ec5878f1..896e2e35260 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> @@ -5,8 +5,9 @@
>   int ismod (int a, int b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
> b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> index 84685e5ff43..148998c8c9d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> @@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)
> 
>   /* { dg-final { scan-assembler {\mvdivsq\M} } } */
>   /* { dg-final { scan-assembler {\mvdivuq\M} } } */
> -/* { dg-final { scan-assembler {\mvmodsq\M} } } */
> -/* { dg-final { scan-assembler {\mvmoduq\M} } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */
  
Pat Haugen June 2, 2023, 3:13 p.m. UTC | #3
Ping ^3

On 4/18/23 7:22 AM, Pat Haugen via Gcc-patches wrote:
> Updated from prior patch to also disable for int128.
> 
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-04-18  Pat Haugen  <pthaugen@linux.ibm.com>
> 
> gcc/
>      * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>      * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>      (define_expand umod<mode>3): New.
>      (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>      (umodti3, modti3): Disable.
> 
> gcc/testsuite/
>      * gcc.target/powerpc/clone1.c: Add xfails.
>      * gcc.target/powerpc/clone3.c: Likewise.
>      * gcc.target/powerpc/mod-1.c: Likewise.
>      * gcc.target/powerpc/mod-2.c: Likewise.
>      * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..1cf0a0013c0 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>          rs6000_asm_output_opcode (STREAM);                \
>       }                                    \
>     while (0)
> +
> +/* Disable generation of scalar modulo instructions due to performance 
> issues
> +   with certain input values. This can be removed in the future when the
> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 44f7dd509cb..4f397bc9179 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
>       FAIL;
> 
>         operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
>       }
>     else
>       {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>            (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "mods<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> 
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod<mode>3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +          (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +    FAIL;
> 
> -(define_insn "umod<mode>3"
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*umod<mode>3"
>     [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>           (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>             (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>     "modu<wd> %0,%1,%2"
>     [(set_attr "type" "div")
>      (set_attr "size" "<bits>")])
> @@ -3507,7 +3543,7 @@ (define_insn "umodti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>            (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmoduq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> @@ -3516,7 +3552,7 @@ (define_insn "modti3"
>     [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>       (mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>           (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>     "vmodsq %0,%1,%2"
>     [(set_attr "type" "vecdiv")
>      (set_attr "size" "128")])
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
> b/gcc/testsuite/gcc.target/powerpc/clone1.c
> index c69fd2aa1b8..74323ca0e8c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
> @@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
> b/gcc/testsuite/gcc.target/powerpc/clone3.c
> index 911b88b781d..d3eb4dd2378 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
> @@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
>     return mod_func (a, b) | c;
>   }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
>   /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> index 861ba670af4..74af98f5bc3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> @@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
>   unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "modsd " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-times "modud " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "mulld "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divd "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> -/* { dg-final { scan-assembler-not   "divdu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modsd " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modud " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mulld "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divd "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divdu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c 
> b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> index 441ec5878f1..896e2e35260 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> @@ -5,8 +5,9 @@
>   int ismod (int a, int b) { return a%b; }
>   unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
> b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> index 84685e5ff43..148998c8c9d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> @@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)
> 
>   /* { dg-final { scan-assembler {\mvdivsq\M} } } */
>   /* { dg-final { scan-assembler {\mvdivuq\M} } } */
> -/* { dg-final { scan-assembler {\mvmodsq\M} } } */
> -/* { dg-final { scan-assembler {\mvmoduq\M} } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */
  
Kewen.Lin June 5, 2023, 6:10 a.m. UTC | #4
Hi Pat,

Thanks for fixing this and sorry for the late review!

on 2023/4/18 20:22, Pat Haugen wrote:
> Updated from prior patch to also disable for int128.
> 
> 
> Disable generation of scalar modulo instructions.
> 
> It was recently discovered that the scalar modulo instructions can suffer
> noticeable performance issues for certain input values. This patch disables
> their generation since the equivalent div/mul/sub sequence does not suffer
> the same problem.
> 
> Bootstrapped and regression tested on powerpc64/powerpc64le.
> Ok for master and backports after burn in?
> 
> -Pat
> 
> 
> 2023-04-18  Pat Haugen  <pthaugen@linux.ibm.com>
> 
> gcc/
>     * config/rs6000/rs6000.h (RS6000_DISABLE_SCALAR_MODULO): New.
>     * config/rs6000/rs6000.md (mod<mode>3, *mod<mode>3): Disable.
>     (define_expand umod<mode>3): New.
>     (define_insn umod<mode>3): Rename to *umod<mode>3 and disable.
>     (umodti3, modti3): Disable.

I noticed that there is one place in rs6000_rtx_costs only checking
TARGET_MODULO for if counting extra cost for umod/mod, I guess we
should update it as well (for scalar int modes)?

      /* Add in shift and subtract for MOD unless we have a mod instruction. */
      if (!TARGET_MODULO && (code == MOD || code == UMOD))
	*total += COSTS_N_INSNS (2);

> 
> gcc/testsuite/
>     * gcc.target/powerpc/clone1.c: Add xfails.
>     * gcc.target/powerpc/clone3.c: Likewise.
>     * gcc.target/powerpc/mod-1.c: Likewise.
>     * gcc.target/powerpc/mod-2.c: Likewise.
>     * gcc.target/powerpc/p10-vdivq-vmodq.c: Likewise.
> 
> 
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 3503614efbd..1cf0a0013c0 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -2492,3 +2492,9 @@ while (0)
>         rs6000_asm_output_opcode (STREAM);                \
>      }                                    \
>    while (0)

The diff file seemed to expand tab with some spaces unexpectedly, such as the
above lines.

> +
> +/* Disable generation of scalar modulo instructions due to performance issues
> +   with certain input values. This can be removed in the future when the

nit:                           ~~ two spaces instead of one.

> +   issues have been resolved.  */
> +#define RS6000_DISABLE_SCALAR_MODULO 1
> +
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 44f7dd509cb..4f397bc9179 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3421,6 +3421,17 @@ (define_expand "mod<mode>3"
>      FAIL;
> 
>        operands[2] = force_reg (<MODE>mode, operands[2]);
> +
> +      if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }

nit: wrong indent.

>      }
>    else
>      {
> @@ -3440,17 +3451,42 @@ (define_insn "*mod<mode>3"
>    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>          (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>           (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>    "mods<wd> %0,%1,%2"
>    [(set_attr "type" "div")
>     (set_attr "size" "<bits>")])
> 
> +;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
> +;; removed.
> +(define_expand "umod<mode>3"
> +  [(set (match_operand:GPR 0 "gpc_reg_operand")
> +    (umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
> +          (match_operand:GPR 2 "gpc_reg_operand")))]
> +  ""
> +{
> +  rtx temp1;
> +  rtx temp2;
> +
> +  if (!TARGET_MODULO)
> +    FAIL;
> 
> -(define_insn "umod<mode>3"
> +  if (RS6000_DISABLE_SCALAR_MODULO)
> +    {
> +      temp1 = gen_reg_rtx (<MODE>mode);
> +      temp2 = gen_reg_rtx (<MODE>mode);
> +
> +      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
> +      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
> +      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
> +      DONE;
> +    }
> +})
> +
> +(define_insn "*umod<mode>3"
>    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
>          (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
>            (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
> -  "TARGET_MODULO"
> +  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
>    "modu<wd> %0,%1,%2"
>    [(set_attr "type" "div")
>     (set_attr "size" "<bits>")])
> @@ -3507,7 +3543,7 @@ (define_insn "umodti3"
>    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>      (umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>           (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>    "vmoduq %0,%1,%2"
>    [(set_attr "type" "vecdiv")
>     (set_attr "size" "128")])
> @@ -3516,7 +3552,7 @@ (define_insn "modti3"
>    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
>      (mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
>          (match_operand:TI 2 "altivec_register_operand" "v")))]
> -  "TARGET_POWER10 && TARGET_POWERPC64"
> +  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
>    "vmodsq %0,%1,%2"
>    [(set_attr "type" "vecdiv")
>     (set_attr "size" "128")])
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c b/gcc/testsuite/gcc.target/powerpc/clone1.c
> index c69fd2aa1b8..74323ca0e8c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
> @@ -21,6 +21,7 @@ long mod_func_or (long a, long b, long c)
>    return mod_func (a, b) | c;
>  }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c b/gcc/testsuite/gcc.target/powerpc/clone3.c
> index 911b88b781d..d3eb4dd2378 100644
> --- a/gcc/testsuite/gcc.target/powerpc/clone3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
> @@ -27,7 +27,8 @@ long mod_func_or (long a, long b, long c)
>    return mod_func (a, b) | c;
>  }
> 
> -/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
> -/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
> -/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
>  /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> index 861ba670af4..74af98f5bc3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
> @@ -7,13 +7,14 @@ long lsmod (long a, long b) { return a%b; }
>  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
>  unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "modsd " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-times "modud " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "mulld "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divd "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> -/* { dg-final { scan-assembler-not   "divdu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modsd " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "modud " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mulld "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divd "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divdu "   { xfail *-*-* } } } */

The existing patterns haven't made use of \m and \M already, but since you
touched these lines, if you don't mind, could you can also update them
with \m and \M?

Thanks in advance!

> diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> index 441ec5878f1..896e2e35260 100644
> --- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
> @@ -5,8 +5,9 @@
>  int ismod (int a, int b) { return a%b; }
>  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
> 
> -/* { dg-final { scan-assembler-times "modsw " 1 } } */
> -/* { dg-final { scan-assembler-times "moduw " 1 } } */
> -/* { dg-final { scan-assembler-not   "mullw "   } } */
> -/* { dg-final { scan-assembler-not   "divw "    } } */
> -/* { dg-final { scan-assembler-not   "divwu "   } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */

Ditto.

BR,
Kewen

> diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> index 84685e5ff43..148998c8c9d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> +++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
> @@ -23,5 +23,6 @@ __int128 s_mod(__int128 a, __int128 b)
> 
>  /* { dg-final { scan-assembler {\mvdivsq\M} } } */
>  /* { dg-final { scan-assembler {\mvdivuq\M} } } */
> -/* { dg-final { scan-assembler {\mvmodsq\M} } } */
> -/* { dg-final { scan-assembler {\mvmoduq\M} } } */
> +/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
> +/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */
  

Patch

diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 3503614efbd..1cf0a0013c0 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2492,3 +2492,9 @@  while (0)
         rs6000_asm_output_opcode (STREAM);				\
      }									\
    while (0)
+
+/* Disable generation of scalar modulo instructions due to performance 
issues
+   with certain input values. This can be removed in the future when the
+   issues have been resolved.  */
+#define RS6000_DISABLE_SCALAR_MODULO 1
+
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 44f7dd509cb..4f397bc9179 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3421,6 +3421,17 @@  (define_expand "mod<mode>3"
  	FAIL;

        operands[2] = force_reg (<MODE>mode, operands[2]);
+
+      if (RS6000_DISABLE_SCALAR_MODULO)
+	{
+	  temp1 = gen_reg_rtx (<MODE>mode);
+	  temp2 = gen_reg_rtx (<MODE>mode);
+
+	  emit_insn (gen_div<mode>3 (temp1, operands[1], operands[2]));
+	  emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+	  emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+	  DONE;
+	}
      }
    else
      {
@@ -3440,17 +3451,42 @@  (define_insn "*mod<mode>3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
          (mod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
  		 (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "mods<wd> %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "<bits>")])

+;; This define_expand can be removed when RS6000_DISABLE_SCALAR_MODULO is
+;; removed.
+(define_expand "umod<mode>3"
+  [(set (match_operand:GPR 0 "gpc_reg_operand")
+	(umod:GPR (match_operand:GPR 1 "gpc_reg_operand")
+		  (match_operand:GPR 2 "gpc_reg_operand")))]
+  ""
+{
+  rtx temp1;
+  rtx temp2;
+
+  if (!TARGET_MODULO)
+	FAIL;

-(define_insn "umod<mode>3"
+  if (RS6000_DISABLE_SCALAR_MODULO)
+    {
+      temp1 = gen_reg_rtx (<MODE>mode);
+      temp2 = gen_reg_rtx (<MODE>mode);
+
+      emit_insn (gen_udiv<mode>3 (temp1, operands[1], operands[2]));
+      emit_insn (gen_mul<mode>3 (temp2, temp1, operands[2]));
+      emit_insn (gen_sub<mode>3 (operands[0], operands[1], temp2));
+      DONE;
+    }
+})
+
+(define_insn "*umod<mode>3"
    [(set (match_operand:GPR 0 "gpc_reg_operand" "=&r,r")
          (umod:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r")
  		  (match_operand:GPR 2 "gpc_reg_operand" "r,r")))]
-  "TARGET_MODULO"
+  "TARGET_MODULO && !RS6000_DISABLE_SCALAR_MODULO"
    "modu<wd> %0,%1,%2"
    [(set_attr "type" "div")
     (set_attr "size" "<bits>")])
@@ -3507,7 +3543,7 @@  (define_insn "umodti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  	(umod:TI (match_operand:TI 1 "altivec_register_operand" "v")
  		 (match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmoduq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
@@ -3516,7 +3552,7 @@  (define_insn "modti3"
    [(set (match_operand:TI 0 "altivec_register_operand" "=v")
  	(mod:TI (match_operand:TI 1 "altivec_register_operand" "v")
  		(match_operand:TI 2 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_POWERPC64"
+  "TARGET_POWER10 && TARGET_POWERPC64 && !RS6000_DISABLE_SCALAR_MODULO"
    "vmodsq %0,%1,%2"
    [(set_attr "type" "vecdiv")
     (set_attr "size" "128")])
diff --git a/gcc/testsuite/gcc.target/powerpc/clone1.c 
b/gcc/testsuite/gcc.target/powerpc/clone1.c
index c69fd2aa1b8..74323ca0e8c 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone1.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone1.c
@@ -21,6 +21,7 @@  long mod_func_or (long a, long b, long c)
    return mod_func (a, b) | c;
  }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 1 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 1 { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/clone3.c 
b/gcc/testsuite/gcc.target/powerpc/clone3.c
index 911b88b781d..d3eb4dd2378 100644
--- a/gcc/testsuite/gcc.target/powerpc/clone3.c
+++ b/gcc/testsuite/gcc.target/powerpc/clone3.c
@@ -27,7 +27,8 @@  long mod_func_or (long a, long b, long c)
    return mod_func (a, b) | c;
  }

-/* { dg-final { scan-assembler-times {\mdivd\M}  1 } } */
-/* { dg-final { scan-assembler-times {\mmulld\M} 1 } } */
-/* { dg-final { scan-assembler-times {\mmodsd\M} 2 } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times {\mdivd\M}  1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {\mmodsd\M} 2 { xfail *-*-* } } } */
  /* { dg-final { scan-assembler-times {\mpld\M}   1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-1.c 
b/gcc/testsuite/gcc.target/powerpc/mod-1.c
index 861ba670af4..74af98f5bc3 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-1.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-1.c
@@ -7,13 +7,14 @@  long lsmod (long a, long b) { return a%b; }
  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }
  unsigned long lumod (unsigned long a, unsigned long b) { return a%b; }

-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "modsd " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-times "modud " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "mulld "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divd "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
-/* { dg-final { scan-assembler-not   "divdu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "modsd " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "modud " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "mulld "   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divd "    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divdu "   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/mod-2.c 
b/gcc/testsuite/gcc.target/powerpc/mod-2.c
index 441ec5878f1..896e2e35260 100644
--- a/gcc/testsuite/gcc.target/powerpc/mod-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/mod-2.c
@@ -5,8 +5,9 @@ 
  int ismod (int a, int b) { return a%b; }
  unsigned int iumod (unsigned int a, unsigned int b) { return a%b; }

-/* { dg-final { scan-assembler-times "modsw " 1 } } */
-/* { dg-final { scan-assembler-times "moduw " 1 } } */
-/* { dg-final { scan-assembler-not   "mullw "   } } */
-/* { dg-final { scan-assembler-not   "divw "    } } */
-/* { dg-final { scan-assembler-not   "divwu "   } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler-times "modsw " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "moduw " 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "mullw "   { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divw "    { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-not   "divwu "   { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c 
b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
index 84685e5ff43..148998c8c9d 100644
--- a/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
+++ b/gcc/testsuite/gcc.target/powerpc/p10-vdivq-vmodq.c
@@ -23,5 +23,6 @@  __int128 s_mod(__int128 a, __int128 b)

  /* { dg-final { scan-assembler {\mvdivsq\M} } } */
  /* { dg-final { scan-assembler {\mvdivuq\M} } } */
-/* { dg-final { scan-assembler {\mvmodsq\M} } } */
-/* { dg-final { scan-assembler {\mvmoduq\M} } } */
+/* { Fail due to RS6000_DISABLE_SCALAR_MODULO. */
+/* { dg-final { scan-assembler {\mvmodsq\M} { xfail *-*-* } } } */
+/* { dg-final { scan-assembler {\mvmoduq\M} { xfail *-*-* } } } */