[COMMITTED] i386: Account for the memory read in V*QImode multiplication sequences

Message ID CAFULd4ZKo5_-H4OBXGB=7353=tg0Mr8rYaWFbJ5HAKSFZdP30g@mail.gmail.com
State Unresolved
Headers
Series [COMMITTED] i386: Account for the memory read in V*QImode multiplication sequences |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Uros Bizjak May 22, 2023, 2:36 p.m. UTC
  Add the cost of a memory read to the cost of V*QImode vector mult sequences.

gcc/ChangeLog:

    * config/i386/i386.cc (ix86_multiplication_cost): Add
    the cost of a memory read to the cost of V?QImode sequences.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Uros.
  

Patch

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 6a4b3326219..a36e625342d 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20463,27 +20463,42 @@  ix86_multiplication_cost (const struct processor_costs *cost,
       {
       case V4QImode:
       case V8QImode:
-	/* Partial V*QImode is emulated with 4-5 insns.  */
-	if ((TARGET_AVX512BW && TARGET_AVX512VL) || TARGET_XOP)
+	/* Partial V*QImode is emulated with 4-6 insns.  */
+	if (TARGET_AVX512BW && TARGET_AVX512VL)
 	  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+	else if (TARGET_AVX2)
+	  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 5);
+	else if (TARGET_XOP)
+	  return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3)
+		  + cost->sse_load[2]);
 	else
-	  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4);
+	  return (ix86_vec_cost (mode, cost->mulss + cost->sse_op * 4)
+		  + cost->sse_load[2]);
 
       case V16QImode:
 	/* V*QImode is emulated with 4-11 insns.  */
 	if (TARGET_AVX512BW && TARGET_AVX512VL)
 	  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+	else if (TARGET_AVX2)
+	  return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 8);
 	else if (TARGET_XOP)
-	  return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
-	/* FALLTHRU */
+	  return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5)
+		  + cost->sse_load[2]);
+	else
+	  return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+		  + cost->sse_load[2]);
+
       case V32QImode:
-	if (TARGET_AVX512BW && mode == V32QImode)
+	if (TARGET_AVX512BW)
 	  return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
 	else
-	  return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7);
+	  return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7)
+		  + cost->sse_load[3] * 2);
 
       case V64QImode:
-	return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9);
+	return (ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9)
+		+ cost->sse_load[3] * 2
+		+ cost->sse_load[4] * 2);
 
       case V4SImode:
 	/* pmulld is used in this case. No emulation is needed.  */