[2/2,v2] arm: move the switch tables for Arm to the RO data section

Message ID db79cb37-db68-671d-57f0-49e70ccb11e9@arm.com
State Unresolved
Headers
Series None |

Checks

Context Check Description
snail/gcc-patch-check warning Git am fail log

Commit Message

Richard Ball Oct. 27, 2023, 2:55 p.m. UTC
  v2: Formatting and nits fixed.

Follow up patch to arm: Use deltas for Arm switch tables
This patch moves the switch tables for Arm from the .text section
into the .rodata section.

gcc/ChangeLog:

	* config/arm/aout.h: Change to use the Lrtx label.
	* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
        from (!target_pure_code) condition.
        (ADDR_VEC_ALIGN): Add align for tables in rodata section.
	* config/arm/arm.cc (arm_output_casesi): Alter the function to include
        .Lrtx label and remove adr instructions.
	* config/arm/arm.md
        (arm_casesi_internal): Use force_reg to generate ldr instructions that
        would otherwise be out of range, and change rtl to accommodate force reg.
        Additionally remove unnecessary register temp.
        (casesi): Remove pure code check for Arm.
	* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
        targets from JUMP_TABLES_IN_TEXT_SECTION definition.

gcc/testsuite/ChangeLog:

	* gcc.target/arm/arm-switchstatement.c: Alter the tests to
        change adr instruction to ldr.
  

Comments

Richard Earnshaw Oct. 30, 2023, 10:44 a.m. UTC | #1
On 27/10/2023 15:55, Richard Ball wrote:
> v2: Formatting and nits fixed.
> 
> Follow up patch to arm: Use deltas for Arm switch tables
> This patch moves the switch tables for Arm from the .text section
> into the .rodata section.
> 
> gcc/ChangeLog:
> 
> 	* config/arm/aout.h: Change to use the Lrtx label.
> 	* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Remove arm targets
>          from (!target_pure_code) condition.
>          (ADDR_VEC_ALIGN): Add align for tables in rodata section.
> 	* config/arm/arm.cc (arm_output_casesi): Alter the function to include
>          .Lrtx label and remove adr instructions.
> 	* config/arm/arm.md
>          (arm_casesi_internal): Use force_reg to generate ldr instructions that
>          would otherwise be out of range, and change rtl to accommodate force reg.
>          Additionally remove unnecessary register temp.
>          (casesi): Remove pure code check for Arm.
> 	* config/arm/elf.h (JUMP_TABLES_IN_TEXT_SECTION): Remove arm
>          targets from JUMP_TABLES_IN_TEXT_SECTION definition.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/arm/arm-switchstatement.c: Alter the tests to
>          change adr instruction to ldr.

OK.

Reviewed-by: rearnsha@arm.com

R.
  

Patch

diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index 6a4c8da5f6d5a1695518f42830b9d045888eeed6..49896bb962081a5ee4b5328029813c681c489a9e 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -187,16 +187,16 @@ 
 	  switch (GET_MODE (body))					\
 	    {								\
 	    case E_QImode:						\
-	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_HImode:						\
-	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n",	\
+	      asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LLrtx%d-4)/4\n",	\
 			   VALUE, REL);					\
 	      break;							\
 	    case E_SImode:						\
 	      if (flag_pic)						\
-		asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n",	\
+		asm_fprintf (STREAM, "\t.word\t%LL%d-%LLrtx%d-4\n",	\
 			     VALUE, REL);				\
 	      else							\
 		asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);	\
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3063e3489094f04ecf03a52952c185d4a75da645..a9c2752c0ea5ecd4597ded254e9426753ac0a098 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -2092,10 +2092,11 @@  enum arm_auto_incmodes
    for the index in the tablejump instruction.  */
 #define CASE_VECTOR_MODE Pmode
 
-#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2		\
-				  || (TARGET_THUMB1			\
-				      && (optimize_size || flag_pic)))	\
-				 && (!target_pure_code))
+#define CASE_VECTOR_PC_RELATIVE						\
+   (TARGET_ARM								\
+    || (!target_pure_code						\
+	&& (TARGET_THUMB2						\
+	    || (TARGET_THUMB1 && (optimize_size || flag_pic)))))
 
 
 #define CASE_VECTOR_SHORTEN_MODE(min, max, body)			\
@@ -2301,8 +2302,14 @@  extern int making_const_table;
 	asm_fprintf (STREAM, "\tpop {%r}\n", REGNO);	\
     } while (0)
 
-#define ADDR_VEC_ALIGN(JUMPTABLE)	\
-  ((TARGET_THUMB && GET_MODE (PATTERN (JUMPTABLE)) == SImode) ? 2 : 0)
+/* If the switch table is in the code segment, additional alignment is
+   needed for Thumb SImode tables.  Otherwise, tables in RO data have
+   natural alignment.  */
+#define ADDR_VEC_ALIGN(TABLE)						\
+  (JUMP_TABLES_IN_TEXT_SECTION						\
+   ? ((TARGET_THUMB && GET_MODE (PATTERN (TABLE)) == SImode) ? 2 : 0)	\
+   : (exact_log2 (GET_MODE_ALIGNMENT (GET_MODE (PATTERN (TABLE)))	\
+		  / BITS_PER_UNIT)))
 
 /* Alignment for case labels comes from ADDR_VEC_ALIGN; avoid the
    default alignment from elfos.h.  */
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 4e5e6997ed555372683e01b2aff5c25265f4e50c..620ef7bfb2f3af9b8de576359a6157190c439aad 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -30469,44 +30469,55 @@  arm_output_iwmmxt_tinsr (rtx *operands)
 const char *
 arm_output_casesi (rtx *operands)
 {
+  char label[100];
   rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
-
   gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
-
   output_asm_insn ("cmp\t%0, %1", operands);
   output_asm_insn ("bhi\t%l3", operands);
+  ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
   switch (GET_MODE (diff_vec))
     {
     case E_QImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
       if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrb\t%4, [%4, %0]", operands);
-     else
-	output_asm_insn ("ldrsb\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+	output_asm_insn ("ldrb\t%4, [%5, %0]", operands);
+      else
+	output_asm_insn ("ldrsb\t%4, [%5, %0]", operands);
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+      break;
     case E_HImode:
-      output_asm_insn ("adr\t%4, %l2", operands);
-      output_asm_insn ("add\t%4, %4, %0", operands);
-      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
-	output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
-     else
-	output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
-      return "add\t%|pc, %|pc, %4, lsl #2";
-
+      if (REGNO (operands[4]) != REGNO (operands[5]))
+	{
+	  output_asm_insn ("add\t%4, %0, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%5, %4]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%5, %4]", operands);
+	}
+      else
+	{
+	  output_asm_insn ("add\t%4, %5, %0", operands);
+	  if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	    output_asm_insn ("ldrh\t%4, [%4, %0]", operands);
+	  else
+	    output_asm_insn ("ldrsh\t%4, [%4, %0]", operands);
+	}
+      output_asm_insn ("add\t%|pc, %|pc, %4, lsl #2", operands);
+      break;
     case E_SImode:
       if (flag_pic)
 	{
-	  output_asm_insn ("adr\t%4, %l2", operands);
-	  output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands);
-	  return "add\t%|pc, %|pc, %4";
+	  output_asm_insn ("ldr\t%4, [%5, %0, lsl #2]", operands);
+	  output_asm_insn ("add\t%|pc, %|pc, %4", operands);
 	}
-      output_asm_insn ("adr\t%4, %l2", operands);
-      return "ldr\t%|pc, [%4, %0, lsl #2]";
-
+      else
+	output_asm_insn ("ldr\t%|pc, [%5, %0, lsl #2]", operands);
+      break;
     default:
       gcc_unreachable ();
     }
+    assemble_label (asm_out_file, label);
+    output_asm_insn ("nop", operands);
+  return "";
 }
 
 /* Output a Thumb-1 casesi dispatch sequence.  */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0b2eb4bce92bb7e8b1ca0c5a04b1a52e9c16b64a..07eaf06cdeace750fe1c7d399deb833ef5fc2b66 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9514,7 +9514,9 @@ 
    (match_operand:SI 2 "const_int_operand")	; total range
    (match_operand:SI 3 "" "")			; table label
    (match_operand:SI 4 "" "")]			; Out of range label
-  "(TARGET_32BIT || optimize_size || flag_pic) && !target_pure_code"
+   "(TARGET_ARM
+     || (!target_pure_code
+         && (TARGET_THUMB2 || optimize_size || flag_pic)))"
   "
   {
     enum insn_code code;
@@ -9557,14 +9559,14 @@ 
 		(label_ref:SI (match_operand 3 ""))))
 	      (clobber (reg:CC CC_REGNUM))
 	      (clobber (match_scratch:SI 5))
-	      (clobber (match_scratch:SI 6))
 	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
 {
-  operands[4] = gen_rtx_MULT (SImode, operands[0], GEN_INT (4));
-  operands[4] = gen_rtx_PLUS (SImode, operands[4],
-			      gen_rtx_LABEL_REF (SImode, operands[2]));
-  operands[4] = gen_rtx_MEM (SImode, operands[4]);
+  rtx vec_table_ref = force_reg (SImode, gen_rtx_LABEL_REF (SImode, operands[2]));
+  rtx tmp = gen_rtx_MULT (SImode, operands[0], GEN_INT (4));
+  tmp = gen_rtx_PLUS (SImode, tmp,
+			      vec_table_ref);
+  operands[4] = gen_rtx_MEM (SImode, tmp);
   MEM_READONLY_P (operands[4]) = 1;
   MEM_NOTRAP_P (operands[4]) = 1;
 })
@@ -9575,12 +9577,11 @@ 
 		(leu (match_operand:SI 0 "s_register_operand" "r")
 		     (match_operand:SI 1 "arm_rhs_operand" "rI"))
 		(mem:SI (plus:SI (mult:SI (match_dup 0) (const_int 4))
-				 (label_ref:SI (match_operand 2 "" ""))))
+				 (match_operand:SI 5 "s_register_operand" "r")))
 		(label_ref:SI (match_operand 3 "" ""))))
 	      (clobber (reg:CC CC_REGNUM))
-	      (clobber (match_scratch:SI 4 "=&r"))
-	      (clobber (match_scratch:SI 5 "=r"))
-	      (use (label_ref:SI (match_dup 2)))])]
+	      (clobber (match_scratch:SI 4 "=r"))
+	      (use (label_ref:SI (match_operand 2 "")))])]
   "TARGET_ARM"
   {
     return arm_output_casesi (operands);
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 5766cb4a9888512889334c48ccfbe46e9980f800..f766828989604bf64c57c6cbdb28dd9c622063f6 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -91,11 +91,21 @@ 
 /* Define this macro if jump tables (for `tablejump' insns) should be
    output in the text section, along with the assembler instructions.
    Otherwise, the readonly data section is used.  */
-/* We put ARM and Thumb-2 jump tables in the text section, because it makes
-   the code more efficient, but for Thumb-1 it's better to put them out of
-   band unless we are generating compressed tables.  */
+/* The choice of placement for jump tables is nuanced.  For cores with
+   Harvard caches (pretty much all cases these days), there is a
+   benefit of maintaing a separation between I- and D-cache candidates
+   and that favors having jump tables in the RO data section.  This
+   makes the dispatch sequence slightly longer as we need to load the
+   address of the jump table first, but we often save elsewhere as the
+   content of the code section becomes better packed and we need
+   fewer long-range branch operations.  We also require the dispatch
+   table to be in the RO section when compiling for pure code.  We
+   currently place jump tables in the RO-data section for Arm or
+   whenever pure code is required; we also do it for Thumb-1 when
+   using an ADDR_VEC.  The remaining cases put the jump table in the
+   text section, but we should revisit this choice.  */
 #define JUMP_TABLES_IN_TEXT_SECTION					\
-   ((TARGET_32BIT || (TARGET_THUMB && (optimize_size || flag_pic))) \
+   ((TARGET_THUMB2 || (TARGET_THUMB && (optimize_size || flag_pic))) \
     && !target_pure_code)
 
 #ifndef LINK_SPEC
diff --git a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
index a7aa9d45c7634db6c8192072019e497db969b681..803731c479c66e9e684cae532a6071335c042f0e 100644
--- a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
+++ b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c
@@ -53,9 +53,10 @@  inline void SIFunction (const char* flag)
 /*
 **QImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	ldrb	\1, \[\1, r[0-9]+\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	ldrb	(r[0-9]+), \[\1, r[0-9]+\]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x)
@@ -77,10 +78,11 @@  __attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enu
 /*
 **HImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x)
@@ -102,7 +104,8 @@  __attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enu
 /*
 **SImode_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
 **	ldr	pc, \[\1, r[0-9]+, lsl #2\]
 **	...
 */
@@ -125,10 +128,11 @@  __attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enu
 /*
 **backwards_branch_test:
 **	...
-**	adr	(r[0-9]+), .L[0-9]+
-**	add	\1, \1, (r[0-9]+)
-**	ldrsh	\1, \[\1, \2\]
-**	add	pc, pc, \1, lsl #2
+**	ldr	(r[0-9]+), .L[0-9]+
+**	...
+**	add	r[0-9]+, r[0-9]+, r[0-9]+
+**	ldrsh	(r[0-9]+), \[r[0-9]+, r[0-9]+]
+**	add	pc, pc, \2, lsl #2
 **	...
 */
 __attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag)