[1/6] x86: last-insn recording should be per-section

Message ID 94626e2b-0e73-4267-9c80-cb25e1dbab9b@suse.com
State Unresolved
Headers
Series correct and further utilize x86'es last-insn tracking |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jan Beulich Nov. 24, 2023, 9:03 a.m. UTC
  Otherwise intermediate section switches result in inconsistent behavior.
Note, however, that intermediate sub-section switches will continue to
result in inconsistent or even inappropriate behavior.

While there also add recording of state to s_insn().
---
i386_cons_align() qualifying the recording on SEC_CODE seems suspicious,
too: md_assemble() isn't constrained to emitting instructions to only
text sections.

For sub-sections the best I can think of that we could do is warn when
now_subseg is nonzero wherever we already warn based on last_insn state
(and similarly bail rather than inserting any code). That would have
not really intended effects in subsequent patches, though: We'd then
generally suppress optimizations outside of subseg 0, and we'd also
prefix plain old NOPs there (in i386_generate_nops()).

For .insn I'm not entirely decided: We might also assume that only
proper insns are constructed this way, and hence record last_insn_other
in s_insn().
  

Comments

Jan Beulich Nov. 24, 2023, 1:29 p.m. UTC | #1
On 24.11.2023 10:03, Jan Beulich wrote:
> Otherwise intermediate section switches result in inconsistent behavior.
> Note, however, that intermediate sub-section switches will continue to
> result in inconsistent or even inappropriate behavior.
> 
> While there also add recording of state to s_insn().
> ---
> i386_cons_align() qualifying the recording on SEC_CODE seems suspicious,
> too: md_assemble() isn't constrained to emitting instructions to only
> text sections.
> 
> For sub-sections the best I can think of that we could do is warn when
> now_subseg is nonzero wherever we already warn based on last_insn state
> (and similarly bail rather than inserting any code). That would have
> not really intended effects in subsequent patches, though: We'd then
> generally suppress optimizations outside of subseg 0, and we'd also
> prefix plain old NOPs there (in i386_generate_nops()).

With some prereq tweaking of (at least) obj-elf.c, perhaps we could leverage
md_elf_section_change_hook() to deal with subsections. The prereq tweaking
would be needed as from my initial investigations I'm getting the impression
that not all section switches actually properly invoke that hook.

Jan
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -178,7 +178,7 @@  static int check_word_reg (void);
 static int finalize_imm (void);
 static int process_operands (void);
 static const reg_entry *build_modrm_byte (void);
-static void output_insn (void);
+static void output_insn (const struct last_insn *);
 static void output_imm (fragS *, offsetT);
 static void output_disp (fragS *, offsetT);
 #ifndef I386COFF
@@ -677,21 +677,6 @@  static enum lfence_before_ret_kind
   }
 lfence_before_ret;
 
-/* Types of previous instruction is .byte or prefix.  */
-static struct
-  {
-    segT seg;
-    const char *file;
-    const char *name;
-    unsigned int line;
-    enum last_insn_kind
-      {
-	last_insn_other = 0,
-	last_insn_directive,
-	last_insn_prefix
-      } kind;
-  } last_insn;
-
 /* 1 if the assembler should generate relax relocations.  */
 
 static int generate_relax_relocations
@@ -4971,7 +4956,7 @@  insert_lfence_after (void)
 /* Output lfence, 0xfaee8, before instruction.  */
 
 static void
-insert_lfence_before (void)
+insert_lfence_before (const struct last_insn *last_insn)
 {
   char *p;
 
@@ -5007,12 +4992,11 @@  insert_lfence_before (void)
       else
 	return;
 
-      if (last_insn.kind != last_insn_other
-	  && last_insn.seg == now_seg)
+      if (last_insn->kind != last_insn_other)
 	{
-	  as_warn_where (last_insn.file, last_insn.line,
+	  as_warn_where (last_insn->file, last_insn->line,
 			 _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
-			 last_insn.name, insn_name (&i.tm));
+			 last_insn->name, insn_name (&i.tm));
 	  return;
 	}
 
@@ -5027,12 +5011,11 @@  insert_lfence_before (void)
   if (lfence_before_ret != lfence_before_ret_none
       && (i.tm.base_opcode | 1) == 0xc3)
     {
-      if (last_insn.kind != last_insn_other
-	  && last_insn.seg == now_seg)
+      if (last_insn->kind != last_insn_other)
 	{
-	  as_warn_where (last_insn.file, last_insn.line,
+	  as_warn_where (last_insn->file, last_insn->line,
 			 _("`%s` skips -mlfence-before-ret on `%s`"),
-			 last_insn.name, insn_name (&i.tm));
+			 last_insn->name, insn_name (&i.tm));
 	  return;
 	}
 
@@ -5129,6 +5112,8 @@  md_assemble (char *line)
   const char *end, *pass1_mnem = NULL;
   enum i386_error pass1_err = 0;
   const insn_template *t;
+  struct last_insn *last_insn
+    = &seg_info(now_seg)->tc_segment_info_data.last_insn;
 
   /* Initialize globals.  */
   current_templates.end = current_templates.start = NULL;
@@ -5682,23 +5667,21 @@  md_assemble (char *line)
   if (i.rex != 0)
     add_prefix (REX_OPCODE | i.rex);
 
-  insert_lfence_before ();
+  insert_lfence_before (last_insn);
 
   /* We are ready to output the insn.  */
-  output_insn ();
+  output_insn (last_insn);
 
   insert_lfence_after ();
 
-  last_insn.seg = now_seg;
-
   if (i.tm.opcode_modifier.isprefix)
     {
-      last_insn.kind = last_insn_prefix;
-      last_insn.name = insn_name (&i.tm);
-      last_insn.file = as_where (&last_insn.line);
+      last_insn->kind = last_insn_prefix;
+      last_insn->name = insn_name (&i.tm);
+      last_insn->file = as_where (&last_insn->line);
     }
   else
-    last_insn.kind = last_insn_other;
+    last_insn->kind = last_insn_other;
 }
 
 /* The Q suffix is generally valid only in 64-bit mode, with very few
@@ -9667,7 +9650,8 @@  maybe_fused_with_jcc_p (enum mf_cmp_kind
 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
 
 static int
-add_fused_jcc_padding_frag_p (enum mf_cmp_kind* mf_cmp_p)
+add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
+			      const struct last_insn *last_insn)
 {
   /* NB: Don't work with COND_JUMP86 without i386.  */
   if (!align_branch_power
@@ -9678,13 +9662,12 @@  add_fused_jcc_padding_frag_p (enum mf_cm
 
   if (maybe_fused_with_jcc_p (mf_cmp_p))
     {
-      if (last_insn.kind == last_insn_other
-	  || last_insn.seg != now_seg)
+      if (last_insn->kind == last_insn_other)
 	return 1;
       if (flag_debug)
-	as_warn_where (last_insn.file, last_insn.line,
+	as_warn_where (last_insn->file, last_insn->line,
 		       _("`%s` skips -malign-branch-boundary on `%s`"),
-		       last_insn.name, insn_name (&i.tm));
+		       last_insn->name, insn_name (&i.tm));
     }
 
   return 0;
@@ -9693,7 +9676,7 @@  add_fused_jcc_padding_frag_p (enum mf_cm
 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
 
 static int
-add_branch_prefix_frag_p (void)
+add_branch_prefix_frag_p (const struct last_insn *last_insn)
 {
   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
      to PadLock instructions since they include prefixes in opcode.  */
@@ -9709,14 +9692,13 @@  add_branch_prefix_frag_p (void)
   if (!i.operands || i.tm.opcode_modifier.isprefix)
     return 0;
 
-  if (last_insn.kind == last_insn_other
-      || last_insn.seg != now_seg)
+  if (last_insn->kind == last_insn_other)
     return 1;
 
   if (flag_debug)
-    as_warn_where (last_insn.file, last_insn.line,
+    as_warn_where (last_insn->file, last_insn->line,
 		   _("`%s` skips -malign-branch-boundary on `%s`"),
-		   last_insn.name, insn_name (&i.tm));
+		   last_insn->name, insn_name (&i.tm));
 
   return 0;
 }
@@ -9725,7 +9707,8 @@  add_branch_prefix_frag_p (void)
 
 static int
 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
-			   enum mf_jcc_kind *mf_jcc_p)
+			   enum mf_jcc_kind *mf_jcc_p,
+			   const struct last_insn *last_insn)
 {
   int add_padding;
 
@@ -9799,13 +9782,12 @@  add_branch_padding_frag_p (enum align_br
     }
 
   if (add_padding
-      && last_insn.kind != last_insn_other
-      && last_insn.seg == now_seg)
+      && last_insn->kind != last_insn_other)
     {
       if (flag_debug)
-	as_warn_where (last_insn.file, last_insn.line,
+	as_warn_where (last_insn->file, last_insn->line,
 		       _("`%s` skips -malign-branch-boundary on `%s`"),
-		       last_insn.name, insn_name (&i.tm));
+		       last_insn->name, insn_name (&i.tm));
       return 0;
     }
 
@@ -9813,7 +9795,7 @@  add_branch_padding_frag_p (enum align_br
 }
 
 static void
-output_insn (void)
+output_insn (const struct last_insn *last_insn)
 {
   fragS *insn_start_frag;
   offsetT insn_start_off;
@@ -9943,7 +9925,7 @@  output_insn (void)
   insn_start_frag = frag_now;
   insn_start_off = frag_now_fix ();
 
-  if (add_branch_padding_frag_p (&branch, &mf_jcc))
+  if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
     {
       char *p;
       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
@@ -10029,7 +10011,7 @@  output_insn (void)
       if (branch)
 	/* Skip if this is a branch.  */
 	;
-      else if (add_fused_jcc_padding_frag_p (&mf_cmp))
+      else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
 	{
 	  /* Make room for padding.  */
 	  frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
@@ -10045,7 +10027,7 @@  output_insn (void)
 	  fragP->tc_frag_data.branch_type = align_branch_fused;
 	  fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
 	}
-      else if (add_branch_prefix_frag_p ())
+      else if (add_branch_prefix_frag_p (last_insn))
 	{
 	  unsigned int max_prefix_size = align_branch_prefix_size;
 
@@ -10948,6 +10930,7 @@  s_insn (int dummy ATTRIBUTE_UNUSED)
   unsigned int j;
   valueT val;
   bool vex = false, xop = false, evex = false;
+  struct last_insn *last_insn;
 
   init_globals ();
 
@@ -11701,7 +11684,11 @@  s_insn (int dummy ATTRIBUTE_UNUSED)
   else if (i.rex != 0)
     add_prefix (REX_OPCODE | i.rex);
 
-  output_insn ();
+  last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
+  output_insn (last_insn);
+  last_insn->kind = last_insn_directive;
+  last_insn->name = ".insn directive";
+  last_insn->file = as_where (&last_insn->line);
 
  done:
   *saved_ilp = saved_char;
@@ -15496,13 +15483,15 @@  s_bss (int ignore ATTRIBUTE_UNUSED)
 void
 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
 {
-  if (last_insn.kind != last_insn_directive
+  struct last_insn *last_insn
+    = &seg_info(now_seg)->tc_segment_info_data.last_insn;
+
+  if (last_insn->kind != last_insn_directive
       && (bfd_section_flags (now_seg) & SEC_CODE))
     {
-      last_insn.seg = now_seg;
-      last_insn.kind = last_insn_directive;
-      last_insn.name = "constant directive";
-      last_insn.file = as_where (&last_insn.line);
+      last_insn->kind = last_insn_directive;
+      last_insn->name = "constant directive";
+      last_insn->file = as_where (&last_insn->line);
       if (lfence_before_ret != lfence_before_ret_none)
 	{
 	  if (lfence_before_indirect_branch != lfence_branch_none)
--- a/gas/config/tc-i386.h
+++ b/gas/config/tc-i386.h
@@ -281,6 +281,23 @@  extern enum i386_flag_code {
 	CODE_64BIT
 } i386_flag_code;
 
+struct i386_segment_info {
+  /* Type of previous "instruction", e.g. .byte or stand-alone prefix.  */
+  struct last_insn {
+    const char *file;
+    const char *name;
+    unsigned int line;
+    enum last_insn_kind
+      {
+	last_insn_other = 0,
+	last_insn_directive,
+	last_insn_prefix
+      } kind;
+  } last_insn;
+};
+
+#define TC_SEGMENT_INFO_TYPE struct i386_segment_info
+
 struct i386_tc_frag_data
 {
   union
--- /dev/null
+++ b/gas/testsuite/gas/i386/lfence-section.d
@@ -0,0 +1,19 @@ 
+#as: -mlfence-before-indirect-branch=all
+#warning_output: lfence-section.e
+#objdump: -dw
+#name: -mlfence-before-indirect-branch=all w/ section switches
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+0+ <_start>:
+ +[a-f0-9]+:	f3 ff d0             	repz call \*%eax
+ +[a-f0-9]+:	f3 c3                	repz ret
+ +[a-f0-9]+:	cc                   	int3
+ +[a-f0-9]+:	cc                   	int3
+ +[a-f0-9]+:	cc                   	int3
+
+Disassembly of section \.text2:
+#pass
--- /dev/null
+++ b/gas/testsuite/gas/i386/lfence-section.e
@@ -0,0 +1,3 @@ 
+.*: Assembler messages:
+.*:3: Warning: `rep` skips -mlfence-before-indirect-branch on `call`
+.*:11: Warning: `rep` skips -mlfence-before-ret on `ret`
--- /dev/null
+++ b/gas/testsuite/gas/i386/lfence-section.s
@@ -0,0 +1,19 @@ 
+	.text
+_start:
+	rep
+
+	.section .text2, "ax"
+aux1:
+	nop
+
+	.text
+	call *%eax
+	rep
+
+	.section .text2, "ax"
+aux2:
+	nop
+
+	.text
+	ret
+	.p2align 2, 0xcc
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -642,6 +642,17 @@  if [gas_32_check] then {
     run_dump_test "lfence-ret-c"
     run_dump_test "lfence-ret-d"
     run_dump_test "lfence-byte"
+    # This test uses the .section directive, which only ELF and COFF/PE support.
+    if {[is_elf_format]
+	|| [istarget "*-*-vxworks*"]
+	|| [istarget "*-*-coff*"]
+	|| [istarget "*-*-pe*"]
+	|| [istarget "*-*-cygwin*"]
+	|| [istarget "*-*-mingw*"]
+    } then {
+	run_dump_test "lfence-section"
+    }
+
     run_dump_test "branch"
 
     # These tests require support for 8 and 16 bit relocs,