x86: drop identifier_chars[]

Message ID 312cb612-378a-be36-6f6c-62df7313975d@suse.com
State Accepted
Headers
Series x86: drop identifier_chars[] |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Jan Beulich March 10, 2023, 10:11 a.m. UTC
  It tries to resemble what's underlying is_part_of_name(), but doesn't
quite achieve that: '$' for example is unconditionally marked as part of
symbol names, but was included as identifier char for Intel syntax only.
Note that i386_att_operand() checks for the immediate prefix first, so
the wider coverage by starts_memory_operand() is has no real effect
there, but it does matter for something like

	mov	%fs:$dollar, %eax

which previously wasn't accepted (but which clearly is a memory
reference - there's no point in forcing people to parenthesize the
symbol name). Similarly including '%' as an identfier for Intel syntax
had no real significance to the rest of the assembler. If '%' was to be
valid in (unquoted) symbol names, LEX_PCT would need to be defined.

Note further that this also addresses the latent issue of a sub-target
defining LEX_AT or LEX_QM to zero: That would make '@' and/or '?' no
valid part of symbol names, but would have included them in what
is_identifier_char() considers a valid part of a name. (There's a minor
related issue which is actually being eliminated: te-interix.h allows
'@' only in the middle of symbol names, yet starts_memory_operand()
specifically looks at the first character of [possibly] a symbol name.)

In parse_real_register() there's no point also checking is_name_ender()
as at this point no character is marked solely LEX_END_NAME by any sub-
target. Checking is_name_beginner() is also pointless as the hash lookup
will fail anyway for a zero-length name.

While touching the check in parse_real_register() also drop the
"allow_naked_reg" part of the condition: This has only led to
inconsistent error messages.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -531,14 +531,12 @@  const char FLT_CHARS[] = "fFdDxXhHbB";
 static char mnemonic_chars[256];
 static char register_chars[256];
 static char operand_chars[256];
-static char identifier_chars[256];
 
 /* Lexical macros.  */
 #define is_mnemonic_char(x) (mnemonic_chars[(unsigned char) x])
 #define is_operand_char(x) (operand_chars[(unsigned char) x])
 #define is_register_char(x) (register_chars[(unsigned char) x])
 #define is_space_char(x) ((x) == ' ')
-#define is_identifier_char(x) (identifier_chars[(unsigned char) x])
 
 /* All non-digit non-letter characters that may occur in an operand.  */
 static char operand_special_chars[] = "%$-+(,)*._~/<>|&^!:[@]";
@@ -2611,8 +2609,6 @@  set_intel_syntax (int syntax_flag)
 
   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
 
-  identifier_chars['%'] = intel_syntax && allow_naked_reg ? '%' : 0;
-  identifier_chars['$'] = intel_syntax ? '$' : 0;
   register_prefix = allow_naked_reg ? "" : "%";
 }
 
@@ -3076,27 +3072,16 @@  md_begin (void)
 	  operand_chars[c] = c;
 #endif
 
-	if (ISALPHA (c) || ISDIGIT (c))
-	  identifier_chars[c] = c;
-	else if (c >= 128)
-	  {
-	    identifier_chars[c] = c;
-	    operand_chars[c] = c;
-	  }
+	if (c >= 128)
+	  operand_chars[c] = c;
       }
 
-#ifdef LEX_AT
-    identifier_chars['@'] = '@';
-#endif
 #ifdef LEX_QM
-    identifier_chars['?'] = '?';
     operand_chars['?'] = '?';
 #endif
     mnemonic_chars['_'] = '_';
     mnemonic_chars['-'] = '-';
     mnemonic_chars['.'] = '.';
-    identifier_chars['_'] = '_';
-    identifier_chars['.'] = '.';
 
     for (p = operand_special_chars; *p != '\0'; p++)
       operand_chars[(unsigned char) *p] = *p;
@@ -11579,7 +11564,7 @@  RC_SAE_immediate (const char *imm_start)
 static INLINE bool starts_memory_operand (char c)
 {
   return ISDIGIT (c)
-	 || is_identifier_char (c)
+	 || is_name_beginner (c)
 	 || strchr ("([\"+-!~", c);
 }
 
@@ -13069,10 +13054,7 @@  parse_real_register (char *reg_string, c
       s++;
     }
 
-  /* For naked regs, make sure that we are not dealing with an identifier.
-     This prevents confusing an identifier like `eax_var' with register
-     `eax'.  */
-  if (allow_naked_reg && identifier_chars[(unsigned char) *s])
+  if (is_part_of_name (*s))
     return (const reg_entry *) NULL;
 
   *end_op = s;