[v2,08/11] RISC-V: Split match/print steps on disassembler

Message ID c30cdce491780153a7fe89b79ac4b5f1a03f6e5c.1669610611.git.research_trasio@irq.a4lg.com
State Accepted
Headers
Series RISC-V: Requirements for disassembler optimizations batch 1 |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Tsukasa OI Nov. 28, 2022, 4:43 a.m. UTC
  From: Tsukasa OI <research_trasio@irq.a4lg.com>

For further optimization and more disassembler features, we may need to
change the core RISC-V instruction matching.  For this purpose, it is
inconvenient to have "match" and "print" steps in the same loop.

This commit rewrites riscv_disassemble_insn function so that we store
matched_op for matching RISC-V opcode and then print it (if not NULL).

Although it looks a bit inefficient, it also lowers the indent of opcode
matching loop to clarify the opcode matching changes on the next
optimization commit.

Unfortunately, this commit alone will impose some performance penalty (<5%
on most cases but sometimes about 15% worse) but it can be easily paid back
by other optimizations.

opcodes/ChangeLog:

	* riscv-dis.c (riscv_disassemble_insn): Split instruction
	handling to two separate steps - opcode matching and printing.
---
 opcodes/riscv-dis.c | 151 +++++++++++++++++++++++---------------------
 1 file changed, 79 insertions(+), 72 deletions(-)
  

Patch

diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index f5dc1907c6ec..c74827ed19df 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -649,7 +649,7 @@  riscv_disassemble_insn (bfd_vma memaddr,
 			const bfd_byte *packet,
 			disassemble_info *info)
 {
-  const struct riscv_opcode *op;
+  const struct riscv_opcode *op, *matched_op;
   static bool init = false;
   static const struct riscv_opcode *riscv_hash[OP_MASK_OP + 1];
   struct riscv_private_data *pd;
@@ -705,85 +705,92 @@  riscv_disassemble_insn (bfd_vma memaddr,
   info->target = 0;
   info->target2 = 0;
 
+  matched_op = NULL;
   op = riscv_hash[OP_HASH_IDX (word)];
-  if (op != NULL)
+
+  /* If XLEN is not known, get its value from the ELF class.  */
+  if (info->mach == bfd_mach_riscv64)
+    xlen = 64;
+  else if (info->mach == bfd_mach_riscv32)
+    xlen = 32;
+  else if (info->section != NULL)
     {
-      /* If XLEN is not known, get its value from the ELF class.  */
-      if (info->mach == bfd_mach_riscv64)
-	xlen = 64;
-      else if (info->mach == bfd_mach_riscv32)
-	xlen = 32;
-      else if (info->section != NULL)
-	{
-	  Elf_Internal_Ehdr *ehdr = elf_elfheader (info->section->owner);
-	  xlen = ehdr->e_ident[EI_CLASS] == ELFCLASS64 ? 64 : 32;
-	}
+      Elf_Internal_Ehdr *ehdr = elf_elfheader (info->section->owner);
+      xlen = ehdr->e_ident[EI_CLASS] == ELFCLASS64 ? 64 : 32;
+    }
 
-      /* If arch has the Zfinx extension, replace FPR with GPR.  */
-      if (riscv_subset_supports (&riscv_rps_dis, "zfinx"))
-	riscv_fpr_names = riscv_gpr_names;
-      else
-	riscv_fpr_names = riscv_gpr_names == riscv_gpr_names_abi ?
-			  riscv_fpr_names_abi : riscv_fpr_names_numeric;
+  /* If arch has the Zfinx extension, replace FPR with GPR.  */
+  if (riscv_subset_supports (&riscv_rps_dis, "zfinx"))
+    riscv_fpr_names = riscv_gpr_names;
+  else
+    riscv_fpr_names = riscv_gpr_names == riscv_gpr_names_abi
+			  ? riscv_fpr_names_abi
+			  : riscv_fpr_names_numeric;
 
-      for (; op->name; op++)
-	{
-	  /* Does the opcode match?  */
-	  if (! (op->match_func) (op, word))
-	    continue;
-	  /* Is this a pseudo-instruction and may we print it as such?  */
-	  if (no_aliases && (op->pinfo & INSN_ALIAS))
-	    continue;
-	  /* Is this instruction restricted to a certain value of XLEN?  */
-	  if ((op->xlen_requirement != 0) && (op->xlen_requirement != xlen))
-	    continue;
-	  /* Is this instruction supported by the current architecture?  */
-	  if (!riscv_multi_subset_supports (&riscv_rps_dis, op->insn_class))
-	    continue;
-
-	  /* It's a match.  */
-	  (*info->fprintf_styled_func) (info->stream, dis_style_mnemonic,
-					"%s", op->name);
-	  print_insn_args (op->args, word, memaddr, info);
-
-	  /* Try to disassemble multi-instruction addressing sequences.  */
-	  if (pd->to_print_addr)
-	    {
-	      info->target = pd->print_addr;
-	      (*info->fprintf_styled_func)
-		(info->stream, dis_style_comment_start, " # ");
-	      (*info->print_address_func) (info->target, info);
-	      pd->to_print_addr = false;
-	    }
+  for (; op && op->name; op++)
+    {
+      /* Does the opcode match?  */
+      if (!(op->match_func) (op, word))
+	continue;
+      /* Is this a pseudo-instruction and may we print it as such?  */
+      if (no_aliases && (op->pinfo & INSN_ALIAS))
+	continue;
+      /* Is this instruction restricted to a certain value of XLEN?  */
+      if ((op->xlen_requirement != 0) && (op->xlen_requirement != xlen))
+	continue;
+      /* Is this instruction supported by the current architecture?  */
+      if (!riscv_multi_subset_supports (&riscv_rps_dis, op->insn_class))
+	continue;
 
-	  /* Finish filling out insn_info fields.  */
-	  switch (op->pinfo & INSN_TYPE)
-	    {
-	    case INSN_BRANCH:
-	      info->insn_type = dis_branch;
-	      break;
-	    case INSN_CONDBRANCH:
-	      info->insn_type = dis_condbranch;
-	      break;
-	    case INSN_JSR:
-	      info->insn_type = dis_jsr;
-	      break;
-	    case INSN_DREF:
-	      info->insn_type = dis_dref;
-	      break;
-	    default:
-	      break;
-	    }
+      matched_op = op;
+      break;
+    }
 
-	  if (op->pinfo & INSN_DATA_SIZE)
-	    {
-	      int size = ((op->pinfo & INSN_DATA_SIZE)
-			  >> INSN_DATA_SIZE_SHIFT);
-	      info->data_size = 1 << (size - 1);
-	    }
+  if (matched_op != NULL)
+    {
+      /* There is a match.  */
+      op = matched_op;
+
+      (*info->fprintf_styled_func) (info->stream, dis_style_mnemonic,
+				    "%s", op->name);
+      print_insn_args (op->args, word, memaddr, info);
 
-	  return insnlen;
+      /* Try to disassemble multi-instruction addressing sequences.  */
+      if (pd->to_print_addr)
+	{
+	  info->target = pd->print_addr;
+	  (*info->fprintf_styled_func) (info->stream, dis_style_comment_start,
+					" # ");
+	  (*info->print_address_func) (info->target, info);
+	  pd->to_print_addr = false;
 	}
+
+      /* Finish filling out insn_info fields.  */
+      switch (op->pinfo & INSN_TYPE)
+	{
+	case INSN_BRANCH:
+	  info->insn_type = dis_branch;
+	  break;
+	case INSN_CONDBRANCH:
+	  info->insn_type = dis_condbranch;
+	  break;
+	case INSN_JSR:
+	  info->insn_type = dis_jsr;
+	  break;
+	case INSN_DREF:
+	  info->insn_type = dis_dref;
+	  break;
+	default:
+	  break;
+	}
+
+      if (op->pinfo & INSN_DATA_SIZE)
+	{
+	  int size = ((op->pinfo & INSN_DATA_SIZE) >> INSN_DATA_SIZE_SHIFT);
+	  info->data_size = 1 << (size - 1);
+	}
+
+      return insnlen;
     }
 
   /* We did not find a match, so just print the instruction bits.  */