RISC-V: Avoid updating state until symbol is found

Message ID 20231122013511.46088-1-patrick@rivosinc.com
State Accepted
Headers
Series RISC-V: Avoid updating state until symbol is found |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

Patrick O'Neill Nov. 22, 2023, 1:35 a.m. UTC
  Currently objdump gets and updates the map state once per symbol. Updating the
state (partiularly riscv_parse_subset) is expensive and grows quadratically
since we iterate over all symbols. By deferring this until once we've found the
symbol of interest, we can reduce the time to dump a 4k insn file of .norvc and
.rvc insns from ~47 seconds to ~0.13 seconds.

opcodes/ChangeLog:

	* riscv-dis.c (riscv_get_map_state): Remove state updating logic.
	(riscv_update_map_state): Add state updating logic to seperate function.
	(riscv_search_mapping_symbol): Use new riscv_update_map_state.
	(riscv_data_length): Ditto.

Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
---
Somewhat related to:
https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=c2f60ac565f1d369fde98146a16f1d3ef79e1000
Sequences of compressed/uncompressed insns have different isa strings,
so the cache in that patch is not triggered. Thankfully we can just
reduce the number of calls to fix this issue rather than create a new
cache.
---
 opcodes/riscv-dis.c | 47 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 9 deletions(-)

--
2.34.1
  

Comments

Nelson Chu Nov. 28, 2023, 1:39 a.m. UTC | #1
On Wed, Nov 22, 2023 at 9:35 AM Patrick O'Neill <patrick@rivosinc.com>
wrote:

> Currently objdump gets and updates the map state once per symbol. Updating
> the
> state (partiularly riscv_parse_subset) is expensive and grows quadratically
> since we iterate over all symbols. By deferring this until once we've
> found the
> symbol of interest, we can reduce the time to dump a 4k insn file of
> .norvc and
> .rvc insns from ~47 seconds to ~0.13 seconds.
>
> opcodes/ChangeLog:
>
>         * riscv-dis.c (riscv_get_map_state): Remove state updating logic.
>         (riscv_update_map_state): Add state updating logic to seperate
> function.
>         (riscv_search_mapping_symbol): Use new riscv_update_map_state.
>         (riscv_data_length): Ditto.
>
> Signed-off-by: Patrick O'Neill <patrick@rivosinc.com>
> ---
> Somewhat related to:
>
> https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=c2f60ac565f1d369fde98146a16f1d3ef79e1000
> Sequences of compressed/uncompressed insns have different isa strings,
> so the cache in that patch is not triggered. Thankfully we can just
> reduce the number of calls to fix this issue rather than create a new
> cache.
> ---
>  opcodes/riscv-dis.c | 47 ++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 38 insertions(+), 9 deletions(-)
>
> diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
> index ca328b4c997..4ada1487732 100644
> --- a/opcodes/riscv-dis.c
> +++ b/opcodes/riscv-dis.c
> @@ -860,20 +860,20 @@ riscv_disassemble_insn (bfd_vma memaddr,
>    return insnlen;
>  }
>
> -/* Return true if we find the suitable mapping symbol,
> -   and also update the STATE.  Otherwise, return false.  */
> +/* If we find the suitable mapping symbol update the STATE.
> +   Otherwise, do nothing.  */
>
> -static bool
> -riscv_get_map_state (int n,
> -                    enum riscv_seg_mstate *state,
> -                    struct disassemble_info *info)
> +static void
> +riscv_update_map_state (int n,
> +                       enum riscv_seg_mstate *state,
> +                       struct disassemble_info *info)
>  {
>    const char *name;
>
>    /* If the symbol is in a different section, ignore it.  */
>    if (info->section != NULL
>        && info->section != info->symtab[n]->section)
> -    return false;
> +    return;
>
>    name = bfd_asymbol_name(info->symtab[n]);
>    if (strcmp (name, "$x") == 0)
> @@ -900,10 +900,27 @@ riscv_get_map_state (int n,
>        else
>         riscv_parse_subset (&riscv_rps_dis, name + 2);
>      }
> -  else
> +}
> +
> +/* Return true if we find the suitable mapping symbol.
> +   Otherwise, return false.  */
> +
> +static bool
> +riscv_get_map_state (int n,
> +                    enum riscv_seg_mstate *state,

+                    struct disassemble_info *info)
> +{
> +  const char *name;
> +
> +  /* If the symbol is in a different section, ignore it.  */
> +  if (info->section != NULL
> +      && info->section != info->symtab[n]->section)
>      return false;
>
> -  return true;
> +  name = bfd_asymbol_name(info->symtab[n]);
> +  return (strcmp (name, "$x") == 0
> +         || strcmp (name, "$d") == 0
> +         || strncmp (name, "$xrv", 4) == 0);
>

riscv_elf_is_mapping_symbols?


>  }
>

option 1: remove the unused input *state here, and then maybe change the
function name to riscv_valid_mapping_symbols? or any other names without
"state".
option 2: keep the old riscv_get_map_state, keep updating map_state there,
but don't call riscv_release_subset_list and riscv_parse_subset for
MAP_INSN until riscv_update_map_state (so that we can changed the name to
riscv_update_arch_from_map_state?).


>  /* Check the sorted symbol table (sorted by the symbol value), find the
> @@ -972,6 +989,11 @@ riscv_search_mapping_symbol (bfd_vma memaddr,
>         }
>      }
>
> +  if (found)
> +    riscv_update_map_state (symbol, &mstate, info);
> +  else
> +    riscv_update_map_state (info->symtab_size - 1, &mstate, info);
>

Do we still need to call riscv_update_map_state even if we cannot find any
suitable mapping symbols?


> +
>    /* We can not find the suitable mapping symbol above.  Therefore, we
>       look forwards and try to find it again, but don't go past the start
>       of the section.  Otherwise a data section without mapping symbols
> @@ -996,6 +1018,10 @@ riscv_search_mapping_symbol (bfd_vma memaddr,
>               break;
>             }
>         }
> +      if (found)
> +       riscv_update_map_state (symbol, &mstate, info);
> +      else
> +       riscv_update_map_state (0, &mstate, info);
>

Likewise.


>      }
>
>    if (found)
>

Can we call "riscv_update_map_state (symbol, ...), or
riscv_update_arch_from_map_state" once here?


> @@ -1060,9 +1086,12 @@ riscv_data_length (bfd_vma memaddr,
>               if (addr - memaddr < length)
>                 length = addr - memaddr;
>               found = true;
> +          riscv_update_map_state (n, &m, info);

              break;
>             }
>         }
> +      if (!found)
> +       riscv_update_map_state (0, &m, info);
>

We should already know the map_state is MAP_DATA, and then call into here,
so don't need to update the map_state again?

Thanks
Nelson
  

Patch

diff --git a/opcodes/riscv-dis.c b/opcodes/riscv-dis.c
index ca328b4c997..4ada1487732 100644
--- a/opcodes/riscv-dis.c
+++ b/opcodes/riscv-dis.c
@@ -860,20 +860,20 @@  riscv_disassemble_insn (bfd_vma memaddr,
   return insnlen;
 }

-/* Return true if we find the suitable mapping symbol,
-   and also update the STATE.  Otherwise, return false.  */
+/* If we find the suitable mapping symbol update the STATE.
+   Otherwise, do nothing.  */

-static bool
-riscv_get_map_state (int n,
-		     enum riscv_seg_mstate *state,
-		     struct disassemble_info *info)
+static void
+riscv_update_map_state (int n,
+			enum riscv_seg_mstate *state,
+			struct disassemble_info *info)
 {
   const char *name;

   /* If the symbol is in a different section, ignore it.  */
   if (info->section != NULL
       && info->section != info->symtab[n]->section)
-    return false;
+    return;

   name = bfd_asymbol_name(info->symtab[n]);
   if (strcmp (name, "$x") == 0)
@@ -900,10 +900,27 @@  riscv_get_map_state (int n,
       else
 	riscv_parse_subset (&riscv_rps_dis, name + 2);
     }
-  else
+}
+
+/* Return true if we find the suitable mapping symbol.
+   Otherwise, return false.  */
+
+static bool
+riscv_get_map_state (int n,
+		     enum riscv_seg_mstate *state,
+		     struct disassemble_info *info)
+{
+  const char *name;
+
+  /* If the symbol is in a different section, ignore it.  */
+  if (info->section != NULL
+      && info->section != info->symtab[n]->section)
     return false;

-  return true;
+  name = bfd_asymbol_name(info->symtab[n]);
+  return (strcmp (name, "$x") == 0
+	  || strcmp (name, "$d") == 0
+	  || strncmp (name, "$xrv", 4) == 0);
 }

 /* Check the sorted symbol table (sorted by the symbol value), find the
@@ -972,6 +989,11 @@  riscv_search_mapping_symbol (bfd_vma memaddr,
 	}
     }

+  if (found)
+    riscv_update_map_state (symbol, &mstate, info);
+  else
+    riscv_update_map_state (info->symtab_size - 1, &mstate, info);
+
   /* We can not find the suitable mapping symbol above.  Therefore, we
      look forwards and try to find it again, but don't go past the start
      of the section.  Otherwise a data section without mapping symbols
@@ -996,6 +1018,10 @@  riscv_search_mapping_symbol (bfd_vma memaddr,
 	      break;
 	    }
 	}
+      if (found)
+	riscv_update_map_state (symbol, &mstate, info);
+      else
+	riscv_update_map_state (0, &mstate, info);
     }

   if (found)
@@ -1060,9 +1086,12 @@  riscv_data_length (bfd_vma memaddr,
 	      if (addr - memaddr < length)
 		length = addr - memaddr;
 	      found = true;
+	      riscv_update_map_state (n, &m, info);
 	      break;
 	    }
 	}
+      if (!found)
+	riscv_update_map_state (0, &m, info);
     }
   if (!found)
     {