[v1] LoongArch: Add support for <b ".L1"> and <beq, $t0, $t1, ".L1">

Message ID 20231210094132.3236040-1-mengqinggang@loongson.cn
State Accepted
Headers
Series [v1] LoongArch: Add support for <b ".L1"> and <beq, $t0, $t1, ".L1"> |

Checks

Context Check Description
snail/binutils-gdb-check success Github commit url

Commit Message

mengqinggang Dec. 10, 2023, 9:41 a.m. UTC
  Support symbol names enclosed in double quotation marks.
---
 .../gas/loongarch/double_quotation_marks.d          | 13 +++++++++++++
 .../gas/loongarch/double_quotation_marks.s          |  2 ++
 opcodes/loongarch-coder.c                           |  7 +++++++
 3 files changed, 22 insertions(+)
 create mode 100644 gas/testsuite/gas/loongarch/double_quotation_marks.d
 create mode 100644 gas/testsuite/gas/loongarch/double_quotation_marks.s
  

Comments

Alan Modra Dec. 14, 2023, 1:36 a.m. UTC | #1
On Sun, Dec 10, 2023 at 05:41:32PM +0800, mengqinggang wrote:
> Support symbol names enclosed in double quotation marks.

> --- a/opcodes/loongarch-coder.c
> +++ b/opcodes/loongarch-coder.c
> @@ -264,6 +264,13 @@ loongarch_split_args_by_comma (char *args, const char *arg_strs[])
>  	else
>  	  *args = '\0', arg_strs[num++] = args + 1;
>        }
> +
> +  if (*(args-1) == '"')
> +    {
> +      *(args-1) = '\0';
> +      arg_strs[num-1] = arg_strs[num-1] + 1;
> +    }
> +
>    arg_strs[num] = NULL;
>    return num;
>  }

This results in buffer overflows running the testsuite.

/build/gas-san/loongarch32-elf/gas/testsuite/../../binutils/objdump  -dr tmpdir/64_pcrel.o > tmpdir/dump.out
Executing on host: sh -c {/build/gas-san/loongarch32-elf/gas/testsuite/../../binutils/objdump  -dr tmpdir/64_pcrel.o > tmpdir/dump.out 2>dump.tmp}  /dev/null  (timeout = 300)
spawn [open ...]
exited abnormally with 1, output:=================================================================
==3602547==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60200000006f at pc 0x5600d00b56c0 bp 0x7ffe37855240 sp 0x7ffe37855230
READ of size 1 at 0x60200000006f thread T0
    #0 0x5600d00b56bf in loongarch_split_args_by_comma /home/alan/src/binutils-gdb/opcodes/loongarch-coder.c:268
  
Alan Modra Dec. 24, 2023, 11:56 p.m. UTC | #2
On Thu, Dec 14, 2023 at 12:06:49PM +1030, Alan Modra wrote:
> On Sun, Dec 10, 2023 at 05:41:32PM +0800, mengqinggang wrote:
> > Support symbol names enclosed in double quotation marks.
> 
> > --- a/opcodes/loongarch-coder.c
> > +++ b/opcodes/loongarch-coder.c
> > @@ -264,6 +264,13 @@ loongarch_split_args_by_comma (char *args, const char *arg_strs[])
> >  	else
> >  	  *args = '\0', arg_strs[num++] = args + 1;
> >        }
> > +
> > +  if (*(args-1) == '"')
> > +    {
> > +      *(args-1) = '\0';
> > +      arg_strs[num-1] = arg_strs[num-1] + 1;
> > +    }
> > +
> >    arg_strs[num] = NULL;
> >    return num;
> >  }
> 
> This results in buffer overflows running the testsuite.

This fixes the buffer overflow added in commit 22b78fad28, and a few
other problems.  Committed.

	* loongarch-coder.c (loongarch_split_args_by_comma): Don't
	overflow buffer when args == "".  Don't remove unbalanced
	quotes.  Don't trim last arg if max number of args exceeded.

diff --git a/opcodes/loongarch-coder.c b/opcodes/loongarch-coder.c
index 672a468b3f4..b68352769ca 100644
--- a/opcodes/loongarch-coder.c
+++ b/opcodes/loongarch-coder.c
@@ -255,22 +255,24 @@ loongarch_split_args_by_comma (char *args, const char *arg_strs[])
   size_t num = 0;
 
   if (*args)
-    arg_strs[num++] = args;
-  for (; *args; args++)
-    if (*args == ',')
-      {
-	if (MAX_ARG_NUM_PLUS_2 - 1 == num)
-	  break;
-	else
-	  *args = '\0', arg_strs[num++] = args + 1;
-      }
-
-  if (*(args-1) == '"')
     {
-      *(args-1) = '\0';
-      arg_strs[num-1] = arg_strs[num-1] + 1;
-    }
+      arg_strs[num++] = args;
+      for (; *args; args++)
+	if (*args == ',')
+	  {
+	    if (MAX_ARG_NUM_PLUS_2 - 1 == num)
+	      goto out;
+	    *args = '\0';
+	    arg_strs[num++] = args + 1;
+	  }
 
+      if (*(args - 1) == '"' && *arg_strs[num - 1] == '"')
+	{
+	  *(args - 1) = '\0';
+	  arg_strs[num - 1] += 1;
+	}
+    }
+ out:
   arg_strs[num] = NULL;
   return num;
 }
  
Alan Modra Dec. 25, 2023, 12:01 a.m. UTC | #3
This adds some extra features
1) All args have double-quotes removed, not just the last one, and
2) Commas now are allowed as part of a double-quote string.

I am not sure this change in behaviour is wanted (ie. it's not an
obvious bug fix to me), so I'll leave it to the loongarch maintainers
to decide whether this patch goes in.

	* loongarch-coder.c (loongarch_split_args_by_comma): Support
	quotes in more than just last arg.  Commas inside quotes are
	not arg delimiters.

diff --git a/opcodes/loongarch-coder.c b/opcodes/loongarch-coder.c
index b68352769ca..1bff3b005f9 100644
--- a/opcodes/loongarch-coder.c
+++ b/opcodes/loongarch-coder.c
@@ -18,6 +18,7 @@
    along with this program; see the file COPYING3.  If not,
    see <http://www.gnu.org/licenses/>.  */
 #include "sysdep.h"
+#include <stdbool.h>
 #include "opcode/loongarch.h"
 
 int
@@ -256,12 +257,20 @@ loongarch_split_args_by_comma (char *args, const char *arg_strs[])
 
   if (*args)
     {
+      bool inquote = false;
       arg_strs[num++] = args;
       for (; *args; args++)
-	if (*args == ',')
+	if (*args == '"')
+	  inquote = !inquote;
+	else if (*args == ',' && !inquote)
 	  {
 	    if (MAX_ARG_NUM_PLUS_2 - 1 == num)
 	      goto out;
+	    if (*(args - 1) == '"' && *arg_strs[num] == '"')
+	      {
+		*(args - 1) = '\0';
+		arg_strs[num] += 1;
+	      }
 	    *args = '\0';
 	    arg_strs[num++] = args + 1;
 	  }
  
mengqinggang Dec. 28, 2023, 6:33 a.m. UTC | #4
To consistent with other architectures, we don't need to support quoted 
registers.
The second feature is a good idea and it can be merged.

Thank you very much.


在 2023/12/25 上午8:01, Alan Modra 写道:
> This adds some extra features
> 1) All args have double-quotes removed, not just the last one, and
> 2) Commas now are allowed as part of a double-quote string.
>
> I am not sure this change in behaviour is wanted (ie. it's not an
> obvious bug fix to me), so I'll leave it to the loongarch maintainers
> to decide whether this patch goes in.
>
> 	* loongarch-coder.c (loongarch_split_args_by_comma): Support
> 	quotes in more than just last arg.  Commas inside quotes are
> 	not arg delimiters.
>
> diff --git a/opcodes/loongarch-coder.c b/opcodes/loongarch-coder.c
> index b68352769ca..1bff3b005f9 100644
> --- a/opcodes/loongarch-coder.c
> +++ b/opcodes/loongarch-coder.c
> @@ -18,6 +18,7 @@
>      along with this program; see the file COPYING3.  If not,
>      see <http://www.gnu.org/licenses/>.  */
>   #include "sysdep.h"
> +#include <stdbool.h>
>   #include "opcode/loongarch.h"
>   
>   int
> @@ -256,12 +257,20 @@ loongarch_split_args_by_comma (char *args, const char *arg_strs[])
>   
>     if (*args)
>       {
> +      bool inquote = false;
>         arg_strs[num++] = args;
>         for (; *args; args++)
> -	if (*args == ',')
> +	if (*args == '"')
> +	  inquote = !inquote;
> +	else if (*args == ',' && !inquote)
>   	  {
>   	    if (MAX_ARG_NUM_PLUS_2 - 1 == num)
>   	      goto out;
> +	    if (*(args - 1) == '"' && *arg_strs[num] == '"')
> +	      {
> +		*(args - 1) = '\0';
> +		arg_strs[num] += 1;
> +	      }
>   	    *args = '\0';
>   	    arg_strs[num++] = args + 1;
>   	  }
>
  

Patch

diff --git a/gas/testsuite/gas/loongarch/double_quotation_marks.d b/gas/testsuite/gas/loongarch/double_quotation_marks.d
new file mode 100644
index 00000000000..a42534b94a4
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/double_quotation_marks.d
@@ -0,0 +1,13 @@ 
+#as:
+#objdump: -dr
+
+.*:[    ]+file format .*
+
+
+Disassembly of section .text:
+
+.* <.text>:
+[ 	]+0:[ 	]+50000000[ 	]+b[ 	]+0[ 	]+# 0x0
+[ 	]+0: R_LARCH_B26[ 	]+.L1
+[ 	]+4:[ 	]+5800018d[ 	]+beq[ 	]+\$t0, \$t1, 0[ 	]+# 0x4
+[ 	]+4: R_LARCH_B16[ 	]+.L1
diff --git a/gas/testsuite/gas/loongarch/double_quotation_marks.s b/gas/testsuite/gas/loongarch/double_quotation_marks.s
new file mode 100644
index 00000000000..f8b63074150
--- /dev/null
+++ b/gas/testsuite/gas/loongarch/double_quotation_marks.s
@@ -0,0 +1,2 @@ 
+b ".L1"
+beq $r12, $r13, ".L1"
diff --git a/opcodes/loongarch-coder.c b/opcodes/loongarch-coder.c
index a68ae1c3106..672a468b3f4 100644
--- a/opcodes/loongarch-coder.c
+++ b/opcodes/loongarch-coder.c
@@ -264,6 +264,13 @@  loongarch_split_args_by_comma (char *args, const char *arg_strs[])
 	else
 	  *args = '\0', arg_strs[num++] = args + 1;
       }
+
+  if (*(args-1) == '"')
+    {
+      *(args-1) = '\0';
+      arg_strs[num-1] = arg_strs[num-1] + 1;
+    }
+
   arg_strs[num] = NULL;
   return num;
 }