[v3,4/7] x86-64: further re-work insn/suffix recognition to also cover MOVSL

Message ID e4e4b80b-794c-7485-1997-685adab8fb27@suse.com
State New, archived
Headers
Series x86: suffix handling changes |

Commit Message

Jan Beulich Oct. 5, 2022, 7:24 a.m. UTC
  PR gas/29524
In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
to prefix parsing. Utilize i.error just like match_template() does.
---
v3: Re-base over changes to earlier patches (incl use of Pass2).
  

Comments

H.J. Lu Oct. 11, 2022, 5:44 p.m. UTC | #1
On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> PR gas/29524
> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> to prefix parsing. Utilize i.error just like match_template() does.

Since movs{b,w,l,q} are string instructions, integer sign extensions
require a suffix to specify the destination size.  This is different from other
integer instructions.  Since only the new assembler allows the implicit suffix,
it won't be easy to use.  We should improve error messages, but allowing
new syntax doesn't help much.

> ---
> v3: Re-base over changes to earlier patches (incl use of Pass2).
>
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -236,6 +236,8 @@ enum i386_error
>      unsupported_with_intel_mnemonic,
>      unsupported_syntax,
>      unsupported,
> +    unsupported_on_arch,
> +    unsupported_64bit,
>      invalid_sib_address,
>      invalid_vsib_address,
>      invalid_vector_register_set,
> @@ -4849,6 +4851,15 @@ md_assemble (char *line)
>      {
>        if (pass1_mnem != NULL)
>         goto match_error;
> +      if (i.error != no_error)
> +       {
> +         gas_assert (current_templates != NULL);
> +         if (current_templates->start->opcode_modifier.pass2 && !i.suffix)
> +           goto no_match;
> +         /* No point in trying a 2nd pass - it'll only find the same suffix
> +            again.  */
> +         goto match_error;
> +       }
>        return;
>      }
>    if (current_templates->start->opcode_modifier.pass2)
> @@ -4948,12 +4959,21 @@ md_assemble (char *line)
>         {
>           line = copy;
>           copy = NULL;
> +  no_match:
>           pass1_err = i.error;
>           pass1_mnem = current_templates->start->name;
>           goto retry;
>         }
> -      free (copy);
> +
> +      /* If a non-/only-64bit template (group) was found in pass 1, and if
> +        _some_ template (group) was found in pass 2, squash pass 1's
> +        error.  */
> +      if (pass1_err == unsupported_64bit)
> +       pass1_mnem = NULL;
> +
>    match_error:
> +      free (copy);
> +
>        switch (pass1_mnem ? pass1_err : i.error)
>         {
>         default:
> @@ -4986,6 +5006,17 @@ md_assemble (char *line)
>           as_bad (_("unsupported instruction `%s'"),
>                   pass1_mnem ? pass1_mnem : current_templates->start->name);
>           return;
> +       case unsupported_on_arch:
> +         as_bad (_("`%s' is not supported on `%s%s'"),
> +                 pass1_mnem ? pass1_mnem : current_templates->start->name,
> +                 cpu_arch_name ? cpu_arch_name : default_arch,
> +                 cpu_sub_arch_name ? cpu_sub_arch_name : "");
> +         return;
> +       case unsupported_64bit:
> +         as_bad (_("`%s' is %s supported in 64-bit mode"),
> +                 pass1_mnem ? pass1_mnem : current_templates->start->name,
> +                 flag_code == CODE_64BIT ? _("not") : _("only"));
> +         return;
>         case invalid_sib_address:
>           err_msg = _("invalid SIB address");
>           break;
> @@ -5601,16 +5632,13 @@ parse_insn (const char *line, char *mnem
>         return l;
>      }
>
> -  if (!(supported & CPU_FLAGS_64BIT_MATCH))
> -    as_bad (flag_code == CODE_64BIT
> -           ? _("`%s' is not supported in 64-bit mode")
> -           : _("`%s' is only supported in 64-bit mode"),
> -           current_templates->start->name);
> -  else
> -    as_bad (_("`%s' is not supported on `%s%s'"),
> -           current_templates->start->name,
> -           cpu_arch_name ? cpu_arch_name : default_arch,
> -           cpu_sub_arch_name ? cpu_sub_arch_name : "");
> +  if (pass1)
> +    {
> +      if (supported & CPU_FLAGS_64BIT_MATCH)
> +        i.error = unsupported_on_arch;
> +      else
> +        i.error = unsupported_64bit;
> +    }
>
>    return NULL;
>  }
> --- a/gas/testsuite/gas/i386/movs.s
> +++ b/gas/testsuite/gas/i386/movs.s
> @@ -30,4 +30,10 @@ movs:
>  .ifdef x86_64
>         movswq  %ax,%rax
>         movswq  (%rax),%rax
> +
> +       movsl   %eax,%rax
> +       movsl   (%rax),%rax
> +
> +       movslq  %eax,%rax
> +       movslq  (%rax),%rax
>  .endif
> --- a/gas/testsuite/gas/i386/movx64.l
> +++ b/gas/testsuite/gas/i386/movx64.l
> @@ -241,6 +241,46 @@
>  [      ]*[1-9][0-9]*[  ]+movswq        %eax, %rcx
>  [      ]*[1-9][0-9]*[  ]+movswq        %rax, %rcx
>  [      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movsl %al, %cl
> +[      ]*[1-9][0-9]*[  ]+movsl %ax, %cl
> +[      ]*[1-9][0-9]*[  ]+movsl %eax, %cl
> +[      ]*[1-9][0-9]*[  ]+movsl %rax, %cl
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movsl %al, %cx
> +[      ]*[1-9][0-9]*[  ]+movsl %ax, %cx
> +[      ]*[1-9][0-9]*[  ]+movsl %eax, %cx
> +[      ]*[1-9][0-9]*[  ]+movsl %rax, %cx
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movsl %al, %ecx
> +[      ]*[1-9][0-9]*[  ]+movsl %ax, %ecx
> +[      ]*[1-9][0-9]*[  ]+movsl %eax, %ecx
> +[      ]*[1-9][0-9]*[  ]+movsl %rax, %ecx
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movsl %al, %rcx
> +[      ]*[1-9][0-9]*[  ]+movsl %ax, %rcx
> +[      ]*[1-9][0-9]* \?\?\?\? 4863C8[  ]+movsl %eax, %rcx
> +[      ]*[1-9][0-9]*[  ]+movsl %rax, %rcx
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movslq        %al, %cl
> +[      ]*[1-9][0-9]*[  ]+movslq        %ax, %cl
> +[      ]*[1-9][0-9]*[  ]+movslq        %eax, %cl
> +[      ]*[1-9][0-9]*[  ]+movslq        %rax, %cl
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movslq        %al, %cx
> +[      ]*[1-9][0-9]*[  ]+movslq        %ax, %cx
> +[      ]*[1-9][0-9]*[  ]+movslq        %eax, %cx
> +[      ]*[1-9][0-9]*[  ]+movslq        %rax, %cx
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movslq        %al, %ecx
> +[      ]*[1-9][0-9]*[  ]+movslq        %ax, %ecx
> +[      ]*[1-9][0-9]*[  ]+movslq        %eax, %ecx
> +[      ]*[1-9][0-9]*[  ]+movslq        %rax, %ecx
> +[      ]*[1-9][0-9]*[  ]*
> +[      ]*[1-9][0-9]*[  ]+movslq        %al, %rcx
> +[      ]*[1-9][0-9]*[  ]+movslq        %ax, %rcx
> +[      ]*[1-9][0-9]* \?\?\?\? 4863C8[  ]+movslq        %eax, %rcx
> +[      ]*[1-9][0-9]*[  ]+movslq        %rax, %rcx
> +[      ]*[1-9][0-9]*[  ]*
>  [      ]*[1-9][0-9]*[  ]+movzx:
>  [      ]*[1-9][0-9]*[  ]+movzx %al, %cl
>  [      ]*[1-9][0-9]*[  ]+movzx %ax, %cl
> --- a/gas/testsuite/gas/i386/movx64.s
> +++ b/gas/testsuite/gas/i386/movx64.s
> @@ -241,6 +241,46 @@ movsx:
>         movswq  %eax, %rcx
>         movswq  %rax, %rcx
>
> +       movsl   %al, %cl
> +       movsl   %ax, %cl
> +       movsl   %eax, %cl
> +       movsl   %rax, %cl
> +
> +       movsl   %al, %cx
> +       movsl   %ax, %cx
> +       movsl   %eax, %cx
> +       movsl   %rax, %cx
> +
> +       movsl   %al, %ecx
> +       movsl   %ax, %ecx
> +       movsl   %eax, %ecx
> +       movsl   %rax, %ecx
> +
> +       movsl   %al, %rcx
> +       movsl   %ax, %rcx
> +       movsl   %eax, %rcx
> +       movsl   %rax, %rcx
> +
> +       movslq  %al, %cl
> +       movslq  %ax, %cl
> +       movslq  %eax, %cl
> +       movslq  %rax, %cl
> +
> +       movslq  %al, %cx
> +       movslq  %ax, %cx
> +       movslq  %eax, %cx
> +       movslq  %rax, %cx
> +
> +       movslq  %al, %ecx
> +       movslq  %ax, %ecx
> +       movslq  %eax, %ecx
> +       movslq  %rax, %ecx
> +
> +       movslq  %al, %rcx
> +       movslq  %ax, %rcx
> +       movslq  %eax, %rcx
> +       movslq  %rax, %rcx
> +
>  movzx:
>         movzx   %al, %cl
>         movzx   %ax, %cl
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -164,9 +164,7 @@ movbe, 0x0f38f0, None, CpuMovbe, D|Modrm
>  // Move with sign extend.
>  movsb, 0xfbe, None, Cpu386, Modrm|No_bSuf|No_sSuf|No_ldSuf|Pass2, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  movsw, 0xfbf, None, Cpu386, Modrm|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|Pass2, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 }
> -// "movslq" must not be converted into "movsl" to avoid conflict with the
> -// "movsl" string move instruction.
> -movslq, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg32|Dword|Unspecified|BaseIndex, Reg64 }
> +movsl, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Pass2, { Reg32|Unspecified|BaseIndex, Reg64 }
>  movsx, 0xfbe, None, Cpu386, W|Modrm|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
>  movsx, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
>  movsxd, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
>
  
Jan Beulich Oct. 12, 2022, 7:08 a.m. UTC | #2
On 11.10.2022 19:44, H.J. Lu wrote:
> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> PR gas/29524
>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>> to prefix parsing. Utilize i.error just like match_template() does.
> 
> Since movs{b,w,l,q} are string instructions, integer sign extensions
> require a suffix to specify the destination size.  This is different from other
> integer instructions.  Since only the new assembler allows the implicit suffix,
> it won't be easy to use.  We should improve error messages, but allowing
> new syntax doesn't help much.

It is an earlier change making most of this consistent with MOVZ*; it is
only logical to extend this to the long-to-quad sign-extending insn. As
with any fixes to prior misbehavior - of course one needs to play by the
rules of the older assembler for a number of years. But projects raise
their baselines, and hence at some point projects with an "avoid suffixes
if possible" policy could switch.

Jan
  
H.J. Lu Oct. 12, 2022, 5:10 p.m. UTC | #3
On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 11.10.2022 19:44, H.J. Lu wrote:
> > On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> PR gas/29524
> >> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >> to prefix parsing. Utilize i.error just like match_template() does.
> >
> > Since movs{b,w,l,q} are string instructions, integer sign extensions
> > require a suffix to specify the destination size.  This is different from other
> > integer instructions.  Since only the new assembler allows the implicit suffix,
> > it won't be easy to use.  We should improve error messages, but allowing
> > new syntax doesn't help much.
>
> It is an earlier change making most of this consistent with MOVZ*; it is

MOVZ is different.  There are no MOVZ string instructions.  MOVS has
different meanings in ISA.   MOVS difference from MOVZ in assembly
syntax should be expected.

> only logical to extend this to the long-to-quad sign-extending insn. As
> with any fixes to prior misbehavior - of course one needs to play by the
> rules of the older assembler for a number of years. But projects raise
> their baselines, and hence at some point projects with an "avoid suffixes
> if possible" policy could switch.
>
> Jan
  
Jan Beulich Oct. 13, 2022, 6:08 a.m. UTC | #4
On 12.10.2022 19:10, H.J. Lu wrote:
> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 11.10.2022 19:44, H.J. Lu wrote:
>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> PR gas/29524
>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>
>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>> require a suffix to specify the destination size.  This is different from other
>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>> it won't be easy to use.  We should improve error messages, but allowing
>>> new syntax doesn't help much.
>>
>> It is an earlier change making most of this consistent with MOVZ*; it is
> 
> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> different meanings in ISA.   MOVS difference from MOVZ in assembly
> syntax should be expected.

You've said so before, yes, but I continue to disagree. And as we can see
from the series things can be made work consistently (and imo nothing else
should have been done right from the beginning).

Jan
  
H.J. Lu Oct. 13, 2022, 5 p.m. UTC | #5
On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 12.10.2022 19:10, H.J. Lu wrote:
> > On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 11.10.2022 19:44, H.J. Lu wrote:
> >>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> PR gas/29524
> >>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >>>> to prefix parsing. Utilize i.error just like match_template() does.
> >>>
> >>> Since movs{b,w,l,q} are string instructions, integer sign extensions
> >>> require a suffix to specify the destination size.  This is different from other
> >>> integer instructions.  Since only the new assembler allows the implicit suffix,
> >>> it won't be easy to use.  We should improve error messages, but allowing
> >>> new syntax doesn't help much.
> >>
> >> It is an earlier change making most of this consistent with MOVZ*; it is
> >
> > MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> > different meanings in ISA.   MOVS difference from MOVZ in assembly
> > syntax should be expected.
>
> You've said so before, yes, but I continue to disagree. And as we can see
> from the series things can be made work consistently (and imo nothing else
> should have been done right from the beginning).
>

There are inconsistencies in ISA.  AT&T syntax makes things more complex.
People should either deal with it or leave it to compilers.   I don't think we
should make assembler more complex.
  
Jan Beulich Oct. 14, 2022, 7:03 a.m. UTC | #6
On 13.10.2022 19:00, H.J. Lu wrote:
> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 12.10.2022 19:10, H.J. Lu wrote:
>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 11.10.2022 19:44, H.J. Lu wrote:
>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>
>>>>>> PR gas/29524
>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>>>
>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>>>> require a suffix to specify the destination size.  This is different from other
>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>>>> it won't be easy to use.  We should improve error messages, but allowing
>>>>> new syntax doesn't help much.
>>>>
>>>> It is an earlier change making most of this consistent with MOVZ*; it is
>>>
>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
>>> syntax should be expected.
>>
>> You've said so before, yes, but I continue to disagree. And as we can see
>> from the series things can be made work consistently (and imo nothing else
>> should have been done right from the beginning).
>>
> 
> There are inconsistencies in ISA.

Sure. But we shouldn't add further ones in the assembler.

>  AT&T syntax makes things more complex.
> People should either deal with it or leave it to compilers.   I don't think we
> should make assembler more complex.

The complexity added here isn't all that bad. You've added far more
complexity in the past for things which arguably shouldn't even be
dealt with by the assembler (I'm thinking of -malign-branch* and
-mlfence-* first of all).

Jan
  
H.J. Lu Oct. 14, 2022, 5:07 p.m. UTC | #7
On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 13.10.2022 19:00, H.J. Lu wrote:
> > On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 12.10.2022 19:10, H.J. Lu wrote:
> >>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> On 11.10.2022 19:44, H.J. Lu wrote:
> >>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>
> >>>>>> PR gas/29524
> >>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >>>>>> to prefix parsing. Utilize i.error just like match_template() does.
> >>>>>
> >>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
> >>>>> require a suffix to specify the destination size.  This is different from other
> >>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
> >>>>> it won't be easy to use.  We should improve error messages, but allowing
> >>>>> new syntax doesn't help much.
> >>>>
> >>>> It is an earlier change making most of this consistent with MOVZ*; it is
> >>>
> >>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> >>> different meanings in ISA.   MOVS difference from MOVZ in assembly
> >>> syntax should be expected.
> >>
> >> You've said so before, yes, but I continue to disagree. And as we can see
> >> from the series things can be made work consistently (and imo nothing else
> >> should have been done right from the beginning).
> >>
> >
> > There are inconsistencies in ISA.
>
> Sure. But we shouldn't add further ones in the assembler.

Assembler just follows ISA.  Programmers should learn to
deal with it or use a compiler.

> >  AT&T syntax makes things more complex.
> > People should either deal with it or leave it to compilers.   I don't think we
> > should make assembler more complex.
>
> The complexity added here isn't all that bad. You've added far more
> complexity in the past for things which arguably shouldn't even be
> dealt with by the assembler (I'm thinking of -malign-branch* and
> -mlfence-* first of all).
>
> Jan
  
Jan Beulich Oct. 17, 2022, 7:02 a.m. UTC | #8
On 14.10.2022 19:07, H.J. Lu wrote:
> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 13.10.2022 19:00, H.J. Lu wrote:
>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 12.10.2022 19:10, H.J. Lu wrote:
>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>
>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>> PR gas/29524
>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>>>>>
>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>>>>>> require a suffix to specify the destination size.  This is different from other
>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
>>>>>>> new syntax doesn't help much.
>>>>>>
>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
>>>>>
>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
>>>>> syntax should be expected.
>>>>
>>>> You've said so before, yes, but I continue to disagree. And as we can see
>>>> from the series things can be made work consistently (and imo nothing else
>>>> should have been done right from the beginning).
>>>>
>>>
>>> There are inconsistencies in ISA.
>>
>> Sure. But we shouldn't add further ones in the assembler.
> 
> Assembler just follows ISA.  Programmers should learn to
> deal with it or use a compiler.

This is entirely non-constructive. Assembler writers should get things into
usable (read: consistent) shape. Plus what ISA are you talking about here?
We're talking of mnemonics which aren't spelled out in any ISA document
anyway. The only halfway official AT&T doc I'm aware of doesn't provide
room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
and elsewhere (recently: CMPccXADD) you're even suggesting to force people
to omit suffixes (plus you've previously objected to the disassembler to
consistently emit them in suffix-always mode).

That same document was also only updated to cover 64-bit code in a half-
hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
list any form of MOVSXD at all afaics, for example). Where not explicitly
mentioned, their intended handling can only be inferred by using analogies.
Nor do we support some of the odd (quirky I would say) mnemonics that are
listed there, like xchglA or movabsbA (which is even wrongly described as
moving an immediate value into the register).

Bottom line: May I please ask that you take another (constructive) look at
the v4 submission?

Jan

[1] Really it does, by saying "long" is then implied, which has never been
the behavior of gas when a register saying otherwise is also in use.
  
H.J. Lu Oct. 17, 2022, 10:36 p.m. UTC | #9
On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 14.10.2022 19:07, H.J. Lu wrote:
> > On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 13.10.2022 19:00, H.J. Lu wrote:
> >>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> On 12.10.2022 19:10, H.J. Lu wrote:
> >>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>
> >>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
> >>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>
> >>>>>>>> PR gas/29524
> >>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
> >>>>>>>
> >>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
> >>>>>>> require a suffix to specify the destination size.  This is different from other
> >>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
> >>>>>>> it won't be easy to use.  We should improve error messages, but allowing
> >>>>>>> new syntax doesn't help much.
> >>>>>>
> >>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
> >>>>>
> >>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> >>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
> >>>>> syntax should be expected.
> >>>>
> >>>> You've said so before, yes, but I continue to disagree. And as we can see
> >>>> from the series things can be made work consistently (and imo nothing else
> >>>> should have been done right from the beginning).
> >>>>
> >>>
> >>> There are inconsistencies in ISA.
> >>
> >> Sure. But we shouldn't add further ones in the assembler.
> >
> > Assembler just follows ISA.  Programmers should learn to
> > deal with it or use a compiler.
>
> This is entirely non-constructive. Assembler writers should get things into
> usable (read: consistent) shape. Plus what ISA are you talking about here?

GNU assembler has been this way for a long time and the current GNU
assembler will still be in use for the next few years.  Assembler writers
should know about all these.

> We're talking of mnemonics which aren't spelled out in any ISA document
> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
> to omit suffixes (plus you've previously objected to the disassembler to
> consistently emit them in suffix-always mode).
>
> That same document was also only updated to cover 64-bit code in a half-
> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
> list any form of MOVSXD at all afaics, for example). Where not explicitly
> mentioned, their intended handling can only be inferred by using analogies.
> Nor do we support some of the odd (quirky I would say) mnemonics that are
> listed there, like xchglA or movabsbA (which is even wrongly described as
> moving an immediate value into the register).
>
> Bottom line: May I please ask that you take another (constructive) look at
> the v4 submission?
>
> Jan
>
> [1] Really it does, by saying "long" is then implied, which has never been
> the behavior of gas when a register saying otherwise is also in use.
  
Jan Beulich Oct. 18, 2022, 6:31 a.m. UTC | #10
On 18.10.2022 00:36, H.J. Lu wrote:
> On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 14.10.2022 19:07, H.J. Lu wrote:
>>> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 13.10.2022 19:00, H.J. Lu wrote:
>>>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>
>>>>>> On 12.10.2022 19:10, H.J. Lu wrote:
>>>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
>>>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>
>>>>>>>>>> PR gas/29524
>>>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>>>>>>>
>>>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>>>>>>>> require a suffix to specify the destination size.  This is different from other
>>>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
>>>>>>>>> new syntax doesn't help much.
>>>>>>>>
>>>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
>>>>>>>
>>>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
>>>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
>>>>>>> syntax should be expected.
>>>>>>
>>>>>> You've said so before, yes, but I continue to disagree. And as we can see
>>>>>> from the series things can be made work consistently (and imo nothing else
>>>>>> should have been done right from the beginning).
>>>>>>
>>>>>
>>>>> There are inconsistencies in ISA.
>>>>
>>>> Sure. But we shouldn't add further ones in the assembler.
>>>
>>> Assembler just follows ISA.  Programmers should learn to
>>> deal with it or use a compiler.
>>
>> This is entirely non-constructive. Assembler writers should get things into
>> usable (read: consistent) shape. Plus what ISA are you talking about here?
> 
> GNU assembler has been this way for a long time and the current GNU
> assembler will still be in use for the next few years.  Assembler writers
> should know about all these.

Hmm, so after all not any ISA to follow? Plus do you suggest there's
only people having written x86 assembler code for many years? And
there's no people who would prefer to get their code into more
consistent shape, but who are limited by assembler shortcomings?

In any event, ...

>> We're talking of mnemonics which aren't spelled out in any ISA document
>> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
>> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
>> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
>> to omit suffixes (plus you've previously objected to the disassembler to
>> consistently emit them in suffix-always mode).
>>
>> That same document was also only updated to cover 64-bit code in a half-
>> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
>> list any form of MOVSXD at all afaics, for example). Where not explicitly
>> mentioned, their intended handling can only be inferred by using analogies.
>> Nor do we support some of the odd (quirky I would say) mnemonics that are
>> listed there, like xchglA or movabsbA (which is even wrongly described as
>> moving an immediate value into the register).
>>
>> Bottom line: May I please ask that you take another (constructive) look at
>> the v4 submission?

... this request continues to stand.

Jan
  
H.J. Lu Oct. 18, 2022, 9:48 p.m. UTC | #11
On Mon, Oct 17, 2022 at 11:31 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 18.10.2022 00:36, H.J. Lu wrote:
> > On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 14.10.2022 19:07, H.J. Lu wrote:
> >>> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> On 13.10.2022 19:00, H.J. Lu wrote:
> >>>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>
> >>>>>> On 12.10.2022 19:10, H.J. Lu wrote:
> >>>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>
> >>>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
> >>>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> PR gas/29524
> >>>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >>>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >>>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
> >>>>>>>>>
> >>>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
> >>>>>>>>> require a suffix to specify the destination size.  This is different from other
> >>>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
> >>>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
> >>>>>>>>> new syntax doesn't help much.
> >>>>>>>>
> >>>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
> >>>>>>>
> >>>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> >>>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
> >>>>>>> syntax should be expected.
> >>>>>>
> >>>>>> You've said so before, yes, but I continue to disagree. And as we can see
> >>>>>> from the series things can be made work consistently (and imo nothing else
> >>>>>> should have been done right from the beginning).
> >>>>>>
> >>>>>
> >>>>> There are inconsistencies in ISA.
> >>>>
> >>>> Sure. But we shouldn't add further ones in the assembler.
> >>>
> >>> Assembler just follows ISA.  Programmers should learn to
> >>> deal with it or use a compiler.
> >>
> >> This is entirely non-constructive. Assembler writers should get things into
> >> usable (read: consistent) shape. Plus what ISA are you talking about here?
> >
> > GNU assembler has been this way for a long time and the current GNU
> > assembler will still be in use for the next few years.  Assembler writers
> > should know about all these.
>
> Hmm, so after all not any ISA to follow? Plus do you suggest there's
> only people having written x86 assembler code for many years? And
> there's no people who would prefer to get their code into more
> consistent shape, but who are limited by assembler shortcomings?

I prefer consistency with existing assemblers and ISA specs.
For new instructions, suffixes should be used only when there is
an ambiguity.  For existing instructions, to support existing assembly
codes, suffixes may be optional even when there is an ambiguity.
But for integer MOVS instructions, suffixes have always been required.
I don't think we should change them now.  We can improve documentation
if needed.

> In any event, ...
>
> >> We're talking of mnemonics which aren't spelled out in any ISA document
> >> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
> >> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
> >> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
> >> to omit suffixes (plus you've previously objected to the disassembler to
> >> consistently emit them in suffix-always mode).
> >>
> >> That same document was also only updated to cover 64-bit code in a half-
> >> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
> >> list any form of MOVSXD at all afaics, for example). Where not explicitly
> >> mentioned, their intended handling can only be inferred by using analogies.
> >> Nor do we support some of the odd (quirky I would say) mnemonics that are
> >> listed there, like xchglA or movabsbA (which is even wrongly described as
> >> moving an immediate value into the register).
> >>
> >> Bottom line: May I please ask that you take another (constructive) look at
> >> the v4 submission?
>
> ... this request continues to stand.

I think we should improve diagnostics and documentation, not add new
syntaxes to existing instructions.
  
Jan Beulich Oct. 19, 2022, 6:08 a.m. UTC | #12
On 18.10.2022 23:48, H.J. Lu wrote:
> On Mon, Oct 17, 2022 at 11:31 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 18.10.2022 00:36, H.J. Lu wrote:
>>> On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 14.10.2022 19:07, H.J. Lu wrote:
>>>>> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>
>>>>>> On 13.10.2022 19:00, H.J. Lu wrote:
>>>>>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>> On 12.10.2022 19:10, H.J. Lu wrote:
>>>>>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
>>>>>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> PR gas/29524
>>>>>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>>>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>>>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>>>>>>>>>
>>>>>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>>>>>>>>>> require a suffix to specify the destination size.  This is different from other
>>>>>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>>>>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
>>>>>>>>>>> new syntax doesn't help much.
>>>>>>>>>>
>>>>>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
>>>>>>>>>
>>>>>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
>>>>>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
>>>>>>>>> syntax should be expected.
>>>>>>>>
>>>>>>>> You've said so before, yes, but I continue to disagree. And as we can see
>>>>>>>> from the series things can be made work consistently (and imo nothing else
>>>>>>>> should have been done right from the beginning).
>>>>>>>>
>>>>>>>
>>>>>>> There are inconsistencies in ISA.
>>>>>>
>>>>>> Sure. But we shouldn't add further ones in the assembler.
>>>>>
>>>>> Assembler just follows ISA.  Programmers should learn to
>>>>> deal with it or use a compiler.
>>>>
>>>> This is entirely non-constructive. Assembler writers should get things into
>>>> usable (read: consistent) shape. Plus what ISA are you talking about here?
>>>
>>> GNU assembler has been this way for a long time and the current GNU
>>> assembler will still be in use for the next few years.  Assembler writers
>>> should know about all these.
>>
>> Hmm, so after all not any ISA to follow? Plus do you suggest there's
>> only people having written x86 assembler code for many years? And
>> there's no people who would prefer to get their code into more
>> consistent shape, but who are limited by assembler shortcomings?
> 
> I prefer consistency with existing assemblers and ISA specs.
> For new instructions, suffixes should be used only when there is
> an ambiguity.  For existing instructions, to support existing assembly
> codes, suffixes may be optional even when there is an ambiguity.
> But for integer MOVS instructions, suffixes have always been required.
> I don't think we should change them now.  We can improve documentation
> if needed.
> 
>> In any event, ...
>>
>>>> We're talking of mnemonics which aren't spelled out in any ISA document
>>>> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
>>>> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
>>>> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
>>>> to omit suffixes (plus you've previously objected to the disassembler to
>>>> consistently emit them in suffix-always mode).
>>>>
>>>> That same document was also only updated to cover 64-bit code in a half-
>>>> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
>>>> list any form of MOVSXD at all afaics, for example). Where not explicitly
>>>> mentioned, their intended handling can only be inferred by using analogies.
>>>> Nor do we support some of the odd (quirky I would say) mnemonics that are
>>>> listed there, like xchglA or movabsbA (which is even wrongly described as
>>>> moving an immediate value into the register).
>>>>
>>>> Bottom line: May I please ask that you take another (constructive) look at
>>>> the v4 submission?
>>
>> ... this request continues to stand.
> 
> I think we should improve diagnostics and documentation, not add new
> syntaxes to existing instructions.

I continue to disagree, but to make at least some forward progress: Which
parts of this series can I expect an okay for (patches 5-7 already have
one, but can't go in ahead)? I would try to re-order the series then to
put the controversial patch(es) at the end, such that at least parts can
go in and I can make further progress with other work. But there's no
promise this re-ordering actually is going to work out if it's more than
just this one patch which you continue to object to.

Jan
  
H.J. Lu Oct. 19, 2022, 9:46 p.m. UTC | #13
On Tue, Oct 18, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 18.10.2022 23:48, H.J. Lu wrote:
> > On Mon, Oct 17, 2022 at 11:31 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>
> >> On 18.10.2022 00:36, H.J. Lu wrote:
> >>> On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>
> >>>> On 14.10.2022 19:07, H.J. Lu wrote:
> >>>>> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>
> >>>>>> On 13.10.2022 19:00, H.J. Lu wrote:
> >>>>>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>
> >>>>>>>> On 12.10.2022 19:10, H.J. Lu wrote:
> >>>>>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
> >>>>>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>> PR gas/29524
> >>>>>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
> >>>>>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
> >>>>>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
> >>>>>>>>>>>
> >>>>>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
> >>>>>>>>>>> require a suffix to specify the destination size.  This is different from other
> >>>>>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
> >>>>>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
> >>>>>>>>>>> new syntax doesn't help much.
> >>>>>>>>>>
> >>>>>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
> >>>>>>>>>
> >>>>>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
> >>>>>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
> >>>>>>>>> syntax should be expected.
> >>>>>>>>
> >>>>>>>> You've said so before, yes, but I continue to disagree. And as we can see
> >>>>>>>> from the series things can be made work consistently (and imo nothing else
> >>>>>>>> should have been done right from the beginning).
> >>>>>>>>
> >>>>>>>
> >>>>>>> There are inconsistencies in ISA.
> >>>>>>
> >>>>>> Sure. But we shouldn't add further ones in the assembler.
> >>>>>
> >>>>> Assembler just follows ISA.  Programmers should learn to
> >>>>> deal with it or use a compiler.
> >>>>
> >>>> This is entirely non-constructive. Assembler writers should get things into
> >>>> usable (read: consistent) shape. Plus what ISA are you talking about here?
> >>>
> >>> GNU assembler has been this way for a long time and the current GNU
> >>> assembler will still be in use for the next few years.  Assembler writers
> >>> should know about all these.
> >>
> >> Hmm, so after all not any ISA to follow? Plus do you suggest there's
> >> only people having written x86 assembler code for many years? And
> >> there's no people who would prefer to get their code into more
> >> consistent shape, but who are limited by assembler shortcomings?
> >
> > I prefer consistency with existing assemblers and ISA specs.
> > For new instructions, suffixes should be used only when there is
> > an ambiguity.  For existing instructions, to support existing assembly
> > codes, suffixes may be optional even when there is an ambiguity.
> > But for integer MOVS instructions, suffixes have always been required.
> > I don't think we should change them now.  We can improve documentation
> > if needed.
> >
> >> In any event, ...
> >>
> >>>> We're talking of mnemonics which aren't spelled out in any ISA document
> >>>> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
> >>>> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
> >>>> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
> >>>> to omit suffixes (plus you've previously objected to the disassembler to
> >>>> consistently emit them in suffix-always mode).
> >>>>
> >>>> That same document was also only updated to cover 64-bit code in a half-
> >>>> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
> >>>> list any form of MOVSXD at all afaics, for example). Where not explicitly
> >>>> mentioned, their intended handling can only be inferred by using analogies.
> >>>> Nor do we support some of the odd (quirky I would say) mnemonics that are
> >>>> listed there, like xchglA or movabsbA (which is even wrongly described as
> >>>> moving an immediate value into the register).
> >>>>
> >>>> Bottom line: May I please ask that you take another (constructive) look at
> >>>> the v4 submission?
> >>
> >> ... this request continues to stand.
> >
> > I think we should improve diagnostics and documentation, not add new
> > syntaxes to existing instructions.
>
> I continue to disagree, but to make at least some forward progress: Which
> parts of this series can I expect an okay for (patches 5-7 already have
> one, but can't go in ahead)? I would try to re-order the series then to
> put the controversial patch(es) at the end, such that at least parts can
> go in and I can make further progress with other work. But there's no
> promise this re-ordering actually is going to work out if it's more than
> just this one patch which you continue to object to.
>

Please submit a new patch set without MOVS changes.

Thanks.
  
Jan Beulich Oct. 20, 2022, 10:12 a.m. UTC | #14
On 19.10.2022 23:46, H.J. Lu wrote:
> On Tue, Oct 18, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>
>> On 18.10.2022 23:48, H.J. Lu wrote:
>>> On Mon, Oct 17, 2022 at 11:31 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>
>>>> On 18.10.2022 00:36, H.J. Lu wrote:
>>>>> On Mon, Oct 17, 2022 at 12:02 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>
>>>>>> On 14.10.2022 19:07, H.J. Lu wrote:
>>>>>>> On Fri, Oct 14, 2022 at 12:03 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>
>>>>>>>> On 13.10.2022 19:00, H.J. Lu wrote:
>>>>>>>>> On Wed, Oct 12, 2022 at 11:08 PM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>
>>>>>>>>>> On 12.10.2022 19:10, H.J. Lu wrote:
>>>>>>>>>>> On Wed, Oct 12, 2022 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> On 11.10.2022 19:44, H.J. Lu wrote:
>>>>>>>>>>>>> On Wed, Oct 5, 2022 at 12:24 AM Jan Beulich <jbeulich@suse.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> PR gas/29524
>>>>>>>>>>>>>> In order to make MOVSL{,Q} behave similarly to MOVSB{W,L,Q} and
>>>>>>>>>>>>>> MOVSW{L,Q} we need to defer parse_insn()'s emitting of errors unrelated
>>>>>>>>>>>>>> to prefix parsing. Utilize i.error just like match_template() does.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Since movs{b,w,l,q} are string instructions, integer sign extensions
>>>>>>>>>>>>> require a suffix to specify the destination size.  This is different from other
>>>>>>>>>>>>> integer instructions.  Since only the new assembler allows the implicit suffix,
>>>>>>>>>>>>> it won't be easy to use.  We should improve error messages, but allowing
>>>>>>>>>>>>> new syntax doesn't help much.
>>>>>>>>>>>>
>>>>>>>>>>>> It is an earlier change making most of this consistent with MOVZ*; it is
>>>>>>>>>>>
>>>>>>>>>>> MOVZ is different.  There are no MOVZ string instructions.  MOVS has
>>>>>>>>>>> different meanings in ISA.   MOVS difference from MOVZ in assembly
>>>>>>>>>>> syntax should be expected.
>>>>>>>>>>
>>>>>>>>>> You've said so before, yes, but I continue to disagree. And as we can see
>>>>>>>>>> from the series things can be made work consistently (and imo nothing else
>>>>>>>>>> should have been done right from the beginning).
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> There are inconsistencies in ISA.
>>>>>>>>
>>>>>>>> Sure. But we shouldn't add further ones in the assembler.
>>>>>>>
>>>>>>> Assembler just follows ISA.  Programmers should learn to
>>>>>>> deal with it or use a compiler.
>>>>>>
>>>>>> This is entirely non-constructive. Assembler writers should get things into
>>>>>> usable (read: consistent) shape. Plus what ISA are you talking about here?
>>>>>
>>>>> GNU assembler has been this way for a long time and the current GNU
>>>>> assembler will still be in use for the next few years.  Assembler writers
>>>>> should know about all these.
>>>>
>>>> Hmm, so after all not any ISA to follow? Plus do you suggest there's
>>>> only people having written x86 assembler code for many years? And
>>>> there's no people who would prefer to get their code into more
>>>> consistent shape, but who are limited by assembler shortcomings?
>>>
>>> I prefer consistency with existing assemblers and ISA specs.
>>> For new instructions, suffixes should be used only when there is
>>> an ambiguity.  For existing instructions, to support existing assembly
>>> codes, suffixes may be optional even when there is an ambiguity.
>>> But for integer MOVS instructions, suffixes have always been required.
>>> I don't think we should change them now.  We can improve documentation
>>> if needed.
>>>
>>>> In any event, ...
>>>>
>>>>>> We're talking of mnemonics which aren't spelled out in any ISA document
>>>>>> anyway. The only halfway official AT&T doc I'm aware of doesn't provide
>>>>>> room for omitting size suffixes [1]. Yet that's a fundamental feature of gas,
>>>>>> and elsewhere (recently: CMPccXADD) you're even suggesting to force people
>>>>>> to omit suffixes (plus you've previously objected to the disassembler to
>>>>>> consistently emit them in suffix-always mode).
>>>>>>
>>>>>> That same document was also only updated to cover 64-bit code in a half-
>>>>>> hearted way, so can't necessarily be used for 64-bit only insns (it doesn't
>>>>>> list any form of MOVSXD at all afaics, for example). Where not explicitly
>>>>>> mentioned, their intended handling can only be inferred by using analogies.
>>>>>> Nor do we support some of the odd (quirky I would say) mnemonics that are
>>>>>> listed there, like xchglA or movabsbA (which is even wrongly described as
>>>>>> moving an immediate value into the register).
>>>>>>
>>>>>> Bottom line: May I please ask that you take another (constructive) look at
>>>>>> the v4 submission?
>>>>
>>>> ... this request continues to stand.
>>>
>>> I think we should improve diagnostics and documentation, not add new
>>> syntaxes to existing instructions.
>>
>> I continue to disagree, but to make at least some forward progress: Which
>> parts of this series can I expect an okay for (patches 5-7 already have
>> one, but can't go in ahead)? I would try to re-order the series then to
>> put the controversial patch(es) at the end, such that at least parts can
>> go in and I can make further progress with other work. But there's no
>> promise this re-ordering actually is going to work out if it's more than
>> just this one patch which you continue to object to.
>>
> 
> Please submit a new patch set without MOVS changes.

As said - I'll put those in a separate patch at the end of the series. Not
the least because you'll be a little disappointed: The changes to tc-i386.c
simply need to move to another patch then. Hence there's not going to be
much left to make move-with-sign-extend consistent in that final patch
(most of it is then testsuite adjustments/additions).

Jan
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -236,6 +236,8 @@  enum i386_error
     unsupported_with_intel_mnemonic,
     unsupported_syntax,
     unsupported,
+    unsupported_on_arch,
+    unsupported_64bit,
     invalid_sib_address,
     invalid_vsib_address,
     invalid_vector_register_set,
@@ -4849,6 +4851,15 @@  md_assemble (char *line)
     {
       if (pass1_mnem != NULL)
 	goto match_error;
+      if (i.error != no_error)
+	{
+	  gas_assert (current_templates != NULL);
+	  if (current_templates->start->opcode_modifier.pass2 && !i.suffix)
+	    goto no_match;
+	  /* No point in trying a 2nd pass - it'll only find the same suffix
+	     again.  */
+	  goto match_error;
+	}
       return;
     }
   if (current_templates->start->opcode_modifier.pass2)
@@ -4948,12 +4959,21 @@  md_assemble (char *line)
 	{
 	  line = copy;
 	  copy = NULL;
+  no_match:
 	  pass1_err = i.error;
 	  pass1_mnem = current_templates->start->name;
 	  goto retry;
 	}
-      free (copy);
+
+      /* If a non-/only-64bit template (group) was found in pass 1, and if
+	 _some_ template (group) was found in pass 2, squash pass 1's
+	 error.  */
+      if (pass1_err == unsupported_64bit)
+	pass1_mnem = NULL;
+
   match_error:
+      free (copy);
+
       switch (pass1_mnem ? pass1_err : i.error)
 	{
 	default:
@@ -4986,6 +5006,17 @@  md_assemble (char *line)
 	  as_bad (_("unsupported instruction `%s'"),
 		  pass1_mnem ? pass1_mnem : current_templates->start->name);
 	  return;
+	case unsupported_on_arch:
+	  as_bad (_("`%s' is not supported on `%s%s'"),
+		  pass1_mnem ? pass1_mnem : current_templates->start->name,
+		  cpu_arch_name ? cpu_arch_name : default_arch,
+		  cpu_sub_arch_name ? cpu_sub_arch_name : "");
+	  return;
+	case unsupported_64bit:
+	  as_bad (_("`%s' is %s supported in 64-bit mode"),
+		  pass1_mnem ? pass1_mnem : current_templates->start->name,
+		  flag_code == CODE_64BIT ? _("not") : _("only"));
+	  return;
 	case invalid_sib_address:
 	  err_msg = _("invalid SIB address");
 	  break;
@@ -5601,16 +5632,13 @@  parse_insn (const char *line, char *mnem
 	return l;
     }
 
-  if (!(supported & CPU_FLAGS_64BIT_MATCH))
-    as_bad (flag_code == CODE_64BIT
-	    ? _("`%s' is not supported in 64-bit mode")
-	    : _("`%s' is only supported in 64-bit mode"),
-	    current_templates->start->name);
-  else
-    as_bad (_("`%s' is not supported on `%s%s'"),
-	    current_templates->start->name,
-	    cpu_arch_name ? cpu_arch_name : default_arch,
-	    cpu_sub_arch_name ? cpu_sub_arch_name : "");
+  if (pass1)
+    {
+      if (supported & CPU_FLAGS_64BIT_MATCH)
+        i.error = unsupported_on_arch;
+      else
+        i.error = unsupported_64bit;
+    }
 
   return NULL;
 }
--- a/gas/testsuite/gas/i386/movs.s
+++ b/gas/testsuite/gas/i386/movs.s
@@ -30,4 +30,10 @@  movs:
 .ifdef x86_64
 	movswq	%ax,%rax
 	movswq	(%rax),%rax
+
+	movsl	%eax,%rax
+	movsl	(%rax),%rax
+
+	movslq	%eax,%rax
+	movslq	(%rax),%rax
 .endif
--- a/gas/testsuite/gas/i386/movx64.l
+++ b/gas/testsuite/gas/i386/movx64.l
@@ -241,6 +241,46 @@ 
 [ 	]*[1-9][0-9]*[ 	]+movswq	%eax, %rcx
 [ 	]*[1-9][0-9]*[ 	]+movswq	%rax, %rcx
 [ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movsl	%al, %cl
+[ 	]*[1-9][0-9]*[ 	]+movsl	%ax, %cl
+[ 	]*[1-9][0-9]*[ 	]+movsl	%eax, %cl
+[ 	]*[1-9][0-9]*[ 	]+movsl	%rax, %cl
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movsl	%al, %cx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%ax, %cx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%eax, %cx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%rax, %cx
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movsl	%al, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%ax, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%eax, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%rax, %ecx
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movsl	%al, %rcx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%ax, %rcx
+[ 	]*[1-9][0-9]* \?\?\?\? 4863C8[ 	]+movsl	%eax, %rcx
+[ 	]*[1-9][0-9]*[ 	]+movsl	%rax, %rcx
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movslq	%al, %cl
+[ 	]*[1-9][0-9]*[ 	]+movslq	%ax, %cl
+[ 	]*[1-9][0-9]*[ 	]+movslq	%eax, %cl
+[ 	]*[1-9][0-9]*[ 	]+movslq	%rax, %cl
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movslq	%al, %cx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%ax, %cx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%eax, %cx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%rax, %cx
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movslq	%al, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%ax, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%eax, %ecx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%rax, %ecx
+[ 	]*[1-9][0-9]*[ 	]*
+[ 	]*[1-9][0-9]*[ 	]+movslq	%al, %rcx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%ax, %rcx
+[ 	]*[1-9][0-9]* \?\?\?\? 4863C8[ 	]+movslq	%eax, %rcx
+[ 	]*[1-9][0-9]*[ 	]+movslq	%rax, %rcx
+[ 	]*[1-9][0-9]*[ 	]*
 [ 	]*[1-9][0-9]*[ 	]+movzx:
 [ 	]*[1-9][0-9]*[ 	]+movzx	%al, %cl
 [ 	]*[1-9][0-9]*[ 	]+movzx	%ax, %cl
--- a/gas/testsuite/gas/i386/movx64.s
+++ b/gas/testsuite/gas/i386/movx64.s
@@ -241,6 +241,46 @@  movsx:
 	movswq	%eax, %rcx
 	movswq	%rax, %rcx
 
+	movsl	%al, %cl
+	movsl	%ax, %cl
+	movsl	%eax, %cl
+	movsl	%rax, %cl
+
+	movsl	%al, %cx
+	movsl	%ax, %cx
+	movsl	%eax, %cx
+	movsl	%rax, %cx
+
+	movsl	%al, %ecx
+	movsl	%ax, %ecx
+	movsl	%eax, %ecx
+	movsl	%rax, %ecx
+
+	movsl	%al, %rcx
+	movsl	%ax, %rcx
+	movsl	%eax, %rcx
+	movsl	%rax, %rcx
+
+	movslq	%al, %cl
+	movslq	%ax, %cl
+	movslq	%eax, %cl
+	movslq	%rax, %cl
+
+	movslq	%al, %cx
+	movslq	%ax, %cx
+	movslq	%eax, %cx
+	movslq	%rax, %cx
+
+	movslq	%al, %ecx
+	movslq	%ax, %ecx
+	movslq	%eax, %ecx
+	movslq	%rax, %ecx
+
+	movslq	%al, %rcx
+	movslq	%ax, %rcx
+	movslq	%eax, %rcx
+	movslq	%rax, %rcx
+
 movzx:
 	movzx	%al, %cl
 	movzx	%ax, %cl
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -164,9 +164,7 @@  movbe, 0x0f38f0, None, CpuMovbe, D|Modrm
 // Move with sign extend.
 movsb, 0xfbe, None, Cpu386, Modrm|No_bSuf|No_sSuf|No_ldSuf|Pass2, { Reg8|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 movsw, 0xfbf, None, Cpu386, Modrm|No_bSuf|No_wSuf|No_sSuf|No_ldSuf|Pass2, { Reg16|Unspecified|BaseIndex, Reg32|Reg64 }
-// "movslq" must not be converted into "movsl" to avoid conflict with the
-// "movsl" string move instruction.
-movslq, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf|Size64, { Reg32|Dword|Unspecified|BaseIndex, Reg64 }
+movsl, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_ldSuf|Pass2, { Reg32|Unspecified|BaseIndex, Reg64 }
 movsx, 0xfbe, None, Cpu386, W|Modrm|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg8|Reg16|Unspecified|BaseIndex, Reg16|Reg32|Reg64 }
 movsx, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 }
 movsxd, 0x63, None, Cpu64, Modrm|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Reg32|Unspecified|BaseIndex, Reg32|Reg64 }