[1/2] x86: CPU-qualify {disp16} / {disp32}

Message ID 56e9550a-91d3-6849-0f3c-ad0559371de1@suse.com
State Unresolved
Headers
Series x86: pseudo-prefix handling |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jan Beulich Nov. 6, 2023, 2:22 p.m. UTC
  {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid to
use on pre-386 CPUs. The latter, also affecting other (real) prefixes,
further requires that like for insns we fully check the CPU flags; till
now only Cpu64/CpuNo64 were taken into consideration.
---
While this is consistent with i386_index_check() diagnosing wrong
{dispN} use as an error, that and the change here aren't consistent with
documentation saying "prefer", suggesting such prefixes - like {rex},
albeit even there not fully consistent, seeing the error md_assemble()
generates when used with VEX/XOP/EVEX encoded insns - are ignored when
impossible to fulfill. Otoh the change here is consistent with {rex}
being refused (rather than ignored) outside of 64-bit mode.
  

Comments

Cui, Lili Nov. 7, 2023, 8:40 a.m. UTC | #1
> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> 
> {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid to use on
> pre-386 CPUs. The latter, also affecting other (real) prefixes, further requires
> that like for insns we fully check the CPU flags; till now only Cpu64/CpuNo64
> were taken into consideration.
> ---
> While this is consistent with i386_index_check() diagnosing wrong {dispN} use
> as an error, that and the change here aren't consistent with documentation
> saying "prefer", suggesting such prefixes - like {rex}, albeit even there not fully
> consistent, seeing the error md_assemble() generates when used with
> VEX/XOP/EVEX encoded insns - are ignored when impossible to fulfill. Otoh
> the change here is consistent with {rex} being refused (rather than ignored)
> outside of 64-bit mode.
> 
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -5781,7 +5781,8 @@ parse_insn (const char *line, char *mnem
>  	  && current_templates
>  	  && current_templates->start->opcode_modifier.isprefix)
>  	{
> -	  if (!cpu_flags_check_cpu64 (current_templates->start))
> +	  supported = cpu_flags_match (current_templates->start);
> +	  if (!(supported & CPU_FLAGS_64BIT_MATCH))
>  	    {
>  	      as_bad ((flag_code != CODE_64BIT
>  		       ? _("`%s' is only supported in 64-bit mode") @@ -5789,6
> +5790,14 @@ parse_insn (const char *line, char *mnem
>  		      insn_name (current_templates->start));
>  	      return NULL;
>  	    }
> +	  if (supported != CPU_FLAGS_PERFECT_MATCH)
> +	    {
> +	      as_bad (_("`%s' is not supported on `%s%s'"),
> +		      insn_name (current_templates->start),
> +		      cpu_arch_name ? cpu_arch_name : default_arch,
> +		      cpu_sub_arch_name ? cpu_sub_arch_name : "");
> +	      return NULL;
> +	    }
>  	  /* If we are in 16-bit mode, do not allow addr16 or data16.
>  	     Similarly, in 32-bit mode, do not allow addr32 or data32.  */
>  	  if ((current_templates->start->opcode_modifier.size == SIZE16
> --- a/gas/testsuite/gas/i386/prefix32.l
> +++ b/gas/testsuite/gas/i386/prefix32.l
> @@ -10,6 +10,13 @@
>  .*:20: Error: data size .* `vaddps'
>  .*:21: Error: data size .* `vaddpd'
>  .*:25: Error: same type of prefix .*
> +.*:31: Error: `xacquire' is not supported on `i386'
> +.*:32: Error: `notrack' is not supported on `i386'
> +.*:33: Error: `bnd' is not supported on `i386'
> +.*:38: Error: `gs' is not supported on `i286'
> +.*:39: Error: `data32' is not supported on `i286'
> +.*:40: Error: `addr32' is not supported on `i286'
> +.*:41: Error: .*disp32.* is not supported on `i286'
>  GAS LISTING .*
>  #...
>  [ 	]*1[ 	]+\.text
> @@ -40,4 +47,18 @@ GAS LISTING .*
>  [ 	]*26[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov
> 	%ss:\(%ebp\), %eax
>  [ 	]*27[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov
> 	%ds:\(%ebp\), %eax
>  [ 	]*28[ 	]*
> +[ 	]*[0-9]+[ 	]+\.L386:
> +[ 	]*[0-9]+[ 	]+\.arch i386
> +[ 	]*[0-9]+[ 	]+xacquire lock add \[esi\], eax
> +[ 	]*[0-9]+[ 	]+notrack call eax
> +[ 	]*[0-9]+[ 	]+bnd call eax
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.L286:
> +[ 	]*[0-9]+[ 	]+\.code16
> +[ 	]*[0-9]+[ 	]+\.arch i286
> +[ 	]*[0-9]+[ 	]+gs inc word ptr \[si\]
> +[ 	]*[0-9]+[ 	]+data32 nop
> +[ 	]*[0-9]+[ 	]+addr32 nop
> +[ 	]*[0-9]+[ 	]+\{disp32\} nop
> +[ 	]*[0-9]+[ 	]*
>  #pass
> --- a/gas/testsuite/gas/i386/prefix32.s
> +++ b/gas/testsuite/gas/i386/prefix32.s
> @@ -26,4 +26,18 @@ prefix:
>  	ds mov		%ss:(%ebp), %eax
>  	ds mov		%ds:(%ebp), %eax
> 
> +.L386:
> +	.arch i386
> +	xacquire lock add [esi], eax
> +	notrack call eax
> +	bnd call eax
> +
> +.L286:
> +	.code16
> +	.arch i286
> +	gs inc word ptr [si]
> +	data32 nop
> +	addr32 nop
> +	{disp32} nop
> +
>  	.p2align	4,0
> --- a/gas/testsuite/gas/i386/prefix64.l
> +++ b/gas/testsuite/gas/i386/prefix64.l
> @@ -3,12 +3,13 @@
>  .*:7: Error: invalid .* `addss' after `repne'
>  .*:8: Error: invalid .* `vaddss' after `repe'
>  .*:9: Error: invalid .* `vaddss' after `repne'
> -.*:14: Error: same type of prefix .*
> -.*:15: Error: same type of prefix .*
> -.*:18: Error: data size .* `addps'
> -.*:19: Error: data size .* `addpd'
> -.*:20: Error: data size .* `vaddps'
> -.*:21: Error: data size .* `vaddpd'
> +.*:11: Error: .*disp16.* is not supported .*
> +.*:16: Error: same type of prefix .*
> +.*:17: Error: same type of prefix .*
> +.*:20: Error: data size .* `addps'
> +.*:21: Error: data size .* `addpd'
> +.*:22: Error: data size .* `vaddps'
> +.*:23: Error: data size .* `vaddpd'
>  GAS LISTING .*
>  #...
>  [ 	]*1[ 	]+\.text
> @@ -21,16 +22,18 @@ GAS LISTING .*
>  [ 	]*8[ 	]+repe vaddss	%xmm0, %xmm0, %xmm0
>  [ 	]*9[ 	]+repne vaddss	%xmm0, %xmm0, %xmm0
>  [ 	]*10[ 	]*
> -[ 	]*11[ 	]+\.Lrep_ret:
> -[ 	]*12[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
> -[ 	]*13[ 	]+\?\?\?\? F3C3[ 	]+rep ret
> -[ 	]*14[ 	]+bnd rep ret
> -[ 	]*15[ 	]+rep bnd ret
> -[ 	]*16[ 	]*
> -[ 	]*17[ 	]+\.Ldata16:
> -[ 	]*18[ 	]+data16 addps	%xmm0, %xmm0
> -[ 	]*19[ 	]+data16 addpd	%xmm0, %xmm0
> -[ 	]*20[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
> -[ 	]*21[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
> -[ 	]*22[ 	]*
> +[ 	]*[0-9]+[ 	]+\{disp16\} nop
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.Lrep_ret:
> +[ 	]*[0-9]+[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
> +[ 	]*[0-9]+[ 	]+\?\?\?\? F3C3[ 	]+rep ret
> +[ 	]*[0-9]+[ 	]+bnd rep ret
> +[ 	]*[0-9]+[ 	]+rep bnd ret
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.Ldata16:
> +[ 	]*[0-9]+[ 	]+data16 addps	%xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 addpd	%xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]*
>  #pass
> --- a/gas/testsuite/gas/i386/prefix64.s
> +++ b/gas/testsuite/gas/i386/prefix64.s
> @@ -8,6 +8,8 @@ prefix:
>  	repe vaddss	%xmm0, %xmm0, %xmm0
>  	repne vaddss	%xmm0, %xmm0, %xmm0
> 
> +	{disp16} nop
> +
>  .Lrep_ret:
>  	bnd ret
>  	rep ret
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -890,7 +890,7 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
> 
>  // Pseudo prefixes (base_opcode == PSEUDO_PREFIX)
> 
> -<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:0, disp32:Disp32:0, +
> +<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:No64,
> +disp32:Disp32:i386, +
>                        load:Load:0, store:Store:0, +
>                        vex:VEX:0, vex2:VEX:0, vex3:VEX3:0, evex:EVEX:0, +
>                        rex:REX:x64, nooptimize:NoOptimize:0>
LGTM

Lili.
  
Cui, Lili Nov. 7, 2023, 3:06 p.m. UTC | #2
> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> 
> {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid to use on
> pre-386 CPUs. The latter, also affecting other (real) prefixes, further requires
> that like for insns we fully check the CPU flags; till now only Cpu64/CpuNo64
> were taken into consideration.
> ---
> While this is consistent with i386_index_check() diagnosing wrong {dispN} use
> as an error, that and the change here aren't consistent with documentation
> saying "prefer", suggesting such prefixes - like {rex}, albeit even there not fully
> consistent, seeing the error md_assemble() generates when used with
> VEX/XOP/EVEX encoded insns - are ignored when impossible to fulfill. Otoh
> the change here is consistent with {rex} being refused (rather than ignored)
> outside of 64-bit mode.
> 
> >>>         {rex} vmovaps %xmm7,%xmm2
> >>>         {rex} vmovaps %xmm17,%xmm2
> >>>         {rex} rorx $7,%eax,%ebx
> >>> +       {rex2} vmovaps %xmm7,%xmm2
> >>
> >> Right, but please see my "optional vs required" comment in the
> >> pseudo- prefix related patch I did send earlier today. I question the
> >> correctness of the {rex} related check here, which would then extend to the
> {rex2} one as well.
> >>
> >
> > A REX byte that is immediately followed by a legacy prefix byte (LOCK,
> > REPE, REPNE, OSIZE override, ASIZE override, or segment overrides) or
> another REX byte is ignored and behaves as if it does not exist (except for
> contributing to the instruction length), but in this case I think it's correct.
> 
> I'm afraid I can't relate this to the aspect I raised above. Perhaps better to
> discuss in the context of the patch that I sent (and that I mentioned above;
> "x86: CPU-qualify {disp16} / {disp32}"). You did reply to the patch, but you
> didn't reply to the more detailed description of the issue (which I did refer to
> above).
> 

I think the vex instruction is not preceded by these legacy prefix bytes (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment override), so the vex instruction should not ignore the rex bytes. 

Lili.
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -5781,7 +5781,8 @@ parse_insn (const char *line, char *mnem
>  	  && current_templates
>  	  && current_templates->start->opcode_modifier.isprefix)
>  	{
> -	  if (!cpu_flags_check_cpu64 (current_templates->start))
> +	  supported = cpu_flags_match (current_templates->start);
> +	  if (!(supported & CPU_FLAGS_64BIT_MATCH))
>  	    {
>  	      as_bad ((flag_code != CODE_64BIT
>  		       ? _("`%s' is only supported in 64-bit mode") @@ -5789,6
> +5790,14 @@ parse_insn (const char *line, char *mnem
>  		      insn_name (current_templates->start));
>  	      return NULL;
>  	    }
> +	  if (supported != CPU_FLAGS_PERFECT_MATCH)
> +	    {
> +	      as_bad (_("`%s' is not supported on `%s%s'"),
> +		      insn_name (current_templates->start),
> +		      cpu_arch_name ? cpu_arch_name : default_arch,
> +		      cpu_sub_arch_name ? cpu_sub_arch_name : "");
> +	      return NULL;
> +	    }
>  	  /* If we are in 16-bit mode, do not allow addr16 or data16.
>  	     Similarly, in 32-bit mode, do not allow addr32 or data32.  */
>  	  if ((current_templates->start->opcode_modifier.size == SIZE16
> --- a/gas/testsuite/gas/i386/prefix32.l
> +++ b/gas/testsuite/gas/i386/prefix32.l
> @@ -10,6 +10,13 @@
>  .*:20: Error: data size .* `vaddps'
>  .*:21: Error: data size .* `vaddpd'
>  .*:25: Error: same type of prefix .*
> +.*:31: Error: `xacquire' is not supported on `i386'
> +.*:32: Error: `notrack' is not supported on `i386'
> +.*:33: Error: `bnd' is not supported on `i386'
> +.*:38: Error: `gs' is not supported on `i286'
> +.*:39: Error: `data32' is not supported on `i286'
> +.*:40: Error: `addr32' is not supported on `i286'
> +.*:41: Error: .*disp32.* is not supported on `i286'
>  GAS LISTING .*
>  #...
>  [ 	]*1[ 	]+\.text
> @@ -40,4 +47,18 @@ GAS LISTING .*
>  [ 	]*26[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov
> 	%ss:\(%ebp\), %eax
>  [ 	]*27[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov
> 	%ds:\(%ebp\), %eax
>  [ 	]*28[ 	]*
> +[ 	]*[0-9]+[ 	]+\.L386:
> +[ 	]*[0-9]+[ 	]+\.arch i386
> +[ 	]*[0-9]+[ 	]+xacquire lock add \[esi\], eax
> +[ 	]*[0-9]+[ 	]+notrack call eax
> +[ 	]*[0-9]+[ 	]+bnd call eax
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.L286:
> +[ 	]*[0-9]+[ 	]+\.code16
> +[ 	]*[0-9]+[ 	]+\.arch i286
> +[ 	]*[0-9]+[ 	]+gs inc word ptr \[si\]
> +[ 	]*[0-9]+[ 	]+data32 nop
> +[ 	]*[0-9]+[ 	]+addr32 nop
> +[ 	]*[0-9]+[ 	]+\{disp32\} nop
> +[ 	]*[0-9]+[ 	]*
>  #pass
> --- a/gas/testsuite/gas/i386/prefix32.s
> +++ b/gas/testsuite/gas/i386/prefix32.s
> @@ -26,4 +26,18 @@ prefix:
>  	ds mov		%ss:(%ebp), %eax
>  	ds mov		%ds:(%ebp), %eax
> 
> +.L386:
> +	.arch i386
> +	xacquire lock add [esi], eax
> +	notrack call eax
> +	bnd call eax
> +
> +.L286:
> +	.code16
> +	.arch i286
> +	gs inc word ptr [si]
> +	data32 nop
> +	addr32 nop
> +	{disp32} nop
> +
>  	.p2align	4,0
> --- a/gas/testsuite/gas/i386/prefix64.l
> +++ b/gas/testsuite/gas/i386/prefix64.l
> @@ -3,12 +3,13 @@
>  .*:7: Error: invalid .* `addss' after `repne'
>  .*:8: Error: invalid .* `vaddss' after `repe'
>  .*:9: Error: invalid .* `vaddss' after `repne'
> -.*:14: Error: same type of prefix .*
> -.*:15: Error: same type of prefix .*
> -.*:18: Error: data size .* `addps'
> -.*:19: Error: data size .* `addpd'
> -.*:20: Error: data size .* `vaddps'
> -.*:21: Error: data size .* `vaddpd'
> +.*:11: Error: .*disp16.* is not supported .*
> +.*:16: Error: same type of prefix .*
> +.*:17: Error: same type of prefix .*
> +.*:20: Error: data size .* `addps'
> +.*:21: Error: data size .* `addpd'
> +.*:22: Error: data size .* `vaddps'
> +.*:23: Error: data size .* `vaddpd'
>  GAS LISTING .*
>  #...
>  [ 	]*1[ 	]+\.text
> @@ -21,16 +22,18 @@ GAS LISTING .*
>  [ 	]*8[ 	]+repe vaddss	%xmm0, %xmm0, %xmm0
>  [ 	]*9[ 	]+repne vaddss	%xmm0, %xmm0, %xmm0
>  [ 	]*10[ 	]*
> -[ 	]*11[ 	]+\.Lrep_ret:
> -[ 	]*12[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
> -[ 	]*13[ 	]+\?\?\?\? F3C3[ 	]+rep ret
> -[ 	]*14[ 	]+bnd rep ret
> -[ 	]*15[ 	]+rep bnd ret
> -[ 	]*16[ 	]*
> -[ 	]*17[ 	]+\.Ldata16:
> -[ 	]*18[ 	]+data16 addps	%xmm0, %xmm0
> -[ 	]*19[ 	]+data16 addpd	%xmm0, %xmm0
> -[ 	]*20[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
> -[ 	]*21[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
> -[ 	]*22[ 	]*
> +[ 	]*[0-9]+[ 	]+\{disp16\} nop
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.Lrep_ret:
> +[ 	]*[0-9]+[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
> +[ 	]*[0-9]+[ 	]+\?\?\?\? F3C3[ 	]+rep ret
> +[ 	]*[0-9]+[ 	]+bnd rep ret
> +[ 	]*[0-9]+[ 	]+rep bnd ret
> +[ 	]*[0-9]+[ 	]*
> +[ 	]*[0-9]+[ 	]+\.Ldata16:
> +[ 	]*[0-9]+[ 	]+data16 addps	%xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 addpd	%xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
> +[ 	]*[0-9]+[ 	]*
>  #pass
> --- a/gas/testsuite/gas/i386/prefix64.s
> +++ b/gas/testsuite/gas/i386/prefix64.s
> @@ -8,6 +8,8 @@ prefix:
>  	repe vaddss	%xmm0, %xmm0, %xmm0
>  	repne vaddss	%xmm0, %xmm0, %xmm0
> 
> +	{disp16} nop
> +
>  .Lrep_ret:
>  	bnd ret
>  	rep ret
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -890,7 +890,7 @@ rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
> 
>  // Pseudo prefixes (base_opcode == PSEUDO_PREFIX)
> 
> -<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:0, disp32:Disp32:0, +
> +<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:No64,
> +disp32:Disp32:i386, +
>                        load:Load:0, store:Store:0, +
>                        vex:VEX:0, vex2:VEX:0, vex3:VEX3:0, evex:EVEX:0, +
>                        rex:REX:x64, nooptimize:NoOptimize:0>
  
Jan Beulich Nov. 7, 2023, 3:14 p.m. UTC | #3
On 07.11.2023 16:06, Cui, Lili wrote:
>> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
>>
>> {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid to use on
>> pre-386 CPUs. The latter, also affecting other (real) prefixes, further requires
>> that like for insns we fully check the CPU flags; till now only Cpu64/CpuNo64
>> were taken into consideration.
>> ---
>> While this is consistent with i386_index_check() diagnosing wrong {dispN} use
>> as an error, that and the change here aren't consistent with documentation
>> saying "prefer", suggesting such prefixes - like {rex}, albeit even there not fully
>> consistent, seeing the error md_assemble() generates when used with
>> VEX/XOP/EVEX encoded insns - are ignored when impossible to fulfill. Otoh
>> the change here is consistent with {rex} being refused (rather than ignored)
>> outside of 64-bit mode.
>>
>>>>>         {rex} vmovaps %xmm7,%xmm2
>>>>>         {rex} vmovaps %xmm17,%xmm2
>>>>>         {rex} rorx $7,%eax,%ebx
>>>>> +       {rex2} vmovaps %xmm7,%xmm2
>>>>
>>>> Right, but please see my "optional vs required" comment in the
>>>> pseudo- prefix related patch I did send earlier today. I question the
>>>> correctness of the {rex} related check here, which would then extend to the
>> {rex2} one as well.
>>>>
>>>
>>> A REX byte that is immediately followed by a legacy prefix byte (LOCK,
>>> REPE, REPNE, OSIZE override, ASIZE override, or segment overrides) or
>> another REX byte is ignored and behaves as if it does not exist (except for
>> contributing to the instruction length), but in this case I think it's correct.
>>
>> I'm afraid I can't relate this to the aspect I raised above. Perhaps better to
>> discuss in the context of the patch that I sent (and that I mentioned above;
>> "x86: CPU-qualify {disp16} / {disp32}"). You did reply to the patch, but you
>> didn't reply to the more detailed description of the issue (which I did refer to
>> above).
>>
> 
> I think the vex instruction is not preceded by these legacy prefix bytes (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment override), so the vex instruction should not ignore the rex bytes. 

But you realize the difference between "rex" and "{rex}" as a prefix?
What you describe looks to talk about the former, whereas I've been
talking about the latter.

Jan
  
Cui, Lili Nov. 7, 2023, 3:50 p.m. UTC | #4
> Subject: Re: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> 
> On 07.11.2023 16:06, Cui, Lili wrote:
> >> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> >>
> >> {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid
> >> to use on
> >> pre-386 CPUs. The latter, also affecting other (real) prefixes,
> >> further requires that like for insns we fully check the CPU flags;
> >> till now only Cpu64/CpuNo64 were taken into consideration.
> >> ---
> >> While this is consistent with i386_index_check() diagnosing wrong
> >> {dispN} use as an error, that and the change here aren't consistent
> >> with documentation saying "prefer", suggesting such prefixes - like
> >> {rex}, albeit even there not fully consistent, seeing the error
> >> md_assemble() generates when used with VEX/XOP/EVEX encoded insns -
> >> are ignored when impossible to fulfill. Otoh the change here is
> >> consistent with {rex} being refused (rather than ignored) outside of 64-bit
> mode.
> >>
> >>>>>         {rex} vmovaps %xmm7,%xmm2
> >>>>>         {rex} vmovaps %xmm17,%xmm2
> >>>>>         {rex} rorx $7,%eax,%ebx
> >>>>> +       {rex2} vmovaps %xmm7,%xmm2
> >>>>
> >>>> Right, but please see my "optional vs required" comment in the
> >>>> pseudo- prefix related patch I did send earlier today. I question
> >>>> the correctness of the {rex} related check here, which would then
> >>>> extend to the
> >> {rex2} one as well.
> >>>>
> >>>
> >>> A REX byte that is immediately followed by a legacy prefix byte
> >>> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment
> >>> overrides) or
> >> another REX byte is ignored and behaves as if it does not exist
> >> (except for contributing to the instruction length), but in this case I think
> it's correct.
> >>
> >> I'm afraid I can't relate this to the aspect I raised above. Perhaps
> >> better to discuss in the context of the patch that I sent (and that I
> >> mentioned above;
> >> "x86: CPU-qualify {disp16} / {disp32}"). You did reply to the patch,
> >> but you didn't reply to the more detailed description of the issue
> >> (which I did refer to above).
> >>
> >
> > I think the vex instruction is not preceded by these legacy prefix bytes
> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment override), so
> the vex instruction should not ignore the rex bytes.
> 
> But you realize the difference between "rex" and "{rex}" as a prefix?
> What you describe looks to talk about the former, whereas I've been talking
> about the latter.
> 
Oh, got it, I mixed them up, thanks for pointing it out. 
Suppose the customer wants to insert a byte {rex} in front of VEX insn to achieve a certain byte alignment. Would it be better for us to report an error? Or just ignore it. I don't know if this really exists.

Lili.
  
Jan Beulich Nov. 7, 2023, 4 p.m. UTC | #5
On 07.11.2023 16:50, Cui, Lili wrote:
>> Subject: Re: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
>>
>> On 07.11.2023 16:06, Cui, Lili wrote:
>>>> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
>>>>
>>>> {disp16} is invalid to use in 64-bit mode, while {disp32} is invalid
>>>> to use on
>>>> pre-386 CPUs. The latter, also affecting other (real) prefixes,
>>>> further requires that like for insns we fully check the CPU flags;
>>>> till now only Cpu64/CpuNo64 were taken into consideration.
>>>> ---
>>>> While this is consistent with i386_index_check() diagnosing wrong
>>>> {dispN} use as an error, that and the change here aren't consistent
>>>> with documentation saying "prefer", suggesting such prefixes - like
>>>> {rex}, albeit even there not fully consistent, seeing the error
>>>> md_assemble() generates when used with VEX/XOP/EVEX encoded insns -
>>>> are ignored when impossible to fulfill. Otoh the change here is
>>>> consistent with {rex} being refused (rather than ignored) outside of 64-bit
>> mode.
>>>>
>>>>>>>         {rex} vmovaps %xmm7,%xmm2
>>>>>>>         {rex} vmovaps %xmm17,%xmm2
>>>>>>>         {rex} rorx $7,%eax,%ebx
>>>>>>> +       {rex2} vmovaps %xmm7,%xmm2
>>>>>>
>>>>>> Right, but please see my "optional vs required" comment in the
>>>>>> pseudo- prefix related patch I did send earlier today. I question
>>>>>> the correctness of the {rex} related check here, which would then
>>>>>> extend to the
>>>> {rex2} one as well.
>>>>>>
>>>>>
>>>>> A REX byte that is immediately followed by a legacy prefix byte
>>>>> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment
>>>>> overrides) or
>>>> another REX byte is ignored and behaves as if it does not exist
>>>> (except for contributing to the instruction length), but in this case I think
>> it's correct.
>>>>
>>>> I'm afraid I can't relate this to the aspect I raised above. Perhaps
>>>> better to discuss in the context of the patch that I sent (and that I
>>>> mentioned above;
>>>> "x86: CPU-qualify {disp16} / {disp32}"). You did reply to the patch,
>>>> but you didn't reply to the more detailed description of the issue
>>>> (which I did refer to above).
>>>>
>>>
>>> I think the vex instruction is not preceded by these legacy prefix bytes
>> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment override), so
>> the vex instruction should not ignore the rex bytes.
>>
>> But you realize the difference between "rex" and "{rex}" as a prefix?
>> What you describe looks to talk about the former, whereas I've been talking
>> about the latter.
>>
> Oh, got it, I mixed them up, thanks for pointing it out. 
> Suppose the customer wants to insert a byte {rex} in front of VEX insn to achieve a certain byte alignment. Would it be better for us to report an error? Or just ignore it. I don't know if this really exists.

{rex} won't guarantee to insert anything, as per its documentation. If one
strictly wants a REX prefix, "rex" needs to be used.

Jan
  
Cui, Lili Nov. 7, 2023, 4:08 p.m. UTC | #6
> Subject: Re: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> 
> On 07.11.2023 16:50, Cui, Lili wrote:
> >> Subject: Re: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> >>
> >> On 07.11.2023 16:06, Cui, Lili wrote:
> >>>> Subject: [PATCH 1/2] x86: CPU-qualify {disp16} / {disp32}
> >>>>
> >>>> {disp16} is invalid to use in 64-bit mode, while {disp32} is
> >>>> invalid to use on
> >>>> pre-386 CPUs. The latter, also affecting other (real) prefixes,
> >>>> further requires that like for insns we fully check the CPU flags;
> >>>> till now only Cpu64/CpuNo64 were taken into consideration.
> >>>> ---
> >>>> While this is consistent with i386_index_check() diagnosing wrong
> >>>> {dispN} use as an error, that and the change here aren't consistent
> >>>> with documentation saying "prefer", suggesting such prefixes - like
> >>>> {rex}, albeit even there not fully consistent, seeing the error
> >>>> md_assemble() generates when used with VEX/XOP/EVEX encoded insns
> -
> >>>> are ignored when impossible to fulfill. Otoh the change here is
> >>>> consistent with {rex} being refused (rather than ignored) outside
> >>>> of 64-bit
> >> mode.
> >>>>
> >>>>>>>         {rex} vmovaps %xmm7,%xmm2
> >>>>>>>         {rex} vmovaps %xmm17,%xmm2
> >>>>>>>         {rex} rorx $7,%eax,%ebx
> >>>>>>> +       {rex2} vmovaps %xmm7,%xmm2
> >>>>>>
> >>>>>> Right, but please see my "optional vs required" comment in the
> >>>>>> pseudo- prefix related patch I did send earlier today. I question
> >>>>>> the correctness of the {rex} related check here, which would then
> >>>>>> extend to the
> >>>> {rex2} one as well.
> >>>>>>
> >>>>>
> >>>>> A REX byte that is immediately followed by a legacy prefix byte
> >>>>> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment
> >>>>> overrides) or
> >>>> another REX byte is ignored and behaves as if it does not exist
> >>>> (except for contributing to the instruction length), but in this
> >>>> case I think
> >> it's correct.
> >>>>
> >>>> I'm afraid I can't relate this to the aspect I raised above.
> >>>> Perhaps better to discuss in the context of the patch that I sent
> >>>> (and that I mentioned above;
> >>>> "x86: CPU-qualify {disp16} / {disp32}"). You did reply to the
> >>>> patch, but you didn't reply to the more detailed description of the
> >>>> issue (which I did refer to above).
> >>>>
> >>>
> >>> I think the vex instruction is not preceded by these legacy prefix
> >>> bytes
> >> (LOCK, REPE, REPNE, OSIZE override, ASIZE override, or segment
> >> override), so the vex instruction should not ignore the rex bytes.
> >>
> >> But you realize the difference between "rex" and "{rex}" as a prefix?
> >> What you describe looks to talk about the former, whereas I've been
> >> talking about the latter.
> >>
> > Oh, got it, I mixed them up, thanks for pointing it out.
> > Suppose the customer wants to insert a byte {rex} in front of VEX insn to
> achieve a certain byte alignment. Would it be better for us to report an error?
> Or just ignore it. I don't know if this really exists.
> 
> {rex} won't guarantee to insert anything, as per its documentation. If one
> strictly wants a REX prefix, "rex" needs to be used.
> 
Okay, I have no concern now. This issue may indeed require HJ to confirm.

Lili.
  

Patch

--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -5781,7 +5781,8 @@  parse_insn (const char *line, char *mnem
 	  && current_templates
 	  && current_templates->start->opcode_modifier.isprefix)
 	{
-	  if (!cpu_flags_check_cpu64 (current_templates->start))
+	  supported = cpu_flags_match (current_templates->start);
+	  if (!(supported & CPU_FLAGS_64BIT_MATCH))
 	    {
 	      as_bad ((flag_code != CODE_64BIT
 		       ? _("`%s' is only supported in 64-bit mode")
@@ -5789,6 +5790,14 @@  parse_insn (const char *line, char *mnem
 		      insn_name (current_templates->start));
 	      return NULL;
 	    }
+	  if (supported != CPU_FLAGS_PERFECT_MATCH)
+	    {
+	      as_bad (_("`%s' is not supported on `%s%s'"),
+		      insn_name (current_templates->start),
+		      cpu_arch_name ? cpu_arch_name : default_arch,
+		      cpu_sub_arch_name ? cpu_sub_arch_name : "");
+	      return NULL;
+	    }
 	  /* If we are in 16-bit mode, do not allow addr16 or data16.
 	     Similarly, in 32-bit mode, do not allow addr32 or data32.  */
 	  if ((current_templates->start->opcode_modifier.size == SIZE16
--- a/gas/testsuite/gas/i386/prefix32.l
+++ b/gas/testsuite/gas/i386/prefix32.l
@@ -10,6 +10,13 @@ 
 .*:20: Error: data size .* `vaddps'
 .*:21: Error: data size .* `vaddpd'
 .*:25: Error: same type of prefix .*
+.*:31: Error: `xacquire' is not supported on `i386'
+.*:32: Error: `notrack' is not supported on `i386'
+.*:33: Error: `bnd' is not supported on `i386'
+.*:38: Error: `gs' is not supported on `i286'
+.*:39: Error: `data32' is not supported on `i286'
+.*:40: Error: `addr32' is not supported on `i286'
+.*:41: Error: .*disp32.* is not supported on `i286'
 GAS LISTING .*
 #...
 [ 	]*1[ 	]+\.text
@@ -40,4 +47,18 @@  GAS LISTING .*
 [ 	]*26[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov		%ss:\(%ebp\), %eax
 [ 	]*27[ 	]+\?\?\?\? 3E8B4500[ 	]+ds mov		%ds:\(%ebp\), %eax
 [ 	]*28[ 	]*
+[ 	]*[0-9]+[ 	]+\.L386:
+[ 	]*[0-9]+[ 	]+\.arch i386
+[ 	]*[0-9]+[ 	]+xacquire lock add \[esi\], eax
+[ 	]*[0-9]+[ 	]+notrack call eax
+[ 	]*[0-9]+[ 	]+bnd call eax
+[ 	]*[0-9]+[ 	]*
+[ 	]*[0-9]+[ 	]+\.L286:
+[ 	]*[0-9]+[ 	]+\.code16
+[ 	]*[0-9]+[ 	]+\.arch i286
+[ 	]*[0-9]+[ 	]+gs inc word ptr \[si\]
+[ 	]*[0-9]+[ 	]+data32 nop
+[ 	]*[0-9]+[ 	]+addr32 nop
+[ 	]*[0-9]+[ 	]+\{disp32\} nop
+[ 	]*[0-9]+[ 	]*
 #pass
--- a/gas/testsuite/gas/i386/prefix32.s
+++ b/gas/testsuite/gas/i386/prefix32.s
@@ -26,4 +26,18 @@  prefix:
 	ds mov		%ss:(%ebp), %eax
 	ds mov		%ds:(%ebp), %eax
 
+.L386:
+	.arch i386
+	xacquire lock add [esi], eax
+	notrack call eax
+	bnd call eax
+
+.L286:
+	.code16
+	.arch i286
+	gs inc word ptr [si]
+	data32 nop
+	addr32 nop
+	{disp32} nop
+
 	.p2align	4,0
--- a/gas/testsuite/gas/i386/prefix64.l
+++ b/gas/testsuite/gas/i386/prefix64.l
@@ -3,12 +3,13 @@ 
 .*:7: Error: invalid .* `addss' after `repne'
 .*:8: Error: invalid .* `vaddss' after `repe'
 .*:9: Error: invalid .* `vaddss' after `repne'
-.*:14: Error: same type of prefix .*
-.*:15: Error: same type of prefix .*
-.*:18: Error: data size .* `addps'
-.*:19: Error: data size .* `addpd'
-.*:20: Error: data size .* `vaddps'
-.*:21: Error: data size .* `vaddpd'
+.*:11: Error: .*disp16.* is not supported .*
+.*:16: Error: same type of prefix .*
+.*:17: Error: same type of prefix .*
+.*:20: Error: data size .* `addps'
+.*:21: Error: data size .* `addpd'
+.*:22: Error: data size .* `vaddps'
+.*:23: Error: data size .* `vaddpd'
 GAS LISTING .*
 #...
 [ 	]*1[ 	]+\.text
@@ -21,16 +22,18 @@  GAS LISTING .*
 [ 	]*8[ 	]+repe vaddss	%xmm0, %xmm0, %xmm0
 [ 	]*9[ 	]+repne vaddss	%xmm0, %xmm0, %xmm0
 [ 	]*10[ 	]*
-[ 	]*11[ 	]+\.Lrep_ret:
-[ 	]*12[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
-[ 	]*13[ 	]+\?\?\?\? F3C3[ 	]+rep ret
-[ 	]*14[ 	]+bnd rep ret
-[ 	]*15[ 	]+rep bnd ret
-[ 	]*16[ 	]*
-[ 	]*17[ 	]+\.Ldata16:
-[ 	]*18[ 	]+data16 addps	%xmm0, %xmm0
-[ 	]*19[ 	]+data16 addpd	%xmm0, %xmm0
-[ 	]*20[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
-[ 	]*21[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
-[ 	]*22[ 	]*
+[ 	]*[0-9]+[ 	]+\{disp16\} nop
+[ 	]*[0-9]+[ 	]*
+[ 	]*[0-9]+[ 	]+\.Lrep_ret:
+[ 	]*[0-9]+[ 	]+\?\?\?\? F2C3[ 	]+bnd ret
+[ 	]*[0-9]+[ 	]+\?\?\?\? F3C3[ 	]+rep ret
+[ 	]*[0-9]+[ 	]+bnd rep ret
+[ 	]*[0-9]+[ 	]+rep bnd ret
+[ 	]*[0-9]+[ 	]*
+[ 	]*[0-9]+[ 	]+\.Ldata16:
+[ 	]*[0-9]+[ 	]+data16 addps	%xmm0, %xmm0
+[ 	]*[0-9]+[ 	]+data16 addpd	%xmm0, %xmm0
+[ 	]*[0-9]+[ 	]+data16 vaddps	%xmm0, %xmm0, %xmm0
+[ 	]*[0-9]+[ 	]+data16 vaddpd	%xmm0, %xmm0, %xmm0
+[ 	]*[0-9]+[ 	]*
 #pass
--- a/gas/testsuite/gas/i386/prefix64.s
+++ b/gas/testsuite/gas/i386/prefix64.s
@@ -8,6 +8,8 @@  prefix:
 	repe vaddss	%xmm0, %xmm0, %xmm0
 	repne vaddss	%xmm0, %xmm0, %xmm0
 
+	{disp16} nop
+
 .Lrep_ret:
 	bnd ret
 	rep ret
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -890,7 +890,7 @@  rex.wrxb, 0x4f, x64, NoSuf|IsPrefix, {}
 
 // Pseudo prefixes (base_opcode == PSEUDO_PREFIX)
 
-<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:0, disp32:Disp32:0, +
+<pseudopfx:ident:cpu, disp8:Disp8:0, disp16:Disp16:No64, disp32:Disp32:i386, +
                       load:Load:0, store:Store:0, +
                       vex:VEX:0, vex2:VEX:0, vex3:VEX3:0, evex:EVEX:0, +
                       rex:REX:x64, nooptimize:NoOptimize:0>