[v3,6/7] x86-64: allow HLE store of accumulator to absolute 32-bit address
Commit Message
In commit 1212781b35c9 ("ix86: allow HLE store of accumulator to
absolute address") I was wrong to exclude 64-bit code. Dropping the
check also leads to better diagnostics in 64-bit code ("MOV", after
all, isn't invalid with "XRELEASE").
While there also limit the amount of further checks done: The operand
type checks that were there were effectively redundant with other ones
anyway, plus it's quite fine to also have "xrelease mov <disp>, %eax"
look for the next MOV template (in fact again also improving
diagnostics).
---
v2: New.
Comments
On Wed, Oct 5, 2022 at 12:25 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> In commit 1212781b35c9 ("ix86: allow HLE store of accumulator to
> absolute address") I was wrong to exclude 64-bit code. Dropping the
> check also leads to better diagnostics in 64-bit code ("MOV", after
> all, isn't invalid with "XRELEASE").
>
> While there also limit the amount of further checks done: The operand
> type checks that were there were effectively redundant with other ones
> anyway, plus it's quite fine to also have "xrelease mov <disp>, %eax"
> look for the next MOV template (in fact again also improving
> diagnostics).
> ---
> v2: New.
>
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -6819,12 +6819,9 @@ match_template (char mnem_suffix)
> continue;
> /* xrelease mov %eax, <disp> is another special case. It must not
> match the accumulator-only encoding of mov. */
> - if (flag_code != CODE_64BIT
> - && i.hle_prefix
> + if (i.hle_prefix
> && t->base_opcode == 0xa0
> - && t->opcode_modifier.opcodespace == SPACE_BASE
> - && i.types[0].bitfield.instance == Accum
> - && (i.flags[1] & Operand_Mem))
> + && t->opcode_modifier.opcodespace == SPACE_BASE)
> continue;
> /* Fall through. */
>
> --- a/gas/testsuite/gas/i386/x86-64-hle-intel.d
> +++ b/gas/testsuite/gas/i386/x86-64-hle-intel.d
> @@ -425,6 +425,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 20 01 lock xacquire and BYTE PTR \[rcx\],al
> [ ]*[a-f0-9]+: f0 f3 20 01 lock xrelease and BYTE PTR \[rcx\],al
> [ ]*[a-f0-9]+: f3 88 01 xrelease mov BYTE PTR \[rcx\],al
> +[ ]*[a-f0-9]+: f3 88 04 25 78 56 34 12 xrelease mov BYTE PTR (ds:)?0x12345678,al
> +[ ]*[a-f0-9]+: 67 f3 88 04 25 21 43 65 87 xrelease mov BYTE PTR \[eiz\*1\+0x87654321\],al
> [ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or BYTE PTR \[rcx\],al
> [ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or BYTE PTR \[rcx\],al
> [ ]*[a-f0-9]+: f3 f0 08 01 xrelease lock or BYTE PTR \[rcx\],al
> @@ -476,6 +478,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 66 21 01 lock xacquire and WORD PTR \[rcx\],ax
> [ ]*[a-f0-9]+: f0 f3 66 21 01 lock xrelease and WORD PTR \[rcx\],ax
> [ ]*[a-f0-9]+: 66 f3 89 01 xrelease mov WORD PTR \[rcx\],ax
> +[ ]*[a-f0-9]+: 66 f3 89 04 25 78 56 34 12 xrelease mov WORD PTR (ds:)?0x12345678,ax
> +[ ]*[a-f0-9]+: 67 66 f3 89 04 25 21 43 65 87 xrelease mov WORD PTR \[eiz\*1\+0x87654321\],ax
> [ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or WORD PTR \[rcx\],ax
> [ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or WORD PTR \[rcx\],ax
> [ ]*[a-f0-9]+: 66 f3 f0 09 01 xrelease lock or WORD PTR \[rcx\],ax
> @@ -527,6 +531,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 21 01 lock xacquire and DWORD PTR \[rcx\],eax
> [ ]*[a-f0-9]+: f0 f3 21 01 lock xrelease and DWORD PTR \[rcx\],eax
> [ ]*[a-f0-9]+: f3 89 01 xrelease mov DWORD PTR \[rcx\],eax
> +[ ]*[a-f0-9]+: f3 89 04 25 78 56 34 12 xrelease mov DWORD PTR (ds:)?0x12345678,eax
> +[ ]*[a-f0-9]+: 67 f3 89 04 25 21 43 65 87 xrelease mov DWORD PTR \[eiz\*1\+0x87654321\],eax
> [ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or DWORD PTR \[rcx\],eax
> [ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or DWORD PTR \[rcx\],eax
> [ ]*[a-f0-9]+: f3 f0 09 01 xrelease lock or DWORD PTR \[rcx\],eax
> @@ -578,6 +584,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 48 21 01 lock xacquire and QWORD PTR \[rcx\],rax
> [ ]*[a-f0-9]+: f0 f3 48 21 01 lock xrelease and QWORD PTR \[rcx\],rax
> [ ]*[a-f0-9]+: f3 48 89 01 xrelease mov QWORD PTR \[rcx\],rax
> +[ ]*[a-f0-9]+: f3 48 89 04 25 78 56 34 12 xrelease mov QWORD PTR (ds:)?0x12345678,rax
> +[ ]*[a-f0-9]+: 67 f3 48 89 04 25 21 43 65 87 xrelease mov QWORD PTR \[eiz\*1\+0x87654321\],rax
> [ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or QWORD PTR \[rcx\],rax
> [ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or QWORD PTR \[rcx\],rax
> [ ]*[a-f0-9]+: f3 f0 48 09 01 xrelease lock or QWORD PTR \[rcx\],rax
> --- a/gas/testsuite/gas/i386/x86-64-hle.d
> +++ b/gas/testsuite/gas/i386/x86-64-hle.d
> @@ -424,6 +424,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 20 01 lock xacquire and %al,\(%rcx\)
> [ ]*[a-f0-9]+: f0 f3 20 01 lock xrelease and %al,\(%rcx\)
> [ ]*[a-f0-9]+: f3 88 01 xrelease mov %al,\(%rcx\)
> +[ ]*[a-f0-9]+: f3 88 04 25 78 56 34 12 xrelease mov %al,0x12345678
> +[ ]*[a-f0-9]+: 67 f3 88 04 25 21 43 65 87 xrelease mov %al,0x87654321\(,%eiz,1\)
> [ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or %al,\(%rcx\)
> [ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or %al,\(%rcx\)
> [ ]*[a-f0-9]+: f3 f0 08 01 xrelease lock or %al,\(%rcx\)
> @@ -475,6 +477,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 66 21 01 lock xacquire and %ax,\(%rcx\)
> [ ]*[a-f0-9]+: f0 f3 66 21 01 lock xrelease and %ax,\(%rcx\)
> [ ]*[a-f0-9]+: 66 f3 89 01 xrelease mov %ax,\(%rcx\)
> +[ ]*[a-f0-9]+: 66 f3 89 04 25 78 56 34 12 xrelease mov %ax,0x12345678
> +[ ]*[a-f0-9]+: 67 66 f3 89 04 25 21 43 65 87 xrelease mov %ax,0x87654321\(,%eiz,1\)
> [ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or %ax,\(%rcx\)
> [ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or %ax,\(%rcx\)
> [ ]*[a-f0-9]+: 66 f3 f0 09 01 xrelease lock or %ax,\(%rcx\)
> @@ -526,6 +530,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 21 01 lock xacquire and %eax,\(%rcx\)
> [ ]*[a-f0-9]+: f0 f3 21 01 lock xrelease and %eax,\(%rcx\)
> [ ]*[a-f0-9]+: f3 89 01 xrelease mov %eax,\(%rcx\)
> +[ ]*[a-f0-9]+: f3 89 04 25 78 56 34 12 xrelease mov %eax,0x12345678
> +[ ]*[a-f0-9]+: 67 f3 89 04 25 21 43 65 87 xrelease mov %eax,0x87654321\(,%eiz,1\)
> [ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or %eax,\(%rcx\)
> [ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or %eax,\(%rcx\)
> [ ]*[a-f0-9]+: f3 f0 09 01 xrelease lock or %eax,\(%rcx\)
> @@ -577,6 +583,8 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: f0 f2 48 21 01 lock xacquire and %rax,\(%rcx\)
> [ ]*[a-f0-9]+: f0 f3 48 21 01 lock xrelease and %rax,\(%rcx\)
> [ ]*[a-f0-9]+: f3 48 89 01 xrelease mov %rax,\(%rcx\)
> +[ ]*[a-f0-9]+: f3 48 89 04 25 78 56 34 12 xrelease mov %rax,0x12345678
> +[ ]*[a-f0-9]+: 67 f3 48 89 04 25 21 43 65 87 xrelease mov %rax,0x87654321\(,%eiz,1\)
> [ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or %rax,\(%rcx\)
> [ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or %rax,\(%rcx\)
> [ ]*[a-f0-9]+: f3 f0 48 09 01 xrelease lock or %rax,\(%rcx\)
> --- a/gas/testsuite/gas/i386/x86-64-hle.s
> +++ b/gas/testsuite/gas/i386/x86-64-hle.s
> @@ -442,6 +442,8 @@ _start:
> .byte 0xf0; .byte 0xf2; andb %al,(%rcx)
> .byte 0xf0; .byte 0xf3; andb %al,(%rcx)
> xrelease movb %al,(%rcx)
> + xrelease movb %al,0x12345678
> + xrelease addr32 movb %al,0x87654321
> xacquire lock orb %al,(%rcx)
> lock xacquire orb %al,(%rcx)
> xrelease lock orb %al,(%rcx)
> @@ -496,6 +498,8 @@ _start:
> .byte 0xf0; .byte 0xf2; andw %ax,(%rcx)
> .byte 0xf0; .byte 0xf3; andw %ax,(%rcx)
> xrelease movw %ax,(%rcx)
> + xrelease movw %ax,0x12345678
> + xrelease addr32 movw %ax,0x87654321
> xacquire lock orw %ax,(%rcx)
> lock xacquire orw %ax,(%rcx)
> xrelease lock orw %ax,(%rcx)
> @@ -550,6 +554,8 @@ _start:
> .byte 0xf0; .byte 0xf2; andl %eax,(%rcx)
> .byte 0xf0; .byte 0xf3; andl %eax,(%rcx)
> xrelease movl %eax,(%rcx)
> + xrelease movl %eax,0x12345678
> + xrelease addr32 movl %eax,0x87654321
> xacquire lock orl %eax,(%rcx)
> lock xacquire orl %eax,(%rcx)
> xrelease lock orl %eax,(%rcx)
> @@ -604,6 +610,8 @@ _start:
> .byte 0xf0; .byte 0xf2; andq %rax,(%rcx)
> .byte 0xf0; .byte 0xf3; andq %rax,(%rcx)
> xrelease movq %rax,(%rcx)
> + xrelease movq %rax,0x12345678
> + xrelease addr32 movq %rax,0x87654321
> xacquire lock orq %rax,(%rcx)
> lock xacquire orq %rax,(%rcx)
> xrelease lock orq %rax,(%rcx)
>
OK.
Thanks.
@@ -6819,12 +6819,9 @@ match_template (char mnem_suffix)
continue;
/* xrelease mov %eax, <disp> is another special case. It must not
match the accumulator-only encoding of mov. */
- if (flag_code != CODE_64BIT
- && i.hle_prefix
+ if (i.hle_prefix
&& t->base_opcode == 0xa0
- && t->opcode_modifier.opcodespace == SPACE_BASE
- && i.types[0].bitfield.instance == Accum
- && (i.flags[1] & Operand_Mem))
+ && t->opcode_modifier.opcodespace == SPACE_BASE)
continue;
/* Fall through. */
@@ -425,6 +425,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 20 01 lock xacquire and BYTE PTR \[rcx\],al
[ ]*[a-f0-9]+: f0 f3 20 01 lock xrelease and BYTE PTR \[rcx\],al
[ ]*[a-f0-9]+: f3 88 01 xrelease mov BYTE PTR \[rcx\],al
+[ ]*[a-f0-9]+: f3 88 04 25 78 56 34 12 xrelease mov BYTE PTR (ds:)?0x12345678,al
+[ ]*[a-f0-9]+: 67 f3 88 04 25 21 43 65 87 xrelease mov BYTE PTR \[eiz\*1\+0x87654321\],al
[ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or BYTE PTR \[rcx\],al
[ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or BYTE PTR \[rcx\],al
[ ]*[a-f0-9]+: f3 f0 08 01 xrelease lock or BYTE PTR \[rcx\],al
@@ -476,6 +478,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 66 21 01 lock xacquire and WORD PTR \[rcx\],ax
[ ]*[a-f0-9]+: f0 f3 66 21 01 lock xrelease and WORD PTR \[rcx\],ax
[ ]*[a-f0-9]+: 66 f3 89 01 xrelease mov WORD PTR \[rcx\],ax
+[ ]*[a-f0-9]+: 66 f3 89 04 25 78 56 34 12 xrelease mov WORD PTR (ds:)?0x12345678,ax
+[ ]*[a-f0-9]+: 67 66 f3 89 04 25 21 43 65 87 xrelease mov WORD PTR \[eiz\*1\+0x87654321\],ax
[ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or WORD PTR \[rcx\],ax
[ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or WORD PTR \[rcx\],ax
[ ]*[a-f0-9]+: 66 f3 f0 09 01 xrelease lock or WORD PTR \[rcx\],ax
@@ -527,6 +531,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 21 01 lock xacquire and DWORD PTR \[rcx\],eax
[ ]*[a-f0-9]+: f0 f3 21 01 lock xrelease and DWORD PTR \[rcx\],eax
[ ]*[a-f0-9]+: f3 89 01 xrelease mov DWORD PTR \[rcx\],eax
+[ ]*[a-f0-9]+: f3 89 04 25 78 56 34 12 xrelease mov DWORD PTR (ds:)?0x12345678,eax
+[ ]*[a-f0-9]+: 67 f3 89 04 25 21 43 65 87 xrelease mov DWORD PTR \[eiz\*1\+0x87654321\],eax
[ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or DWORD PTR \[rcx\],eax
[ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or DWORD PTR \[rcx\],eax
[ ]*[a-f0-9]+: f3 f0 09 01 xrelease lock or DWORD PTR \[rcx\],eax
@@ -578,6 +584,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 48 21 01 lock xacquire and QWORD PTR \[rcx\],rax
[ ]*[a-f0-9]+: f0 f3 48 21 01 lock xrelease and QWORD PTR \[rcx\],rax
[ ]*[a-f0-9]+: f3 48 89 01 xrelease mov QWORD PTR \[rcx\],rax
+[ ]*[a-f0-9]+: f3 48 89 04 25 78 56 34 12 xrelease mov QWORD PTR (ds:)?0x12345678,rax
+[ ]*[a-f0-9]+: 67 f3 48 89 04 25 21 43 65 87 xrelease mov QWORD PTR \[eiz\*1\+0x87654321\],rax
[ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or QWORD PTR \[rcx\],rax
[ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or QWORD PTR \[rcx\],rax
[ ]*[a-f0-9]+: f3 f0 48 09 01 xrelease lock or QWORD PTR \[rcx\],rax
@@ -424,6 +424,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 20 01 lock xacquire and %al,\(%rcx\)
[ ]*[a-f0-9]+: f0 f3 20 01 lock xrelease and %al,\(%rcx\)
[ ]*[a-f0-9]+: f3 88 01 xrelease mov %al,\(%rcx\)
+[ ]*[a-f0-9]+: f3 88 04 25 78 56 34 12 xrelease mov %al,0x12345678
+[ ]*[a-f0-9]+: 67 f3 88 04 25 21 43 65 87 xrelease mov %al,0x87654321\(,%eiz,1\)
[ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or %al,\(%rcx\)
[ ]*[a-f0-9]+: f2 f0 08 01 xacquire lock or %al,\(%rcx\)
[ ]*[a-f0-9]+: f3 f0 08 01 xrelease lock or %al,\(%rcx\)
@@ -475,6 +477,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 66 21 01 lock xacquire and %ax,\(%rcx\)
[ ]*[a-f0-9]+: f0 f3 66 21 01 lock xrelease and %ax,\(%rcx\)
[ ]*[a-f0-9]+: 66 f3 89 01 xrelease mov %ax,\(%rcx\)
+[ ]*[a-f0-9]+: 66 f3 89 04 25 78 56 34 12 xrelease mov %ax,0x12345678
+[ ]*[a-f0-9]+: 67 66 f3 89 04 25 21 43 65 87 xrelease mov %ax,0x87654321\(,%eiz,1\)
[ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or %ax,\(%rcx\)
[ ]*[a-f0-9]+: 66 f2 f0 09 01 xacquire lock or %ax,\(%rcx\)
[ ]*[a-f0-9]+: 66 f3 f0 09 01 xrelease lock or %ax,\(%rcx\)
@@ -526,6 +530,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 21 01 lock xacquire and %eax,\(%rcx\)
[ ]*[a-f0-9]+: f0 f3 21 01 lock xrelease and %eax,\(%rcx\)
[ ]*[a-f0-9]+: f3 89 01 xrelease mov %eax,\(%rcx\)
+[ ]*[a-f0-9]+: f3 89 04 25 78 56 34 12 xrelease mov %eax,0x12345678
+[ ]*[a-f0-9]+: 67 f3 89 04 25 21 43 65 87 xrelease mov %eax,0x87654321\(,%eiz,1\)
[ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or %eax,\(%rcx\)
[ ]*[a-f0-9]+: f2 f0 09 01 xacquire lock or %eax,\(%rcx\)
[ ]*[a-f0-9]+: f3 f0 09 01 xrelease lock or %eax,\(%rcx\)
@@ -577,6 +583,8 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: f0 f2 48 21 01 lock xacquire and %rax,\(%rcx\)
[ ]*[a-f0-9]+: f0 f3 48 21 01 lock xrelease and %rax,\(%rcx\)
[ ]*[a-f0-9]+: f3 48 89 01 xrelease mov %rax,\(%rcx\)
+[ ]*[a-f0-9]+: f3 48 89 04 25 78 56 34 12 xrelease mov %rax,0x12345678
+[ ]*[a-f0-9]+: 67 f3 48 89 04 25 21 43 65 87 xrelease mov %rax,0x87654321\(,%eiz,1\)
[ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or %rax,\(%rcx\)
[ ]*[a-f0-9]+: f2 f0 48 09 01 xacquire lock or %rax,\(%rcx\)
[ ]*[a-f0-9]+: f3 f0 48 09 01 xrelease lock or %rax,\(%rcx\)
@@ -442,6 +442,8 @@ _start:
.byte 0xf0; .byte 0xf2; andb %al,(%rcx)
.byte 0xf0; .byte 0xf3; andb %al,(%rcx)
xrelease movb %al,(%rcx)
+ xrelease movb %al,0x12345678
+ xrelease addr32 movb %al,0x87654321
xacquire lock orb %al,(%rcx)
lock xacquire orb %al,(%rcx)
xrelease lock orb %al,(%rcx)
@@ -496,6 +498,8 @@ _start:
.byte 0xf0; .byte 0xf2; andw %ax,(%rcx)
.byte 0xf0; .byte 0xf3; andw %ax,(%rcx)
xrelease movw %ax,(%rcx)
+ xrelease movw %ax,0x12345678
+ xrelease addr32 movw %ax,0x87654321
xacquire lock orw %ax,(%rcx)
lock xacquire orw %ax,(%rcx)
xrelease lock orw %ax,(%rcx)
@@ -550,6 +554,8 @@ _start:
.byte 0xf0; .byte 0xf2; andl %eax,(%rcx)
.byte 0xf0; .byte 0xf3; andl %eax,(%rcx)
xrelease movl %eax,(%rcx)
+ xrelease movl %eax,0x12345678
+ xrelease addr32 movl %eax,0x87654321
xacquire lock orl %eax,(%rcx)
lock xacquire orl %eax,(%rcx)
xrelease lock orl %eax,(%rcx)
@@ -604,6 +610,8 @@ _start:
.byte 0xf0; .byte 0xf2; andq %rax,(%rcx)
.byte 0xf0; .byte 0xf3; andq %rax,(%rcx)
xrelease movq %rax,(%rcx)
+ xrelease movq %rax,0x12345678
+ xrelease addr32 movq %rax,0x87654321
xacquire lock orq %rax,(%rcx)
lock xacquire orq %rax,(%rcx)
xrelease lock orq %rax,(%rcx)