On 27.07.2023 09:15, Haochen Jiang wrote:
> @@ -1845,7 +1846,9 @@ cpu_flags_match (const insn_template *t)
> i386_cpu_flags cpu = cpu_arch_flags;
>
> /* AVX512VL is no standalone feature - match it and then strip it. */
> - if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
> + if (x.bitfield.cpuavx512vl
> + && !cpu.bitfield.cpuavx512vl
> + && !cpu.bitfield.cpuavx10_1)
> return match;
> x.bitfield.cpuavx512vl = 0;
I _think_ the code change is correct, but the comment needs updating
(then also clarifying what the intention here is).
> @@ -6382,7 +6386,10 @@ check_VecOperands (const insn_template *t)
> cpu = cpu_flags_and (t->cpu_flags, avx512);
> if (!cpu_flags_all_zero (&cpu)
> && !t->cpu_flags.bitfield.cpuavx512vl
> - && !cpu_arch_flags.bitfield.cpuavx512vl)
> + && !cpu_arch_flags.bitfield.cpuavx512vl
> + && (!t->cpu_flags.bitfield.cpuavx10_1
> + || (t->cpu_flags.bitfield.cpuavx10_1
> + && !cpu_arch_flags.bitfield.cpuavx10_1)))
This first of all can be simplified to
if (!cpu_flags_all_zero (&cpu)
&& !t->cpu_flags.bitfield.cpuavx512vl
&& !cpu_arch_flags.bitfield.cpuavx512vl
&& (!t->cpu_flags.bitfield.cpuavx10_1
|| !cpu_arch_flags.bitfield.cpuavx10_1))
which doesn't look quite right. But of course the two features also
aren't symmetric, so I may well be wrong. First of all the remark at
the very bottom of this mail needs resolving, though. Also for ...
> @@ -13794,7 +13801,8 @@ static bool check_register (const reg_entry *r)
> if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
> return false;
>
> - if (!cpu_arch_flags.bitfield.cpuavx512f)
> + if (!cpu_arch_flags.bitfield.cpuavx512f
> + && !cpu_arch_flags.bitfield.cpuavx10_1)
> {
> if (r->reg_type.bitfield.zmmword
> || r->reg_type.bitfield.class == RegMask)
> @@ -13826,7 +13834,8 @@ static bool check_register (const reg_entry *r)
> mode, and require EVEX encoding. */
> if (r->reg_flags & RegVRex)
> {
> - if (!cpu_arch_flags.bitfield.cpuavx512f
> + if ((!cpu_arch_flags.bitfield.cpuavx512f
> + && !cpu_arch_flags.bitfield.cpuavx10_1)
> || flag_code != CODE_64BIT)
> return false;
... the changes to make here.
> --- a/gas/testsuite/gas/i386/i386.exp
> +++ b/gas/testsuite/gas/i386/i386.exp
> @@ -506,6 +506,7 @@ if [gas_32_check] then {
> run_dump_test "sm4"
> run_dump_test "sm4-intel"
> run_list_test "pbndkb-inval"
> + run_list_test "avx10_1-inval"
> run_list_test "sg"
> run_dump_test "clzero"
> run_dump_test "invlpgb"
Only an inval test? I'm inclined to say you either want both here, or
leave to just the 64-bit testing.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-avx10_1.s
> @@ -0,0 +1,97 @@
> +# Check AVX10.1 instructions
> +
> + .text
> +_start:
> + .arch .noavx512f
This implies ...
> + kaddd %k1, %k2, %k3
> + kaddb %k1, %k2, %k3
> + kaddw %k1, %k2, %k3
> + kaddq %k1, %k2, %k3
> + kmovb (%ecx), %k5
> + kmovb %k5, -123456(%esp,%esi,8)
> + kmovd -123456(%esp,%esi,8), %k5
> + kmovd %ebp, %k5
> + kmovw %k5, (%ecx)
> + kmovw %k5, %ebp
> + vaddpd %xmm4, %xmm5, %xmm6{%k7}
> + vaddpd (%ecx), %xmm5, %xmm6{%k7}
> + vaddpd (%eax){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd 2048(%edx), %xmm5, %xmm6{%k7}
> + vaddpd -2064(%edx), %xmm5, %xmm6{%k7}
> + vaddpd 1024(%edx){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
> + vaddpd %zmm4, %zmm5, %zmm6{%k7}{z}
> + vaddpd -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> + vaddpd 8192(%edx), %zmm5, %zmm6{%k7}
> + vaddpd -4096(%edx), %ymm5, %ymm6{%k7}
> + vaddpd 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> + vaddpd -2048(%edx){1to8}, %zmm5, %zmm6{%k7}
> + vgf2p8affineqb $0xab, %xmm4, %xmm5, %xmm6{%k7}
> + vgf2p8affineqb $123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
> + vgf2p8affineqb $123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
> + vgf2p8affineinvqb $123, 2032(%edx), %xmm5, %xmm6{%k7}
> + vgf2p8affineinvqb $0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
> + vgf2p8mulb %zmm4, %zmm5, %zmm6{%k7}
> + vgf2p8mulb -123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
> + vgf2p8mulb 8192(%edx), %zmm5, %zmm6{%k7}
> + vaesenc %ymm24, %ymm26, %ymm22
> + vaesdec -123456(%esp,%esi,8), %xmm15, %xmm16
> + vaesenclast %xmm24, %xmm26, %xmm27
> + vaesdeclast 4064(%edx), %ymm25, %ymm26
> + vaesdec %zmm24, %zmm26, %zmm22
> + vaesdeclast (%ecx), %zmm26, %zmm27
> + vpclmulqdq $0xab, %xmm22, %xmm22, %xmm23
> + vpclmulqdq $123, 2032(%edx), %xmm22, %xmm23
> + vpclmulqdq $123, -123456(%esp,%esi,8), %ymm16, %ymm14
> + vpclmulhqhqdq %xmm22, %xmm23, %xmm24
> + vpclmullqhqdq %xmm14, %xmm15, %xmm16
> + vpclmulhqlqdq %ymm22, %ymm23, %ymm24
> + vpclmullqlqdq %zmm14, %zmm15, %zmm16
> +
> + .arch .noavx512vl
... this, so for the test to be useful I think the two parts of the
test need to be swapped.
> + kaddd %k1, %k2, %k3
> + kaddb %k1, %k2, %k3
> + kaddw %k1, %k2, %k3
> + kaddq %k1, %k2, %k3
> + kmovb (%ecx), %k5
> + kmovb %k5, -123456(%esp,%esi,8)
> + kmovd -123456(%esp,%esi,8), %k5
> + kmovd %ebp, %k5
> + kmovw %k5, (%ecx)
> + kmovw %k5, %ebp
There's also little point in having these twice. Having them once in
the more restricted case (noavx512f) ought to suffice.
> --- a/gas/testsuite/gas/i386/xmmhi32.s
> +++ b/gas/testsuite/gas/i386/xmmhi32.s
> @@ -26,6 +26,7 @@ xmm:
> vmovdqa ymm24, ymm0
>
> .arch .noavx512f
> + .arch .noavx10.1
> vaddps xmm0, xmm1, xmm8
> vaddps xmm0, xmm1, xmm16
> vaddps xmm0, xmm1, xmm24
This (and alike) addition(s) point out another issue: People may be
using .noavx512{f,vl} to make sure they'll know if they wrongly use
certain insns. That protection becomes void with the additions as
you presently make them. This also relates to the first comment below
on i386-gen.c.
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -168,6 +168,8 @@ static const dependency isa_dependencies[] =
> "AVX2" },
> { "FRED",
> "LKGS" },
> + { "AVX10_1",
> + "AVX2" },
This can't be quite right (as in: is insufficient): There's no
restriction to the low 16 XMM/YMM registers in AVX10.1, so some of
AVX512 is also a prereq.
To also address the earlier comment, maybe we need an artificial (i.e.
not user selectable) feature underlying both AVX10 and AVX512? (But I
haven't properly thought this through, so there may be issues with
such an approach as well.)
> @@ -1217,7 +1220,7 @@ static void
> output_i386_opcode (FILE *table, const char *name, char *str,
> char *last, int lineno)
> {
> - unsigned int i, length, prefix = 0, space = 0;
> + unsigned int i, j, length, prefix = 0, space = 0, k = 0;
> char *base_opcode, *extension_opcode, *end, *ident;
> char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
> unsigned long long opcode;
> @@ -1315,6 +1318,20 @@ output_i386_opcode (FILE *table, const char *name, char *str,
> ident = mkident (name);
> fprintf (table, " { MN_%s, 0x%0*llx%s, %u,",
> ident, 2 * (int)length, opcode, end, i);
> +
> + j = strlen(ident);
> + /* All AVX512F based instructions are usable for AVX10.1 except
> + AVX512PF/ER/4FMAPS/4VNNIW/VP2INTERSECT. */
> + if (strstr (cpu_flags, "AVX512")
> + && !strstr (cpu_flags, "AVX512PF")
> + && !strstr (cpu_flags, "AVX512ER")
> + && !strstr (cpu_flags, "4FMAPS")
> + && !strstr (cpu_flags, "4VNNIW")
> + && !strstr (cpu_flags, "VP2INTERSECT"))
> + {
> + cpu_flags = concat (cpu_flags, "|AVX10_1", NULL);
> + k = 1;
> + }
> free (ident);
Unless you know for sure that there aren't going to be further AVX512
sub-features, this looks pretty fragile.
The doc also lists AVX10.1/256 as a possible mode (see e.g. table 1-3),
which isn't reflected throughout the patch at all.
Jan
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AVX10.1 instructions.
+
* Add support for Intel PBNDKB instructions.
* Add support for Intel SM4 instructions.
@@ -1156,6 +1156,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (sm3, SM3, ANY_SM3, false),
SUBARCH (sm4, SM4, ANY_SM4, false),
SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
+ SUBARCH (avx10.1, AVX10_1, ANY_AVX10_1, false),
};
#undef SUBARCH
@@ -1845,7 +1846,9 @@ cpu_flags_match (const insn_template *t)
i386_cpu_flags cpu = cpu_arch_flags;
/* AVX512VL is no standalone feature - match it and then strip it. */
- if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
+ if (x.bitfield.cpuavx512vl
+ && !cpu.bitfield.cpuavx512vl
+ && !cpu.bitfield.cpuavx10_1)
return match;
x.bitfield.cpuavx512vl = 0;
@@ -1871,8 +1874,9 @@ cpu_flags_match (const insn_template *t)
}
else if (x.bitfield.cpuavx512f)
{
- /* We need to check a few extra flags with AVX512F. */
- if (cpu.bitfield.cpuavx512f
+ /* We need to check a few extra flags with AVX512F
+ or AVX10.1. */
+ if ((cpu.bitfield.cpuavx512f || cpu.bitfield.cpuavx10_1)
&& (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
&& (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
&& (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
@@ -6382,7 +6386,10 @@ check_VecOperands (const insn_template *t)
cpu = cpu_flags_and (t->cpu_flags, avx512);
if (!cpu_flags_all_zero (&cpu)
&& !t->cpu_flags.bitfield.cpuavx512vl
- && !cpu_arch_flags.bitfield.cpuavx512vl)
+ && !cpu_arch_flags.bitfield.cpuavx512vl
+ && (!t->cpu_flags.bitfield.cpuavx10_1
+ || (t->cpu_flags.bitfield.cpuavx10_1
+ && !cpu_arch_flags.bitfield.cpuavx10_1)))
{
for (op = 0; op < t->operands; ++op)
{
@@ -13794,7 +13801,8 @@ static bool check_register (const reg_entry *r)
if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
return false;
- if (!cpu_arch_flags.bitfield.cpuavx512f)
+ if (!cpu_arch_flags.bitfield.cpuavx512f
+ && !cpu_arch_flags.bitfield.cpuavx10_1)
{
if (r->reg_type.bitfield.zmmword
|| r->reg_type.bitfield.class == RegMask)
@@ -13826,7 +13834,8 @@ static bool check_register (const reg_entry *r)
mode, and require EVEX encoding. */
if (r->reg_flags & RegVRex)
{
- if (!cpu_arch_flags.bitfield.cpuavx512f
+ if ((!cpu_arch_flags.bitfield.cpuavx512f
+ && !cpu_arch_flags.bitfield.cpuavx10_1)
|| flag_code != CODE_64BIT)
return false;
@@ -212,6 +212,7 @@ accept various extension mnemonics. For example,
@code{sm3},
@code{sm4},
@code{pbndkb},
+@code{avx10.1},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -1642,7 +1643,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
@item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
@item @samp{.avx_vnni_int16} @tab @samp{.sha512} @tab @samp{.sm3} @tab @samp{.sm4}
-@item @samp{.pbndkb}
+@item @samp{.pbndkb} @tab @samp{.avx10.1}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@@ -1,3 +1,3 @@
.* Assembler messages:
-.*:6: Error: unsupported .* `vpmadd52huq'
-.*:7: Error: operand .* `vpmadd52huq'
+.*:7: Error: unsupported .* `vpmadd52huq'
+.*:8: Error: operand .* `vpmadd52huq'
@@ -2,6 +2,7 @@
.text
.arch .noavx512ifma
+ .arch .noavx10.1
_start:
vpmadd52huq %xmm2, %xmm4, %xmm2{%k6}
vpmadd52huq %zmm2, %zmm4, %zmm2
@@ -17,6 +17,7 @@ _start:
test_insn vpmadd52luq
.arch .noavx512vl
+ .arch .noavx10.1
vpmadd52huq %zmm0, %zmm0, %zmm0
vpmadd52huq %ymm0, %ymm0, %ymm0
@@ -24,12 +25,14 @@ _start:
.arch default
.arch .noavx512ifma
+ .arch .noavx10.1
vpmadd52huq %ymm0, %ymm0, %ymm0
vpmadd52huq %xmm0, %xmm0, %xmm0
.arch default
.arch .noavx512f
+ .arch .noavx10.1
vpmadd52huq %ymm0, %ymm0, %ymm0
vpmadd52huq %xmm0, %xmm0, %xmm0
@@ -1,3 +1,3 @@
.* Assembler messages:
-.*:6: Error: unsupported .* `vpdpbusd'
-.*:7: Error: operand .* `vpdpbusd'
+.*:7: Error: unsupported .* `vpdpbusd'
+.*:8: Error: operand .* `vpdpbusd'
@@ -2,6 +2,7 @@
.text
.arch .noavx512_vnni
+ .arch .noavx10.1
_start:
vpdpbusd %xmm2, %xmm4, %xmm2{%k6}
vpdpbusd %zmm2, %zmm4, %zmm2
@@ -17,6 +17,7 @@ _start:
test_insn vpdpwssds
.arch .noavx512vl
+ .arch .noavx10.1
vpdpbusd %zmm0, %zmm0, %zmm0
vpdpbusd %ymm0, %ymm0, %ymm0
@@ -24,12 +25,14 @@ _start:
.arch default
.arch .noavx512_vnni
+ .arch .noavx10.1
vpdpbusd %ymm0, %ymm0, %ymm0
vpdpbusd %xmm0, %xmm0, %xmm0
.arch default
.arch .noavx512f
+ .arch .noavx10.1
vpdpbusd %ymm0, %ymm0, %ymm0
vpdpbusd %xmm0, %xmm0, %xmm0
new file mode 100644
@@ -0,0 +1,6 @@
+.* Assembler messages:
+.*:6: Error: `vp2intersectq' is not supported on `i386.noavx512f'
+.*:7: Error: `vgatherpf0dpd' is not supported on `i386.noavx512f'
+.*:8: Error: `vrcp28ss' is not supported on `i386.noavx512f'
+.*:9: Error: `vp4dpwssd' is not supported on `i386.noavx512f'
+.*:10: Error: `v4fnmaddss' is not supported on `i386.noavx512f'
new file mode 100644
@@ -0,0 +1,10 @@
+# Check invalid AVX10.1 instructions
+
+ .text
+ .arch .noavx512f
+__start:
+ vp2intersectq %xmm1, %xmm2, %k3
+ vgatherpf0dpd 123(%ebp,%ymm7,8){%k1}
+ vrcp28ss %xmm4, %xmm5, %xmm6{%k7}
+ vp4dpwssd (%ecx), %zmm4, %zmm1
+ v4fnmaddss (%ecx), %xmm4, %xmm1
@@ -506,6 +506,7 @@ if [gas_32_check] then {
run_dump_test "sm4"
run_dump_test "sm4-intel"
run_list_test "pbndkb-inval"
+ run_list_test "avx10_1-inval"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
@@ -1,44 +1,44 @@
.*: Assembler messages:
-.*:8: Error: .*operand size mismatch.*
-.*:9: Error: .*unsupported masking.*
+.*:9: Error: .*operand size mismatch.*
.*:10: Error: .*unsupported masking.*
-.*:25: Error: .*not supported.*
+.*:11: Error: .*unsupported masking.*
.*:26: Error: .*not supported.*
.*:27: Error: .*not supported.*
-.*:11: Error: .*not supported.*
+.*:28: Error: .*not supported.*
.*:12: Error: .*not supported.*
.*:13: Error: .*not supported.*
.*:14: Error: .*not supported.*
.*:15: Error: .*not supported.*
.*:16: Error: .*not supported.*
.*:17: Error: .*not supported.*
-.*:21: Error: .*operand.*mismatch.*
-.*:22: Error: .*unsupported masking.*
+.*:18: Error: .*not supported.*
+.*:22: Error: .*operand.*mismatch.*
.*:23: Error: .*unsupported masking.*
-.*:24: Error: .*not supported.*
+.*:24: Error: .*unsupported masking.*
.*:25: Error: .*not supported.*
.*:26: Error: .*not supported.*
.*:27: Error: .*not supported.*
-.*:8: Error: .*bad register name.*
-.*:9: Error: .*unknown vector operation.*
+.*:28: Error: .*not supported.*
+.*:9: Error: .*bad register name.*
.*:10: Error: .*unknown vector operation.*
-.*:11: Error: .*not supported.*
+.*:11: Error: .*unknown vector operation.*
.*:12: Error: .*not supported.*
.*:13: Error: .*not supported.*
.*:14: Error: .*not supported.*
.*:15: Error: .*not supported.*
.*:16: Error: .*not supported.*
.*:17: Error: .*not supported.*
-.*:18: Error: .*bad register name.*
-.*:19: Error: .*unknown vector operation.*
+.*:18: Error: .*not supported.*
+.*:19: Error: .*bad register name.*
.*:20: Error: .*unknown vector operation.*
-.*:21: Error: .*bad register name.*
-.*:22: Error: .*unknown vector operation.*
+.*:21: Error: .*unknown vector operation.*
+.*:22: Error: .*bad register name.*
.*:23: Error: .*unknown vector operation.*
-.*:24: Error: .*not supported.*
+.*:24: Error: .*unknown vector operation.*
.*:25: Error: .*not supported.*
.*:26: Error: .*not supported.*
.*:27: Error: .*not supported.*
+.*:28: Error: .*not supported.*
#...
[ ]*[0-9]+[ ]+\# Test \.arch \.noavx512XX
[ ]*[0-9]+[ ]+\.text
@@ -49,6 +49,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch default
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -93,6 +94,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512bw
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+> vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+> vpabsb %xmm5,%xmm6\{%k7\}
[ ]*[0-9]+[ ]+> vpabsb %ymm5,%ymm6\{%k7\}
@@ -131,6 +133,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512cd
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -172,6 +175,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512dq
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -213,6 +217,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512er
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -256,6 +261,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512ifma
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -297,6 +303,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512pf
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -339,6 +346,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512vbmi
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D4F > vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+1CF5
[ ]*[0-9]+[ ]+\?\?\?\? 62F27D0F > vpabsb %xmm5,%xmm6\{%k7\}
@@ -380,6 +388,7 @@
#...
[ ]*[0-9]+[ ]+> \.arch default
[ ]*[0-9]+[ ]+> \.arch \.noavx512f
+[ ]*[0-9]+[ ]+> \.arch \.noavx10.1
[ ]*[0-9]+[ ]+> vpabsb %zmm5,%zmm6\{%k7\}
[ ]*[0-9]+[ ]+> vpabsb %xmm5,%xmm6\{%k7\}
[ ]*[0-9]+[ ]+> vpabsb %ymm5,%ymm6\{%k7\}
@@ -5,6 +5,7 @@
.arch default
.arch \isa
+ .arch .noavx10.1
vpabsb %zmm5, %zmm6{%k7} # AVX512BW
vpabsb %xmm5, %xmm6{%k7} # AVX512BW + AVX512VL
vpabsb %ymm5, %ymm6{%k7} # AVX512BW + AVX512VL
@@ -1,106 +1,107 @@
.*: Assembler messages:
-.*:26: Error: .*unsupported masking.*
.*:27: Error: .*unsupported masking.*
-.*:29: Error: .*unsupported instruction.*
+.*:28: Error: .*unsupported masking.*
.*:30: Error: .*unsupported instruction.*
-.*:32: Error: .*unsupported instruction.*
+.*:31: Error: .*unsupported instruction.*
.*:33: Error: .*unsupported instruction.*
-.*:36: Error: .*unsupported masking.*
+.*:34: Error: .*unsupported instruction.*
.*:37: Error: .*unsupported masking.*
-.*:39: Error: .*unsupported instruction.*
+.*:38: Error: .*unsupported masking.*
.*:40: Error: .*unsupported instruction.*
-.*:43: Error: .*unsupported instruction.*
+.*:41: Error: .*unsupported instruction.*
.*:44: Error: .*unsupported instruction.*
+.*:45: Error: .*unsupported instruction.*
GAS LISTING .*
#...
[ ]*1[ ]+\# Test \.arch \.noavx512vl
[ ]*2[ ]+\.text
-[ ]*3[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
-[ ]*3[ ]+1CF5
-[ ]*4[ ]+\?\?\?\? 62F27D0F vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
+[ ]*3[ ]+\.arch \.noavx10.1
+[ ]*4[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
[ ]*4[ ]+1CF5
-[ ]*5[ ]+\?\?\?\? 62F27D2F vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
+[ ]*5[ ]+\?\?\?\? 62F27D0F vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
[ ]*5[ ]+1CF5
-[ ]*6[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
-[ ]*6[ ]+C4F5
-[ ]*7[ ]+\?\?\?\? 62F27D08 vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
+[ ]*6[ ]+\?\?\?\? 62F27D2F vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
+[ ]*6[ ]+1CF5
+[ ]*7[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
[ ]*7[ ]+C4F5
-[ ]*8[ ]+\?\?\?\? 62F27D28 vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
+[ ]*8[ ]+\?\?\?\? 62F27D08 vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
[ ]*8[ ]+C4F5
-[ ]*9[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
-[ ]*9[ ]+7B31
-[ ]*10[ ]+\?\?\?\? 62F1FD0F vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
+[ ]*9[ ]+\?\?\?\? 62F27D28 vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
+[ ]*9[ ]+C4F5
+[ ]*10[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
[ ]*10[ ]+7B31
-[ ]*11[ ]+\?\?\?\? 62F1FD2F vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
+[ ]*11[ ]+\?\?\?\? 62F1FD0F vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
[ ]*11[ ]+7B31
-[ ]*12[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
-[ ]*12[ ]+C8F5
-[ ]*13[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
-[ ]*13[ ]+58F4
-[ ]*14[ ]+\?\?\?\? 62F1D50F vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
+[ ]*12[ ]+\?\?\?\? 62F1FD2F vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
+[ ]*12[ ]+7B31
+[ ]*13[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
+[ ]*13[ ]+C8F5
+[ ]*14[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
[ ]*14[ ]+58F4
-[ ]*15[ ]+\?\?\?\? 62F1D52F vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
+[ ]*15[ ]+\?\?\?\? 62F1D50F vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
[ ]*15[ ]+58F4
-[ ]*16[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
-[ ]*16[ ]+B4F4
-[ ]*17[ ]+\?\?\?\? 62F2D50F vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
+[ ]*16[ ]+\?\?\?\? 62F1D52F vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
+[ ]*16[ ]+58F4
+[ ]*17[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
[ ]*17[ ]+B4F4
-[ ]*18[ ]+\?\?\?\? 62F2D52F vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
+[ ]*18[ ]+\?\?\?\? 62F2D50F vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
[ ]*18[ ]+B4F4
-[ ]*19[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
-[ ]*19[ ]+C68CFD17
-[ ]*19[ ]+000000
-[ ]*20[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
-[ ]*20[ ]+8DF4
-[ ]*21[ ]+\?\?\?\? 62F2550F vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
+[ ]*19[ ]+\?\?\?\? 62F2D52F vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
+[ ]*19[ ]+B4F4
+[ ]*20[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
+[ ]*20[ ]+C68CFD17
+[ ]*20[ ]+000000
+[ ]*21[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
[ ]*21[ ]+8DF4
-[ ]*22[ ]+\?\?\?\? 62F2552F vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
+[ ]*22[ ]+\?\?\?\? 62F2550F vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
[ ]*22[ ]+8DF4
-[ ]*23[ ]+
-[ ]*24[ ]+\.arch \.noavx512vl
-[ ]*25[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
-[ ]*25[ ]+1CF5
-[ ]*26[ ]+vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*27[ ]+vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
-[ ]*28[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
-[ ]*28[ ]+C4F5
-[ ]*29[ ]+vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
-[ ]*30[ ]+vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
-[ ]*31[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
-[ ]*31[ ]+7B31
-[ ]*32[ ]+vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
-[ ]*33[ ]+vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
+[ ]*23[ ]+\?\?\?\? 62F2552F vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
+[ ]*23[ ]+8DF4
+[ ]*24[ ]+
+[ ]*25[ ]+\.arch \.noavx512vl
+[ ]*26[ ]+\?\?\?\? 62F27D4F vpabsb %zmm5, %zmm6\{%k7\} \# AVX512BW
+[ ]*26[ ]+1CF5
+[ ]*27[ ]+vpabsb %xmm5, %xmm6\{%k7\} \# AVX512BW \+ AVX512VL
+[ ]*28[ ]+vpabsb %ymm5, %ymm6\{%k7\} \# AVX512BW \+ AVX512VL
+[ ]*29[ ]+\?\?\?\? 62F27D48 vpconflictd %zmm5, %zmm6 \# AVX412CD
+[ ]*29[ ]+C4F5
+[ ]*30[ ]+vpconflictd %xmm5, %xmm6 \# AVX412CD \+ AVX512VL
+[ ]*31[ ]+vpconflictd %ymm5, %ymm6 \# AVX412CD \+ AVX512VL
+[ ]*32[ ]+\?\?\?\? 62F1FD4F vcvtpd2qq \(%ecx\), %zmm6\{%k7\} \# AVX512DQ
+[ ]*32[ ]+7B31
+[ ]*33[ ]+vcvtpd2qq \(%ecx\), %xmm6\{%k7\} \# AVX512DQ \+ AVX512VL
GAS LISTING .*
-[ ]*34[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
-[ ]*34[ ]+C8F5
-[ ]*35[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
-[ ]*35[ ]+58F4
-[ ]*36[ ]+vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*37[ ]+vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
-[ ]*38[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
-[ ]*38[ ]+B4F4
-[ ]*39[ ]+vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*40[ ]+vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
-[ ]*41[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
-[ ]*41[ ]+C68CFD17
-[ ]*41[ ]+000000
-[ ]*42[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
-[ ]*42[ ]+8DF4
-[ ]*43[ ]+vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*44[ ]+vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
-[ ]*45[ ]+
-[ ]*46[ ]+\?\?\?\? C4E2791C vpabsb %xmm5, %xmm6
-[ ]*46[ ]+F5
-[ ]*47[ ]+\?\?\?\? C4E27D1C vpabsb %ymm5, %ymm6
+[ ]*34[ ]+vcvtpd2qq \(%ecx\), %ymm6\{%k7\} \# AVX512DQ \+ AVX512VL
+[ ]*35[ ]+\?\?\?\? 62F27D4F vexp2ps %zmm5, %zmm6\{%k7\} \# AVX512ER
+[ ]*35[ ]+C8F5
+[ ]*36[ ]+\?\?\?\? 62F1D54F vaddpd %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512F
+[ ]*36[ ]+58F4
+[ ]*37[ ]+vaddpd %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512F \+ AVX512VL
+[ ]*38[ ]+vaddpd %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512F \+ AVX512VL
+[ ]*39[ ]+\?\?\?\? 62F2D54F vpmadd52luq %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512IFMA
+[ ]*39[ ]+B4F4
+[ ]*40[ ]+vpmadd52luq %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512IFMA \+ AVX512VL
+[ ]*41[ ]+vpmadd52luq %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512IFMA \+ AVX512VL
+[ ]*42[ ]+\?\?\?\? 62F2FD49 vgatherpf0dpd 23\(%ebp,%ymm7,8\)\{%k1\} \# AVX512PF
+[ ]*42[ ]+C68CFD17
+[ ]*42[ ]+000000
+[ ]*43[ ]+\?\?\?\? 62F2554F vpermb %zmm4, %zmm5, %zmm6\{%k7\} \# AVX512VBMI
+[ ]*43[ ]+8DF4
+[ ]*44[ ]+vpermb %xmm4, %xmm5, %xmm6\{%k7\} \# AVX512VBMI \+ AVX512VL
+[ ]*45[ ]+vpermb %ymm4, %ymm5, %ymm6\{%k7\} \# AVX512VBMI \+ AVX512VL
+[ ]*46[ ]+
+[ ]*47[ ]+\?\?\?\? C4E2791C vpabsb %xmm5, %xmm6
[ ]*47[ ]+F5
-[ ]*48[ ]+\?\?\?\? C5D158F4 vaddpd %xmm4, %xmm5, %xmm6
-[ ]*49[ ]+\?\?\?\? C5D558F4 vaddpd %ymm4, %ymm5, %ymm6
-[ ]*50[ ]+\?\?\?\? 660F381C pabsb %xmm5, %xmm6
-[ ]*50[ ]+F5
-[ ]*51[ ]+\?\?\?\? 660F58F4 addpd %xmm4, %xmm6
-[ ]*52[ ]+
+[ ]*48[ ]+\?\?\?\? C4E27D1C vpabsb %ymm5, %ymm6
+[ ]*48[ ]+F5
+[ ]*49[ ]+\?\?\?\? C5D158F4 vaddpd %xmm4, %xmm5, %xmm6
+[ ]*50[ ]+\?\?\?\? C5D558F4 vaddpd %ymm4, %ymm5, %ymm6
+[ ]*51[ ]+\?\?\?\? 660F381C pabsb %xmm5, %xmm6
+[ ]*51[ ]+F5
+[ ]*52[ ]+\?\?\?\? 660F58F4 addpd %xmm4, %xmm6
+[ ]*53[ ]+
[ ]*[1-9][0-9]*[ ]+\.intel_syntax noprefix
[ ]*[1-9][0-9]*[ ]+\?\?\?\? 62F3FD48 vfpclasspd k0, \[eax], 0
[ ]*[1-9][0-9]*[ ]+660000
@@ -1,5 +1,6 @@
# Test .arch .noavx512vl
.text
+ .arch .noavx10.1
vpabsb %zmm5, %zmm6{%k7} # AVX512BW
vpabsb %xmm5, %xmm6{%k7} # AVX512BW + AVX512VL
vpabsb %ymm5, %ymm6{%k7} # AVX512BW + AVX512VL
@@ -1,4 +1,4 @@
.* Assembler messages:
-.*:6: Error: unsupported .* `vpmadd52huq'
.*:7: Error: unsupported .* `vpmadd52huq'
-.*:8: Error: operand .* `vpmadd52huq'
+.*:8: Error: unsupported .* `vpmadd52huq'
+.*:9: Error: operand .* `vpmadd52huq'
@@ -2,6 +2,7 @@
.text
.arch .noavx512ifma
+ .arch .noavx10.1
_start:
vpmadd52huq %xmm2, %xmm4, %xmm2{%k6}
vpmadd52huq %xmm22, %xmm4, %xmm2{%k1}
@@ -1,4 +1,4 @@
.* Assembler messages:
-.*:6: Error: unsupported .* `vpdpbusds'
.*:7: Error: unsupported .* `vpdpbusds'
-.*:8: Error: operand .* `vpdpbusds'
+.*:8: Error: unsupported .* `vpdpbusds'
+.*:9: Error: operand .* `vpdpbusds'
@@ -2,6 +2,7 @@
.text
.arch .noavx512_vnni
+ .arch .noavx10.1
_start:
vpdpbusds %xmm2, %xmm4, %xmm2{%k6}
vpdpbusds %xmm22, %xmm4, %xmm2{%k1}
new file mode 100644
@@ -0,0 +1,6 @@
+.* Assembler messages:
+.*:6: Error: `vp2intersectq' is not supported on `x86_64.noavx512f'
+.*:7: Error: `vgatherpf0dpd' is not supported on `x86_64.noavx512f'
+.*:8: Error: `vrcp28ss' is not supported on `x86_64.noavx512f'
+.*:9: Error: `vp4dpwssd' is not supported on `x86_64.noavx512f'
+.*:10: Error: `v4fnmaddss' is not supported on `x86_64.noavx512f'
new file mode 100644
@@ -0,0 +1,10 @@
+# Check invalid AVX10.1 instructions
+
+ .text
+ .arch .noavx512f
+__start:
+ vp2intersectq %xmm1, %xmm2, %k3
+ vgatherpf0dpd 123(%ebp,%ymm7,8){%k1}
+ vrcp28ss %xmm4, %xmm5, %xmm6{%k7}
+ vp4dpwssd (%ecx), %zmm4, %zmm1
+ v4fnmaddss (%ecx), %xmm4, %xmm1
new file mode 100644
@@ -0,0 +1,97 @@
+#objdump: -dw
+#name: x86_64 AVX10.1 instructions
+#source: x86-64-avx10_1.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e1 ed 4a d9\s+kaddd %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ed 4a d9\s+kaddb %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ec 4a d9\s+kaddw %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c4 e1 ec 4a d9\s+kaddq %k1,%k2,%k3
+\s*[a-f0-9]+:\s*67 c5 f9 90 29\s+kmovb \(%ecx\),%k5
+\s*[a-f0-9]+:\s*67 c5 f9 91 ac f4 c0 1d fe ff\s+kmovb %k5,-0x1e240\(%esp,%esi,8\)
+\s*[a-f0-9]+:\s*67 c4 e1 f9 90 ac f4 c0 1d fe ff\s+kmovd -0x1e240\(%esp,%esi,8\),%k5
+\s*[a-f0-9]+:\s*c5 fb 92 ed\s+kmovd %ebp,%k5
+\s*[a-f0-9]+:\s*67 c5 f8 91 29\s+kmovw %k5,\(%ecx\)
+\s*[a-f0-9]+:\s*c5 f8 93 ed\s+kmovw %k5,%ebp
+\s*[a-f0-9]+:\s*62 f1 d5 0f 58 f4\s+vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 31\s+vaddpd \(%ecx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 30\s+vaddpd \(%eax\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 00 08 00 00\s+vaddpd 0x800\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 f0 f7 ff ff\s+vaddpd -0x810\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 00 04 00 00\s+vaddpd 0x400\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 f8 fb ff ff\s+vaddpd -0x408\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f1 d5 cf 58 f4\s+vaddpd %zmm4,%zmm5,%zmm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 b4 f4 c0 1d fe ff\s+vaddpd -0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 4f 58 b2 00 20 00 00\s+vaddpd 0x2000\(%edx\),%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 72 80\s+vaddpd -0x1000\(%edx\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 3f 58 72 7f\s+vaddpd 0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 5f 58 b2 00 f8 ff ff\s+vaddpd -0x800\(%edx\)\{1to8\},%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 0f ce f4 ab\s+vgf2p8affineqb \$0xab,%xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 2f ce b4 f4 c0 1d fe ff 7b\s+vgf2p8affineqb \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 3f ce 72 7f 7b\s+vgf2p8affineqb \$0x7b,0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 0f cf 72 7f 7b\s+vgf2p8affineinvqb \$0x7b,0x7f0\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 af cf f4 ab\s+vgf2p8affineinvqb \$0xab,%ymm4,%ymm5,%ymm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*62 f2 55 4f cf f4\s+vgf2p8mulb %zmm4,%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 0f cf b4 f4 c0 1d fe ff\s+vgf2p8mulb -0x1e240\(%esp,%esi,8\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 4f cf b2 00 20 00 00\s+vgf2p8mulb 0x2000\(%edx\),%zmm5,%zmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 82 2d 20 dc f0\s+vaesenc %ymm24,%ymm26,%ymm22
+\s*[a-f0-9]+:\s*67 62 e2 05 08 de 84 f4 c0 1d fe ff\s+vaesdec -0x1e240\(%esp,%esi,8\),%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 02 2d 00 dd d8\s+vaesenclast %xmm24,%xmm26,%xmm27
+\s*[a-f0-9]+:\s*67 62 62 35 20 df 52 7f\s+vaesdeclast 0xfe0\(%edx\),%ymm25,%ymm26
+\s*[a-f0-9]+:\s*62 82 2d 40 de f0\s+vaesdec %zmm24,%zmm26,%zmm22
+\s*[a-f0-9]+:\s*67 62 62 2d 40 df 19\s+vaesdeclast \(%ecx\),%zmm26,%zmm27
+\s*[a-f0-9]+:\s*62 a3 4d 00 44 fe ab\s+vpclmulqdq \$0xab,%xmm22,%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 e3 4d 00 44 7a 7f 7b\s+vpclmulqdq \$0x7b,0x7f0\(%edx\),%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 73 7d 20 44 b4 f4 c0 1d fe ff 7b\s+vpclmulqdq \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm16,%ymm14
+\s*[a-f0-9]+:\s*62 23 45 00 44 c6 11\s+vpclmulhqhqdq %xmm22,%xmm23,%xmm24
+\s*[a-f0-9]+:\s*62 c3 05 08 44 c6 10\s+vpclmullqhqdq %xmm14,%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 23 45 20 44 c6 01\s+vpclmulhqlqdq %ymm22,%ymm23,%ymm24
+\s*[a-f0-9]+:\s*62 c3 05 48 44 c6 00\s+vpclmullqlqdq %zmm14,%zmm15,%zmm16
+\s*[a-f0-9]+:\s*c4 e1 ed 4a d9\s+kaddd %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ed 4a d9\s+kaddb %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c5 ec 4a d9\s+kaddw %k1,%k2,%k3
+\s*[a-f0-9]+:\s*c4 e1 ec 4a d9\s+kaddq %k1,%k2,%k3
+\s*[a-f0-9]+:\s*67 c5 f9 90 29\s+kmovb \(%ecx\),%k5
+\s*[a-f0-9]+:\s*67 c5 f9 91 ac f4 c0 1d fe ff\s+kmovb %k5,-0x1e240\(%esp,%esi,8\)
+\s*[a-f0-9]+:\s*67 c4 e1 f9 90 ac f4 c0 1d fe ff\s+kmovd -0x1e240\(%esp,%esi,8\),%k5
+\s*[a-f0-9]+:\s*c5 fb 92 ed\s+kmovd %ebp,%k5
+\s*[a-f0-9]+:\s*67 c5 f8 91 29\s+kmovw %k5,\(%ecx\)
+\s*[a-f0-9]+:\s*c5 f8 93 ed\s+kmovw %k5,%ebp
+\s*[a-f0-9]+:\s*62 f1 d5 0f 58 f4\s+vaddpd %xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 31\s+vaddpd \(%ecx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 30\s+vaddpd \(%eax\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 00 08 00 00\s+vaddpd 0x800\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 0f 58 b2 f0 f7 ff ff\s+vaddpd -0x810\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 00 04 00 00\s+vaddpd 0x400\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 1f 58 b2 f8 fb ff ff\s+vaddpd -0x408\(%edx\)\{1to2\},%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f1 d5 af 58 f4\s+vaddpd %ymm4,%ymm5,%ymm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 b4 f4 c0 1d fe ff\s+vaddpd -0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 72 7f\s+vaddpd 0xfe0\(%edx\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 2f 58 72 80\s+vaddpd -0x1000\(%edx\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 3f 58 72 7f\s+vaddpd 0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f1 d5 3f 58 72 80\s+vaddpd -0x400\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 0f ce f4 ab\s+vgf2p8affineqb \$0xab,%xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 2f ce b4 f4 c0 1d fe ff 7b\s+vgf2p8affineqb \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 3f ce 72 7f 7b\s+vgf2p8affineqb \$0x7b,0x3f8\(%edx\)\{1to4\},%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f3 d5 0f cf 72 7f 7b\s+vgf2p8affineinvqb \$0x7b,0x7f0\(%edx\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*62 f3 d5 af cf f4 ab\s+vgf2p8affineinvqb \$0xab,%ymm4,%ymm5,%ymm6\{%k7\}\{z\}
+\s*[a-f0-9]+:\s*62 f2 55 0f cf f4\s+vgf2p8mulb %xmm4,%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 0f cf b4 f4 c0 1d fe ff\s+vgf2p8mulb -0x1e240\(%esp,%esi,8\),%xmm5,%xmm6\{%k7\}
+\s*[a-f0-9]+:\s*67 62 f2 55 2f cf 72 7f\s+vgf2p8mulb 0xfe0\(%edx\),%ymm5,%ymm6\{%k7\}
+\s*[a-f0-9]+:\s*62 82 2d 20 dc f0\s+vaesenc %ymm24,%ymm26,%ymm22
+\s*[a-f0-9]+:\s*67 62 e2 05 08 de 84 f4 c0 1d fe ff\s+vaesdec -0x1e240\(%esp,%esi,8\),%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 02 2d 00 dd d8\s+vaesenclast %xmm24,%xmm26,%xmm27
+\s*[a-f0-9]+:\s*67 62 62 35 20 df 52 7f\s+vaesdeclast 0xfe0\(%edx\),%ymm25,%ymm26
+\s*[a-f0-9]+:\s*62 82 2d 00 de f0\s+vaesdec %xmm24,%xmm26,%xmm22
+\s*[a-f0-9]+:\s*67 62 62 2d 00 df 19\s+vaesdeclast \(%ecx\),%xmm26,%xmm27
+\s*[a-f0-9]+:\s*62 a3 4d 00 44 fe ab\s+vpclmulqdq \$0xab,%xmm22,%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 e3 4d 00 44 7a 7f 7b\s+vpclmulqdq \$0x7b,0x7f0\(%edx\),%xmm22,%xmm23
+\s*[a-f0-9]+:\s*67 62 73 7d 20 44 b4 f4 c0 1d fe ff 7b\s+vpclmulqdq \$0x7b,-0x1e240\(%esp,%esi,8\),%ymm16,%ymm14
+\s*[a-f0-9]+:\s*62 23 45 00 44 c6 11\s+vpclmulhqhqdq %xmm22,%xmm23,%xmm24
+\s*[a-f0-9]+:\s*62 c3 05 08 44 c6 10\s+vpclmullqhqdq %xmm14,%xmm15,%xmm16
+\s*[a-f0-9]+:\s*62 23 45 20 44 c6 01\s+vpclmulhqlqdq %ymm22,%ymm23,%ymm24
+\s*[a-f0-9]+:\s*62 c3 05 28 44 c6 00\s+vpclmullqlqdq %ymm14,%ymm15,%ymm16
new file mode 100644
@@ -0,0 +1,97 @@
+# Check AVX10.1 instructions
+
+ .text
+_start:
+ .arch .noavx512f
+
+ kaddd %k1, %k2, %k3
+ kaddb %k1, %k2, %k3
+ kaddw %k1, %k2, %k3
+ kaddq %k1, %k2, %k3
+ kmovb (%ecx), %k5
+ kmovb %k5, -123456(%esp,%esi,8)
+ kmovd -123456(%esp,%esi,8), %k5
+ kmovd %ebp, %k5
+ kmovw %k5, (%ecx)
+ kmovw %k5, %ebp
+ vaddpd %xmm4, %xmm5, %xmm6{%k7}
+ vaddpd (%ecx), %xmm5, %xmm6{%k7}
+ vaddpd (%eax){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd 2048(%edx), %xmm5, %xmm6{%k7}
+ vaddpd -2064(%edx), %xmm5, %xmm6{%k7}
+ vaddpd 1024(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd %zmm4, %zmm5, %zmm6{%k7}{z}
+ vaddpd -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vaddpd 8192(%edx), %zmm5, %zmm6{%k7}
+ vaddpd -4096(%edx), %ymm5, %ymm6{%k7}
+ vaddpd 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vaddpd -2048(%edx){1to8}, %zmm5, %zmm6{%k7}
+ vgf2p8affineqb $0xab, %xmm4, %xmm5, %xmm6{%k7}
+ vgf2p8affineqb $123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vgf2p8affineqb $123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vgf2p8affineinvqb $123, 2032(%edx), %xmm5, %xmm6{%k7}
+ vgf2p8affineinvqb $0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
+ vgf2p8mulb %zmm4, %zmm5, %zmm6{%k7}
+ vgf2p8mulb -123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
+ vgf2p8mulb 8192(%edx), %zmm5, %zmm6{%k7}
+ vaesenc %ymm24, %ymm26, %ymm22
+ vaesdec -123456(%esp,%esi,8), %xmm15, %xmm16
+ vaesenclast %xmm24, %xmm26, %xmm27
+ vaesdeclast 4064(%edx), %ymm25, %ymm26
+ vaesdec %zmm24, %zmm26, %zmm22
+ vaesdeclast (%ecx), %zmm26, %zmm27
+ vpclmulqdq $0xab, %xmm22, %xmm22, %xmm23
+ vpclmulqdq $123, 2032(%edx), %xmm22, %xmm23
+ vpclmulqdq $123, -123456(%esp,%esi,8), %ymm16, %ymm14
+ vpclmulhqhqdq %xmm22, %xmm23, %xmm24
+ vpclmullqhqdq %xmm14, %xmm15, %xmm16
+ vpclmulhqlqdq %ymm22, %ymm23, %ymm24
+ vpclmullqlqdq %zmm14, %zmm15, %zmm16
+
+ .arch .noavx512vl
+
+ kaddd %k1, %k2, %k3
+ kaddb %k1, %k2, %k3
+ kaddw %k1, %k2, %k3
+ kaddq %k1, %k2, %k3
+ kmovb (%ecx), %k5
+ kmovb %k5, -123456(%esp,%esi,8)
+ kmovd -123456(%esp,%esi,8), %k5
+ kmovd %ebp, %k5
+ kmovw %k5, (%ecx)
+ kmovw %k5, %ebp
+ vaddpd %xmm4, %xmm5, %xmm6{%k7}
+ vaddpd (%ecx), %xmm5, %xmm6{%k7}
+ vaddpd (%eax){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd 2048(%edx), %xmm5, %xmm6{%k7}
+ vaddpd -2064(%edx), %xmm5, %xmm6{%k7}
+ vaddpd 1024(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd -1032(%edx){1to2}, %xmm5, %xmm6{%k7}
+ vaddpd %ymm4, %ymm5, %ymm6{%k7}{z}
+ vaddpd -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vaddpd 4064(%edx), %ymm5, %ymm6{%k7}
+ vaddpd -4096(%edx), %ymm5, %ymm6{%k7}
+ vaddpd 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vaddpd -1024(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vgf2p8affineqb $0xab, %xmm4, %xmm5, %xmm6{%k7}
+ vgf2p8affineqb $123, -123456(%esp,%esi,8), %ymm5, %ymm6{%k7}
+ vgf2p8affineqb $123, 1016(%edx){1to4}, %ymm5, %ymm6{%k7}
+ vgf2p8affineinvqb $123, 2032(%edx), %xmm5, %xmm6{%k7}
+ vgf2p8affineinvqb $0xab, %ymm4, %ymm5, %ymm6{%k7}{z}
+ vgf2p8mulb %xmm4, %xmm5, %xmm6{%k7}
+ vgf2p8mulb -123456(%esp,%esi,8), %xmm5, %xmm6{%k7}
+ vgf2p8mulb 4064(%edx), %ymm5, %ymm6{%k7}
+ vaesenc %ymm24, %ymm26, %ymm22
+ vaesdec -123456(%esp,%esi,8), %xmm15, %xmm16
+ vaesenclast %xmm24, %xmm26, %xmm27
+ vaesdeclast 4064(%edx), %ymm25, %ymm26
+ vaesdec %xmm24, %xmm26, %xmm22
+ vaesdeclast (%ecx), %xmm26, %xmm27
+ vpclmulqdq $0xab, %xmm22, %xmm22, %xmm23
+ vpclmulqdq $123, 2032(%edx), %xmm22, %xmm23
+ vpclmulqdq $123, -123456(%esp,%esi,8), %ymm16, %ymm14
+ vpclmulhqhqdq %xmm22, %xmm23, %xmm24
+ vpclmullqhqdq %xmm14, %xmm15, %xmm16
+ vpclmulhqlqdq %ymm22, %ymm23, %ymm24
+ vpclmullqlqdq %ymm14, %ymm15, %ymm16
@@ -449,6 +449,8 @@ run_dump_test "x86-64-sm4"
run_dump_test "x86-64-sm4-intel"
run_dump_test "x86-64-pbndkb"
run_dump_test "x86-64-pbndkb-intel"
+run_dump_test "x86-64-avx10_1"
+run_list_test "x86-64-avx10_1-inval"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
@@ -26,6 +26,7 @@ xmm:
vmovdqa ymm24, ymm0
.arch .noavx512f
+ .arch .noavx10.1
vaddps xmm0, xmm1, xmm8
vaddps xmm0, xmm1, xmm16
vaddps xmm0, xmm1, xmm24
@@ -168,6 +168,8 @@ static const dependency isa_dependencies[] =
"AVX2" },
{ "FRED",
"LKGS" },
+ { "AVX10_1",
+ "AVX2" },
{ "AVX512F",
"AVX2" },
{ "AVX512CD",
@@ -378,6 +380,7 @@ static bitfield cpu_flags[] =
BITFIELD (RAO_INT),
BITFIELD (FRED),
BITFIELD (LKGS),
+ BITFIELD (AVX10_1),
BITFIELD (MWAITX),
BITFIELD (CLZERO),
BITFIELD (OSPKE),
@@ -1217,7 +1220,7 @@ static void
output_i386_opcode (FILE *table, const char *name, char *str,
char *last, int lineno)
{
- unsigned int i, length, prefix = 0, space = 0;
+ unsigned int i, j, length, prefix = 0, space = 0, k = 0;
char *base_opcode, *extension_opcode, *end, *ident;
char *cpu_flags, *opcode_modifier, *operand_types [MAX_OPERANDS];
unsigned long long opcode;
@@ -1315,6 +1318,20 @@ output_i386_opcode (FILE *table, const char *name, char *str,
ident = mkident (name);
fprintf (table, " { MN_%s, 0x%0*llx%s, %u,",
ident, 2 * (int)length, opcode, end, i);
+
+ j = strlen(ident);
+ /* All AVX512F based instructions are usable for AVX10.1 except
+ AVX512PF/ER/4FMAPS/4VNNIW/VP2INTERSECT. */
+ if (strstr (cpu_flags, "AVX512")
+ && !strstr (cpu_flags, "AVX512PF")
+ && !strstr (cpu_flags, "AVX512ER")
+ && !strstr (cpu_flags, "4FMAPS")
+ && !strstr (cpu_flags, "4VNNIW")
+ && !strstr (cpu_flags, "VP2INTERSECT"))
+ {
+ cpu_flags = concat (cpu_flags, "|AVX10_1", NULL);
+ k = 1;
+ }
free (ident);
process_i386_opcode_modifier (table, opcode_modifier, space, prefix,
@@ -1322,6 +1339,9 @@ output_i386_opcode (FILE *table, const char *name, char *str,
process_i386_cpu_flag (table, cpu_flags, NULL, ",", " ", lineno, CpuMax);
+ if (k)
+ free (cpu_flags);
+
fprintf (table, " { ");
for (i = 0; i < ARRAY_SIZE (operand_types); i++)
@@ -241,6 +241,8 @@ enum
CpuFRED,
/* lkgs instruction required */
CpuLKGS,
+ /* Intel AVX10.1 Instructions support required. */
+ CpuAVX10_1,
/* mwaitx instruction required */
CpuMWAITX,
/* Clzero instruction required */
@@ -444,6 +446,7 @@ typedef union i386_cpu_flags
unsigned int cpurao_int:1;
unsigned int cpufred:1;
unsigned int cpulkgs:1;
+ unsigned int cpuavx10_1:1;
unsigned int cpumwaitx:1;
unsigned int cpuclzero:1;
unsigned int cpuospke:1;