On 03.04.2023 09:11, Haochen Jiang wrote:
> @@ -1183,6 +1184,8 @@ if [gas_64_check] then {
> run_dump_test "x86-64-avx-ne-convert-intel"
> run_dump_test "x86-64-raoint"
> run_dump_test "x86-64-raoint-intel"
> + run_dump_test "x86-64-amx-complex"
> + run_dump_test "x86-64-amx-complex-intel"
> run_dump_test "x86-64-clzero"
> run_dump_test "x86-64-mwaitx-bdver4"
> run_list_test "x86-64-mwaitx-reg"
There are constraints on operand combinations, like for tdp*, which want
testing here as well (both the assembler and disassembler sides) imo.
> @@ -4119,6 +4124,13 @@ static const struct dis386 prefix_table[][4] = {
> { VEX_W_TABLE (VEX_W_0F385E_X86_64_P_3) },
> },
>
> + /* PREFIX_VEX_0F386C_X86_64_W_0_M_1_L_0 */
> + {
> + { "tcmmrlfp16ps", { TMM, EXtmm, VexTmm }, 0 },
> + { Bad_Opcode },
> + { "tcmmimfp16ps", { TMM, EXtmm, VexTmm }, 0 },
> + },
You could avoid going through vex_w_table[] by making use of %XS here.
(I guess I'll make a similar change for tdp*16ps, but - to avoid
causing conflicts - perhaps only once yours went in.)
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -248,6 +248,8 @@ enum
> CpuAMX_BF16,
> /* AMX-FP16 instructions required */
> CpuAMX_FP16,
> + /* Intel AMX-COMPLEX Instructions support required. */
> + CpuAMX_COMPLEX,
> /* AMX-TILE instructions required */
> CpuAMX_TILE,
> /* GFNI instructions required */
In line with adjacent comments, please omit "Intel" and "support" from
the comment, and don't start "instructions" with a capital latter. Plus
while the full stop is in line with general comment style, looking at
adjacent comments here it probably also wants omitting.
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3163,6 +3163,13 @@ tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
>
> // AMX instructions end.
>
> +// AMX-COMPLEX instructions.
> +
> +tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
> +tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
> +
> +// AMX-COMPLEX instructions end.
I think these would better not have their own comment-bounded group, but
go inside the "AMX instructions" sections (which already covers all AMX-*).
Jan
> On 03.04.2023 09:11, Haochen Jiang wrote:
> > @@ -1183,6 +1184,8 @@ if [gas_64_check] then {
> > run_dump_test "x86-64-avx-ne-convert-intel"
> > run_dump_test "x86-64-raoint"
> > run_dump_test "x86-64-raoint-intel"
> > + run_dump_test "x86-64-amx-complex"
> > + run_dump_test "x86-64-amx-complex-intel"
> > run_dump_test "x86-64-clzero"
> > run_dump_test "x86-64-mwaitx-bdver4"
> > run_list_test "x86-64-mwaitx-reg"
>
> There are constraints on operand combinations, like for tdp*, which want
> testing here as well (both the assembler and disassembler sides) imo.
I just saw those testcases, I will add them in v2 patch just like tdp* did.
Thx for the reminder.
>
> > @@ -4119,6 +4124,13 @@ static const struct dis386 prefix_table[][4] = {
> > { VEX_W_TABLE (VEX_W_0F385E_X86_64_P_3) },
> > },
> >
> > + /* PREFIX_VEX_0F386C_X86_64_W_0_M_1_L_0 */ {
> > + { "tcmmrlfp16ps", { TMM, EXtmm, VexTmm }, 0 },
> > + { Bad_Opcode },
> > + { "tcmmimfp16ps", { TMM, EXtmm, VexTmm }, 0 }, },
>
> You could avoid going through vex_w_table[] by making use of %XS here.
> (I guess I'll make a similar change for tdp*16ps, but - to avoid causing conflicts
> - perhaps only once yours went in.)
I will leave this to you, using %XS does eliminate W table pass.
>
> > --- a/opcodes/i386-opc.h
> > +++ b/opcodes/i386-opc.h
> > @@ -248,6 +248,8 @@ enum
> > CpuAMX_BF16,
> > /* AMX-FP16 instructions required */
> > CpuAMX_FP16,
> > + /* Intel AMX-COMPLEX Instructions support required. */
> > + CpuAMX_COMPLEX,
> > /* AMX-TILE instructions required */
> > CpuAMX_TILE,
> > /* GFNI instructions required */
>
> In line with adjacent comments, please omit "Intel" and "support" from the
> comment, and don't start "instructions" with a capital latter. Plus while the
> full stop is in line with general comment style, looking at adjacent comments
> here it probably also wants omitting.
Ok will do that in v2 patch.
>
> > --- a/opcodes/i386-opc.tbl
> > +++ b/opcodes/i386-opc.tbl
> > @@ -3163,6 +3163,13 @@ tilezero, 0xf249, AMX_TILE|x64,
> > Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
> >
> > // AMX instructions end.
> >
> > +// AMX-COMPLEX instructions.
> > +
> > +tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64,
> > +Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf,
> { RegTMM,
> > +RegTMM, RegTMM } tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64,
> > +Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf,
> { RegTMM,
> > +RegTMM, RegTMM }
> > +
> > +// AMX-COMPLEX instructions end.
>
> I think these would better not have their own comment-bounded group, but
> go inside the "AMX instructions" sections (which already covers all AMX-*).
I will put them in alphabetical order in v2 patch, which means before tdp*.
Really appreciate your review and I will send v2 patch soon.
Haochen
>
> Jan
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-COMPLEX instructions.
+
* Add SME2 support to the AArch64 port.
* A new .insn directive is recognized by x86 gas.
@@ -1113,6 +1113,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
+ SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
@@ -208,6 +208,7 @@ accept various extension mnemonics. For example,
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
+@code{amx_complex},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1636,7 +1637,8 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
-@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16} @tab @samp{.amx_tile}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
+@item @samp{.amx_complex} @tab @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
new file mode 100644
@@ -0,0 +1,3 @@
+.* Assembler messages:
+.*:6: Error: `tcmmimfp16ps' is only supported in 64-bit mode
+.*:7: Error: `tcmmrlfp16ps' is only supported in 64-bit mode
new file mode 100644
@@ -0,0 +1,7 @@
+# Check Illegal AMX-COMPLEX instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tcmmimfp16ps %tmm1, %tmm2, %tmm3
+ tcmmrlfp16ps %tmm1, %tmm2, %tmm3
@@ -493,6 +493,7 @@ if [gas_32_check] then {
run_dump_test "avx-ne-convert-intel"
run_dump_test "raoint"
run_dump_test "raoint-intel"
+ run_list_test "amx-complex-inval"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
@@ -1183,6 +1184,8 @@ if [gas_64_check] then {
run_dump_test "x86-64-avx-ne-convert-intel"
run_dump_test "x86-64-raoint"
run_dump_test "x86-64-raoint-intel"
+ run_dump_test "x86-64-amx-complex"
+ run_dump_test "x86-64-amx-complex-intel"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
new file mode 100644
@@ -0,0 +1,18 @@
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 AMX-COMPLEX insns (Intel disassembly)
+#source: x86-64-amx-complex.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 59 6c f5\s+tcmmimfp16ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 71 6c da\s+tcmmimfp16ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 58 6c f5\s+tcmmrlfp16ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 70 6c da\s+tcmmrlfp16ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 59 6c f5\s+tcmmimfp16ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 71 6c da\s+tcmmimfp16ps tmm3,tmm2,tmm1
+\s*[a-f0-9]+:\s*c4 e2 58 6c f5\s+tcmmrlfp16ps tmm6,tmm5,tmm4
+\s*[a-f0-9]+:\s*c4 e2 70 6c da\s+tcmmrlfp16ps tmm3,tmm2,tmm1
new file mode 100644
@@ -0,0 +1,15 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-COMPLEX insns
+#source: x86-64-amx-complex.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 59 6c f5\s+tcmmimfp16ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 71 6c da\s+tcmmimfp16ps %tmm1,%tmm2,%tmm3
+\s*[a-f0-9]+:\s*c4 e2 58 6c f5\s+tcmmrlfp16ps %tmm4,%tmm5,%tmm6
+\s*[a-f0-9]+:\s*c4 e2 70 6c da\s+tcmmrlfp16ps %tmm1,%tmm2,%tmm3
+#pass
new file mode 100644
@@ -0,0 +1,15 @@
+# Check 64bit AMX-COMPLEX instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tcmmimfp16ps %tmm4, %tmm5, %tmm6 #AMX-COMPLEX
+ tcmmimfp16ps %tmm1, %tmm2, %tmm3 #AMX-COMPLEX
+ tcmmrlfp16ps %tmm4, %tmm5, %tmm6 #AMX-COMPLEX
+ tcmmrlfp16ps %tmm1, %tmm2, %tmm3 #AMX-COMPLEX
+
+.intel_syntax noprefix
+ tcmmimfp16ps tmm6, tmm5, tmm4 #AMX-COMPLEX
+ tcmmimfp16ps tmm3, tmm2, tmm1 #AMX-COMPLEX
+ tcmmrlfp16ps tmm6, tmm5, tmm4 #AMX-COMPLEX
+ tcmmrlfp16ps tmm3, tmm2, tmm1 #AMX-COMPLEX
@@ -943,6 +943,7 @@ enum
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
MOD_VEX_0F385E_X86_64_P_3_W_0,
+ MOD_VEX_0F386C_X86_64_W_0,
MOD_VEX_0F388C,
MOD_VEX_0F388E,
MOD_VEX_0F3A30_L_0,
@@ -1145,6 +1146,7 @@ enum
PREFIX_VEX_0F3851_W_0,
PREFIX_VEX_0F385C_X86_64,
PREFIX_VEX_0F385E_X86_64,
+ PREFIX_VEX_0F386C_X86_64_W_0_M_1_L_0,
PREFIX_VEX_0F3872,
PREFIX_VEX_0F38B0_W_0,
PREFIX_VEX_0F38B1_W_0,
@@ -1298,6 +1300,7 @@ enum
X86_64_VEX_0F384B,
X86_64_VEX_0F385C,
X86_64_VEX_0F385E,
+ X86_64_VEX_0F386C,
X86_64_VEX_0F38E0,
X86_64_VEX_0F38E1,
X86_64_VEX_0F38E2,
@@ -1398,6 +1401,7 @@ enum
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_3_W_0_M_0,
+ VEX_LEN_0F386C_X86_64_W_0_M_1,
VEX_LEN_0F38DB,
VEX_LEN_0F38F2,
VEX_LEN_0F38F3,
@@ -1565,6 +1569,7 @@ enum
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
VEX_W_0F385E_X86_64_P_3,
+ VEX_W_0F386C_X86_64,
VEX_W_0F3872_P_1,
VEX_W_0F3878,
VEX_W_0F3879,
@@ -4119,6 +4124,13 @@ static const struct dis386 prefix_table[][4] = {
{ VEX_W_TABLE (VEX_W_0F385E_X86_64_P_3) },
},
+ /* PREFIX_VEX_0F386C_X86_64_W_0_M_1_L_0 */
+ {
+ { "tcmmrlfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ { Bad_Opcode },
+ { "tcmmimfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* PREFIX_VEX_0F3872 */
{
{ Bad_Opcode },
@@ -4486,6 +4498,12 @@ static const struct dis386 x86_64_table[][2] = {
{ PREFIX_TABLE (PREFIX_VEX_0F385E_X86_64) },
},
+ /* X86_64_VEX_0F386C */
+ {
+ { Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F386C_X86_64) },
+ },
+
/* X86_64_VEX_0F38E0 */
{
{ Bad_Opcode },
@@ -6461,7 +6479,7 @@ static const struct dis386 vex_table[][256] = {
{ Bad_Opcode },
{ Bad_Opcode },
{ Bad_Opcode },
- { Bad_Opcode },
+ { X86_64_TABLE (X86_64_VEX_0F386C) },
{ Bad_Opcode },
{ Bad_Opcode },
{ Bad_Opcode },
@@ -7181,6 +7199,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbssd", {TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F386C_X86_64_W_0_M_1 */
+ {
+ { PREFIX_TABLE (PREFIX_VEX_0F386C_X86_64_W_0_M_1_L_0) },
+ },
+
/* VEX_LEN_0F38DB */
{
{ "vaesimc", { XM, EXx }, PREFIX_DATA },
@@ -7849,6 +7872,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385E_X86_64_P_3 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_3_W_0) },
},
+ {
+ /* VEX_W_0F386C_X86_64 */
+ { MOD_TABLE (MOD_VEX_0F386C_X86_64_W_0) },
+ },
{
/* VEX_W_0F3872_P_1 */
{ "%XVvcvtneps2bf16%XY", { XMM, EXx }, 0 },
@@ -8696,6 +8723,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385E_X86_64_P_3_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F386C_X86_64_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F386C_X86_64_W_0_M_1) },
+ },
{
/* MOD_VEX_0F388C */
{ "vpmaskmov%DQ", { XM, Vex, Mx }, PREFIX_DATA },
@@ -240,6 +240,8 @@ static const dependency isa_dependencies[] =
"AMX_TILE" },
{ "AMX_FP16",
"AMX_TILE" },
+ { "AMX_COMPLEX",
+ "AMX_TILE" },
{ "KL",
"SSE2" },
{ "WIDEKL",
@@ -378,6 +380,7 @@ static bitfield cpu_flags[] =
BITFIELD (AMX_INT8),
BITFIELD (AMX_BF16),
BITFIELD (AMX_FP16),
+ BITFIELD (AMX_COMPLEX),
BITFIELD (AMX_TILE),
BITFIELD (MOVDIRI),
BITFIELD (MOVDIR64B),
@@ -248,6 +248,8 @@ enum
CpuAMX_BF16,
/* AMX-FP16 instructions required */
CpuAMX_FP16,
+ /* Intel AMX-COMPLEX Instructions support required. */
+ CpuAMX_COMPLEX,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -432,6 +434,7 @@ typedef union i386_cpu_flags
unsigned int cpuamx_int8:1;
unsigned int cpuamx_bf16:1;
unsigned int cpuamx_fp16:1;
+ unsigned int cpuamx_complex:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
@@ -3163,6 +3163,13 @@ tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
// AMX instructions end.
+// AMX-COMPLEX instructions.
+
+tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+
+// AMX-COMPLEX instructions end.
+
// KEYLOCKER instructions.
loadiwkey, 0xf30f38dc, KL, Load|Modrm|NoSuf, { RegXMM, RegXMM }