[09/10] Support Intel AMX-FP16
Checks
Commit Message
From: "Cui,Lili" <lili.cui@intel.com>
gas/
* NEWS: Add support for Intel AMX-FP16 instruction.
* config/tc-i386.c: Add amx_fp16.
* doc/c-i386.texi: Document .amx_fp16, noamx_fp16.
* testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
* testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
* testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
opcodes/
* i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
(VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
(VEX_W_0F385C_X86_64_P_3): Likewise.
(prefix_table): Add VEX_W_0F385C_X86_64_P_3.
(vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
(vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
(mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
* i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS and
CPU_ANY_AMX-FP16_FLAGS.
(CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
(cpu_flags): Add CpuAMX-FP16.
* i386-opc.h (enum): Add CpuAMX-FP16.
(i386_cpu_flags): Add cpuamx_fp16.
* i386-opc.tbl: Add Intel AMX-FP16 instruction.
* i386-init.h: Regenerate.
* i386-tbl.h: Likewise.
---
gas/NEWS | 2 +
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 4 +-
gas/testsuite/gas/i386/i386.exp | 3 +
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 +
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +
.../gas/i386/x86-64-amx-fp16-intel.d | 13 +
gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +
gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +
opcodes/i386-dis.c | 18 +
opcodes/i386-gen.c | 7 +-
opcodes/i386-init.h | 520 +-
opcodes/i386-opc.h | 3 +
opcodes/i386-opc.tbl | 6 +
opcodes/i386-tbl.h | 7837 +++++++++--------
15 files changed, 4327 insertions(+), 4163 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
Comments
On 14.10.2022 11:12, Haochen Jiang wrote:
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -933,6 +933,7 @@ enum
> MOD_VEX_0F384B_X86_64_P_3_W_0,
> MOD_VEX_0F385A,
> MOD_VEX_0F385C_X86_64_P_1_W_0,
> + MOD_VEX_0F385C_X86_64_P_3_W_0,
> MOD_VEX_0F385E_X86_64_P_0_W_0,
> MOD_VEX_0F385E_X86_64_P_1_W_0,
> MOD_VEX_0F385E_X86_64_P_2_W_0,
> @@ -1399,6 +1400,7 @@ enum
> VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
> VEX_LEN_0F385A_M_0,
> VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
> + VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
> @@ -1565,6 +1567,7 @@ enum
> VEX_W_0F3859,
> VEX_W_0F385A_M_0_L_0,
> VEX_W_0F385C_X86_64_P_1,
> + VEX_W_0F385C_X86_64_P_3,
> VEX_W_0F385E_X86_64_P_0,
> VEX_W_0F385E_X86_64_P_1,
> VEX_W_0F385E_X86_64_P_2,
> @@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
> { Bad_Opcode },
> { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
> { Bad_Opcode },
> + { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
> },
>
> /* PREFIX_VEX_0F385E_X86_64 */
> @@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
> { "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
> },
>
> + /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
> + {
> + { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
> + },
> +
> /* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
> {
> { "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
> @@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
> /* VEX_W_0F385C_X86_64_P_1 */
> { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
> },
> + {
> + /* VEX_W_0F385C_X86_64_P_3 */
> + { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
> + },
> {
> /* VEX_W_0F385E_X86_64_P_0 */
> { MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
> @@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
> { Bad_Opcode },
> { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
> },
> + {
> + /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
> + { Bad_Opcode },
> + { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
> + },
> {
> /* MOD_VEX_0F385E_X86_64_P_0_W_0 */
> { Bad_Opcode },
> diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
> index eac229e54d..d10b462548 100644
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -259,6 +259,8 @@ static initializer cpu_flag_init[] =
> "CpuWRMSRNS" },
> { "CPU_MSRLIST_FLAGS",
> "CpuMSRLIST" },
> + { "CPU_AMX_FP16_FLAGS",
> + "CpuAMX_FP16" },
> { "CPU_IAMCU_FLAGS",
> "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
> { "CPU_ADX_FLAGS",
Can you please insert next to the other similar AMX entries? Seeing the flaw
here, I'll be making a patch to address the lack of CPU_AMX_TILE_FLAGS in
the similar pre-existing entries. When you move the insertion, it'll be
easier to keep things in sync.
> @@ -426,7 +428,7 @@ static initializer cpu_flag_init[] =
> { "CPU_ANY_AMX_BF16_FLAGS",
> "CpuAMX_BF16" },
> { "CPU_ANY_AMX_TILE_FLAGS",
> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> { "CPU_ANY_AVX_VNNI_FLAGS",
> "CpuAVX_VNNI" },
> { "CPU_ANY_MOVDIRI_FLAGS",
> @@ -467,6 +469,8 @@ static initializer cpu_flag_init[] =
> "CpuWRMSRNS" },
> { "CPU_ANY_MSRLIST_FLAGS",
> "CpuMSRLIST" },
> + { "CPU_ANY_AMX_FP16_FLAGS",
> + "CpuAMX_FP16" },
> };
Same here then.
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -223,6 +223,8 @@ enum
> CpuWRMSRNS,
> /* Intel MSRLIST Instructions support required. */
> CpuMSRLIST,
> + /* AMX-FP16 instructions required */
> + CpuAMX_FP16,
This (and the related stuff) may also benefit from grouping with the other
AMX ones.
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3339,3 +3339,9 @@ rdmsrlist, 0xf20f01c6, None, CpuMSRLIST|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|N
> wrmsrlist, 0xf30f01c6, None, CpuMSRLIST|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {}
>
> // MSRLIST instructions end.
> +
> +// AMX-FP16 instructions.
> +
> +tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
As before - plain VexVVVV preferably (without the =1), irrespective of the
already present AMX entries still using the less preferred form.
> +// AMX-FP16 instructions end.
Nit (again): Perhaps better use singular?
And as above - perhaps put next to the other AMX entries? Note how they
are all in a single group, despite it being 3 separate feature bits. So
I guess you will want to insert exactly one line below tdpbf16ps. That
way the similarity between both is also going to be easiest to see,
check, and maintain.
Jan
> > + { "CPU_AMX_FP16_FLAGS",
> > + "CpuAMX_FP16" },
> > { "CPU_IAMCU_FLAGS",
> > "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
> > { "CPU_ADX_FLAGS",
>
> Can you please insert next to the other similar AMX entries? Seeing the flaw
> here, I'll be making a patch to address the lack of CPU_AMX_TILE_FLAGS in
> the similar pre-existing entries. When you move the insertion, it'll be easier
> to keep things in sync.
Done.
> > - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> > + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> > { "CPU_ANY_MSRLIST_FLAGS",
> > "CpuMSRLIST" },
> > + { "CPU_ANY_AMX_FP16_FLAGS",
> > + "CpuAMX_FP16" },
> > };
>
> Same here then.
Done.
> > CpuWRMSRNS,
> > /* Intel MSRLIST Instructions support required. */
> > CpuMSRLIST,
> > + /* AMX-FP16 instructions required */ CpuAMX_FP16,
>
> This (and the related stuff) may also benefit from grouping with the other
> AMX ones.
Done.
> +Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|N
> o_wSuf|No
> > +_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
>
> As before - plain VexVVVV preferably (without the =1), irrespective of the
> already present AMX entries still using the less preferred form.
Done.
> Nit (again): Perhaps better use singular?
>
> And as above - perhaps put next to the other AMX entries? Note how they
> are all in a single group, despite it being 3 separate feature bits. So I guess
> you will want to insert exactly one line below tdpbf16ps. That way the
> similarity between both is also going to be easiest to see, check, and maintain.
> Jan
Done, thanks!
Subject: [PATCH] Support Intel AMX-FP16
gas/
* NEWS: Add support for Intel AMX-FP16 instruction.
* config/tc-i386.c: Add amx_fp16.
* doc/c-i386.texi: Document .amx_fp16, noamx_fp16.
* testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
* testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
* testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
opcodes/
* i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
(VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
(VEX_W_0F385C_X86_64_P_3): Likewise.
(prefix_table): Add VEX_W_0F385C_X86_64_P_3.
(vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
(vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
(mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
* i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS and
CPU_ANY_AMX-FP16_FLAGS.
(CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
(cpu_flags): Add CpuAMX-FP16.
* i386-opc.h (enum): Add CpuAMX-FP16.
(i386_cpu_flags): Add cpuamx_fp16.
* i386-opc.tbl: Add Intel AMX-FP16 instruction.
* i386-init.h: Regenerate.
* i386-tbl.h: Likewise.
---
gas/NEWS | 2 ++
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 4 ++-
gas/testsuite/gas/i386/i386.exp | 3 ++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 ++++++++++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +++++++++++++++++++
.../gas/i386/x86-64-amx-fp16-intel.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +++++
opcodes/i386-dis.c | 18 ++++++++++
opcodes/i386-gen.c | 7 +++-
opcodes/i386-opc.h | 3 ++
opcodes/i386-opc.tbl | 1 +
13 files changed, 126 insertions(+), 2 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
diff --git a/gas/NEWS b/gas/NEWS
index 3246e7e825..961449545d 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-FP16 instructions.
+
* Add support for Intel MSRLIST instructions.
* Add support for Intel WRMSRNS instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 981fd9f73d..73aa2c66aa 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
+ SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
};
#undef SUBARCH
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 1bf953ef73..dc7e281e6d 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -220,6 +220,7 @@ accept various extension mnemonics. For example,
@code{raoint},
@code{wrmsrns},
@code{msrlist},
+@code{amx_fp16},
@code{noavx512f},
@code{noavx512cd},
@code{noavx512er},
@@ -247,6 +248,7 @@ accept various extension mnemonics. For example,
@code{noraoint},
@code{nowrmsrns},
@code{nomsrlist},
+@code{noamx_fp16},
@code{noenqcmd},
@code{noserialize},
@code{notsxldtrk},
@@ -1549,7 +1551,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @samp{.ibt}
@item @samp{.avx_ifma} @tab @samp{.avx_vnni_int8} @tab @samp{.avx_ne_convert}
@item @samp{.cmpccxadd} @tab @samp{.raoint} @tab @samp{.wrmsrns}
-@item @samp{.msrlist}
+@item @samp{.msrlist} @tab @samp{.amx_fp16}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 5da64b4076..9f5fa7f612 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -1173,6 +1173,9 @@ if [gas_64_check] then {
run_dump_test "x86-64-wrmsrns-intel"
run_dump_test "x86-64-msrlist"
run_dump_test "x86-64-msrlist-intel"
+ run_dump_test "x86-64-amx-fp16"
+ run_dump_test "x86-64-amx-fp16-intel"
+ run_dump_test "x86-64-amx-fp16-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
new file mode 100644
index 0000000000..a53ebf486d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
@@ -0,0 +1,19 @@
+#as:
+#objdump: -drw
+#name: x86_64 Illegal AMX-FP16 insns
+#source: x86-64-amx-fp16-bad.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
+[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[ ]*tdpfp16ps %tmm5,\(bad\),%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps \(bad\),%tmm4,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
new file mode 100644
index 0000000000..da5be1086e
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
@@ -0,0 +1,35 @@
+# Check Illegal 64bit AMX-FP16 instructions
+
+.text
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0xd3
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x57
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
+ .byte 0xc4
+ .byte 0x62
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
+ .byte 0xc4
+ .byte 0xc2
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x33
+ .byte 0x5c
+ .byte 0xdc
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
new file mode 100644
index 0000000000..497898b760
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX-FP16 insns (Intel disassembly)
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.d b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
new file mode 100644
index 0000000000..7d3af95a4d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-FP16 insns
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.s b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
new file mode 100644
index 0000000000..5a007904ed
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
@@ -0,0 +1,9 @@
+# Check 64bit AMX-FP16 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tdpfp16ps %tmm5, %tmm4, %tmm3
+
+.intel_syntax noprefix
+ tdpfp16ps tmm3, tmm4, tmm5
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 0601bee877..be25d3f612 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -933,6 +933,7 @@ enum
MOD_VEX_0F384B_X86_64_P_3_W_0,
MOD_VEX_0F385A,
MOD_VEX_0F385C_X86_64_P_1_W_0,
+ MOD_VEX_0F385C_X86_64_P_3_W_0,
MOD_VEX_0F385E_X86_64_P_0_W_0,
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
@@ -1399,6 +1400,7 @@ enum
VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385A_M_0,
VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
+ VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
@@ -1565,6 +1567,7 @@ enum
VEX_W_0F3859,
VEX_W_0F385A_M_0_L_0,
VEX_W_0F385C_X86_64_P_1,
+ VEX_W_0F385C_X86_64_P_3,
VEX_W_0F385E_X86_64_P_0,
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
@@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
{ Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
},
/* PREFIX_VEX_0F385E_X86_64 */
@@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
+ {
+ { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
{
{ "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
@@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385C_X86_64_P_1 */
{ MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
},
+ {
+ /* VEX_W_0F385C_X86_64_P_3 */
+ { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
+ },
{
/* VEX_W_0F385E_X86_64_P_0 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
@@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
+ },
{
/* MOD_VEX_0F385E_X86_64_P_0_W_0 */
{ Bad_Opcode },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index eac229e54d..75db98ef0f 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
"CpuAMX_INT8" },
{ "CPU_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
+ { "CPU_AMX_FP16_FLAGS",
+ "CpuAMX_TILE|CpuAMX_FP16" },
{ "CPU_AMX_TILE_FLAGS",
"CpuAMX_TILE" },
{ "CPU_MOVDIRI_FLAGS",
@@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
"CpuAMX_INT8" },
{ "CPU_ANY_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
+ { "CPU_ANY_AMX_FP16_FLAGS",
+ "CpuAMX_FP16" },
{ "CPU_ANY_AMX_TILE_FLAGS",
- "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
+ "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
{ "CPU_ANY_AVX_VNNI_FLAGS",
"CpuAVX_VNNI" },
{ "CPU_ANY_MOVDIRI_FLAGS",
@@ -692,6 +696,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuCLDEMOTE),
BITFIELD (CpuAMX_INT8),
BITFIELD (CpuAMX_BF16),
+ BITFIELD (CpuAMX_FP16),
BITFIELD (CpuAMX_TILE),
BITFIELD (CpuMOVDIRI),
BITFIELD (CpuMOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 75c23aaec6..b548769d75 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -240,6 +240,8 @@ enum
CpuAMX_INT8,
/* AMX-BF16 instructions required */
CpuAMX_BF16,
+ /* AMX-FP16 instructions required */
+ CpuAMX_FP16,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -418,6 +420,7 @@ typedef union i386_cpu_flags
unsigned int cpushstk:1;
unsigned int cpuamx_int8:1;
unsigned int cpuamx_bf16:1;
+ unsigned int cpuamx_fp16:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 0dd7106a1c..a2767008b8 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3113,6 +3113,7 @@ ldtilecfg, 0x49, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|N
sttilecfg, 0x6649, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
tdpbf16ps, 0xf35c, None, CpuAMX_BF16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbssd, 0xf25e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbuud, 0x5e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbusd, 0x665e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
--
2.17.1
Thanks,
Lili.
On 18.10.2022 11:01, Cui, Lili wrote:
>>> + { "CPU_AMX_FP16_FLAGS",
>>> + "CpuAMX_FP16" },
>>> { "CPU_IAMCU_FLAGS",
>>> "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
>>> { "CPU_ADX_FLAGS",
>>
>> Can you please insert next to the other similar AMX entries? Seeing the flaw
>> here, I'll be making a patch to address the lack of CPU_AMX_TILE_FLAGS in
>> the similar pre-existing entries. When you move the insertion, it'll be easier
>> to keep things in sync.
> Done.
>
>>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
>>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
>>> { "CPU_ANY_MSRLIST_FLAGS",
>>> "CpuMSRLIST" },
>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>> + "CpuAMX_FP16" },
>>> };
>>
>> Same here then.
> Done.
I guess my comment here was a little misleading (I'm sorry for that), in
that in addition I was expecting you to consider the comment regarding
the need for the *_ANY_* constants that I did give for several of the
patches in this series. I think the question applies here as well: Are
there dependent features known to appear? If not, there's no need for the
extra constant.
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
> SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
> SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
> SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
> + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
> };
This might also benefit from moving up, but I'm not going to insist.
> --- a/gas/doc/c-i386.texi
> +++ b/gas/doc/c-i386.texi
> @@ -220,6 +220,7 @@ accept various extension mnemonics. For example,
> @code{raoint},
> @code{wrmsrns},
> @code{msrlist},
> +@code{amx_fp16},
> @code{noavx512f},
> @code{noavx512cd},
> @code{noavx512er},
> @@ -247,6 +248,7 @@ accept various extension mnemonics. For example,
> @code{noraoint},
> @code{nowrmsrns},
> @code{nomsrlist},
> +@code{noamx_fp16},
> @code{noenqcmd},
> @code{noserialize},
> @code{notsxldtrk},
There are no @code{no...} entries here anymore, as of earlier today. Hence
no new ones should appear (and you need to re-base in any event).
Jan
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, October 18, 2022 5:24 PM
> To: Cui, Lili <lili.cui@intel.com>
> Cc: hjl.tools@gmail.com; binutils@sourceware.org; Jiang, Haochen
> <haochen.jiang@intel.com>
> Subject: Re: [PATCH 09/10] Support Intel AMX-FP16
>
> On 18.10.2022 11:01, Cui, Lili wrote:
> >>> + { "CPU_AMX_FP16_FLAGS",
> >>> + "CpuAMX_FP16" },
> >>> { "CPU_IAMCU_FLAGS",
> >>> "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
> >>> { "CPU_ADX_FLAGS",
> >>
> >> Can you please insert next to the other similar AMX entries? Seeing the
> flaw
> >> here, I'll be making a patch to address the lack of CPU_AMX_TILE_FLAGS
> in
> >> the similar pre-existing entries. When you move the insertion, it'll be
> easier
> >> to keep things in sync.
> > Done.
> >
> >>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> >>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> >>> { "CPU_ANY_MSRLIST_FLAGS",
> >>> "CpuMSRLIST" },
> >>> + { "CPU_ANY_AMX_FP16_FLAGS",
> >>> + "CpuAMX_FP16" },
> >>> };
> >>
> >> Same here then.
> > Done.
>
> I guess my comment here was a little misleading (I'm sorry for that), in
> that in addition I was expecting you to consider the comment regarding
> the need for the *_ANY_* constants that I did give for several of the
> patches in this series. I think the question applies here as well: Are
> there dependent features known to appear? If not, there's no need for the
> extra constant.
>
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
> > SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
> > SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
> > SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
> > + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
> > };
>
> This might also benefit from moving up, but I'm not going to insist.
>
> > --- a/gas/doc/c-i386.texi
> > +++ b/gas/doc/c-i386.texi
> > @@ -220,6 +220,7 @@ accept various extension mnemonics. For example,
> > @code{raoint},
> > @code{wrmsrns},
> > @code{msrlist},
> > +@code{amx_fp16},
> > @code{noavx512f},
> > @code{noavx512cd},
> > @code{noavx512er},
> > @@ -247,6 +248,7 @@ accept various extension mnemonics. For example,
> > @code{noraoint},
> > @code{nowrmsrns},
> > @code{nomsrlist},
> > +@code{noamx_fp16},
> > @code{noenqcmd},
> > @code{noserialize},
> > @code{notsxldtrk},
>
> There are no @code{no...} entries here anymore, as of earlier today. Hence
> no new ones should appear (and you need to re-base in any event).
I will do the final rebase for all the patches in the end to solve texi problem.
Since there are several people are involved in this series of patches, I just keep
the branch freeze at Friday's main trunk (the day we release all the patches),
which will help keep the branch stable under everyone's change. But if the patch
need main trunk fix out of texi, I will remind them and do the rebase before we
send out the patch. I suppose there has been patches related to table folding and
AMX prereq.
BRs,
Haochen
>
> Jan
> >>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> >>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> >>> { "CPU_ANY_MSRLIST_FLAGS",
> >>> "CpuMSRLIST" },
> >>> + { "CPU_ANY_AMX_FP16_FLAGS",
> >>> + "CpuAMX_FP16" },
> >>> };
> >>
> >> Same here then.
> > Done.
>
> I guess my comment here was a little misleading (I'm sorry for that), in that in
> addition I was expecting you to consider the comment regarding the need for
> the *_ANY_* constants that I did give for several of the patches in this series.
> I think the question applies here as well: Are there dependent features
> known to appear? If not, there's no need for the extra constant.
>
Get you, there are no known dependencies now, and I will pay attention on it in future ISAs.
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
> > SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
> > SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
> > SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
> > + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
> > };
>
> This might also benefit from moving up, but I'm not going to insist.
Done, I missed this place.
>
> There are no @code{no...} entries here anymore, as of earlier today. Hence
> no new ones should appear (and you need to re-base in any event).
Rebased, thanks Jan.
gas/
* NEWS: Add support for Intel AMX-FP16 instruction.
* config/tc-i386.c: Add amx_fp16.
* doc/c-i386.texi: Document .amx_fp16.
* testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
* testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
* testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
opcodes/
* i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
(VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
(VEX_W_0F385C_X86_64_P_3): Likewise.
(prefix_table): Add VEX_W_0F385C_X86_64_P_3.
(vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
(vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
(mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
* i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS and
CPU_ANY_AMX-FP16_FLAGS.
(CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
(cpu_flags): Add CpuAMX-FP16.
* i386-opc.h (enum): Add CpuAMX-FP16.
(i386_cpu_flags): Add cpuamx_fp16.
* i386-opc.tbl: Add Intel AMX-FP16 instruction.
* i386-init.h: Regenerate.
* i386-tbl.h: Likewise.
---
gas/NEWS | 2 ++
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 3 +-
gas/testsuite/gas/i386/i386.exp | 3 ++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 ++++++++++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +++++++++++++++++++
.../gas/i386/x86-64-amx-fp16-intel.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +++++
opcodes/i386-dis.c | 18 ++++++++++
opcodes/i386-gen.c | 7 +++-
opcodes/i386-opc.h | 3 ++
opcodes/i386-opc.tbl | 1 +
13 files changed, 125 insertions(+), 2 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
diff --git a/gas/NEWS b/gas/NEWS
index 3246e7e825..961449545d 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-FP16 instructions.
+
* Add support for Intel MSRLIST instructions.
* Add support for Intel WRMSRNS instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index c9432e4188..906e9db9ad 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
+ SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 49582b29a6..b739d5f32e 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -203,6 +203,7 @@ accept various extension mnemonics. For example,
@code{msrlist},
@code{amx_int8},
@code{amx_bf16},
+@code{amx_fp16},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1499,7 +1500,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
-@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16} @tab @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 5da64b4076..9f5fa7f612 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -1173,6 +1173,9 @@ if [gas_64_check] then {
run_dump_test "x86-64-wrmsrns-intel"
run_dump_test "x86-64-msrlist"
run_dump_test "x86-64-msrlist-intel"
+ run_dump_test "x86-64-amx-fp16"
+ run_dump_test "x86-64-amx-fp16-intel"
+ run_dump_test "x86-64-amx-fp16-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
new file mode 100644
index 0000000000..a53ebf486d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
@@ -0,0 +1,19 @@
+#as:
+#objdump: -drw
+#name: x86_64 Illegal AMX-FP16 insns
+#source: x86-64-amx-fp16-bad.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
+[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[ ]*tdpfp16ps %tmm5,\(bad\),%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps \(bad\),%tmm4,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
new file mode 100644
index 0000000000..da5be1086e
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
@@ -0,0 +1,35 @@
+# Check Illegal 64bit AMX-FP16 instructions
+
+.text
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0xd3
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x57
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
+ .byte 0xc4
+ .byte 0x62
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
+ .byte 0xc4
+ .byte 0xc2
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x33
+ .byte 0x5c
+ .byte 0xdc
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
new file mode 100644
index 0000000000..497898b760
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX-FP16 insns (Intel disassembly)
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.d b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
new file mode 100644
index 0000000000..7d3af95a4d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-FP16 insns
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.s b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
new file mode 100644
index 0000000000..5a007904ed
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
@@ -0,0 +1,9 @@
+# Check 64bit AMX-FP16 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tdpfp16ps %tmm5, %tmm4, %tmm3
+
+.intel_syntax noprefix
+ tdpfp16ps tmm3, tmm4, tmm5
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 0aa41bd5fb..60712c7c5b 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -933,6 +933,7 @@ enum
MOD_VEX_0F384B_X86_64_P_3_W_0,
MOD_VEX_0F385A,
MOD_VEX_0F385C_X86_64_P_1_W_0,
+ MOD_VEX_0F385C_X86_64_P_3_W_0,
MOD_VEX_0F385E_X86_64_P_0_W_0,
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
@@ -1399,6 +1400,7 @@ enum
VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385A_M_0,
VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
+ VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
@@ -1565,6 +1567,7 @@ enum
VEX_W_0F3859,
VEX_W_0F385A_M_0_L_0,
VEX_W_0F385C_X86_64_P_1,
+ VEX_W_0F385C_X86_64_P_3,
VEX_W_0F385E_X86_64_P_0,
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
@@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
{ Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
},
/* PREFIX_VEX_0F385E_X86_64 */
@@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
+ {
+ { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
{
{ "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
@@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385C_X86_64_P_1 */
{ MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
},
+ {
+ /* VEX_W_0F385C_X86_64_P_3 */
+ { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
+ },
{
/* VEX_W_0F385E_X86_64_P_0 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
@@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
+ },
{
/* MOD_VEX_0F385E_X86_64_P_0_W_0 */
{ Bad_Opcode },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 435d67711f..86383ba793 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
"CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
{ "CPU_AMX_BF16_FLAGS",
"CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
+ { "CPU_AMX_FP16_FLAGS",
+ "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
{ "CPU_AMX_TILE_FLAGS",
"CpuAMX_TILE" },
{ "CPU_MOVDIRI_FLAGS",
@@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
"CpuAMX_INT8" },
{ "CPU_ANY_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
+ { "CPU_ANY_AMX_FP16_FLAGS",
+ "CpuAMX_FP16" },
{ "CPU_ANY_AMX_TILE_FLAGS",
- "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
+ "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
{ "CPU_ANY_AVX_VNNI_FLAGS",
"CpuAVX_VNNI" },
{ "CPU_ANY_MOVDIRI_FLAGS",
@@ -692,6 +696,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuCLDEMOTE),
BITFIELD (CpuAMX_INT8),
BITFIELD (CpuAMX_BF16),
+ BITFIELD (CpuAMX_FP16),
BITFIELD (CpuAMX_TILE),
BITFIELD (CpuMOVDIRI),
BITFIELD (CpuMOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 75c23aaec6..b548769d75 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -240,6 +240,8 @@ enum
CpuAMX_INT8,
/* AMX-BF16 instructions required */
CpuAMX_BF16,
+ /* AMX-FP16 instructions required */
+ CpuAMX_FP16,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -418,6 +420,7 @@ typedef union i386_cpu_flags
unsigned int cpushstk:1;
unsigned int cpuamx_int8:1;
unsigned int cpuamx_bf16:1;
+ unsigned int cpuamx_fp16:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 42d6423942..6057664193 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3113,6 +3113,7 @@ ldtilecfg, 0x49, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|N
sttilecfg, 0x6649, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
tdpbf16ps, 0xf35c, None, CpuAMX_BF16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbssd, 0xf25e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbuud, 0x5e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbusd, 0x665e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
--
2.17.1
Thanks,
Lili.
On 19.10.2022 12:33, Cui, Lili wrote:
>>>>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
>>>>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
>>>>> { "CPU_ANY_MSRLIST_FLAGS",
>>>>> "CpuMSRLIST" },
>>>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>>>> + "CpuAMX_FP16" },
>>>>> };
>>>>
>>>> Same here then.
>>> Done.
>>
>> I guess my comment here was a little misleading (I'm sorry for that), in that in
>> addition I was expecting you to consider the comment regarding the need for
>> the *_ANY_* constants that I did give for several of the patches in this series.
>> I think the question applies here as well: Are there dependent features
>> known to appear? If not, there's no need for the extra constant.
>>
> Get you, there are no known dependencies now, and I will pay attention on it in future ISAs.
Despite this reply, ...
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
> SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
> SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
> SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
> + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
... you still use ANY_... here and ...
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
> { "CPU_AMX_BF16_FLAGS",
> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
> + { "CPU_AMX_FP16_FLAGS",
> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
> { "CPU_AMX_TILE_FLAGS",
> "CpuAMX_TILE" },
> { "CPU_MOVDIRI_FLAGS",
> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> "CpuAMX_INT8" },
> { "CPU_ANY_AMX_BF16_FLAGS",
> "CpuAMX_BF16" },
> + { "CPU_ANY_AMX_FP16_FLAGS",
> + "CpuAMX_FP16" },
... you request it to be constructed here.
Jan
> -----Original Message-----
> From: Cui, Lili <lili.cui@intel.com>
> Sent: Wednesday, October 19, 2022 6:34 PM
> To: Beulich, Jan <JBeulich@suse.com>
> Cc: hjl.tools@gmail.com; binutils@sourceware.org; Jiang, Haochen
> <haochen.jiang@intel.com>
> Subject: RE: [PATCH 09/10] Support Intel AMX-FP16
>
> > >>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> > >>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> > >>> { "CPU_ANY_MSRLIST_FLAGS",
> > >>> "CpuMSRLIST" },
> > >>> + { "CPU_ANY_AMX_FP16_FLAGS",
> > >>> + "CpuAMX_FP16" },
> > >>> };
> > >>
> > >> Same here then.
> > > Done.
> >
> > I guess my comment here was a little misleading (I'm sorry for that),
> > in that in addition I was expecting you to consider the comment
> > regarding the need for the *_ANY_* constants that I did give for several of the
> patches in this series.
> > I think the question applies here as well: Are there dependent
> > features known to appear? If not, there's no need for the extra constant.
> >
> Get you, there are no known dependencies now, and I will pay attention on it in
> future ISAs.
But I suppose for AMX-FP16, the possibility of potential dependency is bigger than
some like CMPCCXADD, MSRLIST. Since FP16 type is not that rare.
I prefer to keep the ANY there. BTW, I have just revised all of patches according to
review and will send out very soon.
BRs,
Haochen
>
> > > --- a/gas/config/tc-i386.c
> > > +++ b/gas/config/tc-i386.c
> > > @@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
> > > SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
> > > SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
> > > SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
> > > + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
> > > };
> >
> > This might also benefit from moving up, but I'm not going to insist.
> Done, I missed this place.
>
> >
> > There are no @code{no...} entries here anymore, as of earlier today.
> > Hence no new ones should appear (and you need to re-base in any event).
> Rebased, thanks Jan.
>
>
> gas/
>
> * NEWS: Add support for Intel AMX-FP16 instruction.
> * config/tc-i386.c: Add amx_fp16.
> * doc/c-i386.texi: Document .amx_fp16.
> * testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
> * testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
> * testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
> * testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
> * testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
> * testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
>
> opcodes/
>
> * i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
> (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
> (VEX_W_0F385C_X86_64_P_3): Likewise.
> (prefix_table): Add VEX_W_0F385C_X86_64_P_3.
> (vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
> (vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
> (mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
> * i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS and
> CPU_ANY_AMX-FP16_FLAGS.
> (CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
> (cpu_flags): Add CpuAMX-FP16.
> * i386-opc.h (enum): Add CpuAMX-FP16.
> (i386_cpu_flags): Add cpuamx_fp16.
> * i386-opc.tbl: Add Intel AMX-FP16 instruction.
> * i386-init.h: Regenerate.
> * i386-tbl.h: Likewise.
> ---
> gas/NEWS | 2 ++
> gas/config/tc-i386.c | 1 +
> gas/doc/c-i386.texi | 3 +-
> gas/testsuite/gas/i386/i386.exp | 3 ++
> gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 ++++++++++
> gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +++++++++++++++++++
> .../gas/i386/x86-64-amx-fp16-intel.d | 13 +++++++
> gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +++++++
> gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +++++
> opcodes/i386-dis.c | 18 ++++++++++
> opcodes/i386-gen.c | 7 +++-
> opcodes/i386-opc.h | 3 ++
> opcodes/i386-opc.tbl | 1 +
> 13 files changed, 125 insertions(+), 2 deletions(-) create mode 100644
> gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
> create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
> create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
> create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
> create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
>
> diff --git a/gas/NEWS b/gas/NEWS
> index 3246e7e825..961449545d 100644
> --- a/gas/NEWS
> +++ b/gas/NEWS
> @@ -1,5 +1,7 @@
> -*- text -*-
>
> +* Add support for Intel AMX-FP16 instructions.
> +
> * Add support for Intel MSRLIST instructions.
>
> * Add support for Intel WRMSRNS instructions.
> diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index
> c9432e4188..906e9db9ad 100644
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
> SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
> SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
> SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
> + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
> SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
> SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
> SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false), diff --git
> a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi index 49582b29a6..b739d5f32e
> 100644
> --- a/gas/doc/c-i386.texi
> +++ b/gas/doc/c-i386.texi
> @@ -203,6 +203,7 @@ accept various extension mnemonics. For example,
> @code{msrlist}, @code{amx_int8}, @code{amx_bf16},
> +@code{amx_fp16},
> @code{amx_tile},
> @code{vmx},
> @code{vmfunc},
> @@ -1499,7 +1500,7 @@ supported on the CPU specified. The choices for
> @var{cpu_type} are:
> @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg}
> @tab @samp{.cldemote} @item @samp{.shstk} @tab @samp{.gfni} @tab
> @samp{.vaes} @tab @samp{.vpclmulqdq} @item @samp{.movdiri} @tab
> @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk} -@item
> @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
> +@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16}
> @tab
> +@samp{.amx_tile}
> @item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab
> @samp{.hreset} @item @samp{.3dnow} @tab @samp{.3dnowa} @tab
> @samp{.sse4a} @tab @samp{.sse5} @item @samp{.syscall} @tab
> @samp{.rdtscp} @tab @samp{.svme} diff --git
> a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp index
> 5da64b4076..9f5fa7f612 100644
> --- a/gas/testsuite/gas/i386/i386.exp
> +++ b/gas/testsuite/gas/i386/i386.exp
> @@ -1173,6 +1173,9 @@ if [gas_64_check] then {
> run_dump_test "x86-64-wrmsrns-intel"
> run_dump_test "x86-64-msrlist"
> run_dump_test "x86-64-msrlist-intel"
> + run_dump_test "x86-64-amx-fp16"
> + run_dump_test "x86-64-amx-fp16-intel"
> + run_dump_test "x86-64-amx-fp16-bad"
> run_dump_test "x86-64-clzero"
> run_dump_test "x86-64-mwaitx-bdver4"
> run_list_test "x86-64-mwaitx-reg"
> diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
> b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
> new file mode 100644
> index 0000000000..a53ebf486d
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
> @@ -0,0 +1,19 @@
> +#as:
> +#objdump: -drw
> +#name: x86_64 Illegal AMX-FP16 insns
> +#source: x86-64-amx-fp16-bad.s
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section \.text:
> +
> +0+ <\.text>:
> +[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
> +[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
> +[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
> +[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
> +[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[
> ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
> +[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[
> ]*tdpfp16ps %tmm5,\(bad\),%tmm3
> +[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps
> \(bad\),%tmm4,%tmm3
> +#pass
> diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
> b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
> new file mode 100644
> index 0000000000..da5be1086e
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
> @@ -0,0 +1,35 @@
> +# Check Illegal 64bit AMX-FP16 instructions
> +
> +.text
> + #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
> + .byte 0xc4
> + .byte 0xe2
> + .byte 0xd3
> + .byte 0x5c
> + .byte 0xdc
> + .fill 0x05, 0x01, 0x90
> + #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
> + .byte 0xc4
> + .byte 0xe2
> + .byte 0x57
> + .byte 0x5c
> + .byte 0xdc
> + .fill 0x05, 0x01, 0x90
> + #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
> + .byte 0xc4
> + .byte 0x62
> + .byte 0x53
> + .byte 0x5c
> + .byte 0xdc
> + #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
> + .byte 0xc4
> + .byte 0xc2
> + .byte 0x53
> + .byte 0x5c
> + .byte 0xdc
> + #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
> + .byte 0xc4
> + .byte 0xe2
> + .byte 0x33
> + .byte 0x5c
> + .byte 0xdc
> diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
> b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
> new file mode 100644
> index 0000000000..497898b760
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
> @@ -0,0 +1,13 @@
> +#as:
> +#objdump: -d -Mintel
> +#name: x86_64 AMX-FP16 insns (Intel disassembly)
> +#source: x86-64-amx-fp16.s
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section \.text:
> +
> +0+ <_start>:
> +[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps
> tmm3,tmm4,tmm5
> +[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps
> tmm3,tmm4,tmm5
> diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.d
> b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
> new file mode 100644
> index 0000000000..7d3af95a4d
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
> @@ -0,0 +1,13 @@
> +#as:
> +#objdump: -dw
> +#name: x86_64 AMX-FP16 insns
> +#source: x86-64-amx-fp16.s
> +
> +.*: +file format .*
> +
> +
> +Disassembly of section \.text:
> +
> +0+ <_start>:
> +[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[
> ]*tdpfp16ps %tmm5,%tmm4,%tmm3
> +[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[
> ]*tdpfp16ps %tmm5,%tmm4,%tmm3
> diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.s
> b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
> new file mode 100644
> index 0000000000..5a007904ed
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
> @@ -0,0 +1,9 @@
> +# Check 64bit AMX-FP16 instructions
> +
> + .allow_index_reg
> + .text
> +_start:
> + tdpfp16ps %tmm5, %tmm4, %tmm3
> +
> +.intel_syntax noprefix
> + tdpfp16ps tmm3, tmm4, tmm5
> diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c index
> 0aa41bd5fb..60712c7c5b 100644
> --- a/opcodes/i386-dis.c
> +++ b/opcodes/i386-dis.c
> @@ -933,6 +933,7 @@ enum
> MOD_VEX_0F384B_X86_64_P_3_W_0,
> MOD_VEX_0F385A,
> MOD_VEX_0F385C_X86_64_P_1_W_0,
> + MOD_VEX_0F385C_X86_64_P_3_W_0,
> MOD_VEX_0F385E_X86_64_P_0_W_0,
> MOD_VEX_0F385E_X86_64_P_1_W_0,
> MOD_VEX_0F385E_X86_64_P_2_W_0,
> @@ -1399,6 +1400,7 @@ enum
> VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
> VEX_LEN_0F385A_M_0,
> VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
> + VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
> VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
> @@ -1565,6 +1567,7 @@ enum
> VEX_W_0F3859,
> VEX_W_0F385A_M_0_L_0,
> VEX_W_0F385C_X86_64_P_1,
> + VEX_W_0F385C_X86_64_P_3,
> VEX_W_0F385E_X86_64_P_0,
> VEX_W_0F385E_X86_64_P_1,
> VEX_W_0F385E_X86_64_P_2,
> @@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
> { Bad_Opcode },
> { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
> { Bad_Opcode },
> + { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
> },
>
> /* PREFIX_VEX_0F385E_X86_64 */
> @@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
> { "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
> },
>
> + /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */ {
> + { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 }, },
> +
> /* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
> {
> { "tdpbuud", {TMM, EXtmm, VexTmm }, 0 }, @@ -7788,6 +7797,10 @@ static
> const struct dis386 vex_w_table[][2] = {
> /* VEX_W_0F385C_X86_64_P_1 */
> { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
> },
> + {
> + /* VEX_W_0F385C_X86_64_P_3 */
> + { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) }, },
> {
> /* VEX_W_0F385E_X86_64_P_0 */
> { MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) }, @@ -8610,6
> +8623,11 @@ static const struct dis386 mod_table[][2] = {
> { Bad_Opcode },
> { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
> },
> + {
> + /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
> + { Bad_Opcode },
> + { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) }, },
> {
> /* MOD_VEX_0F385E_X86_64_P_0_W_0 */
> { Bad_Opcode },
> diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c index
> 435d67711f..86383ba793 100644
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
> { "CPU_AMX_BF16_FLAGS",
> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
> + { "CPU_AMX_FP16_FLAGS",
> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
> { "CPU_AMX_TILE_FLAGS",
> "CpuAMX_TILE" },
> { "CPU_MOVDIRI_FLAGS",
> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> "CpuAMX_INT8" },
> { "CPU_ANY_AMX_BF16_FLAGS",
> "CpuAMX_BF16" },
> + { "CPU_ANY_AMX_FP16_FLAGS",
> + "CpuAMX_FP16" },
> { "CPU_ANY_AMX_TILE_FLAGS",
> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> { "CPU_ANY_AVX_VNNI_FLAGS",
> "CpuAVX_VNNI" },
> { "CPU_ANY_MOVDIRI_FLAGS",
> @@ -692,6 +696,7 @@ static bitfield cpu_flags[] =
> BITFIELD (CpuCLDEMOTE),
> BITFIELD (CpuAMX_INT8),
> BITFIELD (CpuAMX_BF16),
> + BITFIELD (CpuAMX_FP16),
> BITFIELD (CpuAMX_TILE),
> BITFIELD (CpuMOVDIRI),
> BITFIELD (CpuMOVDIR64B),
> diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h index
> 75c23aaec6..b548769d75 100644
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -240,6 +240,8 @@ enum
> CpuAMX_INT8,
> /* AMX-BF16 instructions required */
> CpuAMX_BF16,
> + /* AMX-FP16 instructions required */
> + CpuAMX_FP16,
> /* AMX-TILE instructions required */
> CpuAMX_TILE,
> /* GFNI instructions required */
> @@ -418,6 +420,7 @@ typedef union i386_cpu_flags
> unsigned int cpushstk:1;
> unsigned int cpuamx_int8:1;
> unsigned int cpuamx_bf16:1;
> + unsigned int cpuamx_fp16:1;
> unsigned int cpuamx_tile:1;
> unsigned int cpugfni:1;
> unsigned int cpuvaes:1;
> diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl index
> 42d6423942..6057664193 100644
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -3113,6 +3113,7 @@ ldtilecfg, 0x49, None, CpuAMX_TILE|Cpu64,
> Modrm|Vex128|Space0F38|VexW0|No_bSuf|N
> sttilecfg, 0x6649, None, CpuAMX_TILE|Cpu64,
> Modrm|Vex128|Space0F38|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_
> qSuf|No_ldSuf, { Unspecified|BaseIndex }
>
> tdpbf16ps, 0xf35c, None, CpuAMX_BF16|Cpu64,
> Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_w
> Suf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
> +tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64,
> +Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|No_bSuf|No_wS
> uf|No_lSu
> +f|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
> tdpbssd, 0xf25e, None, CpuAMX_INT8|Cpu64,
> Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_w
> Suf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
> tdpbuud, 0x5e, None, CpuAMX_INT8|Cpu64,
> Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_w
> Suf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
> tdpbusd, 0x665e, None, CpuAMX_INT8|Cpu64,
> Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_w
> Suf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
> --
> 2.17.1
>
> Thanks,
> Lili.
> >> I guess my comment here was a little misleading (I'm sorry for that),
> >> in that in addition I was expecting you to consider the comment
> >> regarding the need for the *_ANY_* constants that I did give for several of
> the patches in this series.
> >> I think the question applies here as well: Are there dependent
> >> features known to appear? If not, there's no need for the extra constant.
> >>
> > Get you, there are no known dependencies now, and I will pay attention on
> it in future ISAs.
>
> Despite this reply, ...
>
> > --- a/gas/config/tc-i386.c
> > +++ b/gas/config/tc-i386.c
> > @@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
> > SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
> > SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
> > SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
> > + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
>
> ... you still use ANY_... here and ...
>
> > --- a/opcodes/i386-gen.c
> > +++ b/opcodes/i386-gen.c
> > @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
> > "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
> > { "CPU_AMX_BF16_FLAGS",
> > "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
> > + { "CPU_AMX_FP16_FLAGS",
> > + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
> > { "CPU_AMX_TILE_FLAGS",
> > "CpuAMX_TILE" },
> > { "CPU_MOVDIRI_FLAGS",
> > @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> > "CpuAMX_INT8" },
> > { "CPU_ANY_AMX_BF16_FLAGS",
> > "CpuAMX_BF16" },
> > + { "CPU_ANY_AMX_FP16_FLAGS",
> > + "CpuAMX_FP16" },
>
> ... you request it to be constructed here.
HAHA, you mean if there is no instruction that depends on CpuAMX_FP16, we don’t need to add *ANY* for it in arch_entry cpu_arch[]? Maybe when the dependency happens, we need to change it back.
Lili.
>
> Jan
On 19.10.2022 16:05, Cui, Lili wrote:
>
>>>> I guess my comment here was a little misleading (I'm sorry for that),
>>>> in that in addition I was expecting you to consider the comment
>>>> regarding the need for the *_ANY_* constants that I did give for several of
>> the patches in this series.
>>>> I think the question applies here as well: Are there dependent
>>>> features known to appear? If not, there's no need for the extra constant.
>>>>
>>> Get you, there are no known dependencies now, and I will pay attention on
>> it in future ISAs.
>>
>> Despite this reply, ...
>>
>>> --- a/gas/config/tc-i386.c
>>> +++ b/gas/config/tc-i386.c
>>> @@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
>>> SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
>>> SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
>>> SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
>>> + SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
>>
>> ... you still use ANY_... here and ...
>>
>>> --- a/opcodes/i386-gen.c
>>> +++ b/opcodes/i386-gen.c
>>> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
>>> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
>>> { "CPU_AMX_BF16_FLAGS",
>>> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
>>> + { "CPU_AMX_FP16_FLAGS",
>>> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
>>> { "CPU_AMX_TILE_FLAGS",
>>> "CpuAMX_TILE" },
>>> { "CPU_MOVDIRI_FLAGS",
>>> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
>>> "CpuAMX_INT8" },
>>> { "CPU_ANY_AMX_BF16_FLAGS",
>>> "CpuAMX_BF16" },
>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>> + "CpuAMX_FP16" },
>>
>> ... you request it to be constructed here.
>
> HAHA, you mean if there is no instruction that depends on CpuAMX_FP16, we don’t need to add *ANY* for it in arch_entry cpu_arch[]? Maybe when the dependency happens, we need to change it back.
Right, that's why I did ask whether there are dependent features known to
appear. If you go look you'll find that we don't have ANY_* for a fair
part (if not a majority) of the features).
Jan
On 19.10.2022 16:01, Jiang, Haochen wrote:
>> -----Original Message-----
>> From: Cui, Lili <lili.cui@intel.com>
>> Sent: Wednesday, October 19, 2022 6:34 PM
>>
>>>>>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
>>>>>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
>>>>>> { "CPU_ANY_MSRLIST_FLAGS",
>>>>>> "CpuMSRLIST" },
>>>>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>>>>> + "CpuAMX_FP16" },
>>>>>> };
>>>>>
>>>>> Same here then.
>>>> Done.
>>>
>>> I guess my comment here was a little misleading (I'm sorry for that),
>>> in that in addition I was expecting you to consider the comment
>>> regarding the need for the *_ANY_* constants that I did give for several of the
>> patches in this series.
>>> I think the question applies here as well: Are there dependent
>>> features known to appear? If not, there's no need for the extra constant.
>>>
>> Get you, there are no known dependencies now, and I will pay attention on it in
>> future ISAs.
>
> But I suppose for AMX-FP16, the possibility of potential dependency is bigger than
> some like CMPCCXADD, MSRLIST. Since FP16 type is not that rare.
If you go look you'll find that we don't have ANY_* for a fair part (if
not a majority) of the features). Plus AMX-FP16 as a feature name suggests
more generality than there really is - the feature is about a specific
multiplication insn, not (like e.g. AVX512-FP16) about the introduction of
a set of insns on a basic data type (whether the set of insns could of
course widen down the road, and where the further ISAs would depend on the
more basic operations on that data type).
Jan
> >>> --- a/opcodes/i386-gen.c
> >>> +++ b/opcodes/i386-gen.c
> >>> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
> >>> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
> >>> { "CPU_AMX_BF16_FLAGS",
> >>> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
> >>> + { "CPU_AMX_FP16_FLAGS",
> >>> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
> >>> { "CPU_AMX_TILE_FLAGS",
> >>> "CpuAMX_TILE" },
> >>> { "CPU_MOVDIRI_FLAGS",
> >>> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> >>> "CpuAMX_INT8" },
> >>> { "CPU_ANY_AMX_BF16_FLAGS",
> >>> "CpuAMX_BF16" },
> >>> + { "CPU_ANY_AMX_FP16_FLAGS",
> >>> + "CpuAMX_FP16" },
> >>
> >> ... you request it to be constructed here.
> >
> > HAHA, you mean if there is no instruction that depends on CpuAMX_FP16,
> we don’t need to add *ANY* for it in arch_entry cpu_arch[]? Maybe when
> the dependency happens, we need to change it back.
>
> Right, that's why I did ask whether there are dependent features known to
> appear. If you go look you'll find that we don't have ANY_* for a fair part (if
> not a majority) of the features).
I removed ANY_* for AMX_FP16, If there are instructions that depend on amx_fp16 in the future, we will add ANY_* for it. Thanks Jan.
gas/
* NEWS: Add support for Intel AMX-FP16 instruction.
* config/tc-i386.c: Add amx_fp16.
* doc/c-i386.texi: Document .amx_fp16.
* testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
* testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
* testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
opcodes/
* i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
(VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
(VEX_W_0F385C_X86_64_P_3): Likewise.
(prefix_table): Add VEX_W_0F385C_X86_64_P_3.
(vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
(vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
(mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
* i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS and
CPU_ANY_AMX-FP16_FLAGS.
(CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
(cpu_flags): Add CpuAMX-FP16.
* i386-opc.h (enum): Add CpuAMX-FP16.
(i386_cpu_flags): Add cpuamx_fp16.
* i386-opc.tbl: Add Intel AMX-FP16 instruction.
* i386-init.h: Regenerate.
* i386-tbl.h: Likewise.
---
gas/NEWS | 2 ++
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 3 +-
gas/testsuite/gas/i386/i386.exp | 3 ++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 ++++++++++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +++++++++++++++++++
.../gas/i386/x86-64-amx-fp16-intel.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +++++
opcodes/i386-dis.c | 18 ++++++++++
opcodes/i386-gen.c | 7 +++-
opcodes/i386-opc.h | 3 ++
opcodes/i386-opc.tbl | 1 +
13 files changed, 125 insertions(+), 2 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
diff --git a/gas/NEWS b/gas/NEWS
index 3246e7e825..961449545d 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-FP16 instructions.
+
* Add support for Intel MSRLIST instructions.
* Add support for Intel WRMSRNS instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index c9432e4188..12ed33ff0a 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
+ SUBARCH (amx_fp16, AMX_FP16, AMX_FP16, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 49582b29a6..b739d5f32e 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -203,6 +203,7 @@ accept various extension mnemonics. For example,
@code{msrlist},
@code{amx_int8},
@code{amx_bf16},
+@code{amx_fp16},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1499,7 +1500,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
-@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16} @tab @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 5da64b4076..9f5fa7f612 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -1173,6 +1173,9 @@ if [gas_64_check] then {
run_dump_test "x86-64-wrmsrns-intel"
run_dump_test "x86-64-msrlist"
run_dump_test "x86-64-msrlist-intel"
+ run_dump_test "x86-64-amx-fp16"
+ run_dump_test "x86-64-amx-fp16-intel"
+ run_dump_test "x86-64-amx-fp16-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
new file mode 100644
index 0000000000..a53ebf486d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
@@ -0,0 +1,19 @@
+#as:
+#objdump: -drw
+#name: x86_64 Illegal AMX-FP16 insns
+#source: x86-64-amx-fp16-bad.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
+[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[ ]*tdpfp16ps %tmm5,\(bad\),%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps \(bad\),%tmm4,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
new file mode 100644
index 0000000000..da5be1086e
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
@@ -0,0 +1,35 @@
+# Check Illegal 64bit AMX-FP16 instructions
+
+.text
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0xd3
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x57
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
+ .byte 0xc4
+ .byte 0x62
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
+ .byte 0xc4
+ .byte 0xc2
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x33
+ .byte 0x5c
+ .byte 0xdc
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
new file mode 100644
index 0000000000..497898b760
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX-FP16 insns (Intel disassembly)
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.d b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
new file mode 100644
index 0000000000..7d3af95a4d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-FP16 insns
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.s b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
new file mode 100644
index 0000000000..5a007904ed
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
@@ -0,0 +1,9 @@
+# Check 64bit AMX-FP16 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tdpfp16ps %tmm5, %tmm4, %tmm3
+
+.intel_syntax noprefix
+ tdpfp16ps tmm3, tmm4, tmm5
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 0aa41bd5fb..60712c7c5b 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -933,6 +933,7 @@ enum
MOD_VEX_0F384B_X86_64_P_3_W_0,
MOD_VEX_0F385A,
MOD_VEX_0F385C_X86_64_P_1_W_0,
+ MOD_VEX_0F385C_X86_64_P_3_W_0,
MOD_VEX_0F385E_X86_64_P_0_W_0,
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
@@ -1399,6 +1400,7 @@ enum
VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385A_M_0,
VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
+ VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
@@ -1565,6 +1567,7 @@ enum
VEX_W_0F3859,
VEX_W_0F385A_M_0_L_0,
VEX_W_0F385C_X86_64_P_1,
+ VEX_W_0F385C_X86_64_P_3,
VEX_W_0F385E_X86_64_P_0,
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
@@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
{ Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
},
/* PREFIX_VEX_0F385E_X86_64 */
@@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
+ {
+ { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
{
{ "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
@@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385C_X86_64_P_1 */
{ MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
},
+ {
+ /* VEX_W_0F385C_X86_64_P_3 */
+ { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
+ },
{
/* VEX_W_0F385E_X86_64_P_0 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
@@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
+ },
{
/* MOD_VEX_0F385E_X86_64_P_0_W_0 */
{ Bad_Opcode },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 435d67711f..86383ba793 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
"CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
{ "CPU_AMX_BF16_FLAGS",
"CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
+ { "CPU_AMX_FP16_FLAGS",
+ "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
{ "CPU_AMX_TILE_FLAGS",
"CpuAMX_TILE" },
{ "CPU_MOVDIRI_FLAGS",
@@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
"CpuAMX_INT8" },
{ "CPU_ANY_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
+ { "CPU_ANY_AMX_FP16_FLAGS",
+ "CpuAMX_FP16" },
{ "CPU_ANY_AMX_TILE_FLAGS",
- "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
+ "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
{ "CPU_ANY_AVX_VNNI_FLAGS",
"CpuAVX_VNNI" },
{ "CPU_ANY_MOVDIRI_FLAGS",
@@ -692,6 +696,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuCLDEMOTE),
BITFIELD (CpuAMX_INT8),
BITFIELD (CpuAMX_BF16),
+ BITFIELD (CpuAMX_FP16),
BITFIELD (CpuAMX_TILE),
BITFIELD (CpuMOVDIRI),
BITFIELD (CpuMOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 75c23aaec6..b548769d75 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -240,6 +240,8 @@ enum
CpuAMX_INT8,
/* AMX-BF16 instructions required */
CpuAMX_BF16,
+ /* AMX-FP16 instructions required */
+ CpuAMX_FP16,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -418,6 +420,7 @@ typedef union i386_cpu_flags
unsigned int cpushstk:1;
unsigned int cpuamx_int8:1;
unsigned int cpuamx_bf16:1;
+ unsigned int cpuamx_fp16:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 42d6423942..6057664193 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3113,6 +3113,7 @@ ldtilecfg, 0x49, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|N
sttilecfg, 0x6649, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
tdpbf16ps, 0xf35c, None, CpuAMX_BF16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbssd, 0xf25e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbuud, 0x5e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbusd, 0x665e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
--
2.17.1
Regards,
Lili.
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Wednesday, October 19, 2022 10:13 PM
> To: Jiang, Haochen <haochen.jiang@intel.com>
> Cc: hjl.tools@gmail.com; binutils@sourceware.org; Cui, Lili <lili.cui@intel.com>
> Subject: Re: [PATCH 09/10] Support Intel AMX-FP16
>
> On 19.10.2022 16:01, Jiang, Haochen wrote:
> >> -----Original Message-----
> >> From: Cui, Lili <lili.cui@intel.com>
> >> Sent: Wednesday, October 19, 2022 6:34 PM
> >>
> >>>>>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
> >>>>>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
> >>>>>> { "CPU_ANY_MSRLIST_FLAGS",
> >>>>>> "CpuMSRLIST" },
> >>>>>> + { "CPU_ANY_AMX_FP16_FLAGS",
> >>>>>> + "CpuAMX_FP16" },
> >>>>>> };
> >>>>>
> >>>>> Same here then.
> >>>> Done.
> >>>
> >>> I guess my comment here was a little misleading (I'm sorry for
> >>> that), in that in addition I was expecting you to consider the
> >>> comment regarding the need for the *_ANY_* constants that I did give
> >>> for several of the
> >> patches in this series.
> >>> I think the question applies here as well: Are there dependent
> >>> features known to appear? If not, there's no need for the extra constant.
> >>>
> >> Get you, there are no known dependencies now, and I will pay
> >> attention on it in future ISAs.
> >
> > But I suppose for AMX-FP16, the possibility of potential dependency is
> > bigger than some like CMPCCXADD, MSRLIST. Since FP16 type is not that rare.
>
> If you go look you'll find that we don't have ANY_* for a fair part (if not a
> majority) of the features). Plus AMX-FP16 as a feature name suggests more
> generality than there really is - the feature is about a specific multiplication insn,
> not (like e.g. AVX512-FP16) about the introduction of a set of insns on a basic
> data type (whether the set of insns could of course widen down the road, and
> where the further ISAs would depend on the more basic operations on that data
> type).
>
I have your point and it might be more reasonable to predict nothing depends on
that ISA.
Haochen
> Jan
On 19.10.2022 16:41, Cui, Lili wrote:
>>>>> --- a/opcodes/i386-gen.c
>>>>> +++ b/opcodes/i386-gen.c
>>>>> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
>>>>> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
>>>>> { "CPU_AMX_BF16_FLAGS",
>>>>> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
>>>>> + { "CPU_AMX_FP16_FLAGS",
>>>>> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
>>>>> { "CPU_AMX_TILE_FLAGS",
>>>>> "CpuAMX_TILE" },
>>>>> { "CPU_MOVDIRI_FLAGS",
>>>>> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
>>>>> "CpuAMX_INT8" },
>>>>> { "CPU_ANY_AMX_BF16_FLAGS",
>>>>> "CpuAMX_BF16" },
>>>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>>>> + "CpuAMX_FP16" },
>>>>
>>>> ... you request it to be constructed here.
>>>
>>> HAHA, you mean if there is no instruction that depends on CpuAMX_FP16,
>> we don’t need to add *ANY* for it in arch_entry cpu_arch[]? Maybe when
>> the dependency happens, we need to change it back.
>>
>> Right, that's why I did ask whether there are dependent features known to
>> appear. If you go look you'll find that we don't have ANY_* for a fair part (if
>> not a majority) of the features).
>
> I removed ANY_* for AMX_FP16,
You did remove its use, but not ...
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
> "CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
> { "CPU_AMX_BF16_FLAGS",
> "CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
> + { "CPU_AMX_FP16_FLAGS",
> + "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
> { "CPU_AMX_TILE_FLAGS",
> "CpuAMX_TILE" },
> { "CPU_MOVDIRI_FLAGS",
> @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> "CpuAMX_INT8" },
> { "CPU_ANY_AMX_BF16_FLAGS",
> "CpuAMX_BF16" },
> + { "CPU_ANY_AMX_FP16_FLAGS",
> + "CpuAMX_FP16" },
... it being created. This is the only remaining issue I see.
Jan
> > @@ -425,8 +427,10 @@ static initializer cpu_flag_init[] =
> > "CpuAMX_INT8" },
> > { "CPU_ANY_AMX_BF16_FLAGS",
> > "CpuAMX_BF16" },
> > + { "CPU_ANY_AMX_FP16_FLAGS",
> > + "CpuAMX_FP16" },
>
> ... it being created. This is the only remaining issue I see.
Done, thanks for your time and comments.
gas/
* NEWS: Add support for Intel AMX-FP16 instruction.
* config/tc-i386.c: Add amx_fp16.
* doc/c-i386.texi: Document .amx_fp16.
* testsuite/gas/i386/i386.exp: Add AMX-FP16 tests.
* testsuite/gas/i386/x86-64-amx-fp16-intel.d: New test.
* testsuite/gas/i386/x86-64-amx-fp16.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16.s: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.d: Likewise.
* testsuite/gas/i386/x86-64-amx-fp16-bad.s: Likewise.
opcodes/
* i386-dis.c (MOD_VEX_0F385C_X86_64_P_3_W_0): New.
(VEX_LEN_0F385C_X86_64_P_3_W_0_M_0): Likewise.
(VEX_W_0F385C_X86_64_P_3): Likewise.
(prefix_table): Add VEX_W_0F385C_X86_64_P_3.
(vex_len_table): Add VEX_LEN_0F385C_X86_64_P_3_W_0_M_0.
(vex_w_table): Add VEX_W_0F385C_X86_64_P_3.
(mod_table): Add MOD_VEX_0F385C_X86_64_P_3_W_0.
* i386-gen.c (cpu_flag_init): Add AMX-FP16_FLAGS.
(CPU_ANY_AMX_TILE_FLAGS): Add CpuAMX_FP16.
(cpu_flags): Add CpuAMX-FP16.
* i386-opc.h (enum): Add CpuAMX-FP16.
(i386_cpu_flags): Add cpuamx_fp16.
* i386-opc.tbl: Add Intel AMX-FP16 instruction.
* i386-init.h: Regenerate.
* i386-tbl.h: Likewise.
---
gas/NEWS | 2 ++
gas/config/tc-i386.c | 1 +
gas/doc/c-i386.texi | 3 +-
gas/testsuite/gas/i386/i386.exp | 3 ++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d | 19 ++++++++++
gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s | 35 +++++++++++++++++++
.../gas/i386/x86-64-amx-fp16-intel.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.d | 13 +++++++
gas/testsuite/gas/i386/x86-64-amx-fp16.s | 9 +++++
opcodes/i386-dis.c | 18 ++++++++++
opcodes/i386-gen.c | 5 ++-
opcodes/i386-opc.h | 3 ++
opcodes/i386-opc.tbl | 1 +
13 files changed, 123 insertions(+), 2 deletions(-)
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.d
create mode 100644 gas/testsuite/gas/i386/x86-64-amx-fp16.s
diff --git a/gas/NEWS b/gas/NEWS
index 3246e7e825..961449545d 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-FP16 instructions.
+
* Add support for Intel MSRLIST instructions.
* Add support for Intel WRMSRNS instructions.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index c9432e4188..12ed33ff0a 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1076,6 +1076,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
+ SUBARCH (amx_fp16, AMX_FP16, AMX_FP16, false),
SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
SUBARCH (movdiri, MOVDIRI, ANY_MOVDIRI, false),
SUBARCH (movdir64b, MOVDIR64B, ANY_MOVDIR64B, false),
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 49582b29a6..b739d5f32e 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -203,6 +203,7 @@ accept various extension mnemonics. For example,
@code{msrlist},
@code{amx_int8},
@code{amx_bf16},
+@code{amx_fp16},
@code{amx_tile},
@code{vmx},
@code{vmfunc},
@@ -1499,7 +1500,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
-@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_tile}
+@item @samp{.amx_int8} @tab @samp{.amx_bf16} @tab @samp{.amx_fp16} @tab @samp{.amx_tile}
@item @samp{.kl} @tab @samp{.widekl} @tab @samp{.uintr} @tab @samp{.hreset}
@item @samp{.3dnow} @tab @samp{.3dnowa} @tab @samp{.sse4a} @tab @samp{.sse5}
@item @samp{.syscall} @tab @samp{.rdtscp} @tab @samp{.svme}
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index 5da64b4076..9f5fa7f612 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -1173,6 +1173,9 @@ if [gas_64_check] then {
run_dump_test "x86-64-wrmsrns-intel"
run_dump_test "x86-64-msrlist"
run_dump_test "x86-64-msrlist-intel"
+ run_dump_test "x86-64-amx-fp16"
+ run_dump_test "x86-64-amx-fp16-intel"
+ run_dump_test "x86-64-amx-fp16-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
new file mode 100644
index 0000000000..a53ebf486d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.d
@@ -0,0 +1,19 @@
+#as:
+#objdump: -drw
+#name: x86_64 Illegal AMX-FP16 insns
+#source: x86-64-amx-fp16-bad.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
+[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[ ]*tdpfp16ps %tmm5,\(bad\),%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps \(bad\),%tmm4,%tmm3
+#pass
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
new file mode 100644
index 0000000000..da5be1086e
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-bad.s
@@ -0,0 +1,35 @@
+# Check Illegal 64bit AMX-FP16 instructions
+
+.text
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0xd3
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x57
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
+ .byte 0xc4
+ .byte 0x62
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
+ .byte 0xc4
+ .byte 0xc2
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x33
+ .byte 0x5c
+ .byte 0xdc
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
new file mode 100644
index 0000000000..497898b760
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16-intel.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX-FP16 insns (Intel disassembly)
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.d b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
new file mode 100644
index 0000000000..7d3af95a4d
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.d
@@ -0,0 +1,13 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-FP16 insns
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
diff --git a/gas/testsuite/gas/i386/x86-64-amx-fp16.s b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
new file mode 100644
index 0000000000..5a007904ed
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-amx-fp16.s
@@ -0,0 +1,9 @@
+# Check 64bit AMX-FP16 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tdpfp16ps %tmm5, %tmm4, %tmm3
+
+.intel_syntax noprefix
+ tdpfp16ps tmm3, tmm4, tmm5
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 0aa41bd5fb..60712c7c5b 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -933,6 +933,7 @@ enum
MOD_VEX_0F384B_X86_64_P_3_W_0,
MOD_VEX_0F385A,
MOD_VEX_0F385C_X86_64_P_1_W_0,
+ MOD_VEX_0F385C_X86_64_P_3_W_0,
MOD_VEX_0F385E_X86_64_P_0_W_0,
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
@@ -1399,6 +1400,7 @@ enum
VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385A_M_0,
VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
+ VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
@@ -1565,6 +1567,7 @@ enum
VEX_W_0F3859,
VEX_W_0F385A_M_0_L_0,
VEX_W_0F385C_X86_64_P_1,
+ VEX_W_0F385C_X86_64_P_3,
VEX_W_0F385E_X86_64_P_0,
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
@@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
{ Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
},
/* PREFIX_VEX_0F385E_X86_64 */
@@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
+ {
+ { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
{
{ "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
@@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385C_X86_64_P_1 */
{ MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
},
+ {
+ /* VEX_W_0F385C_X86_64_P_3 */
+ { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
+ },
{
/* VEX_W_0F385E_X86_64_P_0 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
@@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
+ },
{
/* MOD_VEX_0F385E_X86_64_P_0_W_0 */
{ Bad_Opcode },
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 435d67711f..55ca3b6855 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -319,6 +319,8 @@ static initializer cpu_flag_init[] =
"CPU_AMX_TILE_FLAGS|CpuAMX_INT8" },
{ "CPU_AMX_BF16_FLAGS",
"CPU_AMX_TILE_FLAGS|CpuAMX_BF16" },
+ { "CPU_AMX_FP16_FLAGS",
+ "CPU_AMX_TILE_FLAGS|CpuAMX_FP16" },
{ "CPU_AMX_TILE_FLAGS",
"CpuAMX_TILE" },
{ "CPU_MOVDIRI_FLAGS",
@@ -426,7 +428,7 @@ static initializer cpu_flag_init[] =
{ "CPU_ANY_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
{ "CPU_ANY_AMX_TILE_FLAGS",
- "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
+ "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
{ "CPU_ANY_AVX_VNNI_FLAGS",
"CpuAVX_VNNI" },
{ "CPU_ANY_MOVDIRI_FLAGS",
@@ -692,6 +694,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuCLDEMOTE),
BITFIELD (CpuAMX_INT8),
BITFIELD (CpuAMX_BF16),
+ BITFIELD (CpuAMX_FP16),
BITFIELD (CpuAMX_TILE),
BITFIELD (CpuMOVDIRI),
BITFIELD (CpuMOVDIR64B),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index 75c23aaec6..b548769d75 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -240,6 +240,8 @@ enum
CpuAMX_INT8,
/* AMX-BF16 instructions required */
CpuAMX_BF16,
+ /* AMX-FP16 instructions required */
+ CpuAMX_FP16,
/* AMX-TILE instructions required */
CpuAMX_TILE,
/* GFNI instructions required */
@@ -418,6 +420,7 @@ typedef union i386_cpu_flags
unsigned int cpushstk:1;
unsigned int cpuamx_int8:1;
unsigned int cpuamx_bf16:1;
+ unsigned int cpuamx_fp16:1;
unsigned int cpuamx_tile:1;
unsigned int cpugfni:1;
unsigned int cpuvaes:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index 42d6423942..6057664193 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -3113,6 +3113,7 @@ ldtilecfg, 0x49, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|N
sttilecfg, 0x6649, None, CpuAMX_TILE|Cpu64, Modrm|Vex128|Space0F38|VexW0|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { Unspecified|BaseIndex }
tdpbf16ps, 0xf35c, None, CpuAMX_BF16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbssd, 0xf25e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbuud, 0x5e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
tdpbusd, 0x665e, None, CpuAMX_INT8|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
--
2.17.1
Regards,
Lili.
On 18.10.2022 11:23, Jan Beulich wrote:
> On 18.10.2022 11:01, Cui, Lili wrote:
>>>> + { "CPU_AMX_FP16_FLAGS",
>>>> + "CpuAMX_FP16" },
>>>> { "CPU_IAMCU_FLAGS",
>>>> "Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
>>>> { "CPU_ADX_FLAGS",
>>>
>>> Can you please insert next to the other similar AMX entries? Seeing the flaw
>>> here, I'll be making a patch to address the lack of CPU_AMX_TILE_FLAGS in
>>> the similar pre-existing entries. When you move the insertion, it'll be easier
>>> to keep things in sync.
>> Done.
>>
>>>> - "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
>>>> + "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
>>>> { "CPU_ANY_MSRLIST_FLAGS",
>>>> "CpuMSRLIST" },
>>>> + { "CPU_ANY_AMX_FP16_FLAGS",
>>>> + "CpuAMX_FP16" },
>>>> };
>>>
>>> Same here then.
>> Done.
>
> I guess my comment here was a little misleading (I'm sorry for that), in
> that in addition I was expecting you to consider the comment regarding
> the need for the *_ANY_* constants that I did give for several of the
> patches in this series. I think the question applies here as well: Are
> there dependent features known to appear? If not, there's no need for the
> extra constant.
I have to apologize and take back the _ANY_-related comment here (it
applies in the other places I've explicitly given it for this series).
It was only yesterday (in the course of automating the dependency
recording as much as possible) that I realized that there's a two way
requirement for re-using the non-ANY form in the initializer of the
3rd field of cpu_flags[] entries: There must not be dependents _and_
there must also not be dependencies. Otherwise disabling a feature
(via command line option or directive) would also disable the
feature's prereq ones. With my work to (mostly) automate this, I
guess there's no strong need to patch this separately - there are
many more issues that will need taking care of once the base logic is
in place.
Jan
> >>>> + { "CPU_ANY_AMX_FP16_FLAGS",
> >>>> + "CpuAMX_FP16" },
> >>>> };
> >>>
> >>> Same here then.
> >> Done.
> >
> > I guess my comment here was a little misleading (I'm sorry for that),
> > in that in addition I was expecting you to consider the comment
> > regarding the need for the *_ANY_* constants that I did give for
> > several of the patches in this series. I think the question applies
> > here as well: Are there dependent features known to appear? If not,
> > there's no need for the extra constant.
>
> I have to apologize and take back the _ANY_-related comment here (it
> applies in the other places I've explicitly given it for this series).
> It was only yesterday (in the course of automating the dependency recording
> as much as possible) that I realized that there's a two way requirement for
> re-using the non-ANY form in the initializer of the 3rd field of cpu_flags[]
> entries: There must not be dependents _and_ there must also not be
> dependencies. Otherwise disabling a feature (via command line option or
> directive) would also disable the feature's prereq ones. With my work to
> (mostly) automate this, I guess there's no strong need to patch this
> separately - there are many more issues that will need taking care of once
> the base logic is in place.
>
Np. Do we need to change them back.
Lili.
> Jan
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AMX-FP16 instructions.
+
* Add support for Intel MSRLIST instructions.
* Add support for Intel WRMSRNS instructions.
@@ -1101,6 +1101,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (raoint, RAOINT, ANY_RAOINT, false),
SUBARCH (wrmsrns, WRMSRNS, ANY_WRMSRNS, false),
SUBARCH (msrlist, MSRLIST, ANY_MSRLIST, false),
+ SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
};
#undef SUBARCH
@@ -220,6 +220,7 @@ accept various extension mnemonics. For example,
@code{raoint},
@code{wrmsrns},
@code{msrlist},
+@code{amx_fp16},
@code{noavx512f},
@code{noavx512cd},
@code{noavx512er},
@@ -247,6 +248,7 @@ accept various extension mnemonics. For example,
@code{noraoint},
@code{nowrmsrns},
@code{nomsrlist},
+@code{noamx_fp16},
@code{noenqcmd},
@code{noserialize},
@code{notsxldtrk},
@@ -1549,7 +1551,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @samp{.ibt}
@item @samp{.avx_ifma} @tab @samp{.avx_vnni_int8} @tab @samp{.avx_ne_convert}
@item @samp{.cmpccxadd} @tab @samp{.raoint} @tab @samp{.wrmsrns}
-@item @samp{.msrlist}
+@item @samp{.msrlist} @tab @samp{.amx_fp16}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@@ -1173,6 +1173,9 @@ if [gas_64_check] then {
run_dump_test "x86-64-wrmsrns-intel"
run_dump_test "x86-64-msrlist"
run_dump_test "x86-64-msrlist-intel"
+ run_dump_test "x86-64-amx-fp16"
+ run_dump_test "x86-64-amx-fp16-intel"
+ run_dump_test "x86-64-amx-fp16-bad"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
new file mode 100644
@@ -0,0 +1,19 @@
+#as:
+#objdump: -drw
+#name: x86_64 Illegal AMX-FP16 insns
+#source: x86-64-amx-fp16-bad.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <\.text>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 d3 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 57 5c[ ]*\(bad\)[ ]*
+[ ]*[a-f0-9]+:[ ]*dc 90 90 90 90 90[ ]*fcoml.*
+[ ]*[a-f0-9]+:[ ]*c4 62 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,\(bad\)
+[ ]*[a-f0-9]+:[ ]*c4 c2 53 5c dc[ ]*tdpfp16ps %tmm5,\(bad\),%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 33 5c dc[ ]*tdpfp16ps \(bad\),%tmm4,%tmm3
+#pass
new file mode 100644
@@ -0,0 +1,35 @@
+# Check Illegal 64bit AMX-FP16 instructions
+
+.text
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.W = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0xd3
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.L = 1 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x57
+ .byte 0x5c
+ .byte 0xdc
+ .fill 0x05, 0x01, 0x90
+ #tdpfp16ps %tmm5,%tmm4,%tmm3 set VEX.R = 0 (illegal value).
+ .byte 0xc4
+ .byte 0x62
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.B = 0 (illegal value).
+ .byte 0xc4
+ .byte 0xc2
+ .byte 0x53
+ .byte 0x5c
+ .byte 0xdc
+ #tdpbf16ps %tmm5,%tmm4,%tmm3 set VEX.VVVV = 0110 (illegal value).
+ .byte 0xc4
+ .byte 0xe2
+ .byte 0x33
+ .byte 0x5c
+ .byte 0xdc
new file mode 100644
@@ -0,0 +1,13 @@
+#as:
+#objdump: -d -Mintel
+#name: x86_64 AMX-FP16 insns (Intel disassembly)
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps tmm3,tmm4,tmm5
new file mode 100644
@@ -0,0 +1,13 @@
+#as:
+#objdump: -dw
+#name: x86_64 AMX-FP16 insns
+#source: x86-64-amx-fp16.s
+
+.*: +file format .*
+
+
+Disassembly of section \.text:
+
+0+ <_start>:
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
+[ ]*[a-f0-9]+:[ ]*c4 e2 53 5c dc[ ]*tdpfp16ps %tmm5,%tmm4,%tmm3
new file mode 100644
@@ -0,0 +1,9 @@
+# Check 64bit AMX-FP16 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ tdpfp16ps %tmm5, %tmm4, %tmm3
+
+.intel_syntax noprefix
+ tdpfp16ps tmm3, tmm4, tmm5
@@ -933,6 +933,7 @@ enum
MOD_VEX_0F384B_X86_64_P_3_W_0,
MOD_VEX_0F385A,
MOD_VEX_0F385C_X86_64_P_1_W_0,
+ MOD_VEX_0F385C_X86_64_P_3_W_0,
MOD_VEX_0F385E_X86_64_P_0_W_0,
MOD_VEX_0F385E_X86_64_P_1_W_0,
MOD_VEX_0F385E_X86_64_P_2_W_0,
@@ -1399,6 +1400,7 @@ enum
VEX_LEN_0F384B_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385A_M_0,
VEX_LEN_0F385C_X86_64_P_1_W_0_M_0,
+ VEX_LEN_0F385C_X86_64_P_3_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_0_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_1_W_0_M_0,
VEX_LEN_0F385E_X86_64_P_2_W_0_M_0,
@@ -1565,6 +1567,7 @@ enum
VEX_W_0F3859,
VEX_W_0F385A_M_0_L_0,
VEX_W_0F385C_X86_64_P_1,
+ VEX_W_0F385C_X86_64_P_3,
VEX_W_0F385E_X86_64_P_0,
VEX_W_0F385E_X86_64_P_1,
VEX_W_0F385E_X86_64_P_2,
@@ -4088,6 +4091,7 @@ static const struct dis386 prefix_table[][4] = {
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F385C_X86_64_P_1) },
{ Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F385C_X86_64_P_3) },
},
/* PREFIX_VEX_0F385E_X86_64 */
@@ -7120,6 +7124,11 @@ static const struct dis386 vex_len_table[][2] = {
{ "tdpbf16ps", { TMM, EXtmm, VexTmm }, 0 },
},
+ /* VEX_LEN_0F385C_X86_64_P_3_W_0_M_0 */
+ {
+ { "tdpfp16ps", { TMM, EXtmm, VexTmm }, 0 },
+ },
+
/* VEX_LEN_0F385E_X86_64_P_0_W_0_M_0 */
{
{ "tdpbuud", {TMM, EXtmm, VexTmm }, 0 },
@@ -7788,6 +7797,10 @@ static const struct dis386 vex_w_table[][2] = {
/* VEX_W_0F385C_X86_64_P_1 */
{ MOD_TABLE (MOD_VEX_0F385C_X86_64_P_1_W_0) },
},
+ {
+ /* VEX_W_0F385C_X86_64_P_3 */
+ { MOD_TABLE (MOD_VEX_0F385C_X86_64_P_3_W_0) },
+ },
{
/* VEX_W_0F385E_X86_64_P_0 */
{ MOD_TABLE (MOD_VEX_0F385E_X86_64_P_0_W_0) },
@@ -8610,6 +8623,11 @@ static const struct dis386 mod_table[][2] = {
{ Bad_Opcode },
{ VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_1_W_0_M_0) },
},
+ {
+ /* MOD_VEX_0F385C_X86_64_P_3_W_0 */
+ { Bad_Opcode },
+ { VEX_LEN_TABLE (VEX_LEN_0F385C_X86_64_P_3_W_0_M_0) },
+ },
{
/* MOD_VEX_0F385E_X86_64_P_0_W_0 */
{ Bad_Opcode },
@@ -259,6 +259,8 @@ static initializer cpu_flag_init[] =
"CpuWRMSRNS" },
{ "CPU_MSRLIST_FLAGS",
"CpuMSRLIST" },
+ { "CPU_AMX_FP16_FLAGS",
+ "CpuAMX_FP16" },
{ "CPU_IAMCU_FLAGS",
"Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
{ "CPU_ADX_FLAGS",
@@ -426,7 +428,7 @@ static initializer cpu_flag_init[] =
{ "CPU_ANY_AMX_BF16_FLAGS",
"CpuAMX_BF16" },
{ "CPU_ANY_AMX_TILE_FLAGS",
- "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16" },
+ "CpuAMX_TILE|CpuAMX_INT8|CpuAMX_BF16|CpuAMX_FP16" },
{ "CPU_ANY_AVX_VNNI_FLAGS",
"CpuAVX_VNNI" },
{ "CPU_ANY_MOVDIRI_FLAGS",
@@ -467,6 +469,8 @@ static initializer cpu_flag_init[] =
"CpuWRMSRNS" },
{ "CPU_ANY_MSRLIST_FLAGS",
"CpuMSRLIST" },
+ { "CPU_ANY_AMX_FP16_FLAGS",
+ "CpuAMX_FP16" },
};
static initializer operand_type_init[] =
@@ -675,6 +679,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuRAOINT),
BITFIELD (CpuWRMSRNS),
BITFIELD (CpuMSRLIST),
+ BITFIELD (CpuAMX_FP16),
BITFIELD (CpuMWAITX),
BITFIELD (CpuCLZERO),
BITFIELD (CpuOSPKE),
@@ -223,6 +223,8 @@ enum
CpuWRMSRNS,
/* Intel MSRLIST Instructions support required. */
CpuMSRLIST,
+ /* AMX-FP16 instructions required */
+ CpuAMX_FP16,
/* mwaitx instruction required */
CpuMWAITX,
/* Clzero instruction required */
@@ -409,6 +411,7 @@ typedef union i386_cpu_flags
unsigned int cpuraoint:1;
unsigned int cpuwrmsrns:1;
unsigned int cpumsrlist:1;
+ unsigned int cpuamx_fp16:1;
unsigned int cpumwaitx:1;
unsigned int cpuclzero:1;
unsigned int cpuospke:1;
@@ -3339,3 +3339,9 @@ rdmsrlist, 0xf20f01c6, None, CpuMSRLIST|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|N
wrmsrlist, 0xf30f01c6, None, CpuMSRLIST|Cpu64, No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, {}
// MSRLIST instructions end.
+
+// AMX-FP16 instructions.
+
+tdpfp16ps, 0xf25c, None, CpuAMX_FP16|Cpu64, Modrm|Vex128|Space0F38|VexVVVV=1|VexW0|SwapSources|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegTMM, RegTMM, RegTMM }
+
+// AMX-FP16 instructions end.