On 18.07.2023 09:54, Haochen Jiang wrote:
> --- a/gas/testsuite/gas/i386/disassem.s
> +++ b/gas/testsuite/gas/i386/disassem.s
> @@ -168,6 +168,12 @@
> .byte 0xC4, 0xE1, 0xF9, 0x93, 0x6F
> .insn VEX.L0.66.0f.W1 0x93, (%edi), %k7
> .byte 0xc4, 0xe2, 0x1, 0x1c, 0x41, 0x37
> + .insn VEX.L1.F2.0f38.W0 0xCC, (%ecx), %ymm1
> +.fill 0x5, 0x1, 0x90
> + .insn VEX.L1.F2.0f38.W0 0xCD, (%ecx), %ymm1
> +.fill 0x5, 0x1, 0x90
> + .insn VEX.L1.F2.0f38.W0 0xCB, (%ecx), %ymm2, %ymm1
> +.fill 0x5, 0x1, 0x90
In new additions here (and to similar files) please can you avoid
- .fill / .byte and alike whenever possible,
- unindented directives?
The latter is purely style, I know, but strictly speaking directives
should never start in the first column. Present gas, presumably for
historical reasons, simply is overly forgiving in this regard.
To deal with the former, more careful selection of operands is all
it takes. With how the disassembler presently works, what you want
is that the nominal ModR/M byte disassembles as a single-byte opcode.
That's very easy to achieve: Opcodes 40-5f (50-5f for 64-bit) are all
single-byte, i.e. you won't need much more that ModR/M.mod = 1, i.e.
the Disp8 encoding form with a displacement that then also
disassembles as a single-byte opcode.
Alternatively (and perhaps even better) you can arrange for ModR/M
bytes of 69, 6a, 6b, or 70-7f, with a suitable displacement byte
(any will do afaict for 6a and 70-7f, while 69 and 6b would require
the top two bits to be set).
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512-inval.l
> @@ -0,0 +1,4 @@
> +.* Assembler messages:
> +.*:6: Error: operand size mismatch for `vsha512msg1'
> +.*:7: Error: operand size mismatch for `vsha512msg2'
> +.*:8: Error: operand size mismatch for `vsha512rnds2'
Just as a remark, no action expected from your side: This of course
isn't the correct error message to be emitted here. It should be
"type", not "size". You _may_ want to replace "size" by ".*" to
allow for a future assembler adjustment without the need to touch
this testcase again.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512.d
> @@ -0,0 +1,16 @@
> +#as:
What purpose does this line (present in several of the tests) have?
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512.s
> @@ -0,0 +1,13 @@
> +# Check 32bit SHA512 instructions
> +
> + .allow_index_reg
This doesn't look to be needed either.
> + .text
> +_start:
> + vsha512msg1 %xmm5, %ymm6 #SHA512
> + vsha512msg2 %ymm5, %ymm6 #SHA512
> + vsha512rnds2 %xmm4, %ymm5, %ymm6 #SHA512
> +
> +.intel_syntax noprefix
See remark above about indentation of directives.
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-sha512.s
> @@ -0,0 +1,13 @@
> +# Check 64bit SHA512 instructions
> +
> + .allow_index_reg
> + .text
> +_start:
> + vsha512msg1 %xmm5, %ymm6 #SHA512
> + vsha512msg2 %ymm5, %ymm6 #SHA512
> + vsha512rnds2 %xmm4, %ymm5, %ymm6 #SHA512
> +
> +.intel_syntax noprefix
> + vsha512msg1 ymm6, xmm5 #SHA512
> + vsha512msg2 ymm6, ymm5 #SHA512
> + vsha512rnds2 ymm6, ymm5, xmm4 #SHA512
Maybe worthwhile to use higher register numbers as well, e.g.
_start:
vsha512msg1 %xmm14, %ymm5 #SHA512
vsha512msg2 %ymm4, %ymm15 #SHA512
vsha512rnds2 %xmm6, %ymm5, %ymm14 #SHA512
.intel_syntax noprefix
vsha512msg1 ymm14, xmm5 #SHA512
vsha512msg2 ymm6, ymm15 #SHA512
vsha512rnds2 ymm6, ymm5, xmm14 #SHA512
?
Jan
@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel SHA512 instructions.
+
* Add support for Intel AVX-VNNI-INT16 instructions.
Changes in 2.41:
@@ -1152,6 +1152,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (fred, FRED, ANY_FRED, false),
SUBARCH (lkgs, LKGS, ANY_LKGS, false),
SUBARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, false),
+ SUBARCH (sha512, SHA512, ANY_SHA512, false),
};
#undef SUBARCH
@@ -208,6 +208,7 @@ accept various extension mnemonics. For example,
@code{fred},
@code{lkgs},
@code{avx_vnni_int16},
+@code{sha512},
@code{amx_int8},
@code{amx_bf16},
@code{amx_fp16},
@@ -1637,7 +1638,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.prefetchi} @tab @samp{.avx_ifma} @tab @samp{.avx_vnni_int8}
@item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
@item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
-@item @samp{.avx_vnni_int16}
+@item @samp{.avx_vnni_int16} @tab @samp{.sha512}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
@@ -345,6 +345,12 @@ Disassembly of section \.text:
[ ]*[a-f0-9]+:[ ]*c4 e2 01 1c[ ]*\(bad\)
[ ]*[a-f0-9]+:[ ]*41[ ]*inc[ ]*%ecx
[ ]*[a-f0-9]+:[ ]*37[ ]*aaa
+[ ]*[a-f0-9]+:[ ]*c4 e2 7f cc[ ]+vsha512msg1[ ]*\(bad\),.*
+[ ]*[a-f0-9]+:[ ]*09 90 90 90 90 90[ ]+or.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 7f cd[ ]+vsha512msg2[ ]*\(bad\),.*
+[ ]*[a-f0-9]+:[ ]*09 90 90 90 90 90[ ]+or.*
+[ ]*[a-f0-9]+:[ ]*c4 e2 6f cb[ ]+vsha512rnds2[ ]*\(bad\),.*
+[ ]*[a-f0-9]+:[ ]*09 90 90 90 90 90[ ]+or.*
[ ]*[a-f0-9]+:[ ]*62 f2 ad 08 1c[ ]*\(bad\)
[ ]*[a-f0-9]+:[ ]*01 01[ ]*add[ ]*%eax,\(%ecx\)
[ ]*[a-f0-9]+:[ ]*62 f3 7d 28 1b[ ]*\(bad\)
@@ -168,6 +168,12 @@
.byte 0xC4, 0xE1, 0xF9, 0x93, 0x6F
.insn VEX.L0.66.0f.W1 0x93, (%edi), %k7
.byte 0xc4, 0xe2, 0x1, 0x1c, 0x41, 0x37
+ .insn VEX.L1.F2.0f38.W0 0xCC, (%ecx), %ymm1
+.fill 0x5, 0x1, 0x90
+ .insn VEX.L1.F2.0f38.W0 0xCD, (%ecx), %ymm1
+.fill 0x5, 0x1, 0x90
+ .insn VEX.L1.F2.0f38.W0 0xCB, (%ecx), %ymm2, %ymm1
+.fill 0x5, 0x1, 0x90
.byte 0x62, 0xf2, 0xad, 0x08, 0x1c, 0x01
.byte 0x1
.insn EVEX.66.0f3a.W0 0x1b, $0x25, %ymm0, %xmm1
@@ -498,6 +498,9 @@ if [gas_32_check] then {
run_list_test "amx-complex-inval"
run_dump_test "avx-vnni-int16"
run_dump_test "avx-vnni-int16-intel"
+ run_dump_test "sha512"
+ run_dump_test "sha512-intel"
+ run_list_test "sha512-inval"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
new file mode 100644
@@ -0,0 +1,16 @@
+#as:
+#objdump: -dw -Mintel
+#name: i386 SHA512 insns (Intel disassembly)
+#source: sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
new file mode 100644
@@ -0,0 +1,4 @@
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsha512msg1'
+.*:7: Error: operand size mismatch for `vsha512msg2'
+.*:8: Error: operand size mismatch for `vsha512rnds2'
new file mode 100644
@@ -0,0 +1,8 @@
+# Check Illegal SHA512 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vsha512msg1 (%ecx), %ymm6
+ vsha512msg2 (%ecx), %ymm6
+ vsha512rnds2 (%ecx), %ymm5, %ymm6
new file mode 100644
@@ -0,0 +1,16 @@
+#as:
+#objdump: -dw
+#name: i386 SHA512 insns
+#source: sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
new file mode 100644
@@ -0,0 +1,13 @@
+# Check 32bit SHA512 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vsha512msg1 %xmm5, %ymm6 #SHA512
+ vsha512msg2 %ymm5, %ymm6 #SHA512
+ vsha512rnds2 %xmm4, %ymm5, %ymm6 #SHA512
+
+.intel_syntax noprefix
+ vsha512msg1 ymm6, xmm5 #SHA512
+ vsha512msg2 ymm6, ymm5 #SHA512
+ vsha512rnds2 ymm6, ymm5, xmm4 #SHA512
new file mode 100644
@@ -0,0 +1,16 @@
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 SHA512 insns (Intel disassembly)
+#source: x86-64-sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
new file mode 100644
@@ -0,0 +1,4 @@
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsha512msg1'
+.*:7: Error: operand size mismatch for `vsha512msg2'
+.*:8: Error: operand size mismatch for `vsha512rnds2'
new file mode 100644
@@ -0,0 +1,8 @@
+# Check Illegal SHA512 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vsha512msg1 (%ecx), %ymm6
+ vsha512msg2 (%ecx), %ymm6
+ vsha512rnds2 (%ecx), %ymm5, %ymm6
new file mode 100644
@@ -0,0 +1,16 @@
+#as:
+#objdump: -dw
+#name: x86_64 SHA512 insns
+#source: x86-64-sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
new file mode 100644
@@ -0,0 +1,13 @@
+# Check 64bit SHA512 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vsha512msg1 %xmm5, %ymm6 #SHA512
+ vsha512msg2 %ymm5, %ymm6 #SHA512
+ vsha512rnds2 %xmm4, %ymm5, %ymm6 #SHA512
+
+.intel_syntax noprefix
+ vsha512msg1 ymm6, xmm5 #SHA512
+ vsha512msg2 ymm6, ymm5 #SHA512
+ vsha512rnds2 ymm6, ymm5, xmm4 #SHA512
@@ -440,6 +440,9 @@ run_dump_test "x86-64-lkgs"
run_list_test "x86-64-lkgs-inval"
run_dump_test "x86-64-avx-vnni-int16"
run_dump_test "x86-64-avx-vnni-int16-intel"
+run_dump_test "x86-64-sha512"
+run_dump_test "x86-64-sha512-intel"
+run_list_test "x86-64-sha512-inval"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
@@ -530,6 +530,8 @@ fetch_error (const instr_info *ins)
#define Nq { OP_R, q_mode }
#define Ux { OP_R, x_mode }
#define Uxmm { OP_R, xmm_mode }
+#define Rxmmq { OP_R, xmmq_mode }
+#define Rymm { OP_R, ymm_mode }
#define Rtmm { OP_R, tmm_mode }
#define EMCq { OP_EMC, q_mode }
#define MXC { OP_MXC, 0 }
@@ -1064,6 +1066,9 @@ enum
PREFIX_VEX_0F38B1_W_0,
PREFIX_VEX_0F38D2_W_0,
PREFIX_VEX_0F38D3_W_0,
+ PREFIX_VEX_0F38CB,
+ PREFIX_VEX_0F38CC,
+ PREFIX_VEX_0F38CD,
PREFIX_VEX_0F38F5_L_0,
PREFIX_VEX_0F38F6_L_0,
PREFIX_VEX_0F38F7_L_0,
@@ -1306,6 +1311,9 @@ enum
VEX_LEN_0F385C_X86_64,
VEX_LEN_0F385E_X86_64,
VEX_LEN_0F386C_X86_64,
+ VEX_LEN_0F38CB_P_3_W_0,
+ VEX_LEN_0F38CC_P_3_W_0,
+ VEX_LEN_0F38CD_P_3_W_0,
VEX_LEN_0F38DB,
VEX_LEN_0F38F2,
VEX_LEN_0F38F3,
@@ -1473,6 +1481,9 @@ enum
VEX_W_0F38B1,
VEX_W_0F38B4,
VEX_W_0F38B5,
+ VEX_W_0F38CB_P_3,
+ VEX_W_0F38CC_P_3,
+ VEX_W_0F38CD_P_3,
VEX_W_0F38CF,
VEX_W_0F38D2,
VEX_W_0F38D3,
@@ -3928,6 +3939,30 @@ static const struct dis386 prefix_table[][4] = {
{ "vpdpwusds", { XM, Vex, EXx }, 0 },
},
+ /* PREFIX_VEX_0F38CB */
+ {
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F38CB_P_3) },
+ },
+
+ /* PREFIX_VEX_0F38CC */
+ {
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F38CC_P_3) },
+ },
+
+ /* PREFIX_VEX_0F38CD */
+ {
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { Bad_Opcode },
+ { VEX_W_TABLE (VEX_W_0F38CD_P_3) },
+ },
+
/* PREFIX_VEX_0F38F5_L_0 */
{
{ "bzhiS", { Gdq, Edq, VexGdq }, 0 },
@@ -6380,9 +6415,9 @@ static const struct dis386 vex_table[][256] = {
{ Bad_Opcode },
{ Bad_Opcode },
{ Bad_Opcode },
- { Bad_Opcode },
- { Bad_Opcode },
- { Bad_Opcode },
+ { PREFIX_TABLE (PREFIX_VEX_0F38CB) },
+ { PREFIX_TABLE (PREFIX_VEX_0F38CC) },
+ { PREFIX_TABLE (PREFIX_VEX_0F38CD) },
{ Bad_Opcode },
{ VEX_W_TABLE (VEX_W_0F38CF) },
/* d0 */
@@ -6944,6 +6979,24 @@ static const struct dis386 vex_len_table[][2] = {
{ VEX_W_TABLE (VEX_W_0F386C_X86_64_L_0) },
},
+ /* VEX_LEN_0F38CB_P_3_W_0 */
+ {
+ { Bad_Opcode },
+ { "vsha512rnds2", { XM, Vex, Rxmmq }, 0 },
+ },
+
+ /* VEX_LEN_0F38CC_P_3_W_0 */
+ {
+ { Bad_Opcode },
+ { "vsha512msg1", { XM, Rxmmq }, 0 },
+ },
+
+ /* VEX_LEN_0F38CD_P_3_W_0 */
+ {
+ { Bad_Opcode },
+ { "vsha512msg2", { XM, Rymm }, 0 },
+ },
+
/* VEX_LEN_0F38DB */
{
{ "vaesimc", { XM, EXx }, PREFIX_DATA },
@@ -7614,6 +7667,18 @@ static const struct dis386 vex_w_table[][2] = {
{ Bad_Opcode },
{ "%XVvpmadd52huq", { XM, Vex, EXx }, PREFIX_DATA },
},
+ {
+ /* VEX_W_0F38CB_P_3 */
+ { VEX_LEN_TABLE (VEX_LEN_0F38CB_P_3_W_0) },
+ },
+ {
+ /* VEX_W_0F38CC_P_3 */
+ { VEX_LEN_TABLE (VEX_LEN_0F38CC_P_3_W_0) },
+ },
+ {
+ /* VEX_W_0F38CD_P_3 */
+ { VEX_LEN_TABLE (VEX_LEN_0F38CD_P_3_W_0) },
+ },
{
/* VEX_W_0F38CF */
{ "%XEvgf2p8mulb", { XM, Vex, EXx }, PREFIX_DATA },
@@ -8055,6 +8120,14 @@ static const struct dis386 mod_table[][2] = {
{ PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0) },
{ PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1) },
},
+ {
+ /* MOD_VEX_0F38CB_P_3_W_0_L_1 */
+ { Bad_Opcode },
+ },
+ {
+ /* MOD_VEX_0F38CC_P_3_W_0_L_1 */
+ { Bad_Opcode },
+ },
#include "i386-dis-evex-mod.h"
};
@@ -214,6 +214,8 @@ static const dependency isa_dependencies[] =
"XSAVE" },
{ "SHA",
"SSE2" },
+ { "SHA512",
+ "AVX2" },
{ "XSAVES",
"XSAVEC" },
{ "XSAVEC",
@@ -369,6 +371,7 @@ static bitfield cpu_flags[] =
BITFIELD (RAO_INT),
BITFIELD (FRED),
BITFIELD (LKGS),
+ BITFIELD (SHA512),
BITFIELD (MWAITX),
BITFIELD (CLZERO),
BITFIELD (OSPKE),
@@ -175,6 +175,8 @@ enum
CpuSMAP,
/* SHA instructions required. */
CpuSHA,
+ /* SHA512 instructions required. */
+ CpuSHA512,
/* CLFLUSHOPT instruction required */
CpuClflushOpt,
/* XSAVES/XRSTORS instruction required */
@@ -403,6 +405,7 @@ typedef union i386_cpu_flags
unsigned int cpuprfchw:1;
unsigned int cpusmap:1;
unsigned int cpusha:1;
+ unsigned int cpusha512:1;
unsigned int cpuclflushopt:1;
unsigned int cpuxsaves:1;
unsigned int cpuxsavec:1;
@@ -2043,6 +2043,14 @@ sha256rnds2, 0xf38cb, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
sha256msg1, 0xf38cc, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
sha256msg2, 0xf38cd, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
+// SHA512 instructions.
+
+vsha512rnds2, 0xf2cb, SHA512, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { RegXMM, RegYMM, RegYMM }
+vsha512msg1, 0xf2cc, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegXMM, RegYMM }
+vsha512msg2, 0xf2cd, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegYMM, RegYMM }
+
+// SHA512 instructions end.
+
// VPCLMULQDQ instructions
vpclmulqdq, 0x6644, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }