[v2] Support Intel SHA512

Message ID 20230718075412.1304548-1-haochen.jiang@intel.com
State Unresolved
Headers
Series [v2] Support Intel SHA512 |

Checks

Context Check Description
snail/binutils-gdb-check warning Git am fail log

Commit Message

Jiang, Haochen July 18, 2023, 7:54 a.m. UTC
  Hi all,

This is the v2 patch for SHA512 with the following changes comparing to
the initial patch:

1. Added invalid test in disassem.[ds] and [x86-64-]sha512-inval.[ls].

2. Changed the imply of SHA512 from AVX to AVX2.

3. Moved the entry of SHA512 next to SHA. Put Modrm to the first in table.

4. Using Rxmmq instead of passing mod table. Also renamed Uymm to Rymm.

Thx,
Haochen

gas/ChangeLog:

	* NEWS: Support Intel SHA512.
	* config/tc-i386.c: Add sha512.
	* doc/c-i386.texi: Document .sha512.
	* testsuite/gas/i386/disassem.d: Add SHA512 tests.
	* testsuite/gas/i386/disassem.s: Ditto.
	* testsuite/gas/i386/i386.exp: Run SHA512 tests.
	* testsuite/gas/i386/x86-64.exp: Ditto.
	* testsuite/gas/i386/sha512-intel.d: New test.
	* testsuite/gas/i386/sha512-inval.l: Ditto.
	* testsuite/gas/i386/sha512-inval.s: Ditto.
	* testsuite/gas/i386/sha512.d: Ditto.
	* testsuite/gas/i386/sha512.s: Ditto.
	* testsuite/gas/i386/x86-64-sha512-intel.d: Ditto.
	* testsuite/gas/i386/x86-64-sha512-inval.l: Ditto.
	* testsuite/gas/i386/x86-64-sha512-inval.s: Ditto.
	* testsuite/gas/i386/x86-64-sha512.d: Ditto.
	* testsuite/gas/i386/x86-64-sha512.s: Ditto.

opcodes/ChangeLog:

	* i386-dis.c (Rxmmq): New.
	(Rymm): Ditto.
	(MOD_VEX_0F38CB_P_3_W_0_L_1): Ditto.
	(MOD_VEX_0F38CC_P_3_W_0_L_1): Ditto.
	(PREFIX_VEX_0F38CB): Ditto.
	(PREFIX_VEX_0F38CC): Ditto.
	(PREFIX_VEX_0F38CD): Ditto.
	(VEX_LEN_0F38CB_P_3_W_0): Ditto.
	(VEX_LEN_0F38CC_P_3_W_0): Ditto.
	(VEX_LEN_0F38CD_P_3_W_0): Ditto.
	(VEX_W_0F38CB_P_3): Ditto.
	(VEX_W_0F38CC_P_3): Ditto.
	(VEX_W_0F38CD_P_3): Ditto.
	(mod_table): Add MOD_VEX_0F38CB_P_3_W_0_L_1, MOD_VEX_0F38CC_P_3_W_0_L_1,
	(prefix_table): Add PREFIX_VEX_0F38CB, PREFIX_VEX_0F38CC,
	PREFIX_VEX_0F38CD.
	(vex_len_table): Add VEX_LEN_0F38CB_P_3_W_0,
	VEX_LEN_0F38CC_P_3_W_0, VEX_LEN_0F38CD_P_3_W_0.
	(vex_w_table): Add VEX_W_0F38CB_P_3, VEX_W_0F38CC_P_3, VEX_W_0F38CD_P_3.
	* i386-gen.c (isa_dependencies): Add SHA512.
	(cpu_flags): Ditto.
	* i386-init.h: Regenerated.
	* i386-mnem.h: Ditto.
	* i386-opc.h (CpuSHA512): New.
	(i386_cpu_flags): Add cpusha512.
	* i386-opc.tbl: Add SHA512 instructions.
	* i386-tbl.h: Regenerated.
---
 gas/NEWS                                     |    2 +
 gas/config/tc-i386.c                         |    1 +
 gas/doc/c-i386.texi                          |    3 +-
 gas/testsuite/gas/i386/disassem.d            |    6 +
 gas/testsuite/gas/i386/disassem.s            |    6 +
 gas/testsuite/gas/i386/i386.exp              |    3 +
 gas/testsuite/gas/i386/sha512-intel.d        |   16 +
 gas/testsuite/gas/i386/sha512-inval.l        |    4 +
 gas/testsuite/gas/i386/sha512-inval.s        |    8 +
 gas/testsuite/gas/i386/sha512.d              |   16 +
 gas/testsuite/gas/i386/sha512.s              |   13 +
 gas/testsuite/gas/i386/x86-64-sha512-intel.d |   16 +
 gas/testsuite/gas/i386/x86-64-sha512-inval.l |    4 +
 gas/testsuite/gas/i386/x86-64-sha512-inval.s |    8 +
 gas/testsuite/gas/i386/x86-64-sha512.d       |   16 +
 gas/testsuite/gas/i386/x86-64-sha512.s       |   13 +
 gas/testsuite/gas/i386/x86-64.exp            |    3 +
 opcodes/i386-dis.c                           |   79 +-
 opcodes/i386-gen.c                           |    3 +
 opcodes/i386-init.h                          |  776 +-
 opcodes/i386-mnem.h                          | 3949 ++++----
 opcodes/i386-opc.h                           |    3 +
 opcodes/i386-opc.tbl                         |    8 +
 opcodes/i386-tbl.h                           | 9447 +++++++++---------
 24 files changed, 7351 insertions(+), 7052 deletions(-)
 create mode 100644 gas/testsuite/gas/i386/sha512-intel.d
 create mode 100644 gas/testsuite/gas/i386/sha512-inval.l
 create mode 100644 gas/testsuite/gas/i386/sha512-inval.s
 create mode 100644 gas/testsuite/gas/i386/sha512.d
 create mode 100644 gas/testsuite/gas/i386/sha512.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-sha512-intel.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-sha512-inval.l
 create mode 100644 gas/testsuite/gas/i386/x86-64-sha512-inval.s
 create mode 100644 gas/testsuite/gas/i386/x86-64-sha512.d
 create mode 100644 gas/testsuite/gas/i386/x86-64-sha512.s
  

Comments

Frager, Neal via Binutils July 18, 2023, 7:59 a.m. UTC | #1
> diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c index
> 6ad7d6951db..70843eb251f 100644
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -214,6 +214,8 @@ static const dependency isa_dependencies[] =
>      "XSAVE" },
>    { "SHA",
>      "SSE2" },
> +  { "SHA512",
> +    "AVX2" },
>    { "XSAVES",
>      "XSAVEC" },
>    { "XSAVEC",
> @@ -369,6 +371,7 @@ static bitfield cpu_flags[] =
>    BITFIELD (RAO_INT),
>    BITFIELD (FRED),
>    BITFIELD (LKGS),
> +  BITFIELD (SHA512),

Oops, I sent an outdated patch here, it should have also been moved next to SHA.

You could see that in SM3/SM4 patches.

Thx,
Haochen

>    BITFIELD (MWAITX),
>    BITFIELD (CLZERO),
>    BITFIELD (OSPKE),
  
Jan Beulich July 18, 2023, 8:51 a.m. UTC | #2
On 18.07.2023 09:54, Haochen Jiang wrote:
> --- a/gas/testsuite/gas/i386/disassem.s
> +++ b/gas/testsuite/gas/i386/disassem.s
> @@ -168,6 +168,12 @@
>  .byte 0xC4, 0xE1, 0xF9, 0x93, 0x6F
>  	.insn VEX.L0.66.0f.W1 0x93, (%edi), %k7
>  .byte 0xc4, 0xe2, 0x1, 0x1c, 0x41, 0x37
> +	.insn VEX.L1.F2.0f38.W0 0xCC, (%ecx), %ymm1
> +.fill 0x5, 0x1, 0x90
> +	.insn VEX.L1.F2.0f38.W0 0xCD, (%ecx), %ymm1
> +.fill 0x5, 0x1, 0x90
> +	.insn VEX.L1.F2.0f38.W0 0xCB, (%ecx), %ymm2, %ymm1
> +.fill 0x5, 0x1, 0x90

In new additions here (and to similar files) please can you avoid
- .fill / .byte and alike whenever possible,
- unindented directives?
The latter is purely style, I know, but strictly speaking directives
should never start in the first column. Present gas, presumably for
historical reasons, simply is overly forgiving in this regard.

To deal with the former, more careful selection of operands is all
it takes. With how the disassembler presently works, what you want
is that the nominal ModR/M byte disassembles as a single-byte opcode.
That's very easy to achieve: Opcodes 40-5f (50-5f for 64-bit) are all
single-byte, i.e. you won't need much more that ModR/M.mod = 1, i.e.
the Disp8 encoding form with a displacement that then also
disassembles as a single-byte opcode.

Alternatively (and perhaps even better) you can arrange for ModR/M
bytes of 69, 6a, 6b, or 70-7f, with a suitable displacement byte
(any will do afaict for 6a and 70-7f, while 69 and 6b would require
the top two bits to be set).

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512-inval.l
> @@ -0,0 +1,4 @@
> +.* Assembler messages:
> +.*:6: Error: operand size mismatch for `vsha512msg1'
> +.*:7: Error: operand size mismatch for `vsha512msg2'
> +.*:8: Error: operand size mismatch for `vsha512rnds2'

Just as a remark, no action expected from your side: This of course
isn't the correct error message to be emitted here. It should be
"type", not "size". You _may_ want to replace "size" by ".*" to
allow for a future assembler adjustment without the need to touch
this testcase again.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512.d
> @@ -0,0 +1,16 @@
> +#as:

What purpose does this line (present in several of the tests) have?

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/sha512.s
> @@ -0,0 +1,13 @@
> +# Check 32bit SHA512 instructions
> +
> +	.allow_index_reg

This doesn't look to be needed either.

> +	.text
> +_start:
> +	vsha512msg1	%xmm5, %ymm6	 #SHA512
> +	vsha512msg2	%ymm5, %ymm6	 #SHA512
> +	vsha512rnds2	%xmm4, %ymm5, %ymm6	 #SHA512
> +
> +.intel_syntax noprefix

See remark above about indentation of directives.

> --- /dev/null
> +++ b/gas/testsuite/gas/i386/x86-64-sha512.s
> @@ -0,0 +1,13 @@
> +# Check 64bit SHA512 instructions
> +
> +	.allow_index_reg
> +	.text
> +_start:
> +	vsha512msg1	%xmm5, %ymm6	 #SHA512
> +	vsha512msg2	%ymm5, %ymm6	 #SHA512
> +	vsha512rnds2	%xmm4, %ymm5, %ymm6	 #SHA512
> +
> +.intel_syntax noprefix
> +	vsha512msg1	ymm6, xmm5	 #SHA512
> +	vsha512msg2	ymm6, ymm5	 #SHA512
> +	vsha512rnds2	ymm6, ymm5, xmm4	 #SHA512

Maybe worthwhile to use higher register numbers as well, e.g.

_start:
	vsha512msg1	%xmm14, %ymm5	 #SHA512
	vsha512msg2	%ymm4, %ymm15	 #SHA512
	vsha512rnds2	%xmm6, %ymm5, %ymm14	 #SHA512

	.intel_syntax noprefix
	vsha512msg1	ymm14, xmm5	 #SHA512
	vsha512msg2	ymm6, ymm15	 #SHA512
	vsha512rnds2	ymm6, ymm5, xmm14	 #SHA512

?

Jan
  
Frager, Neal via Binutils July 20, 2023, 8:32 a.m. UTC | #3
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/sha512.d
> > @@ -0,0 +1,16 @@
> > +#as:
> 
> What purpose does this line (present in several of the tests) have?
> 
> > --- /dev/null
> > +++ b/gas/testsuite/gas/i386/sha512.s
> > @@ -0,0 +1,13 @@
> > +# Check 32bit SHA512 instructions
> > +
> > +	.allow_index_reg
> 
> This doesn't look to be needed either.
> 

We use script to generate the testcases so these two are to fit all
circumstances since actually script does not know what will happen
for a new ISA. (The former is for some extra option in as the latter is
for index reg.)

We could omit that but one thing I need to mention is that there are
also some redundant things in all the existing testcases. If we want to
eliminate all of them, some may need careful manual work. I am
wondering if that is time-worthy to change all of them. Therefore, I
propose not to omit that to keep align with all the testcases since it
is not wrong. 

All the other mentioned in the review has been fixed in my v3 patch
which will be sent out later.

Thx,
Haochen
  
Jan Beulich July 20, 2023, 10:37 a.m. UTC | #4
On 20.07.2023 10:32, Jiang, Haochen wrote:
>>> --- /dev/null
>>> +++ b/gas/testsuite/gas/i386/sha512.d
>>> @@ -0,0 +1,16 @@
>>> +#as:
>>
>> What purpose does this line (present in several of the tests) have?
>>
>>> --- /dev/null
>>> +++ b/gas/testsuite/gas/i386/sha512.s
>>> @@ -0,0 +1,13 @@
>>> +# Check 32bit SHA512 instructions
>>> +
>>> +	.allow_index_reg
>>
>> This doesn't look to be needed either.
>>
> 
> We use script to generate the testcases so these two are to fit all
> circumstances since actually script does not know what will happen
> for a new ISA. (The former is for some extra option in as the latter is
> for index reg.)
> 
> We could omit that but one thing I need to mention is that there are
> also some redundant things in all the existing testcases. If we want to
> eliminate all of them, some may need careful manual work. I am
> wondering if that is time-worthy to change all of them. Therefore, I
> propose not to omit that to keep align with all the testcases since it
> is not wrong. 

I guess H.J. was more permissive in what he allowed in. I'm concerned
of pieces in testcases which aren't relevant: It easily raises questions
of why things are there. I'd be happy to - over time - clean up that
aspect as well in the testsuite, just like I've been cleaning up other
oddities. I'd prefer if new testcases contained just what is needed in
there for the test to fulfill its purpose.

> All the other mentioned in the review has been fixed in my v3 patch
> which will be sent out later.

Thanks; looks like it wasn't marked as being v3.

Jan
  

Patch

diff --git a/gas/NEWS b/gas/NEWS
index 5e9ed5ab4bc..fe2c055fa7f 100644
--- a/gas/NEWS
+++ b/gas/NEWS
@@ -1,5 +1,7 @@ 
 -*- text -*-
 
+* Add support for Intel SHA512 instructions.
+
 * Add support for Intel AVX-VNNI-INT16 instructions.
 
 Changes in 2.41:
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 0d3d7560efe..836640d9123 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -1152,6 +1152,7 @@  static const arch_entry cpu_arch[] =
   SUBARCH (fred, FRED, ANY_FRED, false),
   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
   SUBARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, false),
+  SUBARCH (sha512, SHA512, ANY_SHA512, false),
 };
 
 #undef SUBARCH
diff --git a/gas/doc/c-i386.texi b/gas/doc/c-i386.texi
index 40ba942d9cb..21fb71e54ab 100644
--- a/gas/doc/c-i386.texi
+++ b/gas/doc/c-i386.texi
@@ -208,6 +208,7 @@  accept various extension mnemonics.  For example,
 @code{fred},
 @code{lkgs},
 @code{avx_vnni_int16},
+@code{sha512},
 @code{amx_int8},
 @code{amx_bf16},
 @code{amx_fp16},
@@ -1637,7 +1638,7 @@  supported on the CPU specified.  The choices for @var{cpu_type} are:
 @item @samp{.prefetchi} @tab @samp{.avx_ifma} @tab @samp{.avx_vnni_int8}
 @item @samp{.cmpccxadd} @tab @samp{.wrmsrns} @tab @samp{.msrlist}
 @item @samp{.avx_ne_convert} @tab @samp{.rao_int} @tab @samp{.fred} @tab @samp{.lkgs}
-@item @samp{.avx_vnni_int16}
+@item @samp{.avx_vnni_int16} @tab @samp{.sha512}
 @item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
 @item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
 @item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
diff --git a/gas/testsuite/gas/i386/disassem.d b/gas/testsuite/gas/i386/disassem.d
index 8ee0a664e0b..f528d8ab169 100644
--- a/gas/testsuite/gas/i386/disassem.d
+++ b/gas/testsuite/gas/i386/disassem.d
@@ -345,6 +345,12 @@  Disassembly of section \.text:
 [ 	]*[a-f0-9]+:[ 	]*c4 e2 01 1c[ 	]*\(bad\)
 [ 	]*[a-f0-9]+:[ 	]*41[ 	]*inc[ 	]*%ecx
 [ 	]*[a-f0-9]+:[ 	]*37[ 	]*aaa
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7f cc[ 	]+vsha512msg1[ 	]*\(bad\),.*
+[ 	]*[a-f0-9]+:[ 	]*09 90 90 90 90 90[ 	]+or.*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 7f cd[ 	]+vsha512msg2[ 	]*\(bad\),.*
+[ 	]*[a-f0-9]+:[ 	]*09 90 90 90 90 90[ 	]+or.*
+[ 	]*[a-f0-9]+:[ 	]*c4 e2 6f cb[ 	]+vsha512rnds2[ 	]*\(bad\),.*
+[ 	]*[a-f0-9]+:[ 	]*09 90 90 90 90 90[ 	]+or.*
 [ 	]*[a-f0-9]+:[ 	]*62 f2 ad 08 1c[ 	]*\(bad\)
 [ 	]*[a-f0-9]+:[ 	]*01 01[ 	]*add[ 	]*%eax,\(%ecx\)
 [ 	]*[a-f0-9]+:[ 	]*62 f3 7d 28 1b[ 	]*\(bad\)
diff --git a/gas/testsuite/gas/i386/disassem.s b/gas/testsuite/gas/i386/disassem.s
index c74a9353933..eeeb38974dd 100644
--- a/gas/testsuite/gas/i386/disassem.s
+++ b/gas/testsuite/gas/i386/disassem.s
@@ -168,6 +168,12 @@ 
 .byte 0xC4, 0xE1, 0xF9, 0x93, 0x6F
 	.insn VEX.L0.66.0f.W1 0x93, (%edi), %k7
 .byte 0xc4, 0xe2, 0x1, 0x1c, 0x41, 0x37
+	.insn VEX.L1.F2.0f38.W0 0xCC, (%ecx), %ymm1
+.fill 0x5, 0x1, 0x90
+	.insn VEX.L1.F2.0f38.W0 0xCD, (%ecx), %ymm1
+.fill 0x5, 0x1, 0x90
+	.insn VEX.L1.F2.0f38.W0 0xCB, (%ecx), %ymm2, %ymm1
+.fill 0x5, 0x1, 0x90
 .byte 0x62, 0xf2, 0xad, 0x08, 0x1c, 0x01
 .byte 0x1
 	.insn EVEX.66.0f3a.W0 0x1b, $0x25, %ymm0, %xmm1
diff --git a/gas/testsuite/gas/i386/i386.exp b/gas/testsuite/gas/i386/i386.exp
index b69c692cd16..1208d5372d7 100644
--- a/gas/testsuite/gas/i386/i386.exp
+++ b/gas/testsuite/gas/i386/i386.exp
@@ -498,6 +498,9 @@  if [gas_32_check] then {
     run_list_test "amx-complex-inval"
     run_dump_test "avx-vnni-int16"
     run_dump_test "avx-vnni-int16-intel"
+    run_dump_test "sha512"
+    run_dump_test "sha512-intel"
+    run_list_test "sha512-inval"
     run_list_test "sg"
     run_dump_test "clzero"
     run_dump_test "invlpgb"
diff --git a/gas/testsuite/gas/i386/sha512-intel.d b/gas/testsuite/gas/i386/sha512-intel.d
new file mode 100644
index 00000000000..c1cc85b9f26
--- /dev/null
+++ b/gas/testsuite/gas/i386/sha512-intel.d
@@ -0,0 +1,16 @@ 
+#as:
+#objdump: -dw -Mintel
+#name: i386 SHA512 insns (Intel disassembly)
+#source: sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
diff --git a/gas/testsuite/gas/i386/sha512-inval.l b/gas/testsuite/gas/i386/sha512-inval.l
new file mode 100644
index 00000000000..6d9455fd741
--- /dev/null
+++ b/gas/testsuite/gas/i386/sha512-inval.l
@@ -0,0 +1,4 @@ 
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsha512msg1'
+.*:7: Error: operand size mismatch for `vsha512msg2'
+.*:8: Error: operand size mismatch for `vsha512rnds2'
diff --git a/gas/testsuite/gas/i386/sha512-inval.s b/gas/testsuite/gas/i386/sha512-inval.s
new file mode 100644
index 00000000000..d3ae819c563
--- /dev/null
+++ b/gas/testsuite/gas/i386/sha512-inval.s
@@ -0,0 +1,8 @@ 
+# Check Illegal SHA512 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	vsha512msg1	(%ecx), %ymm6
+	vsha512msg2	(%ecx), %ymm6
+	vsha512rnds2	(%ecx), %ymm5, %ymm6
diff --git a/gas/testsuite/gas/i386/sha512.d b/gas/testsuite/gas/i386/sha512.d
new file mode 100644
index 00000000000..b90019954ea
--- /dev/null
+++ b/gas/testsuite/gas/i386/sha512.d
@@ -0,0 +1,16 @@ 
+#as:
+#objdump: -dw
+#name: i386 SHA512 insns
+#source: sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
diff --git a/gas/testsuite/gas/i386/sha512.s b/gas/testsuite/gas/i386/sha512.s
new file mode 100644
index 00000000000..e238c272970
--- /dev/null
+++ b/gas/testsuite/gas/i386/sha512.s
@@ -0,0 +1,13 @@ 
+# Check 32bit SHA512 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	vsha512msg1	%xmm5, %ymm6	 #SHA512
+	vsha512msg2	%ymm5, %ymm6	 #SHA512
+	vsha512rnds2	%xmm4, %ymm5, %ymm6	 #SHA512
+
+.intel_syntax noprefix
+	vsha512msg1	ymm6, xmm5	 #SHA512
+	vsha512msg2	ymm6, ymm5	 #SHA512
+	vsha512rnds2	ymm6, ymm5, xmm4	 #SHA512
diff --git a/gas/testsuite/gas/i386/x86-64-sha512-intel.d b/gas/testsuite/gas/i386/x86-64-sha512-intel.d
new file mode 100644
index 00000000000..e644168e311
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sha512-intel.d
@@ -0,0 +1,16 @@ 
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 SHA512 insns (Intel disassembly)
+#source: x86-64-sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 ymm6,xmm5
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 ymm6,ymm5
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 ymm6,ymm5,xmm4
diff --git a/gas/testsuite/gas/i386/x86-64-sha512-inval.l b/gas/testsuite/gas/i386/x86-64-sha512-inval.l
new file mode 100644
index 00000000000..6d9455fd741
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sha512-inval.l
@@ -0,0 +1,4 @@ 
+.* Assembler messages:
+.*:6: Error: operand size mismatch for `vsha512msg1'
+.*:7: Error: operand size mismatch for `vsha512msg2'
+.*:8: Error: operand size mismatch for `vsha512rnds2'
diff --git a/gas/testsuite/gas/i386/x86-64-sha512-inval.s b/gas/testsuite/gas/i386/x86-64-sha512-inval.s
new file mode 100644
index 00000000000..d3ae819c563
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sha512-inval.s
@@ -0,0 +1,8 @@ 
+# Check Illegal SHA512 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	vsha512msg1	(%ecx), %ymm6
+	vsha512msg2	(%ecx), %ymm6
+	vsha512rnds2	(%ecx), %ymm5, %ymm6
diff --git a/gas/testsuite/gas/i386/x86-64-sha512.d b/gas/testsuite/gas/i386/x86-64-sha512.d
new file mode 100644
index 00000000000..fcb8ae61fee
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sha512.d
@@ -0,0 +1,16 @@ 
+#as:
+#objdump: -dw
+#name: x86_64 SHA512 insns
+#source: x86-64-sha512.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cc f5\s+vsha512msg1 %xmm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 7f cd f5\s+vsha512msg2 %ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 cb f4\s+vsha512rnds2 %xmm4,%ymm5,%ymm6
diff --git a/gas/testsuite/gas/i386/x86-64-sha512.s b/gas/testsuite/gas/i386/x86-64-sha512.s
new file mode 100644
index 00000000000..5eaadb3bade
--- /dev/null
+++ b/gas/testsuite/gas/i386/x86-64-sha512.s
@@ -0,0 +1,13 @@ 
+# Check 64bit SHA512 instructions
+
+	.allow_index_reg
+	.text
+_start:
+	vsha512msg1	%xmm5, %ymm6	 #SHA512
+	vsha512msg2	%ymm5, %ymm6	 #SHA512
+	vsha512rnds2	%xmm4, %ymm5, %ymm6	 #SHA512
+
+.intel_syntax noprefix
+	vsha512msg1	ymm6, xmm5	 #SHA512
+	vsha512msg2	ymm6, ymm5	 #SHA512
+	vsha512rnds2	ymm6, ymm5, xmm4	 #SHA512
diff --git a/gas/testsuite/gas/i386/x86-64.exp b/gas/testsuite/gas/i386/x86-64.exp
index 0f2903c6185..c6ec9be3d43 100644
--- a/gas/testsuite/gas/i386/x86-64.exp
+++ b/gas/testsuite/gas/i386/x86-64.exp
@@ -440,6 +440,9 @@  run_dump_test "x86-64-lkgs"
 run_list_test "x86-64-lkgs-inval"
 run_dump_test "x86-64-avx-vnni-int16"
 run_dump_test "x86-64-avx-vnni-int16-intel"
+run_dump_test "x86-64-sha512"
+run_dump_test "x86-64-sha512-intel"
+run_list_test "x86-64-sha512-inval"
 run_dump_test "x86-64-clzero"
 run_dump_test "x86-64-mwaitx-bdver4"
 run_list_test "x86-64-mwaitx-reg"
diff --git a/opcodes/i386-dis.c b/opcodes/i386-dis.c
index 36a839d1652..0043b62f324 100644
--- a/opcodes/i386-dis.c
+++ b/opcodes/i386-dis.c
@@ -530,6 +530,8 @@  fetch_error (const instr_info *ins)
 #define Nq { OP_R, q_mode }
 #define Ux { OP_R, x_mode }
 #define Uxmm { OP_R, xmm_mode }
+#define Rxmmq { OP_R, xmmq_mode }
+#define Rymm { OP_R, ymm_mode }
 #define Rtmm { OP_R, tmm_mode }
 #define EMCq { OP_EMC, q_mode }
 #define MXC { OP_MXC, 0 }
@@ -1064,6 +1066,9 @@  enum
   PREFIX_VEX_0F38B1_W_0,
   PREFIX_VEX_0F38D2_W_0,
   PREFIX_VEX_0F38D3_W_0,
+  PREFIX_VEX_0F38CB,
+  PREFIX_VEX_0F38CC,
+  PREFIX_VEX_0F38CD,
   PREFIX_VEX_0F38F5_L_0,
   PREFIX_VEX_0F38F6_L_0,
   PREFIX_VEX_0F38F7_L_0,
@@ -1306,6 +1311,9 @@  enum
   VEX_LEN_0F385C_X86_64,
   VEX_LEN_0F385E_X86_64,
   VEX_LEN_0F386C_X86_64,
+  VEX_LEN_0F38CB_P_3_W_0,
+  VEX_LEN_0F38CC_P_3_W_0,
+  VEX_LEN_0F38CD_P_3_W_0,
   VEX_LEN_0F38DB,
   VEX_LEN_0F38F2,
   VEX_LEN_0F38F3,
@@ -1473,6 +1481,9 @@  enum
   VEX_W_0F38B1,
   VEX_W_0F38B4,
   VEX_W_0F38B5,
+  VEX_W_0F38CB_P_3,
+  VEX_W_0F38CC_P_3,
+  VEX_W_0F38CD_P_3,
   VEX_W_0F38CF,
   VEX_W_0F38D2,
   VEX_W_0F38D3,
@@ -3928,6 +3939,30 @@  static const struct dis386 prefix_table[][4] = {
     { "vpdpwusds",	{ XM, Vex, EXx }, 0 },
   },
 
+  /* PREFIX_VEX_0F38CB */
+  {
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F38CB_P_3) },
+  },
+
+  /* PREFIX_VEX_0F38CC */
+  {
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F38CC_P_3) },
+  },
+
+  /* PREFIX_VEX_0F38CD */
+  {
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { Bad_Opcode },
+    { VEX_W_TABLE (VEX_W_0F38CD_P_3) },
+  },
+
   /* PREFIX_VEX_0F38F5_L_0 */
   {
     { "bzhiS",		{ Gdq, Edq, VexGdq }, 0 },
@@ -6380,9 +6415,9 @@  static const struct dis386 vex_table[][256] = {
     { Bad_Opcode },
     { Bad_Opcode },
     { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
-    { Bad_Opcode },
+    { PREFIX_TABLE (PREFIX_VEX_0F38CB) },
+    { PREFIX_TABLE (PREFIX_VEX_0F38CC) },
+    { PREFIX_TABLE (PREFIX_VEX_0F38CD) },
     { Bad_Opcode },
     { VEX_W_TABLE (VEX_W_0F38CF) },
     /* d0 */
@@ -6944,6 +6979,24 @@  static const struct dis386 vex_len_table[][2] = {
     { VEX_W_TABLE (VEX_W_0F386C_X86_64_L_0) },
   },
 
+  /* VEX_LEN_0F38CB_P_3_W_0 */
+  {
+    { Bad_Opcode },
+    { "vsha512rnds2", { XM, Vex, Rxmmq }, 0 },
+  },
+
+  /* VEX_LEN_0F38CC_P_3_W_0 */
+  {
+    { Bad_Opcode },
+    { "vsha512msg1", { XM, Rxmmq }, 0 },
+  },
+
+  /* VEX_LEN_0F38CD_P_3_W_0 */
+  {
+    { Bad_Opcode },
+    { "vsha512msg2", { XM, Rymm }, 0 },
+  },
+
   /* VEX_LEN_0F38DB */
   {
     { "vaesimc",	{ XM, EXx }, PREFIX_DATA },
@@ -7614,6 +7667,18 @@  static const struct dis386 vex_w_table[][2] = {
     { Bad_Opcode },
     { "%XVvpmadd52huq",	{ XM, Vex, EXx }, PREFIX_DATA },
   },
+  {
+    /* VEX_W_0F38CB_P_3 */
+    { VEX_LEN_TABLE (VEX_LEN_0F38CB_P_3_W_0) },
+  },
+  {
+    /* VEX_W_0F38CC_P_3 */
+    { VEX_LEN_TABLE (VEX_LEN_0F38CC_P_3_W_0) },
+  },
+  {
+    /* VEX_W_0F38CD_P_3 */
+    { VEX_LEN_TABLE (VEX_LEN_0F38CD_P_3_W_0) },
+  },
   {
     /* VEX_W_0F38CF */
     { "%XEvgf2p8mulb", { XM, Vex, EXx }, PREFIX_DATA },
@@ -8055,6 +8120,14 @@  static const struct dis386 mod_table[][2] = {
     { PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_0) },
     { PREFIX_TABLE (PREFIX_VEX_0F3849_X86_64_L_0_W_0_M_1) },
   },
+  {
+    /* MOD_VEX_0F38CB_P_3_W_0_L_1 */
+    { Bad_Opcode },
+  },
+  {
+    /* MOD_VEX_0F38CC_P_3_W_0_L_1 */
+    { Bad_Opcode },
+  },
 
 #include "i386-dis-evex-mod.h"
 };
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index 6ad7d6951db..70843eb251f 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -214,6 +214,8 @@  static const dependency isa_dependencies[] =
     "XSAVE" },
   { "SHA",
     "SSE2" },
+  { "SHA512",
+    "AVX2" },
   { "XSAVES",
     "XSAVEC" },
   { "XSAVEC",
@@ -369,6 +371,7 @@  static bitfield cpu_flags[] =
   BITFIELD (RAO_INT),
   BITFIELD (FRED),
   BITFIELD (LKGS),
+  BITFIELD (SHA512),
   BITFIELD (MWAITX),
   BITFIELD (CLZERO),
   BITFIELD (OSPKE),
diff --git a/opcodes/i386-opc.h b/opcodes/i386-opc.h
index f9a68b4c513..b3359e47aa6 100644
--- a/opcodes/i386-opc.h
+++ b/opcodes/i386-opc.h
@@ -175,6 +175,8 @@  enum
   CpuSMAP,
   /* SHA instructions required.  */
   CpuSHA,
+  /* SHA512 instructions required.  */
+  CpuSHA512,
   /* CLFLUSHOPT instruction required */
   CpuClflushOpt,
   /* XSAVES/XRSTORS instruction required */
@@ -403,6 +405,7 @@  typedef union i386_cpu_flags
       unsigned int cpuprfchw:1;
       unsigned int cpusmap:1;
       unsigned int cpusha:1;
+      unsigned int cpusha512:1;
       unsigned int cpuclflushopt:1;
       unsigned int cpuxsaves:1;
       unsigned int cpuxsavec:1;
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index f62e5280982..c9a5730f90a 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -2043,6 +2043,14 @@  sha256rnds2, 0xf38cb, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256msg1, 0xf38cc, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 sha256msg2, 0xf38cd, SHA, Modrm|NoSuf, { RegXMM|Unspecified|BaseIndex, RegXMM }
 
+// SHA512 instructions.
+
+vsha512rnds2, 0xf2cb, SHA512, Modrm|Vex256|Space0F38|VexVVVV|VexW0|NoSuf, { RegXMM, RegYMM, RegYMM }
+vsha512msg1, 0xf2cc, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegXMM, RegYMM }
+vsha512msg2, 0xf2cd, SHA512, Modrm|Vex256|Space0F38|VexW0|NoSuf, { RegYMM, RegYMM }
+
+// SHA512 instructions end.
+
 // VPCLMULQDQ instructions
 
 vpclmulqdq, 0x6644, VPCLMULQDQ, Modrm|Vex256|Space0F3A|VexWIG|VexVVVV|NoSuf, { Imm8|Imm8S, Unspecified|BaseIndex|RegYMM, RegYMM, RegYMM }