@@ -1,5 +1,7 @@
-*- text -*-
+* Add support for Intel AVX-VNNI-INT8 instructions.
+
* Add support for Intel AVX-IFMA instructions.
* gas now supports --compress-debug-sections=zstd to compress
@@ -1095,6 +1095,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (hreset, HRESET, ANY_HRESET, false),
SUBARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, false),
SUBARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, false),
+ SUBARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, false),
};
#undef SUBARCH
@@ -214,6 +214,7 @@ accept various extension mnemonics. For example,
@code{avx_vnni},
@code{avx512_fp16},
@code{avx_ifma},
+@code{avx_vnni_int8},
@code{noavx512f},
@code{noavx512cd},
@code{noavx512er},
@@ -235,6 +236,7 @@ accept various extension mnemonics. For example,
@code{noavx_vnni},
@code{noavx512_fp16},
@code{noavx_ifma},
+@code{noavx_vnni_int8},
@code{noenqcmd},
@code{noserialize},
@code{notsxldtrk},
@@ -1535,7 +1537,7 @@ supported on the CPU specified. The choices for @var{cpu_type} are:
@item @samp{.avx512_bitalg} @tab @samp{.avx512_bf16} @tab @samp{.avx512_vp2intersect}
@item @samp{.tdx} @tab @samp{.avx_vnni} @tab @samp{.avx512_fp16}
@item @samp{.clwb} @tab @samp{.rdpid} @tab @samp{.ptwrite} @tab @samp{.ibt}
-@item @samp{.avx_ifma}
+@item @samp{.avx_ifma} @tab @samp{.avx_vnni_int8}
@item @samp{.wbnoinvd} @tab @samp{.pconfig} @tab @samp{.waitpkg} @tab @samp{.cldemote}
@item @samp{.shstk} @tab @samp{.gfni} @tab @samp{.vaes} @tab @samp{.vpclmulqdq}
@item @samp{.movdiri} @tab @samp{.movdir64b} @tab @samp{.enqcmd} @tab @samp{.tsxldtrk}
new file mode 100644
@@ -0,0 +1,71 @@
+#as:
+#objdump: -dw -Mintel
+#name: i386 AVX-VNNI-INT8 insns (Intel disassembly)
+#source: avx-vnni-int8.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 57 50 f4\s+vpdpbssd ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 53 50 f4\s+vpdpbssd xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 57 50 b4 f4 00 00 00 10\s+vpdpbssd ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 57 50 31\s+vpdpbssd ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 57 50 b1 e0 0f 00 00\s+vpdpbssd ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 57 50 b2 00 f0 ff ff\s+vpdpbssd ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 53 50 b4 f4 00 00 00 10\s+vpdpbssd xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 53 50 31\s+vpdpbssd xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 53 50 b1 f0 07 00 00\s+vpdpbssd xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 53 50 b2 00 f8 ff ff\s+vpdpbssd xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*c4 e2 57 51 f4\s+vpdpbssds ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 53 51 f4\s+vpdpbssds xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 57 51 b4 f4 00 00 00 10\s+vpdpbssds ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 57 51 31\s+vpdpbssds ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 57 51 b1 e0 0f 00 00\s+vpdpbssds ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 57 51 b2 00 f0 ff ff\s+vpdpbssds ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 53 51 b4 f4 00 00 00 10\s+vpdpbssds xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 53 51 31\s+vpdpbssds xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 53 51 b1 f0 07 00 00\s+vpdpbssds xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 53 51 b2 00 f8 ff ff\s+vpdpbssds xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*c4 e2 56 50 f4\s+vpdpbsud ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 52 50 f4\s+vpdpbsud xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 56 50 b4 f4 00 00 00 10\s+vpdpbsud ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 56 50 31\s+vpdpbsud ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 56 50 b1 e0 0f 00 00\s+vpdpbsud ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 56 50 b2 00 f0 ff ff\s+vpdpbsud ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 52 50 b4 f4 00 00 00 10\s+vpdpbsud xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 52 50 31\s+vpdpbsud xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 52 50 b1 f0 07 00 00\s+vpdpbsud xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 52 50 b2 00 f8 ff ff\s+vpdpbsud xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*c4 e2 56 51 f4\s+vpdpbsuds ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 52 51 f4\s+vpdpbsuds xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 56 51 b4 f4 00 00 00 10\s+vpdpbsuds ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 56 51 31\s+vpdpbsuds ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 56 51 b1 e0 0f 00 00\s+vpdpbsuds ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 56 51 b2 00 f0 ff ff\s+vpdpbsuds ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 52 51 b4 f4 00 00 00 10\s+vpdpbsuds xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 52 51 31\s+vpdpbsuds xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 52 51 b1 f0 07 00 00\s+vpdpbsuds xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 52 51 b2 00 f8 ff ff\s+vpdpbsuds xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*c4 e2 54 50 f4\s+vpdpbuud ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 50 50 f4\s+vpdpbuud xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 54 50 b4 f4 00 00 00 10\s+vpdpbuud ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 54 50 31\s+vpdpbuud ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 54 50 b1 e0 0f 00 00\s+vpdpbuud ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 54 50 b2 00 f0 ff ff\s+vpdpbuud ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 50 50 b4 f4 00 00 00 10\s+vpdpbuud xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 50 50 31\s+vpdpbuud xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 50 50 b1 f0 07 00 00\s+vpdpbuud xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 50 50 b2 00 f8 ff ff\s+vpdpbuud xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+\s*[a-f0-9]+:\s*c4 e2 54 51 f4\s+vpdpbuuds ymm6,ymm5,ymm4
+\s*[a-f0-9]+:\s*c4 e2 50 51 f4\s+vpdpbuuds xmm6,xmm5,xmm4
+\s*[a-f0-9]+:\s*c4 e2 54 51 b4 f4 00 00 00 10\s+vpdpbuuds ymm6,ymm5,YMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 54 51 31\s+vpdpbuuds ymm6,ymm5,YMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 54 51 b1 e0 0f 00 00\s+vpdpbuuds ymm6,ymm5,YMMWORD PTR \[ecx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 e2 54 51 b2 00 f0 ff ff\s+vpdpbuuds ymm6,ymm5,YMMWORD PTR \[edx-0x1000\]
+\s*[a-f0-9]+:\s*c4 e2 50 51 b4 f4 00 00 00 10\s+vpdpbuuds xmm6,xmm5,XMMWORD PTR \[esp\+esi\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 e2 50 51 31\s+vpdpbuuds xmm6,xmm5,XMMWORD PTR \[ecx\]
+\s*[a-f0-9]+:\s*c4 e2 50 51 b1 f0 07 00 00\s+vpdpbuuds xmm6,xmm5,XMMWORD PTR \[ecx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 e2 50 51 b2 00 f8 ff ff\s+vpdpbuuds xmm6,xmm5,XMMWORD PTR \[edx-0x800\]
+#pass
new file mode 100644
@@ -0,0 +1,71 @@
+#as:
+#objdump: -dw
+#name: i386 AVX-VNNI-INT8 insns
+#source: avx-vnni-int8.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 e2 57 50 f4\s+vpdpbssd %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 53 50 f4\s+vpdpbssd %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 57 50 b4 f4 00 00 00 10\s+vpdpbssd 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 50 31\s+vpdpbssd \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 50 b1 e0 0f 00 00\s+vpdpbssd 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 50 b2 00 f0 ff ff\s+vpdpbssd -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 53 50 b4 f4 00 00 00 10\s+vpdpbssd 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 50 31\s+vpdpbssd \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 50 b1 f0 07 00 00\s+vpdpbssd 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 50 b2 00 f8 ff ff\s+vpdpbssd -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 57 51 f4\s+vpdpbssds %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 53 51 f4\s+vpdpbssds %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 57 51 b4 f4 00 00 00 10\s+vpdpbssds 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 51 31\s+vpdpbssds \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 51 b1 e0 0f 00 00\s+vpdpbssds 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 57 51 b2 00 f0 ff ff\s+vpdpbssds -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 53 51 b4 f4 00 00 00 10\s+vpdpbssds 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 51 31\s+vpdpbssds \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 51 b1 f0 07 00 00\s+vpdpbssds 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 53 51 b2 00 f8 ff ff\s+vpdpbssds -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 56 50 f4\s+vpdpbsud %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 52 50 f4\s+vpdpbsud %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 56 50 b4 f4 00 00 00 10\s+vpdpbsud 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 50 31\s+vpdpbsud \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 50 b1 e0 0f 00 00\s+vpdpbsud 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 50 b2 00 f0 ff ff\s+vpdpbsud -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 52 50 b4 f4 00 00 00 10\s+vpdpbsud 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 50 31\s+vpdpbsud \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 50 b1 f0 07 00 00\s+vpdpbsud 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 50 b2 00 f8 ff ff\s+vpdpbsud -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 56 51 f4\s+vpdpbsuds %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 52 51 f4\s+vpdpbsuds %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 56 51 b4 f4 00 00 00 10\s+vpdpbsuds 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 51 31\s+vpdpbsuds \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 51 b1 e0 0f 00 00\s+vpdpbsuds 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 56 51 b2 00 f0 ff ff\s+vpdpbsuds -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 52 51 b4 f4 00 00 00 10\s+vpdpbsuds 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 51 31\s+vpdpbsuds \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 51 b1 f0 07 00 00\s+vpdpbsuds 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 52 51 b2 00 f8 ff ff\s+vpdpbsuds -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 54 50 f4\s+vpdpbuud %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 50 50 f4\s+vpdpbuud %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 54 50 b4 f4 00 00 00 10\s+vpdpbuud 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 50 31\s+vpdpbuud \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 50 b1 e0 0f 00 00\s+vpdpbuud 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 50 b2 00 f0 ff ff\s+vpdpbuud -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 50 50 b4 f4 00 00 00 10\s+vpdpbuud 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 50 31\s+vpdpbuud \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 50 b1 f0 07 00 00\s+vpdpbuud 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 50 b2 00 f8 ff ff\s+vpdpbuud -0x800\(%edx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 54 51 f4\s+vpdpbuuds %ymm4,%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 50 51 f4\s+vpdpbuuds %xmm4,%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 54 51 b4 f4 00 00 00 10\s+vpdpbuuds 0x10000000\(%esp,%esi,8\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 51 31\s+vpdpbuuds \(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 51 b1 e0 0f 00 00\s+vpdpbuuds 0xfe0\(%ecx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 54 51 b2 00 f0 ff ff\s+vpdpbuuds -0x1000\(%edx\),%ymm5,%ymm6
+\s*[a-f0-9]+:\s*c4 e2 50 51 b4 f4 00 00 00 10\s+vpdpbuuds 0x10000000\(%esp,%esi,8\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 51 31\s+vpdpbuuds \(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 51 b1 f0 07 00 00\s+vpdpbuuds 0x7f0\(%ecx\),%xmm5,%xmm6
+\s*[a-f0-9]+:\s*c4 e2 50 51 b2 00 f8 ff ff\s+vpdpbuuds -0x800\(%edx\),%xmm5,%xmm6
+#pass
new file mode 100644
@@ -0,0 +1,127 @@
+# Check 32bit AVX-VNNI-INT8 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vpdpbssd %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssd %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssd 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssd (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssd 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssd -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssd 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssd (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssd 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssd -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbssds %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssds %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssds 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssds (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbssds 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssds -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssds 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssds (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbssds 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssds -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsud %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsud %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsud 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsud (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsud 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsud -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsud 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsud (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsud 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsud -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsuds %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsuds %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsuds 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsuds (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbsuds 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsuds -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsuds 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsuds (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbsuds 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsuds -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuud %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuud %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuud 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuud (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuud 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuud -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuud 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuud (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuud 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuud -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuuds %ymm4, %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuuds %xmm4, %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuuds 0x10000000(%esp, %esi, 8), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuuds (%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8
+ vpdpbuuds 4064(%ecx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuuds -4096(%edx), %ymm5, %ymm6 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuuds 0x10000000(%esp, %esi, 8), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuuds (%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8
+ vpdpbuuds 2032(%ecx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuuds -2048(%edx), %xmm5, %xmm6 #AVX-VNNI-INT8 Disp32(00f8ffff)
+
+.intel_syntax noprefix
+ vpdpbssd ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbssd xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbssd ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssd ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbssd ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssd ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssd xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssd xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbssd xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssd xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbssds ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbssds xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbssds ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssds ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbssds ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssds ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssds xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssds xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbssds xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssds xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsud ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbsud xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbsud ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsud ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbsud ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsud ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsud xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsud xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbsud xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsud xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsuds ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbsuds xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbsuds ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsuds ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbsuds ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsuds ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsuds xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsuds xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbsuds xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsuds xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuud ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbuud xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbuud ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuud ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbuud ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuud ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuud xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuud xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbuud xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuud xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuuds ymm6, ymm5, ymm4 #AVX-VNNI-INT8
+ vpdpbuuds xmm6, xmm5, xmm4 #AVX-VNNI-INT8
+ vpdpbuuds ymm6, ymm5, YMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuuds ymm6, ymm5, YMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbuuds ymm6, ymm5, YMMWORD PTR [ecx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuuds ymm6, ymm5, YMMWORD PTR [edx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuuds xmm6, xmm5, XMMWORD PTR [esp+esi*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuuds xmm6, xmm5, XMMWORD PTR [ecx] #AVX-VNNI-INT8
+ vpdpbuuds xmm6, xmm5, XMMWORD PTR [ecx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuuds xmm6, xmm5, XMMWORD PTR [edx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
@@ -481,6 +481,8 @@ if [gas_32_check] then {
run_dump_test "avx-ifma"
run_dump_test "avx-ifma-intel"
run_list_test "avx-ifma-inval"
+ run_dump_test "avx-vnni-int8"
+ run_dump_test "avx-vnni-int8-intel"
run_list_test "sg"
run_dump_test "clzero"
run_dump_test "invlpgb"
@@ -1151,6 +1153,8 @@ if [gas_64_check] then {
run_dump_test "x86-64-avx-ifma"
run_dump_test "x86-64-avx-ifma-intel"
run_list_test "x86-64-avx-ifma-inval"
+ run_dump_test "x86-64-avx-vnni-int8"
+ run_dump_test "x86-64-avx-vnni-int8-intel"
run_dump_test "x86-64-clzero"
run_dump_test "x86-64-mwaitx-bdver4"
run_list_test "x86-64-mwaitx-reg"
new file mode 100644
@@ -0,0 +1,71 @@
+#as:
+#objdump: -dw -Mintel
+#name: x86_64 AVX-VNNI-INT8 insns (Intel disassembly)
+#source: x86-64-avx-vnni-int8.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 42 37 50 d0\s+vpdpbssd ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 33 50 d0\s+vpdpbssd xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 37 50 94 f5 00 00 00 10\s+vpdpbssd ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 37 50 11\s+vpdpbssd ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 37 50 91 e0 0f 00 00\s+vpdpbssd ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 37 50 92 00 f0 ff ff\s+vpdpbssd ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 33 50 94 f5 00 00 00 10\s+vpdpbssd xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 33 50 11\s+vpdpbssd xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 33 50 91 f0 07 00 00\s+vpdpbssd xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 33 50 92 00 f8 ff ff\s+vpdpbssd xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*c4 42 37 51 d0\s+vpdpbssds ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 33 51 d0\s+vpdpbssds xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 37 51 94 f5 00 00 00 10\s+vpdpbssds ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 37 51 11\s+vpdpbssds ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 37 51 91 e0 0f 00 00\s+vpdpbssds ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 37 51 92 00 f0 ff ff\s+vpdpbssds ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 33 51 94 f5 00 00 00 10\s+vpdpbssds xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 33 51 11\s+vpdpbssds xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 33 51 91 f0 07 00 00\s+vpdpbssds xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 33 51 92 00 f8 ff ff\s+vpdpbssds xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*c4 42 36 50 d0\s+vpdpbsud ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 32 50 d0\s+vpdpbsud xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 36 50 94 f5 00 00 00 10\s+vpdpbsud ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 36 50 11\s+vpdpbsud ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 36 50 91 e0 0f 00 00\s+vpdpbsud ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 36 50 92 00 f0 ff ff\s+vpdpbsud ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 32 50 94 f5 00 00 00 10\s+vpdpbsud xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 32 50 11\s+vpdpbsud xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 32 50 91 f0 07 00 00\s+vpdpbsud xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 32 50 92 00 f8 ff ff\s+vpdpbsud xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*c4 42 36 51 d0\s+vpdpbsuds ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 32 51 d0\s+vpdpbsuds xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 36 51 94 f5 00 00 00 10\s+vpdpbsuds ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 36 51 11\s+vpdpbsuds ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 36 51 91 e0 0f 00 00\s+vpdpbsuds ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 36 51 92 00 f0 ff ff\s+vpdpbsuds ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 32 51 94 f5 00 00 00 10\s+vpdpbsuds xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 32 51 11\s+vpdpbsuds xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 32 51 91 f0 07 00 00\s+vpdpbsuds xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 32 51 92 00 f8 ff ff\s+vpdpbsuds xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*c4 42 34 50 d0\s+vpdpbuud ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 30 50 d0\s+vpdpbuud xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 34 50 94 f5 00 00 00 10\s+vpdpbuud ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 34 50 11\s+vpdpbuud ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 34 50 91 e0 0f 00 00\s+vpdpbuud ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 34 50 92 00 f0 ff ff\s+vpdpbuud ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 30 50 94 f5 00 00 00 10\s+vpdpbuud xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 30 50 11\s+vpdpbuud xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 30 50 91 f0 07 00 00\s+vpdpbuud xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 30 50 92 00 f8 ff ff\s+vpdpbuud xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+\s*[a-f0-9]+:\s*c4 42 34 51 d0\s+vpdpbuuds ymm10,ymm9,ymm8
+\s*[a-f0-9]+:\s*c4 42 30 51 d0\s+vpdpbuuds xmm10,xmm9,xmm8
+\s*[a-f0-9]+:\s*c4 22 34 51 94 f5 00 00 00 10\s+vpdpbuuds ymm10,ymm9,YMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 34 51 11\s+vpdpbuuds ymm10,ymm9,YMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 34 51 91 e0 0f 00 00\s+vpdpbuuds ymm10,ymm9,YMMWORD PTR \[rcx\+0xfe0\]
+\s*[a-f0-9]+:\s*c4 62 34 51 92 00 f0 ff ff\s+vpdpbuuds ymm10,ymm9,YMMWORD PTR \[rdx-0x1000\]
+\s*[a-f0-9]+:\s*c4 22 30 51 94 f5 00 00 00 10\s+vpdpbuuds xmm10,xmm9,XMMWORD PTR \[rbp\+r14\*8\+0x10000000\]
+\s*[a-f0-9]+:\s*c4 42 30 51 11\s+vpdpbuuds xmm10,xmm9,XMMWORD PTR \[r9\]
+\s*[a-f0-9]+:\s*c4 62 30 51 91 f0 07 00 00\s+vpdpbuuds xmm10,xmm9,XMMWORD PTR \[rcx\+0x7f0\]
+\s*[a-f0-9]+:\s*c4 62 30 51 92 00 f8 ff ff\s+vpdpbuuds xmm10,xmm9,XMMWORD PTR \[rdx-0x800\]
+#pass
new file mode 100644
@@ -0,0 +1,71 @@
+#as:
+#objdump: -dw
+#name: x86_64 AVX-VNNI-INT8 insns
+#source: x86-64-avx-vnni-int8.s
+
+.*: +file format .*
+
+Disassembly of section \.text:
+
+0+ <_start>:
+\s*[a-f0-9]+:\s*c4 42 37 50 d0\s+vpdpbssd %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 33 50 d0\s+vpdpbssd %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 37 50 94 f5 00 00 00 10\s+vpdpbssd 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 37 50 11\s+vpdpbssd \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 37 50 91 e0 0f 00 00\s+vpdpbssd 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 37 50 92 00 f0 ff ff\s+vpdpbssd -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 33 50 94 f5 00 00 00 10\s+vpdpbssd 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 33 50 11\s+vpdpbssd \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 33 50 91 f0 07 00 00\s+vpdpbssd 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 33 50 92 00 f8 ff ff\s+vpdpbssd -0x800\(%rdx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 37 51 d0\s+vpdpbssds %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 33 51 d0\s+vpdpbssds %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 37 51 94 f5 00 00 00 10\s+vpdpbssds 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 37 51 11\s+vpdpbssds \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 37 51 91 e0 0f 00 00\s+vpdpbssds 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 37 51 92 00 f0 ff ff\s+vpdpbssds -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 33 51 94 f5 00 00 00 10\s+vpdpbssds 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 33 51 11\s+vpdpbssds \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 33 51 91 f0 07 00 00\s+vpdpbssds 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 33 51 92 00 f8 ff ff\s+vpdpbssds -0x800\(%rdx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 36 50 d0\s+vpdpbsud %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 32 50 d0\s+vpdpbsud %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 36 50 94 f5 00 00 00 10\s+vpdpbsud 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 36 50 11\s+vpdpbsud \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 36 50 91 e0 0f 00 00\s+vpdpbsud 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 36 50 92 00 f0 ff ff\s+vpdpbsud -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 32 50 94 f5 00 00 00 10\s+vpdpbsud 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 32 50 11\s+vpdpbsud \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 32 50 91 f0 07 00 00\s+vpdpbsud 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 32 50 92 00 f8 ff ff\s+vpdpbsud -0x800\(%rdx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 36 51 d0\s+vpdpbsuds %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 32 51 d0\s+vpdpbsuds %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 36 51 94 f5 00 00 00 10\s+vpdpbsuds 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 36 51 11\s+vpdpbsuds \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 36 51 91 e0 0f 00 00\s+vpdpbsuds 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 36 51 92 00 f0 ff ff\s+vpdpbsuds -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 32 51 94 f5 00 00 00 10\s+vpdpbsuds 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 32 51 11\s+vpdpbsuds \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 32 51 91 f0 07 00 00\s+vpdpbsuds 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 32 51 92 00 f8 ff ff\s+vpdpbsuds -0x800\(%rdx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 34 50 d0\s+vpdpbuud %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 30 50 d0\s+vpdpbuud %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 34 50 94 f5 00 00 00 10\s+vpdpbuud 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 34 50 11\s+vpdpbuud \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 34 50 91 e0 0f 00 00\s+vpdpbuud 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 34 50 92 00 f0 ff ff\s+vpdpbuud -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 30 50 94 f5 00 00 00 10\s+vpdpbuud 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 30 50 11\s+vpdpbuud \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 30 50 91 f0 07 00 00\s+vpdpbuud 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 30 50 92 00 f8 ff ff\s+vpdpbuud -0x800\(%rdx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 34 51 d0\s+vpdpbuuds %ymm8,%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 30 51 d0\s+vpdpbuuds %xmm8,%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 22 34 51 94 f5 00 00 00 10\s+vpdpbuuds 0x10000000\(%rbp,%r14,8\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 42 34 51 11\s+vpdpbuuds \(%r9\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 34 51 91 e0 0f 00 00\s+vpdpbuuds 0xfe0\(%rcx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 62 34 51 92 00 f0 ff ff\s+vpdpbuuds -0x1000\(%rdx\),%ymm9,%ymm10
+\s*[a-f0-9]+:\s*c4 22 30 51 94 f5 00 00 00 10\s+vpdpbuuds 0x10000000\(%rbp,%r14,8\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 42 30 51 11\s+vpdpbuuds \(%r9\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 30 51 91 f0 07 00 00\s+vpdpbuuds 0x7f0\(%rcx\),%xmm9,%xmm10
+\s*[a-f0-9]+:\s*c4 62 30 51 92 00 f8 ff ff\s+vpdpbuuds -0x800\(%rdx\),%xmm9,%xmm10
+#pass
new file mode 100644
@@ -0,0 +1,127 @@
+# Check 64bit AVX-VNNI-INT8 instructions
+
+ .allow_index_reg
+ .text
+_start:
+ vpdpbssd %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssd %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssd 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssd (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssd 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssd -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssd 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssd (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssd 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssd -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbssds %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssds %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssds 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssds (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbssds 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssds -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssds 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssds (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbssds 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssds -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsud %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsud %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsud 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsud (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsud 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsud -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsud 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsud (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsud 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsud -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsuds %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsuds %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsuds 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsuds (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbsuds 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsuds -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsuds 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsuds (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbsuds 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsuds -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuud %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuud %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuud 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuud (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuud 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuud -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuud 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuud (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuud 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuud -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuuds %ymm8, %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuuds %xmm8, %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuuds 0x10000000(%rbp, %r14, 8), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuuds (%r9), %ymm9, %ymm10 #AVX-VNNI-INT8
+ vpdpbuuds 4064(%rcx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuuds -4096(%rdx), %ymm9, %ymm10 #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuuds 0x10000000(%rbp, %r14, 8), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuuds (%r9), %xmm9, %xmm10 #AVX-VNNI-INT8
+ vpdpbuuds 2032(%rcx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuuds -2048(%rdx), %xmm9, %xmm10 #AVX-VNNI-INT8 Disp32(00f8ffff)
+
+.intel_syntax noprefix
+ vpdpbssd ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbssd xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbssd ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssd ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbssd ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssd ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssd xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssd xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbssd xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssd xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbssds ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbssds xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbssds ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssds ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbssds ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbssds ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbssds xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbssds xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbssds xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbssds xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsud ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbsud xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbsud ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsud ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbsud ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsud ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsud xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsud xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbsud xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsud xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbsuds ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbsuds xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbsuds ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsuds ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbsuds ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbsuds ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbsuds xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbsuds xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbsuds xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbsuds xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuud ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbuud xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbuud ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuud ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbuud ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuud ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuud xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuud xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbuud xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuud xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
+ vpdpbuuds ymm10, ymm9, ymm8 #AVX-VNNI-INT8
+ vpdpbuuds xmm10, xmm9, xmm8 #AVX-VNNI-INT8
+ vpdpbuuds ymm10, ymm9, YMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuuds ymm10, ymm9, YMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbuuds ymm10, ymm9, YMMWORD PTR [rcx+4064] #AVX-VNNI-INT8 Disp32(e00f0000)
+ vpdpbuuds ymm10, ymm9, YMMWORD PTR [rdx-4096] #AVX-VNNI-INT8 Disp32(00f0ffff)
+ vpdpbuuds xmm10, xmm9, XMMWORD PTR [rbp+r14*8+0x10000000] #AVX-VNNI-INT8
+ vpdpbuuds xmm10, xmm9, XMMWORD PTR [r9] #AVX-VNNI-INT8
+ vpdpbuuds xmm10, xmm9, XMMWORD PTR [rcx+2032] #AVX-VNNI-INT8 Disp32(f0070000)
+ vpdpbuuds xmm10, xmm9, XMMWORD PTR [rdx-2048] #AVX-VNNI-INT8 Disp32(00f8ffff)
@@ -1128,6 +1128,8 @@ enum
PREFIX_VEX_0FF0,
PREFIX_VEX_0F3849_X86_64,
PREFIX_VEX_0F384B_X86_64,
+ PREFIX_VEX_0F3850_W_0,
+ PREFIX_VEX_0F3851_W_0,
PREFIX_VEX_0F385C_X86_64,
PREFIX_VEX_0F385E_X86_64,
PREFIX_VEX_0F38F5_L_0,
@@ -4004,6 +4006,21 @@ static const struct dis386 prefix_table[][4] = {
{ VEX_W_TABLE (VEX_W_0F384B_X86_64_P_3) },
},
+ /* PREFIX_VEX_0F3850_W_0 */
+ {
+ { "vpdpbuud", { XM, Vex, EXx }, 0 },
+ { "vpdpbsud", { XM, Vex, EXx }, 0 },
+ { "%XV vpdpbusd", { XM, Vex, EXx }, 0 },
+ { "vpdpbssd", { XM, Vex, EXx }, 0 },
+ },
+
+ /* PREFIX_VEX_0F3851_W_0 */
+ {
+ { "vpdpbuuds", { XM, Vex, EXx }, 0 },
+ { "vpdpbsuds", { XM, Vex, EXx }, 0 },
+ { "%XV vpdpbusds", { XM, Vex, EXx }, 0 },
+ { "vpdpbssds", { XM, Vex, EXx }, 0 },
+ },
/* PREFIX_VEX_0F385C_X86_64 */
{
{ Bad_Opcode },
@@ -7547,11 +7564,11 @@ static const struct dis386 vex_w_table[][2] = {
},
{
/* VEX_W_0F3850 */
- { "%XV vpdpbusd", { XM, Vex, EXx }, 0 },
+ { PREFIX_TABLE (PREFIX_VEX_0F3850_W_0) },
},
{
- /* VEX_W_0F3851 */
- { "%XV vpdpbusds", { XM, Vex, EXx }, 0 },
+ /* VEX_W_0F3851_P_0 */
+ { PREFIX_TABLE (PREFIX_VEX_0F3851_W_0) },
},
{
/* VEX_W_0F3852 */
@@ -247,6 +247,8 @@ static initializer cpu_flag_init[] =
"CPU_AVX512BW_FLAGS|CpuAVX512_FP16" },
{ "CPU_AVX_IFMA_FLAGS",
"CPU_AVX2_FLAGS|CpuAVX_IFMA" },
+ { "CPU_AVX_VNNI_INT8_FLAGS",
+ "CPU_AVX2_FLAGS|CpuAVX_VNNI_INT8" },
{ "CPU_IAMCU_FLAGS",
"Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|CpuIAMCU" },
{ "CPU_ADX_FLAGS",
@@ -443,6 +445,8 @@ static initializer cpu_flag_init[] =
"CpuAVX512_FP16" },
{ "CPU_ANY_AVX_IFMA_FLAGS",
"CpuAVX_IFMA" },
+ { "CPU_ANY_AVX_VNNI_INT8_FLAGS",
+ "CpuAVX_VNNI_INT8" },
};
static initializer operand_type_init[] =
@@ -645,6 +649,7 @@ static bitfield cpu_flags[] =
BITFIELD (CpuAVX_VNNI),
BITFIELD (CpuAVX512_FP16),
BITFIELD (CpuAVX_IFMA),
+ BITFIELD (CpuAVX_VNNI_INT8),
BITFIELD (CpuMWAITX),
BITFIELD (CpuCLZERO),
BITFIELD (CpuOSPKE),
@@ -211,6 +211,8 @@ enum
CpuAVX512_FP16,
/* Intel AVX IFMA Instructions support required. */
CpuAVX_IFMA,
+ /* Intel AVX VNNI-INT8 Instructions support required. */
+ CpuAVX_VNNI_INT8,
/* mwaitx instruction required */
CpuMWAITX,
/* Clzero instruction required */
@@ -391,6 +393,7 @@ typedef union i386_cpu_flags
unsigned int cpuavx_vnni:1;
unsigned int cpuavx512_fp16:1;
unsigned int cpuavx_ifma:1;
+ unsigned int cpuavx_vnni_int8:1;
unsigned int cpumwaitx:1;
unsigned int cpuclzero:1;
unsigned int cpuospke:1;
@@ -3270,3 +3270,14 @@ vpmadd52huq, 0x66B5, None, CpuAVX_IFMA, Modrm|Vex|PseudoVexPrefix|Space0F38|VexV
vpmadd52luq, 0x66B4, None, CpuAVX_IFMA, Modrm|Vex|PseudoVexPrefix|Space0F38|VexVVVV=1|VexW1|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
// AVX_IFMA instructions end.
+
+// AVX_VNNI_INT8 instructions.
+
+vpdpbuud, 0x50, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbuuds, 0x51, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbssd, 0xf250, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbssds, 0xf251, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbsud, 0xf350, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+vpdpbsuds, 0xf351, None, CpuAVX_VNNI_INT8, Modrm|Vex|Space0F38|VexVVVV|VexW0|CheckRegSize|No_bSuf|No_wSuf|No_lSuf|No_sSuf|No_qSuf|No_ldSuf, { RegXMM|RegYMM|Unspecified|BaseIndex, RegXMM|RegYMM, RegXMM|RegYMM }
+
+// AVX_VNNI_INT8 instructions end.