[4/4] x86: MONITOR/MWAIT are not SSE3 insns
Checks
Commit Message
These have their own CPUID bit and hence they should also have their own
separate control.
Comments
On Fri, Feb 10, 2023 at 12:51 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> These have their own CPUID bit and hence they should also have their own
> separate control.
>
> --- a/gas/config/tc-i386.c
> +++ b/gas/config/tc-i386.c
> @@ -1027,6 +1027,7 @@ static const arch_entry cpu_arch[] =
> SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
> SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
> SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
> + SUBARCH (monitor, MONITOR, MONITOR, false),
> SUBARCH (vmx, VMX, ANY_VMX, false),
> SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
> SUBARCH (smx, SMX, SMX, false),
> --- a/gas/doc/c-i386.texi
> +++ b/gas/doc/c-i386.texi
> @@ -152,6 +152,7 @@ accept various extension mnemonics. For
> @code{avx},
> @code{avx2},
> @code{lahf_sahf},
> +@code{monitor},
> @code{adx},
> @code{rdseed},
> @code{prfchw},
> @@ -1487,7 +1488,7 @@ supported on the CPU specified. The cho
> @item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
> @item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
> @item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
> -@item @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
> +@item @samp{.monitor} @tab @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
> @item @samp{.lahf_sahf} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
> @item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
> @item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
> --- a/gas/testsuite/gas/i386/arch-10.d
> +++ b/gas/testsuite/gas/i386/arch-10.d
> @@ -1,4 +1,4 @@
> -#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
> +#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+monitor+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
> #objdump: -dw
> #name: i386 arch 10
>
> @@ -38,4 +38,5 @@ Disassembly of section .text:
> [ ]*[a-f0-9]+: 0f 1f 00 nopl \(%eax\)
> [ ]*[a-f0-9]+: c4 e2 60 f3 c9 blsr %ecx,%ebx
> [ ]*[a-f0-9]+: 8f e9 60 01 c9 blcfill %ecx,%ebx
> +[ ]*[a-f0-9]+: 0f 01 c8 monitor( .*)
> #pass
> --- a/gas/testsuite/gas/i386/arch-10.s
> +++ b/gas/testsuite/gas/i386/arch-10.s
> @@ -62,3 +62,5 @@ nopl (%eax)
> blsr %ecx,%ebx
> # TBM
> blcfill %ecx,%ebx
> +# MONITOR
> +monitor
> --- a/gas/testsuite/gas/i386/arch-10-1.l
> +++ b/gas/testsuite/gas/i386/arch-10-1.l
> @@ -30,6 +30,7 @@
> .*:60: Error: .*
> .*:62: Error: .*
> .*:64: Error: .*
> +.*:66: Error: .*
> GAS LISTING .*
>
>
> @@ -101,3 +102,5 @@ GAS LISTING .*
> [ ]*62[ ]+blsr %ecx,%ebx
> [ ]*63[ ]+\# TBM
> [ ]*64[ ]+blcfill %ecx,%ebx
> +[ ]*65[ ]+\# MONITOR
> +[ ]*66[ ]+monitor
> --- a/gas/testsuite/gas/i386/arch-10-2.l
> +++ b/gas/testsuite/gas/i386/arch-10-2.l
> @@ -29,6 +29,7 @@
> .*:60: Error: .*
> .*:62: Error: .*
> .*:64: Error: .*
> +.*:66: Error: .*
> GAS LISTING .*
>
>
> @@ -100,3 +101,5 @@ GAS LISTING .*
> [ ]*62[ ]+blsr %ecx,%ebx
> [ ]*63[ ]+\# TBM
> [ ]*64[ ]+blcfill %ecx,%ebx
> +[ ]*65[ ]+\# MONITOR
> +[ ]*66[ ]+monitor
> --- a/gas/testsuite/gas/i386/arch-10-3.l
> +++ b/gas/testsuite/gas/i386/arch-10-3.l
> @@ -22,6 +22,7 @@
> .*:60: Error: .*
> .*:62: Error: .*
> .*:64: Error: .*
> +.*:66: Error: .*
> GAS LISTING .*
>
>
> @@ -96,3 +97,5 @@ GAS LISTING .*
> [ ]*62[ ]+blsr %ecx,%ebx
> [ ]*63[ ]+\# TBM
> [ ]*64[ ]+blcfill %ecx,%ebx
> +[ ]*65[ ]+\# MONITOR
> +[ ]*66[ ]+monitor
> --- a/gas/testsuite/gas/i386/arch-10-4.l
> +++ b/gas/testsuite/gas/i386/arch-10-4.l
> @@ -20,6 +20,7 @@
> .*:60: Error: .*
> .*:62: Error: .*
> .*:64: Error: .*
> +.*:66: Error: .*
> GAS LISTING .*
>
>
> @@ -94,3 +95,5 @@ GAS LISTING .*
> [ ]*62[ ]+blsr %ecx,%ebx
> [ ]*63[ ]+\# TBM
> [ ]*64[ ]+blcfill %ecx,%ebx
> +[ ]*65[ ]+\# MONITOR
> +[ ]*66[ ]+monitor
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/arch-10-6.l
> @@ -0,0 +1,99 @@
> +.*: Assembler messages:
> +.*:4: Error: .*
> +.*:6: Error: .*
> +.*:8: Error: .*
> +.*:10: Error: .*
> +.*:12: Error: .*
> +.*:14: Error: .*
> +.*:16: Error: .*
> +.*:18: Error: .*
> +.*:20: Error: .*
> +.*:22: Error: .*
> +.*:24: Error: .*
> +.*:26: Error: .*
> +.*:28: Error: .*
> +.*:30: Error: .*
> +.*:32: Error: .*
> +.*:34: Error: .*
> +.*:36: Error: .*
> +.*:38: Error: .*
> +.*:40: Error: .*
> +.*:42: Error: .*
> +.*:44: Error: .*
> +.*:46: Error: .*
> +.*:48: Error: .*
> +.*:50: Error: .*
> +.*:52: Error: .*
> +.*:54: Error: .*
> +.*:56: Error: .*
> +.*:58: Error: .*
> +.*:60: Error: .*
> +.*:62: Error: .*
> +.*:64: Error: .*
> +[ ]*1[ ]+\.include "arch-10\.s"
> +[ ]*1[ ]+\# Test -march=
> +[ ]*2[ ]+\.text
> +[ ]*3[ ]+\# cmov feature *
> +[ ]*4[ ]+cmove %eax,%ebx
> +[ ]*5[ ]+\# clflush
> +[ ]*6[ ]+clflush \(%eax\)
> +[ ]*7[ ]+\# SYSCALL
> +[ ]*8[ ]+syscall
> +[ ]*9[ ]+\# MMX
> +[ ]*10[ ]+paddb %mm4,%mm3
> +[ ]*11[ ]+\# SSE
> +[ ]*12[ ]+addss %xmm4,%xmm3
> +[ ]*13[ ]+\# SSE2
> +[ ]*14[ ]+addsd %xmm4,%xmm3
> +[ ]*15[ ]+\# SSE3
> +[ ]*16[ ]+addsubpd %xmm4,%xmm3
> +[ ]*17[ ]+\# SSSE3
> +[ ]*18[ ]+phaddw %xmm4,%xmm3
> +[ ]*19[ ]+\# SSE4\.1
> +[ ]*20[ ]+phminposuw %xmm1,%xmm3
> +[ ]*21[ ]+\# SSE4\.2
> +[ ]*22[ ]+crc32 %ecx,%ebx
> +[ ]*23[ ]+\# AVX
> +[ ]*24[ ]+vzeroall
> +[ ]*25[ ]+\# VMX
> +[ ]*26[ ]+vmxoff
> +[ ]*27[ ]+\# SMX
> +[ ]*28[ ]+getsec
> +[ ]*29[ ]+\# Xsave
> +[ ]*30[ ]+xgetbv
> +[ ]*31[ ]+\# Xsaveopt
> +[ ]*32[ ]+xsaveopt \(%ecx\)
> +[ ]*33[ ]+\# AES
> +[ ]*34[ ]+aesenc \(%ecx\),%xmm0
> +[ ]*35[ ]+\# PCLMUL
> +[ ]*36[ ]+pclmulqdq \$8,%xmm1,%xmm0
> +[ ]*37[ ]+\# AES \+ AVX
> +[ ]*38[ ]+vaesenc \(%ecx\),%xmm0,%xmm2
> +[ ]*39[ ]+\# PCLMUL \+ AVX
> +[ ]*40[ ]+vpclmulqdq \$8,%xmm4,%xmm6,%xmm2
> +[ ]*41[ ]+\# FMA
> +[ ]*42[ ]+vfmadd132pd %xmm4,%xmm6,%xmm2
> +[ ]*43[ ]+\# MOVBE
> +[ ]*44[ ]+movbe \(%ecx\),%ebx
> +[ ]*45[ ]+\# EPT
> +[ ]*46[ ]+invept \(%ecx\),%ebx
> +[ ]*47[ ]+\# RDTSCP
> +[ ]*48[ ]+rdtscp
> +[ ]*49[ ]+\# 3DNow or PRFCHW
> +[ ]*50[ ]+prefetchw 0x1000\(,%esi,2\)
> +[ ]*51[ ]+\# SSE4a
> +[ ]*52[ ]+insertq %xmm2,%xmm1
> +[ ]*53[ ]+\# SVME
> +[ ]*54[ ]+vmload
> +[ ]*55[ ]+\# ABM/LZCNT
> +[ ]*56[ ]+lzcnt %ecx,%ebx
> +[ ]*57[ ]+\# PadLock
> +[ ]*58[ ]+xstorerng
> +[ ]*59[ ]+\# nop
> +[ ]*60[ ]+nopl \(%eax\)
> +[ ]*61[ ]+\# BMI
> +[ ]*62[ ]+blsr %ecx,%ebx
> +[ ]*63[ ]+\# TBM
> +[ ]*64[ ]+blcfill %ecx,%ebx
> +[ ]*65[ ]+\# MONITOR
> +[ ]*66[ ]+\?\?\?\? 0F01C8 monitor
> --- /dev/null
> +++ b/gas/testsuite/gas/i386/arch-10-6.s
> @@ -0,0 +1 @@
> +.include "arch-10.s"
> --- a/gas/testsuite/gas/i386/arch-10-lzcnt.d
> +++ b/gas/testsuite/gas/i386/arch-10-lzcnt.d
> @@ -1,5 +1,5 @@
> #source: arch-10.s
> -#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
> +#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
> #objdump: -dw
> #name: i386 arch 10 (lzcnt)
> #dump: arch-10.d
> --- a/gas/testsuite/gas/i386/arch-10-prefetchw.d
> +++ b/gas/testsuite/gas/i386/arch-10-prefetchw.d
> @@ -1,5 +1,5 @@
> #source: arch-10.s
> -#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
> +#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
> #objdump: -dw
> #name: i386 arch 10 (prefetchw)
> #dump: arch-10.d
> --- a/gas/testsuite/gas/i386/i386.exp
> +++ b/gas/testsuite/gas/i386/i386.exp
> @@ -206,6 +206,7 @@ if [gas_32_check] then {
> run_list_test "arch-10-3" "-march=i686+mmx+sse4.2 -I${srcdir}/$subdir -al"
> run_list_test "arch-10-4" "-march=i686+mmx+sse4+vmx+smx -I${srcdir}/$subdir -al"
> run_list_test "arch-10-5" "-march=generic32+i686 -al"
> + run_list_test "arch-10-6" "-march=generic32+monitor -I${srcdir}/$subdir -aln"
> run_dump_test "arch-11"
> run_dump_test "arch-12"
> run_dump_test "arch-13"
> --- a/gas/testsuite/gas/i386/nosse-3.l
> +++ b/gas/testsuite/gas/i386/nosse-3.l
> @@ -5,3 +5,4 @@ GAS LISTING .*
> [ ]*1[ ]+\# Test -march=\+nosse
> [ ]*2[ ]+\.text
> [ ]*3[ ]+lfence
> +[ ]*4[ ]+\?\?\?\? 0F01C8 monitor
> --- a/gas/testsuite/gas/i386/nosse-3.s
> +++ b/gas/testsuite/gas/i386/nosse-3.s
> @@ -1,3 +1,4 @@
> # Test -march=+nosse
> .text
> lfence
> + monitor
> --- a/gas/testsuite/gas/i386/nosse-4.l
> +++ b/gas/testsuite/gas/i386/nosse-4.l
> @@ -2,6 +2,7 @@
> .*:6: Error: .*generic.*
> .*:9: Error: .*\.sse.*
> .*:12: Error: .*\.sse2.*
> +.*:14: Error: .*\.sse3.*
> .*:15: Error: .*\.sse3.*
> .*:18: Error: .*\.ssse3.*
> .*:21: Error: .*\.sse4\.1.*
> @@ -9,10 +10,9 @@
> .*:32: Error: .*\.nosse4\.2.*
> .*:35: Error: .*\.nosse4\.1.*
> .*:38: Error: .*\.nossse3.*
> -.*:43: Error: .*\.nosse3.*
> -.*:45: Error: .*\.nommx.*
> -.*:47: Error: .*\.nosse2.*
> -.*:50: Error: .*\.nosse.*
> +.*:43: Error: .*\.nommx.*
> +.*:45: Error: .*\.nosse2.*
> +.*:48: Error: .*\.nosse.*
> GAS LISTING .*
> #...
> [ ]*1[ ]+\# Test \.arch \[\.sseX|\.nosseX\]
> @@ -28,7 +28,7 @@ GAS LISTING .*
> [ ]*11[ ]+\?\?\?\? 0FAEE8 lfence
> [ ]*12[ ]+mwait
> [ ]*13[ ]+\.arch \.sse3
> -[ ]*14[ ]+\?\?\?\? 0F01C9 mwait
> +[ ]*14[ ]+mwait
> [ ]*15[ ]+pabsd %xmm0, %xmm0
> [ ]*16[ ]+\.arch \.ssse3
> [ ]*17[ ]+\?\?\?\? 660F381E pabsd %xmm0, %xmm0
> @@ -60,21 +60,15 @@ GAS LISTING .*
> [ ]*36[ ]+C0
> [ ]*37[ ]+\.arch \.nossse3
> [ ]*38[ ]+pabsd %xmm0, %xmm0
> -[ ]*39[ ]+\?\?\?\? 0F01C9 mwait
> -[ ]*40[ ]+\?\?\?\? 0F77 emms
> -[ ]*41[ ]+\.arch \.nommx
> -[ ]*42[ ]+\.arch \.nosse3
> -[ ]*43[ ]+mwait
> -[ ]*44[ ]+\?\?\?\? 0FAEE8 lfence
> -[ ]*45[ ]+emms
> -[ ]*46[ ]+\.arch \.nosse2
> -[ ]*47[ ]+lfence
> -[ ]*48[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
> -[ ]*49[ ]+\.arch \.nosse
> -[ ]*50[ ]+addps %xmm0, %xmm0
> - GAS LISTING .*
> -
> -
> -[ ]*51[ ]+\?\?\?\? 8DB42600 \.p2align 4
> -[ ]*51[ ]+000000
> +[ ]*39[ ]+\?\?\?\? 0F77 emms
> +[ ]*40[ ]+\.arch \.nommx
> +[ ]*41[ ]+\.arch \.nosse3
> +[ ]*42[ ]+\?\?\?\? 0FAEE8 lfence
> +[ ]*43[ ]+emms
> +[ ]*44[ ]+\.arch \.nosse2
> +[ ]*45[ ]+lfence
> +[ ]*46[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
> +[ ]*47[ ]+\.arch \.nosse
> +[ ]*48[ ]+addps %xmm0, %xmm0
> +[ ]*49[ ]+\?\?\?\? .* \.p2align 4
> #pass
> --- a/gas/testsuite/gas/i386/nosse-4.s
> +++ b/gas/testsuite/gas/i386/nosse-4.s
> @@ -36,11 +36,9 @@
> pabsd %xmm0, %xmm0
> .arch .nossse3
> pabsd %xmm0, %xmm0
> - mwait
> emms
> .arch .nommx
> .arch .nosse3
> - mwait
> lfence
> emms
> .arch .nosse2
> --- a/opcodes/i386-gen.c
> +++ b/opcodes/i386-gen.c
> @@ -61,9 +61,9 @@ static const dependency isa_dependencies
> { "P4",
> "P3|Clflush|SSE2" },
> { "NOCONA",
> - "GENERIC64|FISTTP|SSE3|CX16" },
> + "GENERIC64|FISTTP|SSE3|MONITOR|CX16" },
> { "CORE",
> - "P4|FISTTP|SSE3|CX16" },
> + "P4|FISTTP|SSE3|MONITOR|CX16" },
> { "CORE2",
> "NOCONA|SSSE3" },
> { "COREI7",
> @@ -77,9 +77,9 @@ static const dependency isa_dependencies
> { "K8",
> "ATHLON|Rdtscp|SSE2|LM" },
> { "AMDFAM10",
> - "K8|FISTTP|SSE4A|ABM" },
> + "K8|FISTTP|SSE4A|ABM|MONITOR" },
> { "BDVER1",
> - "GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
> + "GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
> { "BDVER2",
> "BDVER1|FMA|BMI|TBM|F16C" },
> { "BDVER3",
> @@ -87,7 +87,7 @@ static const dependency isa_dependencies
> { "BDVER4",
> "BDVER3|AVX2|Movbe|BMI2|RdRnd|MWAITX" },
> { "ZNVER1",
> - "GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
> + "GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
> { "ZNVER2",
> "ZNVER1|CLWB|RDPID|RDPRU|MCOMMIT|WBNOINVD" },
> { "ZNVER3",
> @@ -95,7 +95,7 @@ static const dependency isa_dependencies
> { "ZNVER4",
> "ZNVER3|AVX512F|AVX512DQ|AVX512IFMA|AVX512CD|AVX512BW|AVX512VL|AVX512_BF16|AVX512VBMI|AVX512_VBMI2|AVX512_VNNI|AVX512_BITALG|AVX512_VPOPCNTDQ|GFNI|RMPQUERY" },
> { "BTVER1",
> - "GENERIC64|FISTTP|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
> + "GENERIC64|FISTTP|MONITOR|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
> { "BTVER2",
> "BTVER1|AVX|BMI|F16C|AES|PCLMUL|Movbe|Xsaveopt|PRFCHW" },
> { "286",
> @@ -322,6 +322,7 @@ static bitfield cpu_flags[] =
> BITFIELD (BMI2),
> BITFIELD (LZCNT),
> BITFIELD (POPCNT),
> + BITFIELD (MONITOR),
> BITFIELD (HLE),
> BITFIELD (RTM),
> BITFIELD (INVPCID),
> --- a/opcodes/i386-opc.h
> +++ b/opcodes/i386-opc.h
> @@ -88,6 +88,8 @@ enum
> CpuLZCNT,
> /* POPCNT support required */
> CpuPOPCNT,
> + /* MONITOR support required */
> + CpuMONITOR,
> /* SSE4.1 support required */
> CpuSSE4_1,
> /* SSE4.2 support required */
> @@ -350,6 +352,7 @@ typedef union i386_cpu_flags
> unsigned int cpusse4a:1;
> unsigned int cpulzcnt:1;
> unsigned int cpupopcnt:1;
> + unsigned int cpumonitor:1;
> unsigned int cpusse4_1:1;
> unsigned int cpusse4_2:1;
> unsigned int cpuavx:1;
> --- a/opcodes/i386-opc.tbl
> +++ b/opcodes/i386-opc.tbl
> @@ -1270,17 +1270,17 @@ cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoS
>
> // MONITOR instructions.
>
> -monitor, 0xf01c8, SSE3, NoSuf, {}
> +monitor, 0xf01c8, MONITOR, NoSuf, {}
> // monitor is very special. CX and DX are always 32 bits. The
> // address size override prefix can be used to overrride the AX size in
> // all modes.
> -monitor, 0xf01c8, SSE3, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
> +monitor, 0xf01c8, MONITOR, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
> // The 64-bit form exists only for compatibility with older gas.
> -monitor, 0xf01c8, SSE3|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
> -mwait, 0xf01c9, SSE3, NoSuf, {}
> +monitor, 0xf01c8, MONITOR|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
> +mwait, 0xf01c9, MONITOR, NoSuf, {}
> // mwait is very special. AX and CX are always 32 bits.
> // The 64-bit form exists only for compatibility with older gas.
> -mwait, 0xf01c9, SSE3, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
> +mwait, 0xf01c9, MONITOR, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
>
> // VMX instructions.
>
>
Since they used to be in SSE3, should they be also allowed with SSE3?
On 10.02.2023 18:02, H.J. Lu wrote:
> Since they used to be in SSE3, should they be also allowed with SSE3?
In order to answer the question, can you please clarify what you mean
by "used to be in SSE3"? I'm not aware of a spec ever marking them as
SSE3 insns. And not allowing their use in e.g. generic32+sse3 is one
of the purposes of this change, because that was simply wrong.
Jan
On Mon, Feb 13, 2023 at 12:08 AM Jan Beulich <jbeulich@suse.com> wrote:
>
> On 10.02.2023 18:02, H.J. Lu wrote:
> > Since they used to be in SSE3, should they be also allowed with SSE3?
>
> In order to answer the question, can you please clarify what you mean
> by "used to be in SSE3"? I'm not aware of a spec ever marking them as
> SSE3 insns. And not allowing their use in e.g. generic32+sse3 is one
> of the purposes of this change, because that was simply wrong.
>
I checked 2005 Intel SDM which has a separate CPUID bit for MONITOR/MWAIT.
No need for SSE3 then.
Thanks.
@@ -1027,6 +1027,7 @@ static const arch_entry cpu_arch[] =
SUBARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, false),
SUBARCH (avx512bw, AVX512BW, ANY_AVX512BW, false),
SUBARCH (avx512vl, AVX512VL, ANY_AVX512VL, false),
+ SUBARCH (monitor, MONITOR, MONITOR, false),
SUBARCH (vmx, VMX, ANY_VMX, false),
SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
SUBARCH (smx, SMX, SMX, false),
@@ -152,6 +152,7 @@ accept various extension mnemonics. For
@code{avx},
@code{avx2},
@code{lahf_sahf},
+@code{monitor},
@code{adx},
@code{rdseed},
@code{prfchw},
@@ -1487,7 +1488,7 @@ supported on the CPU specified. The cho
@item @samp{.aes} @tab @samp{.pclmul} @tab @samp{.fma} @tab @samp{.fsgsbase}
@item @samp{.rdrnd} @tab @samp{.f16c} @tab @samp{.avx2} @tab @samp{.bmi2}
@item @samp{.lzcnt} @tab @samp{.popcnt} @tab @samp{.invpcid} @tab @samp{.vmfunc}
-@item @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
+@item @samp{.monitor} @tab @samp{.hle} @tab @samp{.rtm} @tab @samp{.tsx}
@item @samp{.lahf_sahf} @tab @samp{.adx} @tab @samp{.rdseed} @tab @samp{.prfchw}
@item @samp{.smap} @tab @samp{.mpx} @tab @samp{.sha} @tab @samp{.prefetchwt1}
@item @samp{.clflushopt} @tab @samp{.xsavec} @tab @samp{.xsaves} @tab @samp{.se1}
@@ -1,4 +1,4 @@
-#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
+#as: -march=i686+mmx+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+nop+syscall+monitor+rdtscp+3dnowa+sse4a+svme+abm+padlock+bmi+tbm
#objdump: -dw
#name: i386 arch 10
@@ -38,4 +38,5 @@ Disassembly of section .text:
[ ]*[a-f0-9]+: 0f 1f 00 nopl \(%eax\)
[ ]*[a-f0-9]+: c4 e2 60 f3 c9 blsr %ecx,%ebx
[ ]*[a-f0-9]+: 8f e9 60 01 c9 blcfill %ecx,%ebx
+[ ]*[a-f0-9]+: 0f 01 c8 monitor( .*)
#pass
@@ -62,3 +62,5 @@ nopl (%eax)
blsr %ecx,%ebx
# TBM
blcfill %ecx,%ebx
+# MONITOR
+monitor
@@ -30,6 +30,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
+.*:66: Error: .*
GAS LISTING .*
@@ -101,3 +102,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
+[ ]*65[ ]+\# MONITOR
+[ ]*66[ ]+monitor
@@ -29,6 +29,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
+.*:66: Error: .*
GAS LISTING .*
@@ -100,3 +101,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
+[ ]*65[ ]+\# MONITOR
+[ ]*66[ ]+monitor
@@ -22,6 +22,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
+.*:66: Error: .*
GAS LISTING .*
@@ -96,3 +97,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
+[ ]*65[ ]+\# MONITOR
+[ ]*66[ ]+monitor
@@ -20,6 +20,7 @@
.*:60: Error: .*
.*:62: Error: .*
.*:64: Error: .*
+.*:66: Error: .*
GAS LISTING .*
@@ -94,3 +95,5 @@ GAS LISTING .*
[ ]*62[ ]+blsr %ecx,%ebx
[ ]*63[ ]+\# TBM
[ ]*64[ ]+blcfill %ecx,%ebx
+[ ]*65[ ]+\# MONITOR
+[ ]*66[ ]+monitor
@@ -0,0 +1,99 @@
+.*: Assembler messages:
+.*:4: Error: .*
+.*:6: Error: .*
+.*:8: Error: .*
+.*:10: Error: .*
+.*:12: Error: .*
+.*:14: Error: .*
+.*:16: Error: .*
+.*:18: Error: .*
+.*:20: Error: .*
+.*:22: Error: .*
+.*:24: Error: .*
+.*:26: Error: .*
+.*:28: Error: .*
+.*:30: Error: .*
+.*:32: Error: .*
+.*:34: Error: .*
+.*:36: Error: .*
+.*:38: Error: .*
+.*:40: Error: .*
+.*:42: Error: .*
+.*:44: Error: .*
+.*:46: Error: .*
+.*:48: Error: .*
+.*:50: Error: .*
+.*:52: Error: .*
+.*:54: Error: .*
+.*:56: Error: .*
+.*:58: Error: .*
+.*:60: Error: .*
+.*:62: Error: .*
+.*:64: Error: .*
+[ ]*1[ ]+\.include "arch-10\.s"
+[ ]*1[ ]+\# Test -march=
+[ ]*2[ ]+\.text
+[ ]*3[ ]+\# cmov feature *
+[ ]*4[ ]+cmove %eax,%ebx
+[ ]*5[ ]+\# clflush
+[ ]*6[ ]+clflush \(%eax\)
+[ ]*7[ ]+\# SYSCALL
+[ ]*8[ ]+syscall
+[ ]*9[ ]+\# MMX
+[ ]*10[ ]+paddb %mm4,%mm3
+[ ]*11[ ]+\# SSE
+[ ]*12[ ]+addss %xmm4,%xmm3
+[ ]*13[ ]+\# SSE2
+[ ]*14[ ]+addsd %xmm4,%xmm3
+[ ]*15[ ]+\# SSE3
+[ ]*16[ ]+addsubpd %xmm4,%xmm3
+[ ]*17[ ]+\# SSSE3
+[ ]*18[ ]+phaddw %xmm4,%xmm3
+[ ]*19[ ]+\# SSE4\.1
+[ ]*20[ ]+phminposuw %xmm1,%xmm3
+[ ]*21[ ]+\# SSE4\.2
+[ ]*22[ ]+crc32 %ecx,%ebx
+[ ]*23[ ]+\# AVX
+[ ]*24[ ]+vzeroall
+[ ]*25[ ]+\# VMX
+[ ]*26[ ]+vmxoff
+[ ]*27[ ]+\# SMX
+[ ]*28[ ]+getsec
+[ ]*29[ ]+\# Xsave
+[ ]*30[ ]+xgetbv
+[ ]*31[ ]+\# Xsaveopt
+[ ]*32[ ]+xsaveopt \(%ecx\)
+[ ]*33[ ]+\# AES
+[ ]*34[ ]+aesenc \(%ecx\),%xmm0
+[ ]*35[ ]+\# PCLMUL
+[ ]*36[ ]+pclmulqdq \$8,%xmm1,%xmm0
+[ ]*37[ ]+\# AES \+ AVX
+[ ]*38[ ]+vaesenc \(%ecx\),%xmm0,%xmm2
+[ ]*39[ ]+\# PCLMUL \+ AVX
+[ ]*40[ ]+vpclmulqdq \$8,%xmm4,%xmm6,%xmm2
+[ ]*41[ ]+\# FMA
+[ ]*42[ ]+vfmadd132pd %xmm4,%xmm6,%xmm2
+[ ]*43[ ]+\# MOVBE
+[ ]*44[ ]+movbe \(%ecx\),%ebx
+[ ]*45[ ]+\# EPT
+[ ]*46[ ]+invept \(%ecx\),%ebx
+[ ]*47[ ]+\# RDTSCP
+[ ]*48[ ]+rdtscp
+[ ]*49[ ]+\# 3DNow or PRFCHW
+[ ]*50[ ]+prefetchw 0x1000\(,%esi,2\)
+[ ]*51[ ]+\# SSE4a
+[ ]*52[ ]+insertq %xmm2,%xmm1
+[ ]*53[ ]+\# SVME
+[ ]*54[ ]+vmload
+[ ]*55[ ]+\# ABM/LZCNT
+[ ]*56[ ]+lzcnt %ecx,%ebx
+[ ]*57[ ]+\# PadLock
+[ ]*58[ ]+xstorerng
+[ ]*59[ ]+\# nop
+[ ]*60[ ]+nopl \(%eax\)
+[ ]*61[ ]+\# BMI
+[ ]*62[ ]+blsr %ecx,%ebx
+[ ]*63[ ]+\# TBM
+[ ]*64[ ]+blcfill %ecx,%ebx
+[ ]*65[ ]+\# MONITOR
+[ ]*66[ ]+\?\?\?\? 0F01C8 monitor
@@ -0,0 +1 @@
+.include "arch-10.s"
@@ -1,5 +1,5 @@
#source: arch-10.s
-#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
+#as: -march=i686+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+3dnowa+sse4a+svme+lzcnt+padlock+bmi+tbm
#objdump: -dw
#name: i386 arch 10 (lzcnt)
#dump: arch-10.d
@@ -1,5 +1,5 @@
#source: arch-10.s
-#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
+#as: -march=i686+mmx+nop+avx+vmx+smx+xsave+xsaveopt+aes+pclmul+fma+movbe+ept+clflush+syscall+monitor+rdtscp+sse4a+svme+lzcnt+padlock+bmi+tbm+prfchw
#objdump: -dw
#name: i386 arch 10 (prefetchw)
#dump: arch-10.d
@@ -206,6 +206,7 @@ if [gas_32_check] then {
run_list_test "arch-10-3" "-march=i686+mmx+sse4.2 -I${srcdir}/$subdir -al"
run_list_test "arch-10-4" "-march=i686+mmx+sse4+vmx+smx -I${srcdir}/$subdir -al"
run_list_test "arch-10-5" "-march=generic32+i686 -al"
+ run_list_test "arch-10-6" "-march=generic32+monitor -I${srcdir}/$subdir -aln"
run_dump_test "arch-11"
run_dump_test "arch-12"
run_dump_test "arch-13"
@@ -5,3 +5,4 @@ GAS LISTING .*
[ ]*1[ ]+\# Test -march=\+nosse
[ ]*2[ ]+\.text
[ ]*3[ ]+lfence
+[ ]*4[ ]+\?\?\?\? 0F01C8 monitor
@@ -1,3 +1,4 @@
# Test -march=+nosse
.text
lfence
+ monitor
@@ -2,6 +2,7 @@
.*:6: Error: .*generic.*
.*:9: Error: .*\.sse.*
.*:12: Error: .*\.sse2.*
+.*:14: Error: .*\.sse3.*
.*:15: Error: .*\.sse3.*
.*:18: Error: .*\.ssse3.*
.*:21: Error: .*\.sse4\.1.*
@@ -9,10 +10,9 @@
.*:32: Error: .*\.nosse4\.2.*
.*:35: Error: .*\.nosse4\.1.*
.*:38: Error: .*\.nossse3.*
-.*:43: Error: .*\.nosse3.*
-.*:45: Error: .*\.nommx.*
-.*:47: Error: .*\.nosse2.*
-.*:50: Error: .*\.nosse.*
+.*:43: Error: .*\.nommx.*
+.*:45: Error: .*\.nosse2.*
+.*:48: Error: .*\.nosse.*
GAS LISTING .*
#...
[ ]*1[ ]+\# Test \.arch \[\.sseX|\.nosseX\]
@@ -28,7 +28,7 @@ GAS LISTING .*
[ ]*11[ ]+\?\?\?\? 0FAEE8 lfence
[ ]*12[ ]+mwait
[ ]*13[ ]+\.arch \.sse3
-[ ]*14[ ]+\?\?\?\? 0F01C9 mwait
+[ ]*14[ ]+mwait
[ ]*15[ ]+pabsd %xmm0, %xmm0
[ ]*16[ ]+\.arch \.ssse3
[ ]*17[ ]+\?\?\?\? 660F381E pabsd %xmm0, %xmm0
@@ -60,21 +60,15 @@ GAS LISTING .*
[ ]*36[ ]+C0
[ ]*37[ ]+\.arch \.nossse3
[ ]*38[ ]+pabsd %xmm0, %xmm0
-[ ]*39[ ]+\?\?\?\? 0F01C9 mwait
-[ ]*40[ ]+\?\?\?\? 0F77 emms
-[ ]*41[ ]+\.arch \.nommx
-[ ]*42[ ]+\.arch \.nosse3
-[ ]*43[ ]+mwait
-[ ]*44[ ]+\?\?\?\? 0FAEE8 lfence
-[ ]*45[ ]+emms
-[ ]*46[ ]+\.arch \.nosse2
-[ ]*47[ ]+lfence
-[ ]*48[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
-[ ]*49[ ]+\.arch \.nosse
-[ ]*50[ ]+addps %xmm0, %xmm0
-GAS LISTING .*
-
-
-[ ]*51[ ]+\?\?\?\? 8DB42600 \.p2align 4
-[ ]*51[ ]+000000
+[ ]*39[ ]+\?\?\?\? 0F77 emms
+[ ]*40[ ]+\.arch \.nommx
+[ ]*41[ ]+\.arch \.nosse3
+[ ]*42[ ]+\?\?\?\? 0FAEE8 lfence
+[ ]*43[ ]+emms
+[ ]*44[ ]+\.arch \.nosse2
+[ ]*45[ ]+lfence
+[ ]*46[ ]+\?\?\?\? 0F58C0 addps %xmm0, %xmm0
+[ ]*47[ ]+\.arch \.nosse
+[ ]*48[ ]+addps %xmm0, %xmm0
+[ ]*49[ ]+\?\?\?\? .* \.p2align 4
#pass
@@ -36,11 +36,9 @@
pabsd %xmm0, %xmm0
.arch .nossse3
pabsd %xmm0, %xmm0
- mwait
emms
.arch .nommx
.arch .nosse3
- mwait
lfence
emms
.arch .nosse2
@@ -61,9 +61,9 @@ static const dependency isa_dependencies
{ "P4",
"P3|Clflush|SSE2" },
{ "NOCONA",
- "GENERIC64|FISTTP|SSE3|CX16" },
+ "GENERIC64|FISTTP|SSE3|MONITOR|CX16" },
{ "CORE",
- "P4|FISTTP|SSE3|CX16" },
+ "P4|FISTTP|SSE3|MONITOR|CX16" },
{ "CORE2",
"NOCONA|SSSE3" },
{ "COREI7",
@@ -77,9 +77,9 @@ static const dependency isa_dependencies
{ "K8",
"ATHLON|Rdtscp|SSE2|LM" },
{ "AMDFAM10",
- "K8|FISTTP|SSE4A|ABM" },
+ "K8|FISTTP|SSE4A|ABM|MONITOR" },
{ "BDVER1",
- "GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
+ "GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|XOP|ABM|LWP|SVME|AES|PCLMUL|PRFCHW" },
{ "BDVER2",
"BDVER1|FMA|BMI|TBM|F16C" },
{ "BDVER3",
@@ -87,7 +87,7 @@ static const dependency isa_dependencies
{ "BDVER4",
"BDVER3|AVX2|Movbe|BMI2|RdRnd|MWAITX" },
{ "ZNVER1",
- "GENERIC64|FISTTP|Rdtscp|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
+ "GENERIC64|FISTTP|Rdtscp|MONITOR|CX16|LAHF_SAHF|AVX2|SSE4A|ABM|SVME|AES|PCLMUL|PRFCHW|FMA|BMI|F16C|Xsaveopt|FSGSBase|Movbe|BMI2|RdRnd|ADX|RdSeed|SMAP|SHA|XSAVEC|XSAVES|ClflushOpt|CLZERO|MWAITX" },
{ "ZNVER2",
"ZNVER1|CLWB|RDPID|RDPRU|MCOMMIT|WBNOINVD" },
{ "ZNVER3",
@@ -95,7 +95,7 @@ static const dependency isa_dependencies
{ "ZNVER4",
"ZNVER3|AVX512F|AVX512DQ|AVX512IFMA|AVX512CD|AVX512BW|AVX512VL|AVX512_BF16|AVX512VBMI|AVX512_VBMI2|AVX512_VNNI|AVX512_BITALG|AVX512_VPOPCNTDQ|GFNI|RMPQUERY" },
{ "BTVER1",
- "GENERIC64|FISTTP|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
+ "GENERIC64|FISTTP|MONITOR|CX16|LAHF_SAHF|Rdtscp|SSSE3|SSE4A|ABM|PRFCHW|Clflush|FISTTP|SVME" },
{ "BTVER2",
"BTVER1|AVX|BMI|F16C|AES|PCLMUL|Movbe|Xsaveopt|PRFCHW" },
{ "286",
@@ -322,6 +322,7 @@ static bitfield cpu_flags[] =
BITFIELD (BMI2),
BITFIELD (LZCNT),
BITFIELD (POPCNT),
+ BITFIELD (MONITOR),
BITFIELD (HLE),
BITFIELD (RTM),
BITFIELD (INVPCID),
@@ -88,6 +88,8 @@ enum
CpuLZCNT,
/* POPCNT support required */
CpuPOPCNT,
+ /* MONITOR support required */
+ CpuMONITOR,
/* SSE4.1 support required */
CpuSSE4_1,
/* SSE4.2 support required */
@@ -350,6 +352,7 @@ typedef union i386_cpu_flags
unsigned int cpusse4a:1;
unsigned int cpulzcnt:1;
unsigned int cpupopcnt:1;
+ unsigned int cpumonitor:1;
unsigned int cpusse4_1:1;
unsigned int cpusse4_2:1;
unsigned int cpuavx:1;
@@ -1270,17 +1270,17 @@ cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoS
// MONITOR instructions.
-monitor, 0xf01c8, SSE3, NoSuf, {}
+monitor, 0xf01c8, MONITOR, NoSuf, {}
// monitor is very special. CX and DX are always 32 bits. The
// address size override prefix can be used to overrride the AX size in
// all modes.
-monitor, 0xf01c8, SSE3, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
+monitor, 0xf01c8, MONITOR, AddrPrefixOpReg|NoSuf, { Acc|Word|Dword|Qword, RegC|Dword, RegD|Dword }
// The 64-bit form exists only for compatibility with older gas.
-monitor, 0xf01c8, SSE3|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
-mwait, 0xf01c9, SSE3, NoSuf, {}
+monitor, 0xf01c8, MONITOR|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
+mwait, 0xf01c9, MONITOR, NoSuf, {}
// mwait is very special. AX and CX are always 32 bits.
// The 64-bit form exists only for compatibility with older gas.
-mwait, 0xf01c9, SSE3, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
+mwait, 0xf01c9, MONITOR, CheckOperandSize|IgnoreSize|NoSuf|NoRex64, { Acc|Dword|Qword, RegC|Dword|Qword }
// VMX instructions.