[4/6] x86: KVM: Enable AVX-VNNI-INT8 CPUID and expose it to guest
Commit Message
AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
Sierra Forest. It multiplies the individual bytes of two unsigned or
unsigned source operands, then add and accumulate the results into the
destination dword element size operand. This instruction allows for the
platform to have superior AI capabilities.
The bit definition:
CPUID.(EAX=7,ECX=1):EDX[bit 4]
This patch enables this CPUID in the kernel feature bits and expose it to
guest OS. Since the CPUID involves a bit of EDX (EAX=7,ECX=1) which has not
been enumerated yet, this patch adds CPUID_7_1_EDX to CPUID subleaves. At
the same time, word 20 is newly-defined in CPU features for CPUID level
0x00000007:1 (EDX).
Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 5 ++++-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kvm/cpuid.c | 5 ++++-
arch/x86/kvm/reverse_cpuid.h | 1 +
7 files changed, 19 insertions(+), 6 deletions(-)
Comments
On Wed, Oct 19, 2022 at 04:47:32PM +0800, Jiaxi Chen wrote:
> AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
> Sierra Forest. It multiplies the individual bytes of two unsigned or
> unsigned source operands, then add and accumulate the results into the
> destination dword element size operand. This instruction allows for the
> platform to have superior AI capabilities.
>
> The bit definition:
> CPUID.(EAX=7,ECX=1):EDX[bit 4]
>
> This patch enables this CPUID in the kernel feature bits and expose it to
> guest OS. Since the CPUID involves a bit of EDX (EAX=7,ECX=1) which has not
> been enumerated yet, this patch adds CPUID_7_1_EDX to CPUID subleaves. At
> the same time, word 20 is newly-defined in CPU features for CPUID level
For all your commit messages:
Avoid having "This patch" or "This commit" in the commit message. It is
tautologically useless.
Also, do
$ git grep 'This patch' Documentation/process
for more details.
For this particular one, use scattered.c instead of adding a new leaf.
Thx.
On Wed, Oct 19, 2022, Borislav Petkov wrote:
> On Wed, Oct 19, 2022 at 04:47:32PM +0800, Jiaxi Chen wrote:
> > AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
> > Sierra Forest. It multiplies the individual bytes of two unsigned or
> > unsigned source operands, then add and accumulate the results into the
> > destination dword element size operand. This instruction allows for the
> > platform to have superior AI capabilities.
> >
> > The bit definition:
> > CPUID.(EAX=7,ECX=1):EDX[bit 4]
>
> For this particular one, use scattered.c instead of adding a new leaf.
Unless the kernel wants to use X86_FEATURE_AVX_VNNI_INT8, which seems unlikely,
there's no need to create a scattered entry. This can be handled in KVM by adding
a KVM-only leaf entry (which will be needed no matter what), plus a #define for
X86_FEATURE_AVX_VNNI_INT8 to direct it to the KVM entry.
E.g.
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a19d473d0184..25e7bfc61607 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -13,6 +13,7 @@
*/
enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -24,6 +25,16 @@ enum kvm_only_cpuid_leafs {
#define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
+#define KVM_X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
+
+/*
+ * Alias X86_FEATURE_* to the KVM variant for features in KVM-only leafs that
+ * aren't scattered by cpufeatures.h so that X86_FEATURE_* can be used in KVM,
+ * e.g. to query guest CPUID. As a bonus, no translation is needed for these
+ * features in __feature_translate().
+ */
+#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE_AVX_VNNI_INT8
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -48,6 +59,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
+ [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
};
/*
On Wed, Oct 19, 2022, Sean Christopherson wrote:
> On Wed, Oct 19, 2022, Borislav Petkov wrote:
> > On Wed, Oct 19, 2022 at 04:47:32PM +0800, Jiaxi Chen wrote:
> > > AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
> > > Sierra Forest. It multiplies the individual bytes of two unsigned or
> > > unsigned source operands, then add and accumulate the results into the
> > > destination dword element size operand. This instruction allows for the
> > > platform to have superior AI capabilities.
> > >
> > > The bit definition:
> > > CPUID.(EAX=7,ECX=1):EDX[bit 4]
> >
> > For this particular one, use scattered.c instead of adding a new leaf.
>
> Unless the kernel wants to use X86_FEATURE_AVX_VNNI_INT8, which seems unlikely,
> there's no need to create a scattered entry. This can be handled in KVM by adding
> a KVM-only leaf entry (which will be needed no matter what), plus a #define for
> X86_FEATURE_AVX_VNNI_INT8 to direct it to the KVM entry.
>
> E.g.
>
> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
> index a19d473d0184..25e7bfc61607 100644
> --- a/arch/x86/kvm/reverse_cpuid.h
> +++ b/arch/x86/kvm/reverse_cpuid.h
> @@ -13,6 +13,7 @@
> */
> enum kvm_only_cpuid_leafs {
> CPUID_12_EAX = NCAPINTS,
> + CPUID_7_1_EDX,
> NR_KVM_CPU_CAPS,
>
> NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
> @@ -24,6 +25,16 @@ enum kvm_only_cpuid_leafs {
> #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
> #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
>
> +#define KVM_X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
> +
> +/*
> + * Alias X86_FEATURE_* to the KVM variant for features in KVM-only leafs that
> + * aren't scattered by cpufeatures.h so that X86_FEATURE_* can be used in KVM,
> + * e.g. to query guest CPUID. As a bonus, no translation is needed for these
> + * features in __feature_translate().
> + */
> +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE_AVX_VNNI_INT8
Actually, there's no need for KVM_X86_FEATURE_AVX_VNNI_INT8 in this case, just
#define X86_FEATURE_AVX_VNNI_INT8 directly. The KVM_ prefixed macro exists purely
to redirect the non-KVM_ version, but that's unnecessary in this case.
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a19d473d0184..38adafb03490 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -13,6 +13,7 @@
*/
enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
NR_KVM_CPU_CAPS,
NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -24,6 +25,13 @@ enum kvm_only_cpuid_leafs {
#define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
#define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
+/*
+ * Omit the KVM_ prefix for features in KVM-only leafs that aren't scattered by
+ * cpufeatures.h so that X86_FEATURE_* can be used in KVM,* e.g. to query guest
+ * CPUID. As a bonus, no handling in __feature_translate() is needed.
+ */
+#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -48,6 +56,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
+ [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
};
在 2022/10/19 16:52, Borislav Petkov 写道:
> On Wed, Oct 19, 2022 at 04:47:32PM +0800, Jiaxi Chen wrote:
>> AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
>> Sierra Forest. It multiplies the individual bytes of two unsigned or
>> unsigned source operands, then add and accumulate the results into the
>> destination dword element size operand. This instruction allows for the
>> platform to have superior AI capabilities.
>>
>> The bit definition:
>> CPUID.(EAX=7,ECX=1):EDX[bit 4]
>>
>> This patch enables this CPUID in the kernel feature bits and expose it to
>> guest OS. Since the CPUID involves a bit of EDX (EAX=7,ECX=1) which has not
>> been enumerated yet, this patch adds CPUID_7_1_EDX to CPUID subleaves. At
>> the same time, word 20 is newly-defined in CPU features for CPUID level
>
> For all your commit messages:
>
> Avoid having "This patch" or "This commit" in the commit message. It is
> tautologically useless.
Will follow this in the future. Thanks.
>
> Also, do
>
> $ git grep 'This patch' Documentation/process
>
> for more details.
>
> For this particular one, use scattered.c instead of adding a new leaf.
>
> Thx.
>
On 10/19/2022 11:09 PM, Sean Christopherson wrote:
> On Wed, Oct 19, 2022, Sean Christopherson wrote:
>> On Wed, Oct 19, 2022, Borislav Petkov wrote:
>>> On Wed, Oct 19, 2022 at 04:47:32PM +0800, Jiaxi Chen wrote:
>>>> AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
>>>> Sierra Forest. It multiplies the individual bytes of two unsigned or
>>>> unsigned source operands, then add and accumulate the results into the
>>>> destination dword element size operand. This instruction allows for the
>>>> platform to have superior AI capabilities.
>>>>
>>>> The bit definition:
>>>> CPUID.(EAX=7,ECX=1):EDX[bit 4]
>>>
>>> For this particular one, use scattered.c instead of adding a new leaf.
>>
>> Unless the kernel wants to use X86_FEATURE_AVX_VNNI_INT8, which seems unlikely,
>> there's no need to create a scattered entry. This can be handled in KVM by adding
>> a KVM-only leaf entry (which will be needed no matter what), plus a #define for
>> X86_FEATURE_AVX_VNNI_INT8 to direct it to the KVM entry.
>>
>> E.g.
>>
>> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
>> index a19d473d0184..25e7bfc61607 100644
>> --- a/arch/x86/kvm/reverse_cpuid.h
>> +++ b/arch/x86/kvm/reverse_cpuid.h
>> @@ -13,6 +13,7 @@
>> */
>> enum kvm_only_cpuid_leafs {
>> CPUID_12_EAX = NCAPINTS,
>> + CPUID_7_1_EDX,
>> NR_KVM_CPU_CAPS,
>>
>> NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
>> @@ -24,6 +25,16 @@ enum kvm_only_cpuid_leafs {
>> #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
>> #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
>>
>> +#define KVM_X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
>> +
>> +/*
>> + * Alias X86_FEATURE_* to the KVM variant for features in KVM-only leafs that
>> + * aren't scattered by cpufeatures.h so that X86_FEATURE_* can be used in KVM,
>> + * e.g. to query guest CPUID. As a bonus, no translation is needed for these
>> + * features in __feature_translate().
>> + */
>> +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE_AVX_VNNI_INT8
>
> Actually, there's no need for KVM_X86_FEATURE_AVX_VNNI_INT8 in this case, just
> #define X86_FEATURE_AVX_VNNI_INT8 directly. The KVM_ prefixed macro exists purely
> to redirect the non-KVM_ version, but that's unnecessary in this case.
>
> diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
> index a19d473d0184..38adafb03490 100644
> --- a/arch/x86/kvm/reverse_cpuid.h
> +++ b/arch/x86/kvm/reverse_cpuid.h
> @@ -13,6 +13,7 @@
> */
> enum kvm_only_cpuid_leafs {
> CPUID_12_EAX = NCAPINTS,
> + CPUID_7_1_EDX,
> NR_KVM_CPU_CAPS,
>
> NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
> @@ -24,6 +25,13 @@ enum kvm_only_cpuid_leafs {
> #define KVM_X86_FEATURE_SGX1 KVM_X86_FEATURE(CPUID_12_EAX, 0)
> #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1)
>
> +/*
> + * Omit the KVM_ prefix for features in KVM-only leafs that aren't scattered by
> + * cpufeatures.h so that X86_FEATURE_* can be used in KVM,* e.g. to query guest
> + * CPUID. As a bonus, no handling in __feature_translate() is needed.
> + */
> +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
> +
> struct cpuid_reg {
> u32 function;
> u32 index;
> @@ -48,6 +56,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
> [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
> [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
> [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
> + [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
> };
Use KVM-only leafs is better for edx in this case. Will follow this suggestion in v2.
Thanks Sean very much.
@@ -32,6 +32,7 @@ enum cpuid_leafs
CPUID_8000_0007_EBX,
CPUID_7_EDX,
CPUID_8000_001F_EAX,
+ CPUID_7_1_EDX,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -94,8 +95,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -118,8 +120,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 20))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 21))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 20 /* N 32-bit words worth of info */
+#define NCAPINTS 21 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -423,6 +423,9 @@
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+/* Intel-defined CPU features, CPUID level 0x00000007:1 (EDX), word 20 */
+#define X86_FEATURE_AVX_VNNI_INT8 (20*32+ 4) /* Support for VPDPB[SU,UU,SS]D[,S] */
+
/*
* BUG word(s)
*/
@@ -111,6 +111,7 @@
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK19 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define DISABLED_MASK20 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
@@ -98,6 +98,7 @@
#define REQUIRED_MASK17 0
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 20)
+#define REQUIRED_MASK20 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
@@ -1031,6 +1031,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (eax >= 1) {
cpuid_count(0x00000007, 1, &eax, &ebx, &ecx, &edx);
c->x86_capability[CPUID_7_1_EAX] = eax;
+ c->x86_capability[CPUID_7_1_EDX] = edx;
}
}
@@ -660,6 +660,9 @@ void kvm_set_cpu_caps(void)
F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) |
F(AVX_IFMA));
+ kvm_cpu_cap_mask(CPUID_7_1_EDX,
+ F(AVX_VNNI_INT8));
+
kvm_cpu_cap_mask(CPUID_D_1_EAX,
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
);
@@ -913,9 +916,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
goto out;
cpuid_entry_override(entry, CPUID_7_1_EAX);
+ cpuid_entry_override(entry, CPUID_7_1_EDX);
entry->ebx = 0;
entry->ecx = 0;
- entry->edx = 0;
}
break;
case 0xa: { /* Architectural Performance Monitoring */
@@ -48,6 +48,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
[CPUID_7_1_EAX] = { 7, 1, CPUID_EAX},
[CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX},
[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
+ [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX},
};
/*