From: Zhuocheng Ding <zhuocheng.ding@intel.com>
The ACPI (Thermal Monitor and Software Controlled Clock Facilities)
feature is a dependency of thermal interrupt processing so that
it is required for the HFI notification (a thermal interrupt)
handling.
To support VM to handle thermal interrupt, we need to emulate ACPI
feature in KVM:
1. Emulate MSR_IA32_THERM_CONTROL (alias, IA32_CLOCK_MODULATION),
MSR_IA32_THERM_INTERRUPT and MSR_IA32_THERM_STATUS with dummy values.
According to SDM [1], the ACPI feature means:
"The ACPI flag (bit 22) of the CPUID feature flags indicates the
presence of the IA32_THERM_STATUS, IA32_THERM_INTERRUPT,
IA32_CLOCK_MODULATION MSRs, and the xAPIC thermal LVT entry."
It is enough to use dummy values in KVM to emulate the RDMSR/WRMSR on
them.
2. Add the thermal interrupt injection interfaces.
This interface reflects the integrity of the ACPI emulation. Although
thermal interrupts are not actually injected into the Guest now, in the
following HFI/ITD emulations, thermal interrupt will be injected into
Guest once the conditions are met.
3. Additionally, expose the CPUID bit of the ACPI feature to the VM,
which can help enable thermal interrupt handling in the VM.
[1]: SDM, vol. 3B, section 15.8.4.1, Detection of Software Controlled
Clock Modulation Extension.
Tested-by: Yanting Jiang <yanting.jiang@intel.com>
Signed-off-by: Zhuocheng Ding <zhuocheng.ding@intel.com>
Co-developed-by: Zhao Liu <zhao1.liu@intel.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
---
arch/x86/kvm/cpuid.c | 2 +-
arch/x86/kvm/irq.h | 1 +
arch/x86/kvm/lapic.c | 9 ++++
arch/x86/kvm/svm/svm.c | 3 ++
arch/x86/kvm/vmx/vmx.c | 94 ++++++++++++++++++++++++++++++++++++++++++
arch/x86/kvm/vmx/vmx.h | 3 ++
arch/x86/kvm/x86.c | 3 ++
7 files changed, 114 insertions(+), 1 deletion(-)
@@ -623,7 +623,7 @@ void kvm_set_cpu_caps(void)
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
- 0 /* Reserved, DS, ACPI */ | F(MMX) |
+ 0 /* Reserved, DS */ | F(ACPI) | F(MMX) |
F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
0 /* HTT, TM, Reserved, PBE */
);
@@ -99,6 +99,7 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu);
void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
@@ -2783,6 +2783,15 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVT0);
}
+void kvm_apic_therm_deliver(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (apic)
+ kvm_apic_local_deliver(apic, APIC_LVTTHMR);
+}
+EXPORT_SYMBOL_GPL(kvm_apic_therm_deliver);
+
static const struct kvm_io_device_ops apic_mmio_ops = {
.read = apic_mmio_read,
.write = apic_mmio_write,
@@ -4288,6 +4288,9 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
+ case MSR_IA32_THERM_CONTROL:
+ case MSR_IA32_THERM_INTERRUPT:
+ case MSR_IA32_THERM_STATUS:
return false;
case MSR_IA32_SMBASE:
if (!IS_ENABLED(CONFIG_KVM_SMM))
@@ -157,6 +157,32 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
RTIT_STATUS_BYTECNT))
+/*
+ * TM2 (CPUID.01H:ECX[8]), DTHERM (CPUID.06H:EAX[0]), PLN (CPUID.06H:EAX[4]),
+ * and HWP (CPUID.06H:EAX[7]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_STATUS_RO_MASK (THERM_STATUS_PROCHOT | \
+ THERM_STATUS_PROCHOT_FORCEPR_EVENT | THERM_STATUS_CRITICAL_TEMP)
+#define MSR_IA32_THERM_STATUS_RWC0_MASK (THERM_STATUS_PROCHOT_LOG | \
+ THERM_STATUS_PROCHOT_FORCEPR_LOG | THERM_STATUS_CRITICAL_TEMP_LOG)
+/* MSR_IA32_THERM_STATUS unavailable bits mask: unsupported and reserved bits. */
+#define MSR_IA32_THERM_STATUS_UNAVAIL_MASK (~(MSR_IA32_THERM_STATUS_RO_MASK | \
+ MSR_IA32_THERM_STATUS_RWC0_MASK))
+
+/* ECMD (CPUID.06H:EAX[5]) is not emulated in kvm. */
+#define MSR_IA32_THERM_CONTROL_AVAIL_MASK (THERM_ON_DEM_CLO_MOD_ENABLE | \
+ THERM_ON_DEM_CLO_MOD_DUTY_CYC_MASK)
+
+/*
+ * MSR_IA32_THERM_INTERRUPT available bits mask.
+ * PLN (CPUID.06H:EAX[4]) and HFN (CPUID.06H:EAX[24]) are not emulated in kvm.
+ */
+#define MSR_IA32_THERM_INTERRUPT_AVAIL_MASK (THERM_INT_HIGH_ENABLE | \
+ THERM_INT_LOW_ENABLE | THERM_INT_PROCHOT_ENABLE | \
+ THERM_INT_FORCEPR_ENABLE | THERM_INT_CRITICAL_TEM_ENABLE | \
+ THERM_MASK_THRESHOLD0 | THERM_INT_THRESHOLD0_ENABLE | \
+ THERM_MASK_THRESHOLD1 | THERM_INT_THRESHOLD1_ENABLE)
+
/*
* List of MSRs that can be directly passed to the guest.
* In addition to these x2apic and PT MSRs are handled specially.
@@ -1470,6 +1496,19 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
}
}
+static void vmx_inject_therm_interrupt(struct kvm_vcpu *vcpu)
+{
+ /*
+ * From SDM, the ACPI flag also indicates the presence of the
+ * xAPIC thermal LVT entry.
+ */
+ if (!guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return;
+
+ if (irqchip_in_kernel(vcpu->kvm))
+ kvm_apic_therm_deliver(vcpu);
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
@@ -2109,6 +2148,24 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_DEBUGCTLMSR:
msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
break;
+ case MSR_IA32_THERM_CONTROL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ msr_info->data = vmx->msr_ia32_therm_control;
+ break;
+ case MSR_IA32_THERM_INTERRUPT:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ msr_info->data = vmx->msr_ia32_therm_interrupt;
+ break;
+ case MSR_IA32_THERM_STATUS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ msr_info->data = vmx->msr_ia32_therm_status;
+ break;
default:
find_uret_msr:
msr = vmx_find_uret_msr(vmx, msr_info->index);
@@ -2452,6 +2509,40 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
ret = kvm_set_msr_common(vcpu, msr_info);
break;
+ case MSR_IA32_THERM_CONTROL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ if (!msr_info->host_initiated &&
+ data & ~MSR_IA32_THERM_CONTROL_AVAIL_MASK)
+ return 1;
+ vmx->msr_ia32_therm_control = data;
+ break;
+ case MSR_IA32_THERM_INTERRUPT:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ if (!msr_info->host_initiated &&
+ data & ~MSR_IA32_THERM_INTERRUPT_AVAIL_MASK)
+ return 1;
+ vmx->msr_ia32_therm_interrupt = data;
+ break;
+ case MSR_IA32_THERM_STATUS:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_ACPI))
+ return 1;
+ /* Unsupported and reserved bits: generate the exception. */
+ if (!msr_info->host_initiated &&
+ data & MSR_IA32_THERM_STATUS_UNAVAIL_MASK)
+ return 1;
+ if (!msr_info->host_initiated) {
+ data = vmx_set_msr_rwc0_bits(data, vmx->msr_ia32_therm_status,
+ MSR_IA32_THERM_STATUS_RWC0_MASK);
+ data = vmx_set_msr_ro_bits(data, vmx->msr_ia32_therm_status,
+ MSR_IA32_THERM_STATUS_RO_MASK);
+ }
+ vmx->msr_ia32_therm_status = data;
+ break;
default:
find_uret_msr:
@@ -4870,6 +4961,9 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->spec_ctrl = 0;
vmx->msr_ia32_umwait_control = 0;
+ vmx->msr_ia32_therm_control = 0;
+ vmx->msr_ia32_therm_interrupt = 0;
+ vmx->msr_ia32_therm_status = 0;
vmx->hv_deadline_tsc = -1;
kvm_set_cr8(vcpu, 0);
@@ -282,6 +282,9 @@ struct vcpu_vmx {
u64 spec_ctrl;
u32 msr_ia32_umwait_control;
+ u64 msr_ia32_therm_control;
+ u64 msr_ia32_therm_interrupt;
+ u64 msr_ia32_therm_status;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
@@ -1545,6 +1545,9 @@ static const u32 emulated_msrs_all[] = {
MSR_AMD64_TSC_RATIO,
MSR_IA32_POWER_CTL,
MSR_IA32_UCODE_REV,
+ MSR_IA32_THERM_CONTROL,
+ MSR_IA32_THERM_INTERRUPT,
+ MSR_IA32_THERM_STATUS,
/*
* KVM always supports the "true" VMX control MSRs, even if the host