[7/7] KVM: VMX: Handle NMI VM-Exits in noinstr region
Commit Message
Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
the NMI is handled prior to leaving the safety of noinstr. Handling the
NMI after leaving noinstr exposes the kernel to potential ordering
problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
will unblock NMIs when IRETing back to the faulting instruction.
Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmcs.h | 4 ++--
arch/x86/kvm/vmx/vmenter.S | 8 ++++----
arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++-------------
arch/x86/kvm/x86.h | 6 +++---
4 files changed, 30 insertions(+), 22 deletions(-)
Comments
On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:
> @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>
> vmx_enable_fb_clear(vmx);
>
> + if (unlikely(vmx->fail))
> + vmx->exit_reason.full = 0xdead;
> + else
> + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> +
> + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> + is_nmi(vmx_get_intr_info(vcpu))) {
> + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> + vmx_do_nmi_irqoff();
> + kvm_after_interrupt(vcpu);
> + }
> +
> guest_state_exit_irqoff();
> }
I think we're going to have to sprinkle __always_inline on the
kvm_{before,after}_interrupt() things (or I missed the earlier patches
doing this already), sometimes compilers are just weird.
On Thu, Jan 19, 2023, Peter Zijlstra wrote:
> On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:
>
> > @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> >
> > vmx_enable_fb_clear(vmx);
> >
> > + if (unlikely(vmx->fail))
> > + vmx->exit_reason.full = 0xdead;
> > + else
> > + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> > +
> > + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> > + is_nmi(vmx_get_intr_info(vcpu))) {
> > + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> > + vmx_do_nmi_irqoff();
> > + kvm_after_interrupt(vcpu);
> > + }
> > +
> > guest_state_exit_irqoff();
> > }
>
> I think we're going to have to sprinkle __always_inline on the
> kvm_{before,after}_interrupt() things (or I missed the earlier patches
> doing this already), sometimes compilers are just weird.
It's in this patch, just lurking at the bottom.
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..44d1827f0a30 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -382,13 +382,13 @@ enum kvm_intr_type {
> KVM_HANDLING_NMI,
> };
>
> -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> - enum kvm_intr_type intr)
> +static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> + enum kvm_intr_type intr)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
> }
>
> -static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> +static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
> }
On Thu, Jan 19, 2023 at 03:39:55PM +0000, Sean Christopherson wrote:
> It's in this patch, just lurking at the bottom.
Ha!, so much for reading ..
@@ -75,7 +75,7 @@ struct loaded_vmcs {
struct vmcs_controls_shadow controls_shadow;
};
-static inline bool is_intr_type(u32 intr_info, u32 type)
+static __always_inline bool is_intr_type(u32 intr_info, u32 type)
{
const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
@@ -146,7 +146,7 @@ static inline bool is_icebp(u32 intr_info)
return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
}
-static inline bool is_nmi(u32 intr_info)
+static __always_inline bool is_nmi(u32 intr_info)
{
return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
}
@@ -299,6 +299,10 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+ VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+
.section .text, "ax"
@@ -353,10 +357,6 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
@@ -5095,8 +5095,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx_get_intr_info(vcpu);
+ /*
+ * Machine checks are handled by handle_exception_irqoff(), or by
+ * vmx_vcpu_run() if a #MC occurs on VM-Entry. NMIs are handled by
+ * vmx_vcpu_enter_exit().
+ */
if (is_machine_check(intr_info) || is_nmi(intr_info))
- return 1; /* handled by handle_exception_nmi_irqoff() */
+ return 1;
/*
* Queue the exception here instead of in handle_nm_fault_irqoff().
@@ -6809,7 +6814,7 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
-static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
+static void handle_exception_irqoff(struct vcpu_vmx *vmx)
{
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
@@ -6822,12 +6827,6 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
- /* We need to handle NMIs before interrupts are enabled */
- else if (is_nmi(intr_info)) {
- kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
- kvm_after_interrupt(&vmx->vcpu);
- }
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6857,7 +6856,7 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
- handle_exception_nmi_irqoff(vmx);
+ handle_exception_irqoff(vmx);
}
/*
@@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_enable_fb_clear(vmx);
+ if (unlikely(vmx->fail))
+ vmx->exit_reason.full = 0xdead;
+ else
+ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+
+ if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ is_nmi(vmx_get_intr_info(vcpu))) {
+ kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(vcpu);
+ }
+
guest_state_exit_irqoff();
}
@@ -7260,12 +7271,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
- if (unlikely(vmx->fail)) {
- vmx->exit_reason.full = 0xdead;
+ if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
- }
- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
@@ -382,13 +382,13 @@ enum kvm_intr_type {
KVM_HANDLING_NMI,
};
-static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
- enum kvm_intr_type intr)
+static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+ enum kvm_intr_type intr)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
}
-static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
}