[v2,7/8] KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
Commit Message
Restoration of the host IA32_SPEC_CTRL value is probably too late
with respect to the return thunk training sequence.
With respect to the user/kernel boundary, AMD says, "If software chooses
to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel
exit), software should set STIBP to 1 before executing the return thunk
training sequence." I assume the same requirements apply to the guest/host
boundary. The return thunk training sequence is in vmenter.S, quite close
to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's
IA32_SPEC_CTRL value is not restored until much later.
To avoid this, move the restoration of host SPEC_CTRL to assembly and,
for consistency, move the restoration of the guest SPEC_CTRL as well.
This is not particularly difficult, apart from some care to cover both
32- and 64-bit, and to share code between SEV-ES and normal vmentry.
Cc: stable@vger.kernel.org
Fixes: a149180fbcf3 ("x86: Add magic AMD return-thunk")
Suggested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kernel/cpu/bugs.c | 13 +----
arch/x86/kvm/kvm-asm-offsets.c | 1 +
arch/x86/kvm/svm/svm.c | 38 +++++-------
arch/x86/kvm/svm/svm.h | 4 +-
arch/x86/kvm/svm/vmenter.S | 102 ++++++++++++++++++++++++++++++++-
5 files changed, 121 insertions(+), 37 deletions(-)
Comments
On Tue, Nov 08, 2022, Paolo Bonzini wrote:
> diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
> index 0a4272faf80f..a02eef724379 100644
> --- a/arch/x86/kvm/svm/vmenter.S
> +++ b/arch/x86/kvm/svm/vmenter.S
> @@ -32,10 +32,70 @@
>
> .section .noinstr.text, "ax"
>
> +.macro RESTORE_GUEST_SPEC_CTRL
> + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
> + ALTERNATIVE_2 "", \
> + "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
> + "", X86_FEATURE_V_SPEC_CTRL
> +801:
> +.endm
> +
> +.macro RESTORE_HOST_SPEC_CTRL
> + /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
> + ALTERNATIVE_2 "", \
> + "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
> + "", X86_FEATURE_V_SPEC_CTRL
> +901:
> +.endm
> +
> +.macro RESTORE_SPEC_CTRL_BODY
Can we split these into separate macros? It's a bit more typing, but it's not
immediately obvious that these are two independent chunks (I was expecting a JMP
from the 800 section into the 900 section).
E.g. RESTORE_GUEST_SPEC_CTRL_BODY and RESTORE_HOST_SPEC_CTRL_BODY
> +800:
Ugh, the multiple users makes it somewhat ugly, but rather than arbitrary numbers,
what about using named labels to make it easier to understand the branches?
E.g.
---
arch/x86/kvm/svm/vmenter.S | 43 +++++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index a02eef724379..23fd7353f0d0 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -32,24 +32,24 @@
.section .noinstr.text, "ax"
-.macro RESTORE_GUEST_SPEC_CTRL
+.macro RESTORE_GUEST_SPEC_CTRL name
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "", \
- "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "jmp .Lrestore_guest_spec_ctrl\name", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
-801:
+.Lpost_restore_guest_spec_ctrl\name:
.endm
-.macro RESTORE_HOST_SPEC_CTRL
+.macro RESTORE_HOST_SPEC_CTRL name
/* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
ALTERNATIVE_2 "", \
- "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "jmp .Lrestore_host_spec_ctrl\name", X86_FEATURE_MSR_SPEC_CTRL, \
"", X86_FEATURE_V_SPEC_CTRL
-901:
+.Lpost_restore_host_spec_ctrl\name:
.endm
-.macro RESTORE_SPEC_CTRL_BODY
-800:
+.macro RESTORE_GUEST_SPEC_CTRL_BODY name
+.Lrestore_guest_spec_ctrl\name:
/*
* SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
* host's, write the MSR. This is kept out-of-line so that the common
@@ -61,13 +61,16 @@
*/
movl SVM_spec_ctrl(%_ASM_DI), %eax
cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
- je 801b
+ je .Lpost_restore_guest_spec_ctrl\name
+
mov $MSR_IA32_SPEC_CTRL, %ecx
xor %edx, %edx
wrmsr
- jmp 801b
+ jmp .Lpost_restore_guest_spec_ctrl\name
+.endm
-900:
+.macro RESTORE_HOST_SPEC_CTRL_BODY name
+.Lrestore_host_spec_ctrl\name:
/* Same for after vmexit. */
mov $MSR_IA32_SPEC_CTRL, %ecx
@@ -76,18 +79,18 @@
* if it was not intercepted during guest execution.
*/
cmpb $0, (%_ASM_SP)
- jnz 998f
+ jnz .Lskip_save_guest_spec_ctrl\name
rdmsr
movl %eax, SVM_spec_ctrl(%_ASM_DI)
-998:
+.Lskip_save_guest_spec_ctrl\name:
/* Now restore the host value of the MSR if different from the guest's. */
movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
cmp SVM_spec_ctrl(%_ASM_DI), %eax
- je 901b
+ je .Lpost_restore_host_spec_ctrl\name
xor %edx, %edx
wrmsr
- jmp 901b
+ jmp .Lpost_restore_host_spec_ctrl\name
.endm
@@ -259,7 +262,8 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
RET
- RESTORE_SPEC_CTRL_BODY
+ RESTORE_GUEST_SPEC_CTRL_BODY
+ RESTORE_HOST_SPEC_CTRL_BODY
10: cmpb $0, kvm_rebooting
jne 2b
@@ -310,7 +314,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
mov %_ASM_ARG1, %_ASM_DI
.endif
- RESTORE_GUEST_SPEC_CTRL
+ RESTORE_GUEST_SPEC_CTRL sev_es
/* Get svm->current_vmcb->pa into RAX. */
mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
@@ -331,7 +335,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
- RESTORE_HOST_SPEC_CTRL
+ RESTORE_HOST_SPEC_CTRL sev_es
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
@@ -359,7 +363,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
pop %_ASM_BP
RET
- RESTORE_SPEC_CTRL_BODY
+ RESTORE_GUEST_SPEC_CTRL_BODY sev_es
+ RESTORE_HOST_SPEC_CTRL_BODY sev_es
3: cmpb $0, kvm_rebooting
jne 2b
base-commit: 0b242ada175d97a556866c48c80310860f634579
On 11/9/22 02:14, Sean Christopherson wrote:
>>
>> +.macro RESTORE_SPEC_CTRL_BODY
>
> Can we split these into separate macros? It's a bit more typing, but it's not
> immediately obvious that these are two independent chunks (I was expecting a JMP
> from the 800 section into the 900 section).
>
> E.g. RESTORE_GUEST_SPEC_CTRL_BODY and RESTORE_HOST_SPEC_CTRL_BODY
Sure, I had it like that in an earlier version. I didn't see much
benefit but it is indeed a bit more readable if you order the macros like
.macro RESTORE_GUEST_SPEC_CTRL
.macro RESTORE_GUEST_SPEC_CTRL_BODY
.macro RESTORE_HOST_SPEC_CTRL
.macro RESTORE_HOST_SPEC_CTRL_BODY
>> +800:
>
> Ugh, the multiple users makes it somewhat ugly, but rather than arbitrary numbers,
> what about using named labels to make it easier to understand the branches?
I think it's okay if we separate the macros.
Paolo
@@ -196,22 +196,15 @@ void __init check_bugs(void)
}
/*
- * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
- * done in vmenter.S.
+ * NOTE: This function is *only* called for SVM, since Intel uses
+ * MSR_IA32_SPEC_CTRL for SSBD.
*/
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ u64 guestval, hostval;
struct thread_info *ti = current_thread_info();
- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- if (hostval != guestval) {
- msrval = setguest ? guestval : hostval;
- wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
- }
- }
-
/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
@@ -16,6 +16,7 @@ static void __used common(void)
BLANK();
OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
+ OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
OFFSET(SVM_vmcb01, vcpu_svm, vmcb01);
OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa);
}
@@ -730,6 +730,15 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
u32 offset;
u32 *msrpm;
+ /*
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
msrpm = is_guest_mode(vcpu) ? to_svm(vcpu)->nested.msrpm:
to_svm(vcpu)->msrpm;
@@ -3911,18 +3920,19 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
+static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
struct vcpu_svm *svm = to_svm(vcpu);
guest_state_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
- __svm_sev_es_vcpu_run(svm);
+ __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
} else {
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
- __svm_vcpu_run(svm, __sme_page_pa(sd->save_area));
+ __svm_vcpu_run(svm, __sme_page_pa(sd->save_area),
+ spec_ctrl_intercepted);
}
guest_state_exit_irqoff();
@@ -3931,6 +3941,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
trace_kvm_entry(vcpu);
@@ -3989,26 +4000,7 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL))
x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
- svm_vcpu_enter_exit(vcpu);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
- unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
- svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
+ svm_vcpu_enter_exit(vcpu, spec_ctrl_intercepted);
if (!sev_es_guest(vcpu->kvm))
reload_tss(vcpu);
@@ -683,7 +683,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(struct vcpu_svm *svm);
-void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa);
+void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_vcpu_run(struct vcpu_svm *svm, unsigned long hsave_pa, bool spec_ctrl_intercepted);
#endif
@@ -32,10 +32,70 @@
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 800f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+801:
+.endm
+
+.macro RESTORE_HOST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "", \
+ "jmp 900f", X86_FEATURE_MSR_SPEC_CTRL, \
+ "", X86_FEATURE_V_SPEC_CTRL
+901:
+.endm
+
+.macro RESTORE_SPEC_CTRL_BODY
+800:
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR. This is kept out-of-line so that the common
+ * case does not have to jump.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ movl SVM_spec_ctrl(%_ASM_DI), %eax
+ cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ je 801b
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ wrmsr
+ jmp 801b
+
+900:
+ /* Same for after vmexit. */
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+
+ /*
+ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
+ * if it was not intercepted during guest execution.
+ */
+ cmpb $0, (%_ASM_SP)
+ jnz 998f
+ rdmsr
+ movl %eax, SVM_spec_ctrl(%_ASM_DI)
+998:
+
+ /* Now restore the host value of the MSR if different from the guest's. */
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ cmp SVM_spec_ctrl(%_ASM_DI), %eax
+ je 901b
+ xor %edx, %edx
+ wrmsr
+ jmp 901b
+.endm
+
+
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
* @hsave_pa: unsigned long
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
@@ -50,7 +110,12 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
push %_ASM_BX
- /* @hsave_pa is needed last after vmexit, save it first. */
+ /*
+ * Both @spec_ctrl_intercepted and @hsave_pa are used only after vmexit.
+ * @spec_ctrl_intercepted is needed later and accessed directly from
+ * the stack in RESTORE_HOST_SPEC_CTRL, so save it first.
+ */
+ push %_ASM_ARG3
push %_ASM_ARG2
/* Save @svm. */
@@ -61,6 +126,8 @@ SYM_FUNC_START(__svm_vcpu_run)
mov %_ASM_ARG1, %_ASM_DI
.endif
+ RESTORE_GUEST_SPEC_CTRL
+
/*
* Use a single vmcb (vmcb01 because it's always valid) for
* context switching guest state via VMLOAD/VMSAVE, that way
@@ -138,6 +205,8 @@ SYM_FUNC_START(__svm_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -173,6 +242,9 @@ SYM_FUNC_START(__svm_vcpu_run)
xor %r15d, %r15d
#endif
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -187,6 +259,8 @@ SYM_FUNC_START(__svm_vcpu_run)
pop %_ASM_BP
RET
+ RESTORE_SPEC_CTRL_BODY
+
10: cmpb $0, kvm_rebooting
jne 2b
ud2
@@ -210,6 +284,7 @@ SYM_FUNC_END(__svm_vcpu_run)
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
push %_ASM_BP
@@ -224,8 +299,21 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
push %_ASM_BX
+ /* Save @spec_ctrl_intercepted for RESTORE_HOST_SPEC_CTRL. */
+ push %_ASM_ARG2
+
+ /* Save @svm. */
+ push %_ASM_ARG1
+
+.ifnc _ASM_ARG1, _ASM_DI
+ /* Move @svm to RDI for RESTORE_GUEST_SPEC_CTRL. */
+ mov %_ASM_ARG1, %_ASM_DI
+.endif
+
+ RESTORE_GUEST_SPEC_CTRL
+
/* Get svm->current_vmcb->pa into RAX. */
- mov SVM_current_vmcb(%_ASM_ARG1), %_ASM_AX
+ mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX
mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX
/* Enter guest mode */
@@ -235,11 +323,16 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
2: cli
+ /* Pop @svm to RDI, guest registers have been saved already. */
+ pop %_ASM_DI
+
#ifdef CONFIG_RETPOLINE
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -249,6 +342,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -263,6 +359,8 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
pop %_ASM_BP
RET
+ RESTORE_SPEC_CTRL_BODY
+
3: cmpb $0, kvm_rebooting
jne 2b
ud2