[6/7] KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
Commit Message
Restoration of the host IA32_SPEC_CTRL value is probably too late
with respect to the return thunk training sequence.
With respect to the user/kernel boundary, AMD says, "If software chooses
to toggle STIBP (e.g., set STIBP on kernel entry, and clear it on kernel
exit), software should set STIBP to 1 before executing the return thunk
training sequence." I assume the same requirements apply to the guest/host
boundary. The return thunk training sequence is in vmenter.S, quite close
to the VM-exit. On hosts without V_SPEC_CTRL, however, the host's
IA32_SPEC_CTRL value is not restored until much later.
To avoid this, move the restoration of host SPEC_CTRL to assembly and,
for consistency, move the restoration of the guest SPEC_CTRL as well.
This is not particularly difficult, apart from some care to cover both
32- and 64-bit, and to share code between SEV-ES and normal vmentry.
Suggested-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/kernel/asm-offsets.c | 1 +
arch/x86/kernel/cpu/bugs.c | 13 ++---
arch/x86/kvm/svm/svm.c | 34 +++++--------
arch/x86/kvm/svm/svm.h | 4 +-
arch/x86/kvm/svm/vmenter.S | 93 +++++++++++++++++++++++++++++++++--
5 files changed, 109 insertions(+), 36 deletions(-)
Comments
Hi Paolo,
I love your patch! Yet something to improve:
[auto build test ERROR on kvm/queue]
[also build test ERROR on mst-vhost/linux-next linus/master v6.1-rc2]
[cannot apply to tip/x86/core next-20221028]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Paolo-Bonzini/KVM-SVM-move-MSR_IA32_SPEC_CTRL-save-restore-to-assembly/20221029-071449
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20221028230723.3254250-7-pbonzini%40redhat.com
patch subject: [PATCH 6/7] KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
config: x86_64-allyesconfig
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/63e749bdae3bd901e8710a6b15ed42d0c08853b6
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Paolo-Bonzini/KVM-SVM-move-MSR_IA32_SPEC_CTRL-save-restore-to-assembly/20221029-071449
git checkout 63e749bdae3bd901e8710a6b15ed42d0c08853b6
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All error/warnings (new ones prefixed by >>):
vmlinux.o: warning: objtool: kasan_report+0x12: call to stackleak_track_stack() with UACCESS enabled
>> vmlinux.o: warning: objtool: svm_vcpu_enter_exit+0x1e: call to msr_write_intercepted.constprop.0() leaves .noinstr.text section
vmlinux.o: warning: objtool: check_stackleak_irqoff+0x309: call to _printk() leaves .noinstr.text section
--
ld: kernel image bigger than KERNEL_IMAGE_SIZE
ld: vmlinux.o: in function `__svm_vcpu_run':
>> (.noinstr.text+0x1f0b): undefined reference to `SVM_spec_ctrl'
ld: (.noinstr.text+0x1f26): undefined reference to `SVM_vcpu_arch_regs'
ld: (.noinstr.text+0x1f2d): undefined reference to `SVM_vcpu_arch_regs'
ld: (.noinstr.text+0x1f34): undefined reference to `SVM_vcpu_arch_regs'
ld: (.noinstr.text+0x1f3b): undefined reference to `SVM_vcpu_arch_regs'
ld: (.noinstr.text+0x1f42): undefined reference to `SVM_vcpu_arch_regs'
ld: vmlinux.o:(.noinstr.text+0x1f49): more undefined references to `SVM_vcpu_arch_regs' follow
ld: vmlinux.o: in function `__svm_vcpu_run':
(.noinstr.text+0x2023): undefined reference to `SVM_spec_ctrl'
>> ld: (.noinstr.text+0x2031): undefined reference to `SVM_spec_ctrl'
ld: vmlinux.o: in function `__svm_sev_es_vcpu_run':
(.noinstr.text+0x20ab): undefined reference to `SVM_spec_ctrl'
ld: (.noinstr.text+0x20fc): undefined reference to `SVM_spec_ctrl'
ld: (.noinstr.text+0x210a): undefined reference to `SVM_spec_ctrl'
On Fri, Oct 28, 2022 at 07:07:22PM -0400, Paolo Bonzini wrote:
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3918,10 +3918,21 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
> struct vcpu_svm *svm = to_svm(vcpu);
> unsigned long vmcb_pa = svm->current_vmcb->pa;
>
> + /*
> + * For non-nested case:
> + * If the L01 MSR bitmap does not intercept the MSR, then we need to
> + * save it.
> + *
> + * For nested case:
> + * If the L02 MSR bitmap does not intercept the MSR, then we need to
> + * save it.
> + */
> + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
This triggers a warning:
vmlinux.o: warning: objtool: svm_vcpu_enter_exit+0x3d: call to svm_msrpm_offset() leaves .noinstr.text section
svm_vcpu_enter_exit() is noinstr, but it's calling
msr_write_intercepted() which is not.
That's why in the VMX code I did the call to msr_write_intercepted() (in
__vmx_vcpu_run_flags) before calling vmx_vcpu_enter_exit().
On 11/2/22 16:28, Josh Poimboeuf wrote:
> On Fri, Oct 28, 2022 at 07:07:22PM -0400, Paolo Bonzini wrote:
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -3918,10 +3918,21 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
>> struct vcpu_svm *svm = to_svm(vcpu);
>> unsigned long vmcb_pa = svm->current_vmcb->pa;
>>
>> + /*
>> + * For non-nested case:
>> + * If the L01 MSR bitmap does not intercept the MSR, then we need to
>> + * save it.
>> + *
>> + * For nested case:
>> + * If the L02 MSR bitmap does not intercept the MSR, then we need to
>> + * save it.
>> + */
>> + bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
>
> This triggers a warning:
>
> vmlinux.o: warning: objtool: svm_vcpu_enter_exit+0x3d: call to svm_msrpm_offset() leaves .noinstr.text section
>
> svm_vcpu_enter_exit() is noinstr, but it's calling
> msr_write_intercepted() which is not.
I suspect I didn't see it because it's inlined here, but it has to be
fixed indeed.
> That's why in the VMX code I did the call to msr_write_intercepted() (in
> __vmx_vcpu_run_flags) before calling vmx_vcpu_enter_exit().
Yes, it's easy to do the same and do it in svm_vcpu_run().
Paolo
@@ -113,6 +113,7 @@ static void __used common(void)
if (IS_ENABLED(CONFIG_KVM_AMD)) {
BLANK();
OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
+ OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
}
if (IS_ENABLED(CONFIG_KVM_INTEL)) {
@@ -196,22 +196,15 @@ void __init check_bugs(void)
}
/*
- * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
- * done in vmenter.S.
+ * NOTE: This function is *only* called for SVM, since Intel uses
+ * MSR_IA32_SPEC_CTRL for SSBD.
*/
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
+ u64 guestval, hostval;
struct thread_info *ti = current_thread_info();
- if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- if (hostval != guestval) {
- msrval = setguest ? guestval : hostval;
- wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
- }
- }
-
/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
@@ -3918,10 +3918,21 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long vmcb_pa = svm->current_vmcb->pa;
+ /*
+ * For non-nested case:
+ * If the L01 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ *
+ * For nested case:
+ * If the L02 MSR bitmap does not intercept the MSR, then we need to
+ * save it.
+ */
+ bool spec_ctrl_intercepted = msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL);
+
guest_state_enter_irqoff();
if (sev_es_guest(vcpu->kvm)) {
- __svm_sev_es_vcpu_run(vmcb_pa);
+ __svm_sev_es_vcpu_run(vmcb_pa, svm, spec_ctrl_intercepted);
} else {
struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
@@ -3932,7 +3943,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu)
* vmcb02 when switching vmcbs for nested virtualization.
*/
vmload(svm->vmcb01.pa);
- __svm_vcpu_run(vmcb_pa, svm);
+ __svm_vcpu_run(vmcb_pa, svm, spec_ctrl_intercepted);
vmsave(svm->vmcb01.pa);
vmload(__sme_page_pa(sd->save_area));
@@ -4004,25 +4015,6 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
svm_vcpu_enter_exit(vcpu);
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (!static_cpu_has(X86_FEATURE_V_SPEC_CTRL) &&
- unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
- svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
if (!sev_es_guest(vcpu->kvm))
reload_tss(vcpu);
@@ -483,7 +483,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
/* vmenter.S */
-void __svm_sev_es_vcpu_run(unsigned long vmcb_pa);
-void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm);
+void __svm_sev_es_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+void __svm_vcpu_run(unsigned long vmcb_pa, struct vcpu_svm *svm, bool spec_ctrl_intercepted);
#endif
@@ -29,10 +29,65 @@
.section .noinstr.text, "ax"
+.macro RESTORE_GUEST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "jmp 999f", \
+ "", X86_FEATURE_MSR_SPEC_CTRL, \
+ "jmp 999f", X86_FEATURE_V_SPEC_CTRL
+
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ movl SVM_spec_ctrl(%_ASM_DI), %eax
+ cmp PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ je 999f
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ wrmsr
+999:
+
+.endm
+
+.macro RESTORE_HOST_SPEC_CTRL
+ /* No need to do anything if SPEC_CTRL is unset or V_SPEC_CTRL is set */
+ ALTERNATIVE_2 "jmp 999f", \
+ "", X86_FEATURE_MSR_SPEC_CTRL, \
+ "jmp 999f", X86_FEATURE_V_SPEC_CTRL
+
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+
+ /*
+ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL,
+ * if it was not intercepted during guest execution.
+ */
+ cmpb $0, (%_ASM_SP)
+ jnz 998f
+ rdmsr
+ movl %eax, SVM_spec_ctrl(%_ASM_DI)
+998:
+
+ /* Now restore the host value of the MSR if different from the guest's. */
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %eax
+ cmp SVM_spec_ctrl(%_ASM_DI), %eax
+ je 999f
+ xor %edx, %edx
+ wrmsr
+999:
+
+.endm
+
+
+
/**
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
* @vmcb_pa: unsigned long
* @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_vcpu_run)
push %_ASM_BP
@@ -47,6 +102,9 @@ SYM_FUNC_START(__svm_vcpu_run)
#endif
push %_ASM_BX
+ /* Save @spec_ctrl_intercepted. */
+ push %_ASM_ARG3
+
/* Save @svm. */
push %_ASM_ARG2
@@ -56,6 +114,7 @@ SYM_FUNC_START(__svm_vcpu_run)
/* Move @svm to RDI. */
mov %_ASM_ARG2, %_ASM_DI
+ RESTORE_GUEST_SPEC_CTRL
/* "POP" @vmcb to RAX. */
pop %_ASM_AX
@@ -90,7 +149,7 @@ SYM_FUNC_START(__svm_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
- /* "POP" @svm to RAX. */
+ /* "POP" @svm to RAX while it's the only available register. */
pop %_ASM_AX
/* Save all guest registers. */
@@ -111,6 +170,9 @@ SYM_FUNC_START(__svm_vcpu_run)
mov %r15, VCPU_R15(%_ASM_AX)
#endif
+ mov %_ASM_AX, %_ASM_DI
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -146,6 +208,9 @@ SYM_FUNC_START(__svm_vcpu_run)
xor %r15d, %r15d
#endif
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
@@ -171,6 +236,8 @@ SYM_FUNC_END(__svm_vcpu_run)
/**
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
* @vmcb_pa: unsigned long
+ * @svm: struct vcpu_svm *
+ * @spec_ctrl_intercepted: bool
*/
SYM_FUNC_START(__svm_sev_es_vcpu_run)
push %_ASM_BP
@@ -185,8 +252,22 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
#endif
push %_ASM_BX
- /* Move @vmcb to RAX. */
- mov %_ASM_ARG1, %_ASM_AX
+ /* Save @spec_ctrl_intercepted. */
+ push %_ASM_ARG3
+
+ /* Save @svm. */
+ push %_ASM_ARG2
+
+ /* Save @vmcb. */
+ push %_ASM_ARG1
+
+ /* Move @svm to RDI for RESTORE_GUEST_SPEC_CTRL. */
+ mov %_ASM_ARG2, %_ASM_DI
+
+ RESTORE_GUEST_SPEC_CTRL
+
+ /* Pop @vmcb to RAX. */
+ pop %_ASM_AX
/* Enter guest mode */
sti
@@ -200,6 +281,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ pop %_ASM_DI
+ RESTORE_HOST_SPEC_CTRL
+
/*
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
* untrained as soon as we exit the VM and are back to the
@@ -209,6 +293,9 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
*/
UNTRAIN_RET
+ /* "Pop" @spec_ctrl_intercepted. */
+ pop %_ASM_BX
+
pop %_ASM_BX
#ifdef CONFIG_X86_64