[v4,16/20] KVM:x86: Optimize CET supervisor SSP save/reload
Commit Message
Make PL{0,1,2}_SSP as write-intercepted to detect whether
guest is using these MSRs. Disable intercept to the MSRs
if they're written with non-zero values. KVM does save/
reload for the MSRs only if they're used by guest.
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++++++++++----
arch/x86/kvm/x86.c | 15 ++++++++++-----
3 files changed, 41 insertions(+), 9 deletions(-)
Comments
On Thu, Jul 20, 2023 at 11:03:48PM -0400, Yang Weijiang wrote:
> /*
> * Writes msr value into the appropriate "register".
> * Returns 0 on success, non-0 otherwise.
>@@ -2427,7 +2439,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> #define CET_LEG_BITMAP_BASE(data) ((data) >> 12)
> #define CET_EXCLUSIVE_BITS (CET_SUPPRESS | CET_WAIT_ENDBR)
> case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
>- return kvm_set_msr_common(vcpu, msr_info);
>+ if (kvm_set_msr_common(vcpu, msr_info))
>+ return 1;
>+ /*
>+ * Write to the base SSP MSRs should happen ahead of toggling
>+ * of IA32_S_CET.SH_STK_EN bit.
Is this a requirement from SDM? And how is this related to the change below?
Note that PLx_SSP MSRs are linear addresses of shadow stacks for different CPLs.
I may think using the page at 0 (assuming 0 is the reset value of PLx SSP) is
allowed in architecture although probably no kernel will do so.
I don't understand why this comment is needed. I suggest dropping it.
>+ */
>+ if (msr_index != MSR_IA32_PL3_SSP && data) {
>+ vmx_disable_write_intercept_sss_msr(vcpu);
>+ wrmsrl(msr_index, data);
>+ }
On 7/27/2023 11:27 AM, Chao Gao wrote:
> On Thu, Jul 20, 2023 at 11:03:48PM -0400, Yang Weijiang wrote:
>> /*
>> * Writes msr value into the appropriate "register".
>> * Returns 0 on success, non-0 otherwise.
>> @@ -2427,7 +2439,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> #define CET_LEG_BITMAP_BASE(data) ((data) >> 12)
>> #define CET_EXCLUSIVE_BITS (CET_SUPPRESS | CET_WAIT_ENDBR)
>> case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
>> - return kvm_set_msr_common(vcpu, msr_info);
>> + if (kvm_set_msr_common(vcpu, msr_info))
>> + return 1;
>> + /*
>> + * Write to the base SSP MSRs should happen ahead of toggling
>> + * of IA32_S_CET.SH_STK_EN bit.
> Is this a requirement from SDM? And how is this related to the change below?
No, after a second thought, the usage of the supervisor SSPs doesn't
necessary mean
supervisor SHSTK is being enabled, e.g., used as some HW registers. I'll
remove it.
>
> Note that PLx_SSP MSRs are linear addresses of shadow stacks for different CPLs.
> I may think using the page at 0 (assuming 0 is the reset value of PLx SSP) is
> allowed in architecture although probably no kernel will do so.
>
> I don't understand why this comment is needed. I suggest dropping it.
will drop it, thanks!
>
>> + */
>> + if (msr_index != MSR_IA32_PL3_SSP && data) {
>> + vmx_disable_write_intercept_sss_msr(vcpu);
>> + wrmsrl(msr_index, data);
>> + }
@@ -748,6 +748,7 @@ struct kvm_vcpu_arch {
bool tpr_access_reporting;
bool xsaves_enabled;
bool xfd_no_write_intercept;
+ bool cet_sss_active;
u64 ia32_xss;
u64 microcode_version;
u64 arch_capabilities;
@@ -2155,6 +2155,18 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
return debugctl;
}
+static void vmx_disable_write_intercept_sss_msr(struct kvm_vcpu *vcpu)
+{
+ if (guest_can_use(vcpu, X86_FEATURE_SHSTK)) {
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP,
+ MSR_TYPE_RW, false);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP,
+ MSR_TYPE_RW, false);
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP,
+ MSR_TYPE_RW, false);
+ }
+}
+
/*
* Writes msr value into the appropriate "register".
* Returns 0 on success, non-0 otherwise.
@@ -2427,7 +2439,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
#define CET_LEG_BITMAP_BASE(data) ((data) >> 12)
#define CET_EXCLUSIVE_BITS (CET_SUPPRESS | CET_WAIT_ENDBR)
case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
- return kvm_set_msr_common(vcpu, msr_info);
+ if (kvm_set_msr_common(vcpu, msr_info))
+ return 1;
+ /*
+ * Write to the base SSP MSRs should happen ahead of toggling
+ * of IA32_S_CET.SH_STK_EN bit.
+ */
+ if (msr_index != MSR_IA32_PL3_SSP && data) {
+ vmx_disable_write_intercept_sss_msr(vcpu);
+ wrmsrl(msr_index, data);
+ }
break;
case MSR_IA32_U_CET:
case MSR_IA32_S_CET:
@@ -7774,12 +7795,17 @@ static void vmx_update_intercept_for_cet_msr(struct kvm_vcpu *vcpu)
MSR_TYPE_RW, false);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_S_CET,
MSR_TYPE_RW, false);
+ /*
+ * Supervisor shadow stack MSRs are intercepted until
+ * they're written by guest, this is designed to
+ * optimize the save/restore overhead.
+ */
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL0_SSP,
- MSR_TYPE_RW, false);
+ MSR_TYPE_R, false);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL1_SSP,
- MSR_TYPE_RW, false);
+ MSR_TYPE_R, false);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL2_SSP,
- MSR_TYPE_RW, false);
+ MSR_TYPE_R, false);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_PL3_SSP,
MSR_TYPE_RW, false);
vmx_set_intercept_for_msr(vcpu, MSR_IA32_INT_SSP_TAB,
@@ -4051,6 +4051,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (msr == MSR_IA32_PL0_SSP || msr == MSR_IA32_PL1_SSP ||
msr == MSR_IA32_PL2_SSP) {
vcpu->arch.cet_s_ssp[msr - MSR_IA32_PL0_SSP] = data;
+ if (!vcpu->arch.cet_sss_active && data)
+ vcpu->arch.cet_sss_active = true;
} else if (msr == MSR_IA32_PL3_SSP) {
kvm_set_xsave_msr(msr_info);
}
@@ -11252,7 +11254,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
kvm_sigset_activate(vcpu);
kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
- kvm_reload_cet_supervisor_ssp(vcpu);
+ if (vcpu->arch.cet_sss_active)
+ kvm_reload_cet_supervisor_ssp(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -11341,7 +11344,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
r = vcpu_run(vcpu);
out:
- kvm_save_cet_supervisor_ssp(vcpu);
+ if (vcpu->arch.cet_sss_active)
+ kvm_save_cet_supervisor_ssp(vcpu);
kvm_put_guest_fpu(vcpu);
if (kvm_run->kvm_valid_regs)
store_regs(vcpu);
@@ -12430,15 +12434,16 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
pmu->need_cleanup = true;
kvm_make_request(KVM_REQ_PMU, vcpu);
}
-
- kvm_reload_cet_supervisor_ssp(vcpu);
+ if (vcpu->arch.cet_sss_active)
+ kvm_reload_cet_supervisor_ssp(vcpu);
static_call(kvm_x86_sched_in)(vcpu, cpu);
}
void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu)
{
- kvm_save_cet_supervisor_ssp(vcpu);
+ if (vcpu->arch.cet_sss_active)
+ kvm_save_cet_supervisor_ssp(vcpu);
}
void kvm_arch_free_vm(struct kvm *kvm)