[v10,104/108] KVM: TDX: Silently ignore INIT/SIPI

Message ID a888bb4d30de2e57b0eb5e61189349c86cab1a70.1667110240.git.isaku.yamahata@intel.com
State New
Headers
Series KVM TDX basic feature support |

Commit Message

Isaku Yamahata Oct. 30, 2022, 6:23 a.m. UTC
  From: Isaku Yamahata <isaku.yamahata@intel.com>

The TDX module API doesn't provide API for VMM to inject INIT IPI and SIPI.
Instead it defines the different protocols to boot application processors.
Ignore INIT and SIPI events for the TDX guest.

There are two options. 1) (silently) ignore INIT/SIPI request or 2) return
error to guest TDs somehow.  Given that TDX guest is paravirtualized to
boot AP, the option 1 is chosen for simplicity.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/lapic.c               | 19 ++++++++++++-------
 arch/x86/kvm/svm/svm.c             |  1 +
 arch/x86/kvm/vmx/main.c            | 22 +++++++++++++++++++++-
 5 files changed, 37 insertions(+), 8 deletions(-)
  

Comments

Binbin Wu Nov. 23, 2022, 3:17 p.m. UTC | #1
On 10/30/2022 2:23 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> The TDX module API doesn't provide API for VMM to inject INIT IPI and SIPI.
> Instead it defines the different protocols to boot application processors.
> Ignore INIT and SIPI events for the TDX guest.
>
> There are two options. 1) (silently) ignore INIT/SIPI request or 2) return
> error to guest TDs somehow.  Given that TDX guest is paravirtualized to
> boot AP, the option 1 is chosen for simplicity.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/include/asm/kvm-x86-ops.h |  1 +
>   arch/x86/include/asm/kvm_host.h    |  2 ++
>   arch/x86/kvm/lapic.c               | 19 ++++++++++++-------
>   arch/x86/kvm/svm/svm.c             |  1 +
>   arch/x86/kvm/vmx/main.c            | 22 +++++++++++++++++++++-
>   5 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 17c3828d42a3..4e9b96480716 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -140,6 +140,7 @@ KVM_X86_OP_OPTIONAL(migrate_timers)
>   KVM_X86_OP(msr_filter_changed)
>   KVM_X86_OP(complete_emulated_msr)
>   KVM_X86_OP(vcpu_deliver_sipi_vector)
> +KVM_X86_OP(vcpu_deliver_init)
>   KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
>   KVM_X86_OP(check_processor_compatibility)
>   
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 094fff5414e1..df67ca7b23d3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1706,6 +1706,7 @@ struct kvm_x86_ops {
>   	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
>   
>   	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
> +	void (*vcpu_deliver_init)(struct kvm_vcpu *vcpu);
>   
>   	/*
>   	 * Returns vCPU specific APICv inhibit reasons
> @@ -1914,6 +1915,7 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
>   void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
>   int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
>   void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
> +void kvm_vcpu_deliver_init(struct kvm_vcpu *vcpu);
>   
>   int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
>   		    int reason, bool has_error_code, u32 error_code);
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 7a1d612bd138..7393d858ed72 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -3035,6 +3035,16 @@ int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
>   	return 0;
>   }
>   
> +void kvm_vcpu_deliver_init(struct kvm_vcpu *vcpu)
> +{
> +	kvm_vcpu_reset(vcpu, true);
> +	if (kvm_vcpu_is_bsp(vcpu))
> +		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> +	else
> +		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
> +}
> +EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_init);
> +
>   int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
>   {
>   	struct kvm_lapic *apic = vcpu->arch.apic;
> @@ -3066,13 +3076,8 @@ int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
>   		return 0;
>   	}
>   
> -	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
> -		kvm_vcpu_reset(vcpu, true);
> -		if (kvm_vcpu_is_bsp(apic->vcpu))
> -			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> -		else
> -			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
> -	}
> +	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events))
> +		static_call(kvm_x86_vcpu_deliver_init)(vcpu);
>   	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
>   		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
>   			/* evaluate pending_events before reading the vector */
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 2bcf2e1a5271..5d56b0f1f595 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -4857,6 +4857,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>   	.complete_emulated_msr = svm_complete_emulated_msr,
>   
>   	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
> +	.vcpu_deliver_init = kvm_vcpu_deliver_init,
>   	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
>   };
>   
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 4acba8d8cb27..d776d5d169d0 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -286,6 +286,25 @@ static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
>   	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
>   }
>   
> +static void vt_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return;
> +
> +	kvm_vcpu_deliver_sipi_vector(vcpu, vector);
> +}
> +
> +static void vt_vcpu_deliver_init(struct kvm_vcpu *vcpu)
> +{
> +	if (is_td_vcpu(vcpu)) {
> +		/* TDX doesn't support INIT.  Ignore INIT event */
> +		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> +		return;
> +	}
> +
> +	kvm_vcpu_deliver_init(vcpu);
> +}
> +

Is it better to add WARN_ON_ONCE in the above two functions for TD case?


>   static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
>   {
>   	if (is_td_vcpu(vcpu))
> @@ -627,7 +646,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>   	.msr_filter_changed = vmx_msr_filter_changed,
>   	.complete_emulated_msr = kvm_complete_insn_gp,
>   
> -	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
> +	.vcpu_deliver_sipi_vector = vt_vcpu_deliver_sipi_vector,
> +	.vcpu_deliver_init = vt_vcpu_deliver_init,
>   
>   	.dev_mem_enc_ioctl = tdx_dev_ioctl,
>   	.mem_enc_ioctl = vt_mem_enc_ioctl,
  
Isaku Yamahata Dec. 16, 2022, 3:50 a.m. UTC | #2
On Wed, Nov 23, 2022 at 11:17:44PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > index 4acba8d8cb27..d776d5d169d0 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -286,6 +286,25 @@ static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
> >   	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
> >   }
> > +static void vt_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> > +{
> > +	if (is_td_vcpu(vcpu))
> > +		return;
> > +
> > +	kvm_vcpu_deliver_sipi_vector(vcpu, vector);
> > +}
> > +
> > +static void vt_vcpu_deliver_init(struct kvm_vcpu *vcpu)
> > +{
> > +	if (is_td_vcpu(vcpu)) {
> > +		/* TDX doesn't support INIT.  Ignore INIT event */
> > +		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> > +		return;
> > +	}
> > +
> > +	kvm_vcpu_deliver_init(vcpu);
> > +}
> > +
> 
> Is it better to add WARN_ON_ONCE in the above two functions for TD case?

No because KVM_SET_VCPU_EVENTS ioctl can trigger those callback.
  
Sean Christopherson Dec. 16, 2022, 3:49 p.m. UTC | #3
On Sat, Oct 29, 2022, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> The TDX module API doesn't provide API for VMM to inject INIT IPI and SIPI.
> Instead it defines the different protocols to boot application processors.
> Ignore INIT and SIPI events for the TDX guest.
> 
> There are two options. 1) (silently) ignore INIT/SIPI request or 2) return
> error to guest TDs somehow.  Given that TDX guest is paravirtualized to
> boot AP, the option 1 is chosen for simplicity.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---

...

> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 4acba8d8cb27..d776d5d169d0 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -286,6 +286,25 @@ static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
>  	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
>  }
>  
> +static void vt_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return;
> +
> +	kvm_vcpu_deliver_sipi_vector(vcpu, vector);
> +}
> +
> +static void vt_vcpu_deliver_init(struct kvm_vcpu *vcpu)
> +{
> +	if (is_td_vcpu(vcpu)) {
> +		/* TDX doesn't support INIT.  Ignore INIT event */
> +		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
> +		return;
> +	}
> +
> +	kvm_vcpu_deliver_init(vcpu);
> +}
> +
>  static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
>  {
>  	if (is_td_vcpu(vcpu))
> @@ -627,7 +646,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  	.msr_filter_changed = vmx_msr_filter_changed,
>  	.complete_emulated_msr = kvm_complete_insn_gp,
>  
> -	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
> +	.vcpu_deliver_sipi_vector = vt_vcpu_deliver_sipi_vector,
> +	.vcpu_deliver_init = vt_vcpu_deliver_init,

A simpler, and arguably more correct, appraoch would be to hook .apic_init_signal_blocked()
and have that return true for TDX.  Waiting until delivery means the vCPU will get
spurious wake events, e.g. KVM will wake the vCPU to service the INIT, but then
ignore the INIT.  Of course, sending the bogus INIT/SIPI in the first place
is a guest bug.

That would also prevent userspace from putting the vCPU into INIT/SIPI via
KVM_SET_MP_STATE.

Ideally, KVM would never mark INIT or SIPI pending in the first place, though I'm
not sure that's worth the effort.
  

Patch

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 17c3828d42a3..4e9b96480716 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -140,6 +140,7 @@  KVM_X86_OP_OPTIONAL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP(complete_emulated_msr)
 KVM_X86_OP(vcpu_deliver_sipi_vector)
+KVM_X86_OP(vcpu_deliver_init)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
 KVM_X86_OP(check_processor_compatibility)
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 094fff5414e1..df67ca7b23d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1706,6 +1706,7 @@  struct kvm_x86_ops {
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+	void (*vcpu_deliver_init)(struct kvm_vcpu *vcpu);
 
 	/*
 	 * Returns vCPU specific APICv inhibit reasons
@@ -1914,6 +1915,7 @@  int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
+void kvm_vcpu_deliver_init(struct kvm_vcpu *vcpu);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7a1d612bd138..7393d858ed72 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -3035,6 +3035,16 @@  int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
 	return 0;
 }
 
+void kvm_vcpu_deliver_init(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_reset(vcpu, true);
+	if (kvm_vcpu_is_bsp(vcpu))
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+	else
+		vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_init);
+
 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
@@ -3066,13 +3076,8 @@  int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
-	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events)) {
-		kvm_vcpu_reset(vcpu, true);
-		if (kvm_vcpu_is_bsp(apic->vcpu))
-			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		else
-			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
-	}
+	if (test_and_clear_bit(KVM_APIC_INIT, &apic->pending_events))
+		static_call(kvm_x86_vcpu_deliver_init)(vcpu);
 	if (test_and_clear_bit(KVM_APIC_SIPI, &apic->pending_events)) {
 		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			/* evaluate pending_events before reading the vector */
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2bcf2e1a5271..5d56b0f1f595 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4857,6 +4857,7 @@  static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.complete_emulated_msr = svm_complete_emulated_msr,
 
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+	.vcpu_deliver_init = kvm_vcpu_deliver_init,
 	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
 };
 
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 4acba8d8cb27..d776d5d169d0 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -286,6 +286,25 @@  static void vt_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
 	vmx_deliver_interrupt(apic, delivery_mode, trig_mode, vector);
 }
 
+static void vt_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+	if (is_td_vcpu(vcpu))
+		return;
+
+	kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
+static void vt_vcpu_deliver_init(struct kvm_vcpu *vcpu)
+{
+	if (is_td_vcpu(vcpu)) {
+		/* TDX doesn't support INIT.  Ignore INIT event */
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		return;
+	}
+
+	kvm_vcpu_deliver_init(vcpu);
+}
+
 static void vt_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
 	if (is_td_vcpu(vcpu))
@@ -627,7 +646,8 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.msr_filter_changed = vmx_msr_filter_changed,
 	.complete_emulated_msr = kvm_complete_insn_gp,
 
-	.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+	.vcpu_deliver_sipi_vector = vt_vcpu_deliver_sipi_vector,
+	.vcpu_deliver_init = vt_vcpu_deliver_init,
 
 	.dev_mem_enc_ioctl = tdx_dev_ioctl,
 	.mem_enc_ioctl = vt_mem_enc_ioctl,