[1/2] KVM: x86: Move "KVM no-APIC vCPU" key management into local APIC code

Message ID 20240209222047.394389-2-seanjc@google.com
State New
Headers
Series KVM: x86: kvm_has_noapic_vcpu fix/cleanup |

Commit Message

Sean Christopherson Feb. 9, 2024, 10:20 p.m. UTC
  Move incrementing and decrementing of kvm_has_noapic_vcpu into
kvm_create_lapic() and kvm_free_lapic() respectively to fix a benign bug
bug where KVM fails to decrement the count if vCPU creation ultimately
fails, e.g. due to a memory allocation failing.

Note, the bug is benign as kvm_has_noapic_vcpu is used purely to optimize
lapic_in_kernel() checks, and that optimization is quite dubious.  That,
and practically speaking no setup that cares at all about performance runs
with a userspace local APIC.

Reported-by: Li RongQing <lirongqing@baidu.com>
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++++++++-
 arch/x86/kvm/x86.c   | 29 +++--------------------------
 2 files changed, 29 insertions(+), 27 deletions(-)
  

Comments

Xu Yilun Feb. 15, 2024, 12:58 p.m. UTC | #1
On Fri, Feb 09, 2024 at 02:20:46PM -0800, Sean Christopherson wrote:
> Move incrementing and decrementing of kvm_has_noapic_vcpu into
> kvm_create_lapic() and kvm_free_lapic() respectively to fix a benign bug
> bug where KVM fails to decrement the count if vCPU creation ultimately
  ^

remove the duplicate word, others LGTM.

Reviewed-by: Xu Yilun <yilun.xu@linux.intel.com>

> fails, e.g. due to a memory allocation failing.
> 
> Note, the bug is benign as kvm_has_noapic_vcpu is used purely to optimize
> lapic_in_kernel() checks, and that optimization is quite dubious.  That,
> and practically speaking no setup that cares at all about performance runs
> with a userspace local APIC.
> 
> Reported-by: Li RongQing <lirongqing@baidu.com>
> Cc: Maxim Levitsky <mlevitsk@redhat.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.c   | 29 +++--------------------------
>  2 files changed, 29 insertions(+), 27 deletions(-)
> 
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 3242f3da2457..681f6d82d015 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -124,6 +124,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
>  	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
>  }
>  
> +__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
> +EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
> +
>  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
>  __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
>  
> @@ -2466,8 +2469,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
>  {
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  
> -	if (!vcpu->arch.apic)
> +	if (!vcpu->arch.apic) {
> +		static_branch_dec(&kvm_has_noapic_vcpu);
>  		return;
> +	}
>  
>  	hrtimer_cancel(&apic->lapic_timer.timer);
>  
> @@ -2809,6 +2814,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>  
>  	ASSERT(vcpu != NULL);
>  
> +	if (!irqchip_in_kernel(vcpu->kvm)) {
> +		static_branch_inc(&kvm_has_noapic_vcpu);
> +		return 0;
> +	}
> +
>  	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
>  	if (!apic)
>  		goto nomem;
> @@ -2844,6 +2854,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>  	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
>  	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
>  
> +	/*
> +	 * Defer evaluating inhibits until the vCPU is first run, as this vCPU
> +	 * will not get notified of any changes until this vCPU is visible to
> +	 * other vCPUs (marked online and added to the set of vCPUs).
> +	 *
> +	 * Opportunistically mark APICv active as VMX in particularly is highly
> +	 * unlikely to have inhibits.  Ignore the current per-VM APICv state so
> +	 * that vCPU creation is guaranteed to run with a deterministic value,
> +	 * the request will ensure the vCPU gets the correct state before VM-Entry.
> +	 */
> +	if (enable_apicv) {
> +		apic->apicv_active = true;
> +		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> +	}
> +
>  	return 0;
>  nomem_free_apic:
>  	kfree(apic);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b66c45e7f6f8..59119157bd20 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12053,27 +12053,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>  	if (r < 0)
>  		return r;
>  
> -	if (irqchip_in_kernel(vcpu->kvm)) {
> -		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
> -		if (r < 0)
> -			goto fail_mmu_destroy;
> -
> -		/*
> -		 * Defer evaluating inhibits until the vCPU is first run, as
> -		 * this vCPU will not get notified of any changes until this
> -		 * vCPU is visible to other vCPUs (marked online and added to
> -		 * the set of vCPUs).  Opportunistically mark APICv active as
> -		 * VMX in particularly is highly unlikely to have inhibits.
> -		 * Ignore the current per-VM APICv state so that vCPU creation
> -		 * is guaranteed to run with a deterministic value, the request
> -		 * will ensure the vCPU gets the correct state before VM-Entry.
> -		 */
> -		if (enable_apicv) {
> -			vcpu->arch.apic->apicv_active = true;
> -			kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> -		}
> -	} else
> -		static_branch_inc(&kvm_has_noapic_vcpu);
> +	r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
> +	if (r < 0)
> +		goto fail_mmu_destroy;
>  
>  	r = -ENOMEM;
>  
> @@ -12194,8 +12176,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>  	srcu_read_unlock(&vcpu->kvm->srcu, idx);
>  	free_page((unsigned long)vcpu->arch.pio_data);
>  	kvfree(vcpu->arch.cpuid_entries);
> -	if (!lapic_in_kernel(vcpu))
> -		static_branch_dec(&kvm_has_noapic_vcpu);
>  }
>  
>  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> @@ -12472,9 +12452,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
>  	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
>  }
>  
> -__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
> -EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
> -
>  void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
>  {
>  	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> -- 
> 2.43.0.687.g38aa6559b0-goog
> 
>
  

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3242f3da2457..681f6d82d015 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -124,6 +124,9 @@  static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+
 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
 
@@ -2466,8 +2469,10 @@  void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu->arch.apic)
+	if (!vcpu->arch.apic) {
+		static_branch_dec(&kvm_has_noapic_vcpu);
 		return;
+	}
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
@@ -2809,6 +2814,11 @@  int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 
 	ASSERT(vcpu != NULL);
 
+	if (!irqchip_in_kernel(vcpu->kvm)) {
+		static_branch_inc(&kvm_has_noapic_vcpu);
+		return 0;
+	}
+
 	apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
 	if (!apic)
 		goto nomem;
@@ -2844,6 +2854,21 @@  int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 	static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
+	/*
+	 * Defer evaluating inhibits until the vCPU is first run, as this vCPU
+	 * will not get notified of any changes until this vCPU is visible to
+	 * other vCPUs (marked online and added to the set of vCPUs).
+	 *
+	 * Opportunistically mark APICv active as VMX in particularly is highly
+	 * unlikely to have inhibits.  Ignore the current per-VM APICv state so
+	 * that vCPU creation is guaranteed to run with a deterministic value,
+	 * the request will ensure the vCPU gets the correct state before VM-Entry.
+	 */
+	if (enable_apicv) {
+		apic->apicv_active = true;
+		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+	}
+
 	return 0;
 nomem_free_apic:
 	kfree(apic);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b66c45e7f6f8..59119157bd20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12053,27 +12053,9 @@  int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (r < 0)
 		return r;
 
-	if (irqchip_in_kernel(vcpu->kvm)) {
-		r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
-		if (r < 0)
-			goto fail_mmu_destroy;
-
-		/*
-		 * Defer evaluating inhibits until the vCPU is first run, as
-		 * this vCPU will not get notified of any changes until this
-		 * vCPU is visible to other vCPUs (marked online and added to
-		 * the set of vCPUs).  Opportunistically mark APICv active as
-		 * VMX in particularly is highly unlikely to have inhibits.
-		 * Ignore the current per-VM APICv state so that vCPU creation
-		 * is guaranteed to run with a deterministic value, the request
-		 * will ensure the vCPU gets the correct state before VM-Entry.
-		 */
-		if (enable_apicv) {
-			vcpu->arch.apic->apicv_active = true;
-			kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
-		}
-	} else
-		static_branch_inc(&kvm_has_noapic_vcpu);
+	r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+	if (r < 0)
+		goto fail_mmu_destroy;
 
 	r = -ENOMEM;
 
@@ -12194,8 +12176,6 @@  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
 	kvfree(vcpu->arch.cpuid_entries);
-	if (!lapic_in_kernel(vcpu))
-		static_branch_dec(&kvm_has_noapic_vcpu);
 }
 
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -12472,9 +12452,6 @@  bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
 }
 
-__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
-
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);