[1/2] KVM: x86: Move "KVM no-APIC vCPU" key management into local APIC code
Commit Message
Move incrementing and decrementing of kvm_has_noapic_vcpu into
kvm_create_lapic() and kvm_free_lapic() respectively to fix a benign bug
bug where KVM fails to decrement the count if vCPU creation ultimately
fails, e.g. due to a memory allocation failing.
Note, the bug is benign as kvm_has_noapic_vcpu is used purely to optimize
lapic_in_kernel() checks, and that optimization is quite dubious. That,
and practically speaking no setup that cares at all about performance runs
with a userspace local APIC.
Reported-by: Li RongQing <lirongqing@baidu.com>
Cc: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++++++++-
arch/x86/kvm/x86.c | 29 +++--------------------------
2 files changed, 29 insertions(+), 27 deletions(-)
Comments
On Fri, Feb 09, 2024 at 02:20:46PM -0800, Sean Christopherson wrote:
> Move incrementing and decrementing of kvm_has_noapic_vcpu into
> kvm_create_lapic() and kvm_free_lapic() respectively to fix a benign bug
> bug where KVM fails to decrement the count if vCPU creation ultimately
^
remove the duplicate word, others LGTM.
Reviewed-by: Xu Yilun <yilun.xu@linux.intel.com>
> fails, e.g. due to a memory allocation failing.
>
> Note, the bug is benign as kvm_has_noapic_vcpu is used purely to optimize
> lapic_in_kernel() checks, and that optimization is quite dubious. That,
> and practically speaking no setup that cares at all about performance runs
> with a userspace local APIC.
>
> Reported-by: Li RongQing <lirongqing@baidu.com>
> Cc: Maxim Levitsky <mlevitsk@redhat.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/lapic.c | 27 ++++++++++++++++++++++++++-
> arch/x86/kvm/x86.c | 29 +++--------------------------
> 2 files changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 3242f3da2457..681f6d82d015 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -124,6 +124,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
> return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
> }
>
> +__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
> +EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
> +
> __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
> __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
>
> @@ -2466,8 +2469,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
> {
> struct kvm_lapic *apic = vcpu->arch.apic;
>
> - if (!vcpu->arch.apic)
> + if (!vcpu->arch.apic) {
> + static_branch_dec(&kvm_has_noapic_vcpu);
> return;
> + }
>
> hrtimer_cancel(&apic->lapic_timer.timer);
>
> @@ -2809,6 +2814,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>
> ASSERT(vcpu != NULL);
>
> + if (!irqchip_in_kernel(vcpu->kvm)) {
> + static_branch_inc(&kvm_has_noapic_vcpu);
> + return 0;
> + }
> +
> apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
> if (!apic)
> goto nomem;
> @@ -2844,6 +2854,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
> static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
> kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
>
> + /*
> + * Defer evaluating inhibits until the vCPU is first run, as this vCPU
> + * will not get notified of any changes until this vCPU is visible to
> + * other vCPUs (marked online and added to the set of vCPUs).
> + *
> + * Opportunistically mark APICv active as VMX in particularly is highly
> + * unlikely to have inhibits. Ignore the current per-VM APICv state so
> + * that vCPU creation is guaranteed to run with a deterministic value,
> + * the request will ensure the vCPU gets the correct state before VM-Entry.
> + */
> + if (enable_apicv) {
> + apic->apicv_active = true;
> + kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> + }
> +
> return 0;
> nomem_free_apic:
> kfree(apic);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index b66c45e7f6f8..59119157bd20 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -12053,27 +12053,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
> if (r < 0)
> return r;
>
> - if (irqchip_in_kernel(vcpu->kvm)) {
> - r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
> - if (r < 0)
> - goto fail_mmu_destroy;
> -
> - /*
> - * Defer evaluating inhibits until the vCPU is first run, as
> - * this vCPU will not get notified of any changes until this
> - * vCPU is visible to other vCPUs (marked online and added to
> - * the set of vCPUs). Opportunistically mark APICv active as
> - * VMX in particularly is highly unlikely to have inhibits.
> - * Ignore the current per-VM APICv state so that vCPU creation
> - * is guaranteed to run with a deterministic value, the request
> - * will ensure the vCPU gets the correct state before VM-Entry.
> - */
> - if (enable_apicv) {
> - vcpu->arch.apic->apicv_active = true;
> - kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
> - }
> - } else
> - static_branch_inc(&kvm_has_noapic_vcpu);
> + r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
> + if (r < 0)
> + goto fail_mmu_destroy;
>
> r = -ENOMEM;
>
> @@ -12194,8 +12176,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
> srcu_read_unlock(&vcpu->kvm->srcu, idx);
> free_page((unsigned long)vcpu->arch.pio_data);
> kvfree(vcpu->arch.cpuid_entries);
> - if (!lapic_in_kernel(vcpu))
> - static_branch_dec(&kvm_has_noapic_vcpu);
> }
>
> void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
> @@ -12472,9 +12452,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
> return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
> }
>
> -__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
> -EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
> -
> void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> {
> struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
> --
> 2.43.0.687.g38aa6559b0-goog
>
>
@@ -124,6 +124,9 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
+
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
__read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
@@ -2466,8 +2469,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!vcpu->arch.apic)
+ if (!vcpu->arch.apic) {
+ static_branch_dec(&kvm_has_noapic_vcpu);
return;
+ }
hrtimer_cancel(&apic->lapic_timer.timer);
@@ -2809,6 +2814,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
ASSERT(vcpu != NULL);
+ if (!irqchip_in_kernel(vcpu->kvm)) {
+ static_branch_inc(&kvm_has_noapic_vcpu);
+ return 0;
+ }
+
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic)
goto nomem;
@@ -2844,6 +2854,21 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
+ /*
+ * Defer evaluating inhibits until the vCPU is first run, as this vCPU
+ * will not get notified of any changes until this vCPU is visible to
+ * other vCPUs (marked online and added to the set of vCPUs).
+ *
+ * Opportunistically mark APICv active as VMX in particularly is highly
+ * unlikely to have inhibits. Ignore the current per-VM APICv state so
+ * that vCPU creation is guaranteed to run with a deterministic value,
+ * the request will ensure the vCPU gets the correct state before VM-Entry.
+ */
+ if (enable_apicv) {
+ apic->apicv_active = true;
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
+ }
+
return 0;
nomem_free_apic:
kfree(apic);
@@ -12053,27 +12053,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (r < 0)
return r;
- if (irqchip_in_kernel(vcpu->kvm)) {
- r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
- if (r < 0)
- goto fail_mmu_destroy;
-
- /*
- * Defer evaluating inhibits until the vCPU is first run, as
- * this vCPU will not get notified of any changes until this
- * vCPU is visible to other vCPUs (marked online and added to
- * the set of vCPUs). Opportunistically mark APICv active as
- * VMX in particularly is highly unlikely to have inhibits.
- * Ignore the current per-VM APICv state so that vCPU creation
- * is guaranteed to run with a deterministic value, the request
- * will ensure the vCPU gets the correct state before VM-Entry.
- */
- if (enable_apicv) {
- vcpu->arch.apic->apicv_active = true;
- kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
- }
- } else
- static_branch_inc(&kvm_has_noapic_vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
+ if (r < 0)
+ goto fail_mmu_destroy;
r = -ENOMEM;
@@ -12194,8 +12176,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
srcu_read_unlock(&vcpu->kvm->srcu, idx);
free_page((unsigned long)vcpu->arch.pio_data);
kvfree(vcpu->arch.cpuid_entries);
- if (!lapic_in_kernel(vcpu))
- static_branch_dec(&kvm_has_noapic_vcpu);
}
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -12472,9 +12452,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
}
-__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
-EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
-
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);