[14/21] KVM: x86/mmu: pass error code back to MMU when async pf is ready

Message ID 20240227232100.478238-15-pbonzini@redhat.com
State New
Headers
Series TDX/SNP part 1 of n, for 6.9 |

Commit Message

Paolo Bonzini Feb. 27, 2024, 11:20 p.m. UTC
  Right now the error code is not used when an async page fault is completed.
This is not a problem in the current code, but it is untidy.  For protected
VMs we need to check that the page attributes match the current state of the
page.  Async page faults can only occur on shared pages (because
private pages go through kvm_faultin_pfn_private() instead of
__gfn_to_pfn_memslot()), but it is risky to rely on the polarity of
PFERR_GUEST_ENC_MASK and the high 32 bits of the error code being zero.
So, for clarity and future-proofing of the code, pipe the error code
from kvm_arch_setup_async_pf() to kvm_arch_async_page_ready() via the
architecture-specific async page fault data.

Extracted from a patch by Isaku Yamahata.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu/mmu.c          | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)
  

Comments

Sean Christopherson Feb. 28, 2024, 2:03 a.m. UTC | #1
On Tue, Feb 27, 2024, Paolo Bonzini wrote:
> Right now the error code is not used when an async page fault is completed.
> This is not a problem in the current code, but it is untidy.  For protected
> VMs we need to check that the page attributes match the current state of the
> page.  Async page faults can only occur on shared pages (because
> private pages go through kvm_faultin_pfn_private() instead of
> __gfn_to_pfn_memslot()), but it is risky to rely on the polarity of
> PFERR_GUEST_ENC_MASK and the high 32 bits of the error code being zero.
> So, for clarity and future-proofing of the code, pipe the error code
> from kvm_arch_setup_async_pf() to kvm_arch_async_page_ready() via the
> architecture-specific async page fault data.
> 
> Extracted from a patch by Isaku Yamahata.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  1 +
>  arch/x86/kvm/mmu/mmu.c          | 14 +++++++-------
>  2 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index a4514c2ef0ec..24e30ca2ca8f 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1839,6 +1839,7 @@ struct kvm_arch_async_pf {
>  	gfn_t gfn;
>  	unsigned long cr3;
>  	bool direct_map;
> +	u64 error_code;
>  };
>  
>  extern u32 __read_mostly kvm_nr_uret_msrs;
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index f58ca6cb789a..c9890e5b6e4c 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -4260,18 +4260,18 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
>  	return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
>  }
>  
> -static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> -				    gfn_t gfn)
> +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
> +				    struct kvm_page_fault *fault)
>  {
>  	struct kvm_arch_async_pf arch;
>  
>  	arch.token = alloc_apf_token(vcpu);
> -	arch.gfn = gfn;
> +	arch.gfn = fault->gfn;
>  	arch.direct_map = vcpu->arch.mmu->root_role.direct;
>  	arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
>  
> -	return kvm_setup_async_pf(vcpu, cr2_or_gpa,
> -				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
> +	return kvm_setup_async_pf(vcpu, fault->addr,
> +				  kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
>  }
>  
>  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
> @@ -4290,7 +4290,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
>  	      work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
>  		return;
>  
> -	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
> +	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);

This is silly.  If we're going to bother plumbing in the error code, then we
should use it to do sanity checks.  Things have gone off the rails if end up with
an async #PF on private memory.

>  }
>  
>  static inline u8 kvm_max_level_for_order(int order)
> @@ -4395,7 +4395,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
>  			trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
>  			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
>  			return RET_PF_RETRY;
> -		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> +		} else if (kvm_arch_setup_async_pf(vcpu, fault)) {
>  			return RET_PF_RETRY;
>  		}
>  	}
> -- 
> 2.39.0
> 
>
  
Paolo Bonzini Feb. 28, 2024, 1:13 p.m. UTC | #2
On Wed, Feb 28, 2024 at 3:03 AM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Feb 27, 2024, Paolo Bonzini wrote:
> > Right now the error code is not used when an async page fault is completed.
> > This is not a problem in the current code, but it is untidy.  For protected
> > VMs we need to check that the page attributes match the current state of the
> > page.  Async page faults can only occur on shared pages (because
> > private pages go through kvm_faultin_pfn_private() instead of
> > __gfn_to_pfn_memslot()), but it is risky to rely on the polarity of
> > PFERR_GUEST_ENC_MASK and the high 32 bits of the error code being zero.
> > So, for clarity and future-proofing of the code, pipe the error code
> > from kvm_arch_setup_async_pf() to kvm_arch_async_page_ready() via the
> > architecture-specific async page fault data.
> >
> > Extracted from a patch by Isaku Yamahata.
> >
> > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  1 +
> >  arch/x86/kvm/mmu/mmu.c          | 14 +++++++-------
> >  2 files changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index a4514c2ef0ec..24e30ca2ca8f 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1839,6 +1839,7 @@ struct kvm_arch_async_pf {
> >       gfn_t gfn;
> >       unsigned long cr3;
> >       bool direct_map;
> > +     u64 error_code;
> >  };
> >
> >  extern u32 __read_mostly kvm_nr_uret_msrs;
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index f58ca6cb789a..c9890e5b6e4c 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -4260,18 +4260,18 @@ static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
> >       return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
> >  }
> >
> > -static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
> > -                                 gfn_t gfn)
> > +static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
> > +                                 struct kvm_page_fault *fault)
> >  {
> >       struct kvm_arch_async_pf arch;
> >
> >       arch.token = alloc_apf_token(vcpu);
> > -     arch.gfn = gfn;
> > +     arch.gfn = fault->gfn;
> >       arch.direct_map = vcpu->arch.mmu->root_role.direct;
> >       arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
> >
> > -     return kvm_setup_async_pf(vcpu, cr2_or_gpa,
> > -                               kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
> > +     return kvm_setup_async_pf(vcpu, fault->addr,
> > +                               kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
> >  }
> >
> >  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
> > @@ -4290,7 +4290,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
> >             work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
> >               return;
> >
> > -     kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
> > +     kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
>
> This is silly.  If we're going to bother plumbing in the error code, then we
> should use it to do sanity checks.  Things have gone off the rails if end up with
> an async #PF on private memory.

Sure, I split this part out not just because it makes sense to do so,
but also because it's not strictly necessary. I'll add the check and
tweak the changelog.

Paolo

>
> >  }
> >
> >  static inline u8 kvm_max_level_for_order(int order)
> > @@ -4395,7 +4395,7 @@ static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
> >                       trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
> >                       kvm_make_request(KVM_REQ_APF_HALT, vcpu);
> >                       return RET_PF_RETRY;
> > -             } else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
> > +             } else if (kvm_arch_setup_async_pf(vcpu, fault)) {
> >                       return RET_PF_RETRY;
> >               }
> >       }
> > --
> > 2.39.0
> >
> >
>
  

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a4514c2ef0ec..24e30ca2ca8f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1839,6 +1839,7 @@  struct kvm_arch_async_pf {
 	gfn_t gfn;
 	unsigned long cr3;
 	bool direct_map;
+	u64 error_code;
 };
 
 extern u32 __read_mostly kvm_nr_uret_msrs;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f58ca6cb789a..c9890e5b6e4c 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4260,18 +4260,18 @@  static u32 alloc_apf_token(struct kvm_vcpu *vcpu)
 	return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id;
 }
 
-static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
-				    gfn_t gfn)
+static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu,
+				    struct kvm_page_fault *fault)
 {
 	struct kvm_arch_async_pf arch;
 
 	arch.token = alloc_apf_token(vcpu);
-	arch.gfn = gfn;
+	arch.gfn = fault->gfn;
 	arch.direct_map = vcpu->arch.mmu->root_role.direct;
 	arch.cr3 = kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu);
 
-	return kvm_setup_async_pf(vcpu, cr2_or_gpa,
-				  kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
+	return kvm_setup_async_pf(vcpu, fault->addr,
+				  kvm_vcpu_gfn_to_hva(vcpu, fault->gfn), &arch);
 }
 
 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
@@ -4290,7 +4290,7 @@  void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
 	      work->arch.cr3 != kvm_mmu_get_guest_pgd(vcpu, vcpu->arch.mmu))
 		return;
 
-	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, 0, true, NULL);
+	kvm_mmu_do_page_fault(vcpu, work->cr2_or_gpa, work->arch.error_code, true, NULL);
 }
 
 static inline u8 kvm_max_level_for_order(int order)
@@ -4395,7 +4395,7 @@  static int __kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
 			trace_kvm_async_pf_repeated_fault(fault->addr, fault->gfn);
 			kvm_make_request(KVM_REQ_APF_HALT, vcpu);
 			return RET_PF_RETRY;
-		} else if (kvm_arch_setup_async_pf(vcpu, fault->addr, fault->gfn)) {
+		} else if (kvm_arch_setup_async_pf(vcpu, fault)) {
 			return RET_PF_RETRY;
 		}
 	}