[v10,035/108] KVM: x86/mmu: Track shadow MMIO value on a per-VM basis

Message ID f1c9996fa4f4bc71b3feee8407d247aeabb8968e.1667110240.git.isaku.yamahata@intel.com
State New
Headers
Series KVM TDX basic feature support |

Commit Message

Isaku Yamahata Oct. 30, 2022, 6:22 a.m. UTC
  From: Isaku Yamahata <isaku.yamahata@intel.com>

TDX will use a different shadow PTE entry value for MMIO from VMX.  Add
members to kvm_arch and track value for MMIO per-VM instead of global
variables.  By using the per-VM EPT entry value for MMIO, the existing VMX
logic is kept working.  To untangle the logic to initialize
shadow_mmio_access_mask, introduce a separate setter function.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/mmu.h              |  1 +
 arch/x86/kvm/mmu/mmu.c          |  7 ++++---
 arch/x86/kvm/mmu/spte.c         | 11 +++++++++--
 arch/x86/kvm/mmu/spte.h         |  4 ++--
 arch/x86/kvm/mmu/tdp_mmu.c      |  6 +++---
 6 files changed, 21 insertions(+), 10 deletions(-)
  

Comments

Kai Huang Nov. 9, 2022, 12:27 p.m. UTC | #1
On Sat, 2022-10-29 at 23:22 -0700, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> TDX will use a different shadow PTE entry value for MMIO from VMX.  Add
> members to kvm_arch and track value for MMIO per-VM instead of global
> variables.  By using the per-VM EPT entry value for MMIO, the existing VMX
> logic is kept working.  To untangle the logic to initialize
> shadow_mmio_access_mask, introduce a separate setter function.
> 

It's weird to mention "shadow_mmio_access_mask" here (and this doesn't stand
anymore anyway as we have changed to only make mmio_value per-vm).  Just say
something like "introduce a setter function to set mmio_value for the given
guest so that TDX guest can override later".

Also, seems it's worth to mention the same mmio_mask is used for both TDX guest
and VMX guest so we can still use global shadow_mmio_mask.

[...]

>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
>  {
>  	BUG_ON((u64)(unsigned)access_mask != access_mask);
> +
>  	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);

Looks unnecessary change.
  
Yan Zhao Nov. 22, 2022, 2:10 a.m. UTC | #2
On Sat, Oct 29, 2022 at 11:22:36PM -0700, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> TDX will use a different shadow PTE entry value for MMIO from VMX.  Add
> members to kvm_arch and track value for MMIO per-VM instead of global
> variables.  By using the per-VM EPT entry value for MMIO, the existing VMX
> logic is kept working.  To untangle the logic to initialize
> shadow_mmio_access_mask, introduce a separate setter function.
> 
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/mmu.h              |  1 +
>  arch/x86/kvm/mmu/mmu.c          |  7 ++++---
>  arch/x86/kvm/mmu/spte.c         | 11 +++++++++--
>  arch/x86/kvm/mmu/spte.h         |  4 ++--
>  arch/x86/kvm/mmu/tdp_mmu.c      |  6 +++---
>  6 files changed, 21 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3374ec0d6d90..a1c801ca61d3 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1171,6 +1171,8 @@ struct kvm_arch {
>  	 */
>  	spinlock_t mmu_unsync_pages_lock;
>  
> +	u64 shadow_mmio_value;
> +
>  	struct list_head assigned_dev_head;
>  	struct iommu_domain *iommu_domain;
>  	bool iommu_noncoherent;
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index a45f7a96b821..50d240d52697 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -101,6 +101,7 @@ static inline u8 kvm_get_shadow_phys_bits(void)
>  }
>  
>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
> +void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
>  void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
>  void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
>  
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index e7e11f51f8b4..0d3fa29ccccc 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -2421,7 +2421,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
>  				return kvm_mmu_prepare_zap_page(kvm, child,
>  								invalid_list);
>  		}
> -	} else if (is_mmio_spte(pte)) {
> +	} else if (is_mmio_spte(kvm, pte)) {
>  		mmu_spte_clear_no_track(spte);
>  	}
>  	return 0;
> @@ -4081,7 +4081,7 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
>  	if (WARN_ON(reserved))
>  		return -EINVAL;
>  
> -	if (is_mmio_spte(spte)) {
> +	if (is_mmio_spte(vcpu->kvm, spte)) {
>  		gfn_t gfn = get_mmio_spte_gfn(spte);
>  		unsigned int access = get_mmio_spte_access(spte);
>  
> @@ -4578,7 +4578,7 @@ static unsigned long get_cr3(struct kvm_vcpu *vcpu)
>  static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
>  			   unsigned int access)
>  {
> -	if (unlikely(is_mmio_spte(*sptep))) {
> +	if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) {
>  		if (gfn != get_mmio_spte_gfn(*sptep)) {
>  			mmu_spte_clear_no_track(sptep);
>  			return true;
> @@ -6061,6 +6061,7 @@ int kvm_mmu_init_vm(struct kvm *kvm)
>  	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
>  	int r;
>  
> +	kvm->arch.shadow_mmio_value = shadow_mmio_value;
>  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>  	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
>  	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
> diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> index 5d5c06d4fd89..8f468ee2b985 100644
> --- a/arch/x86/kvm/mmu/spte.c
> +++ b/arch/x86/kvm/mmu/spte.c
> @@ -74,10 +74,10 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
>  	u64 spte = generation_mmio_spte_mask(gen);
>  	u64 gpa = gfn << PAGE_SHIFT;
>  
> -	WARN_ON_ONCE(!shadow_mmio_value);
> +	WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value);
>  
>  	access &= shadow_mmio_access_mask;
> -	spte |= shadow_mmio_value | access;
> +	spte |= vcpu->kvm->arch.shadow_mmio_value | access;
>  	spte |= gpa | shadow_nonpresent_or_rsvd_mask;
>  	spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
>  		<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
> @@ -352,6 +352,7 @@ u64 mark_spte_for_access_track(u64 spte)
>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
>  {
>  	BUG_ON((u64)(unsigned)access_mask != access_mask);
> +
>  	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
>  
>  	/*
> @@ -401,6 +402,12 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
>  
> +void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
> +{
> +	kvm->arch.shadow_mmio_value = mmio_value;
> +}
Also make enable_mmio_caching to be a per-VM value?
As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.

> +EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_value);
> +
>  void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
>  {
>  	/* shadow_me_value must be a subset of shadow_me_mask */
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index 7e0f79e8f45b..82f0d5c08b77 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -241,9 +241,9 @@ static inline int spte_index(u64 *sptep)
>   */
>  extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
>  
> -static inline bool is_mmio_spte(u64 spte)
> +static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
>  {
> -	return (spte & shadow_mmio_mask) == shadow_mmio_value &&
> +	return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
>  	       likely(enable_mmio_caching);
As above, also turn enable_mmio_caching to be per-vm ?

>  }
>  
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 1eee9c159958..e07f14351d14 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -580,8 +580,8 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
>  		 * impact the guest since both the former and current SPTEs
>  		 * are nonpresent.
>  		 */
> -		if (WARN_ON(!is_mmio_spte(old_spte) &&
> -			    !is_mmio_spte(new_spte) &&
> +		if (WARN_ON(!is_mmio_spte(kvm, old_spte) &&
> +			    !is_mmio_spte(kvm, new_spte) &&
>  			    !is_removed_spte(new_spte)))
>  			pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
>  			       "should not be replaced with another,\n"
> @@ -1105,7 +1105,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
>  	}
>  
>  	/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
> -	if (unlikely(is_mmio_spte(new_spte))) {
> +	if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
>  		vcpu->stat.pf_mmio_spte_created++;
>  		trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
>  				     new_spte);
> -- 
> 2.25.1
>
  
Yan Zhao Nov. 25, 2022, 12:12 a.m. UTC | #3
On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > Also make enable_mmio_caching to be a per-VM value?
> > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> 
> If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> disabled (we also will need to change to allow enable_mmio_caching to still be
> true when mmio_value is 0).
> 
> SEV_ES has similar logic:
> 
> void __init sev_hardware_setup(void)
> {
> 
> 	...
> 
>         /*
>          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
>          * instruction stream, i.e. can't emulate in response to a #NPF and
>          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
>          * (the guest can then do a #VMGEXIT to request MMIO emulation).
>          */
>         if (!enable_mmio_caching)
>                 goto out;
> 

Would enabling mmio caching in per-VM basis be better?
  
Kai Huang Nov. 25, 2022, 12:13 a.m. UTC | #4
On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> Also make enable_mmio_caching to be a per-VM value?
> As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.

If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
disabled (we also will need to change to allow enable_mmio_caching to still be
true when mmio_value is 0).

SEV_ES has similar logic:

void __init sev_hardware_setup(void)
{

	...

        /*
         * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
         * instruction stream, i.e. can't emulate in response to a #NPF and
         * instead relies on #NPF(RSVD) being reflected into the guest as #VC
         * (the guest can then do a #VMGEXIT to request MMIO emulation).
         */
        if (!enable_mmio_caching)
                goto out;
  
Yan Zhao Nov. 25, 2022, 12:37 a.m. UTC | #5
On Fri, Nov 25, 2022 at 08:45:01AM +0800, Huang, Kai wrote:
> On Fri, 2022-11-25 at 08:12 +0800, Yan Zhao wrote:
> > On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> > > On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > > > Also make enable_mmio_caching to be a per-VM value?
> > > > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> > > 
> > > If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> > > disabled (we also will need to change to allow enable_mmio_caching to still be
> > > true when mmio_value is 0).
> > > 
> > > SEV_ES has similar logic:
> > > 
> > > void __init sev_hardware_setup(void)
> > > {
> > > 
> > > 	...
> > > 
> > >         /*
> > >          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
> > >          * instruction stream, i.e. can't emulate in response to a #NPF and
> > >          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
> > >          * (the guest can then do a #VMGEXIT to request MMIO emulation).
> > >          */
> > >         if (!enable_mmio_caching)
> > >                 goto out;
> > > 
> > 
> > Would enabling mmio caching in per-VM basis be better?
> > 
> 
> We need Paolo/Sean to decide.
> 
> The thing is TDX guests always require mmio_caching being enabled.  For VMX
> guests, normally we will always enable mmio_caching too.  So I think per-VM
> basis mmio_caching is not that useful.
With per-VM basis enabling, I think we can get rid of the kvm_gfn_shared_mask(kvm)
in below code and also in handle_abnormal_pfn()

static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
{
        return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
               likely(enable_mmio_caching || kvm_gfn_shared_mask(kvm));
}

Thanks
Yan
  
Kai Huang Nov. 25, 2022, 12:45 a.m. UTC | #6
On Fri, 2022-11-25 at 08:12 +0800, Yan Zhao wrote:
> On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> > On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > > Also make enable_mmio_caching to be a per-VM value?
> > > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> > 
> > If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> > disabled (we also will need to change to allow enable_mmio_caching to still be
> > true when mmio_value is 0).
> > 
> > SEV_ES has similar logic:
> > 
> > void __init sev_hardware_setup(void)
> > {
> > 
> > 	...
> > 
> >         /*
> >          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
> >          * instruction stream, i.e. can't emulate in response to a #NPF and
> >          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
> >          * (the guest can then do a #VMGEXIT to request MMIO emulation).
> >          */
> >         if (!enable_mmio_caching)
> >                 goto out;
> > 
> 
> Would enabling mmio caching in per-VM basis be better?
> 

We need Paolo/Sean to decide.

The thing is TDX guests always require mmio_caching being enabled.  For VMX
guests, normally we will always enable mmio_caching too.  So I think per-VM
basis mmio_caching is not that useful.
  
Yan Zhao Nov. 25, 2022, 1:04 a.m. UTC | #7
On Fri, Nov 25, 2022 at 09:07:09AM +0800, Huang, Kai wrote:
> On Fri, 2022-11-25 at 08:37 +0800, Yan Zhao wrote:
> > On Fri, Nov 25, 2022 at 08:45:01AM +0800, Huang, Kai wrote:
> > > On Fri, 2022-11-25 at 08:12 +0800, Yan Zhao wrote:
> > > > On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> > > > > On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > > > > > Also make enable_mmio_caching to be a per-VM value?
> > > > > > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> > > > > 
> > > > > If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> > > > > disabled (we also will need to change to allow enable_mmio_caching to still be
> > > > > true when mmio_value is 0).
> > > > > 
> > > > > SEV_ES has similar logic:
> > > > > 
> > > > > void __init sev_hardware_setup(void)
> > > > > {
> > > > > 
> > > > > 	...
> > > > > 
> > > > >         /*
> > > > >          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
> > > > >          * instruction stream, i.e. can't emulate in response to a #NPF and
> > > > >          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
> > > > >          * (the guest can then do a #VMGEXIT to request MMIO emulation).
> > > > >          */
> > > > >         if (!enable_mmio_caching)
> > > > >                 goto out;
> > > > > 
> > > > 
> > > > Would enabling mmio caching in per-VM basis be better?
> > > > 
> > > 
> > > We need Paolo/Sean to decide.
> > > 
> > > The thing is TDX guests always require mmio_caching being enabled.  For VMX
> > > guests, normally we will always enable mmio_caching too.  So I think per-VM
> > > basis mmio_caching is not that useful.
> > With per-VM basis enabling, I think we can get rid of the kvm_gfn_shared_mask(kvm)
> > in below code and also in handle_abnormal_pfn()
> > 
> > static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
> > {
> >         return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
> >                likely(enable_mmio_caching || kvm_gfn_shared_mask(kvm));
> > }
> > 
> 
> It needs to go anyway regardless per-VM mmio_caching or not, as explained we
> need to change to allow enable_mmio_caching to be true even mmio_value is 0.

Or it's better to check enable_mmio_caching is true in
kvm_mmu_set_mmio_spte_value() as below. 

void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
{
        WARN_ON(!enable_mmio_caching);
        kvm->arch.shadow_mmio_value = mmio_value;
}
  
Kai Huang Nov. 25, 2022, 1:07 a.m. UTC | #8
On Fri, 2022-11-25 at 08:37 +0800, Yan Zhao wrote:
> On Fri, Nov 25, 2022 at 08:45:01AM +0800, Huang, Kai wrote:
> > On Fri, 2022-11-25 at 08:12 +0800, Yan Zhao wrote:
> > > On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> > > > On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > > > > Also make enable_mmio_caching to be a per-VM value?
> > > > > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> > > > 
> > > > If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> > > > disabled (we also will need to change to allow enable_mmio_caching to still be
> > > > true when mmio_value is 0).
> > > > 
> > > > SEV_ES has similar logic:
> > > > 
> > > > void __init sev_hardware_setup(void)
> > > > {
> > > > 
> > > > 	...
> > > > 
> > > >         /*
> > > >          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
> > > >          * instruction stream, i.e. can't emulate in response to a #NPF and
> > > >          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
> > > >          * (the guest can then do a #VMGEXIT to request MMIO emulation).
> > > >          */
> > > >         if (!enable_mmio_caching)
> > > >                 goto out;
> > > > 
> > > 
> > > Would enabling mmio caching in per-VM basis be better?
> > > 
> > 
> > We need Paolo/Sean to decide.
> > 
> > The thing is TDX guests always require mmio_caching being enabled.  For VMX
> > guests, normally we will always enable mmio_caching too.  So I think per-VM
> > basis mmio_caching is not that useful.
> With per-VM basis enabling, I think we can get rid of the kvm_gfn_shared_mask(kvm)
> in below code and also in handle_abnormal_pfn()
> 
> static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
> {
>         return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
>                likely(enable_mmio_caching || kvm_gfn_shared_mask(kvm));
> }
> 

It needs to go anyway regardless per-VM mmio_caching or not, as explained we
need to change to allow enable_mmio_caching to be true even mmio_value is 0.
  
Sean Christopherson Nov. 28, 2022, 11:49 p.m. UTC | #9
On Fri, Nov 25, 2022, Yan Zhao wrote:
> On Fri, Nov 25, 2022 at 09:07:09AM +0800, Huang, Kai wrote:
> > On Fri, 2022-11-25 at 08:37 +0800, Yan Zhao wrote:
> > > On Fri, Nov 25, 2022 at 08:45:01AM +0800, Huang, Kai wrote:
> > > > On Fri, 2022-11-25 at 08:12 +0800, Yan Zhao wrote:
> > > > > On Fri, Nov 25, 2022 at 08:13:48AM +0800, Huang, Kai wrote:
> > > > > > On Tue, 2022-11-22 at 10:10 +0800, Yan Zhao wrote:
> > > > > > > Also make enable_mmio_caching to be a per-VM value?
> > > > > > > As if the shadow_mmio_value is 0, mmio_caching needs to be disabled.
> > > > > > 
> > > > > > If I recall correctly, Sean said we can disable TDX guests if mmio_caching is
> > > > > > disabled (we also will need to change to allow enable_mmio_caching to still be
> > > > > > true when mmio_value is 0).
> > > > > > 
> > > > > > SEV_ES has similar logic:
> > > > > > 
> > > > > > void __init sev_hardware_setup(void)
> > > > > > {
> > > > > > 
> > > > > > 	...
> > > > > > 
> > > > > >         /*
> > > > > >          * SEV-ES requires MMIO caching as KVM doesn't have access to the guest
> > > > > >          * instruction stream, i.e. can't emulate in response to a #NPF and
> > > > > >          * instead relies on #NPF(RSVD) being reflected into the guest as #VC
> > > > > >          * (the guest can then do a #VMGEXIT to request MMIO emulation).
> > > > > >          */
> > > > > >         if (!enable_mmio_caching)
> > > > > >                 goto out;
> > > > > > 
> > > > > 
> > > > > Would enabling mmio caching in per-VM basis be better?
> > > > > 
> > > > 
> > > > We need Paolo/Sean to decide.
> > > > 
> > > > The thing is TDX guests always require mmio_caching being enabled.  For VMX
> > > > guests, normally we will always enable mmio_caching too.  So I think per-VM
> > > > basis mmio_caching is not that useful.
> > > With per-VM basis enabling, I think we can get rid of the kvm_gfn_shared_mask(kvm)
> > > in below code and also in handle_abnormal_pfn()
> > > 
> > > static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
> > > {
> > >         return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
> > >                likely(enable_mmio_caching || kvm_gfn_shared_mask(kvm));
> > > }
> > > 
> > 
> > It needs to go anyway regardless per-VM mmio_caching or not, as explained we
> > need to change to allow enable_mmio_caching to be true even mmio_value is 0.

Yes, the kvm_gfn_shared_mask() in is_mmio_spte() should not exist.

> Or it's better to check enable_mmio_caching is true in
> kvm_mmu_set_mmio_spte_value() as below. 
> 
> void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
> {
>         WARN_ON(!enable_mmio_caching);
>         kvm->arch.shadow_mmio_value = mmio_value;

Eh, if you're going to bother with that assert, capture the more subtle aspects
as well, e.g. that EPT is enabled and that shadow_mmio_mask has been set to the
expected value.  I would also ditch the helper and just open code this in TDX
code, I doubt there's a use case outside of TDX+VMX coexistence.
  

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3374ec0d6d90..a1c801ca61d3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1171,6 +1171,8 @@  struct kvm_arch {
 	 */
 	spinlock_t mmu_unsync_pages_lock;
 
+	u64 shadow_mmio_value;
+
 	struct list_head assigned_dev_head;
 	struct iommu_domain *iommu_domain;
 	bool iommu_noncoherent;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index a45f7a96b821..50d240d52697 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -101,6 +101,7 @@  static inline u8 kvm_get_shadow_phys_bits(void)
 }
 
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
+void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value);
 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
 void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e7e11f51f8b4..0d3fa29ccccc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2421,7 +2421,7 @@  static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 				return kvm_mmu_prepare_zap_page(kvm, child,
 								invalid_list);
 		}
-	} else if (is_mmio_spte(pte)) {
+	} else if (is_mmio_spte(kvm, pte)) {
 		mmu_spte_clear_no_track(spte);
 	}
 	return 0;
@@ -4081,7 +4081,7 @@  static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
 	if (WARN_ON(reserved))
 		return -EINVAL;
 
-	if (is_mmio_spte(spte)) {
+	if (is_mmio_spte(vcpu->kvm, spte)) {
 		gfn_t gfn = get_mmio_spte_gfn(spte);
 		unsigned int access = get_mmio_spte_access(spte);
 
@@ -4578,7 +4578,7 @@  static unsigned long get_cr3(struct kvm_vcpu *vcpu)
 static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
 			   unsigned int access)
 {
-	if (unlikely(is_mmio_spte(*sptep))) {
+	if (unlikely(is_mmio_spte(vcpu->kvm, *sptep))) {
 		if (gfn != get_mmio_spte_gfn(*sptep)) {
 			mmu_spte_clear_no_track(sptep);
 			return true;
@@ -6061,6 +6061,7 @@  int kvm_mmu_init_vm(struct kvm *kvm)
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 	int r;
 
+	kvm->arch.shadow_mmio_value = shadow_mmio_value;
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 5d5c06d4fd89..8f468ee2b985 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -74,10 +74,10 @@  u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
 	u64 spte = generation_mmio_spte_mask(gen);
 	u64 gpa = gfn << PAGE_SHIFT;
 
-	WARN_ON_ONCE(!shadow_mmio_value);
+	WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value);
 
 	access &= shadow_mmio_access_mask;
-	spte |= shadow_mmio_value | access;
+	spte |= vcpu->kvm->arch.shadow_mmio_value | access;
 	spte |= gpa | shadow_nonpresent_or_rsvd_mask;
 	spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
 		<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
@@ -352,6 +352,7 @@  u64 mark_spte_for_access_track(u64 spte)
 void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
 {
 	BUG_ON((u64)(unsigned)access_mask != access_mask);
+
 	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
 
 	/*
@@ -401,6 +402,12 @@  void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 
+void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value)
+{
+	kvm->arch.shadow_mmio_value = mmio_value;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_value);
+
 void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask)
 {
 	/* shadow_me_value must be a subset of shadow_me_mask */
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 7e0f79e8f45b..82f0d5c08b77 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -241,9 +241,9 @@  static inline int spte_index(u64 *sptep)
  */
 extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
 
-static inline bool is_mmio_spte(u64 spte)
+static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
 {
-	return (spte & shadow_mmio_mask) == shadow_mmio_value &&
+	return (spte & shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
 	       likely(enable_mmio_caching);
 }
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 1eee9c159958..e07f14351d14 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -580,8 +580,8 @@  static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 		 * impact the guest since both the former and current SPTEs
 		 * are nonpresent.
 		 */
-		if (WARN_ON(!is_mmio_spte(old_spte) &&
-			    !is_mmio_spte(new_spte) &&
+		if (WARN_ON(!is_mmio_spte(kvm, old_spte) &&
+			    !is_mmio_spte(kvm, new_spte) &&
 			    !is_removed_spte(new_spte)))
 			pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
 			       "should not be replaced with another,\n"
@@ -1105,7 +1105,7 @@  static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
 	}
 
 	/* If a MMIO SPTE is installed, the MMIO will need to be emulated. */
-	if (unlikely(is_mmio_spte(new_spte))) {
+	if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) {
 		vcpu->stat.pf_mmio_spte_created++;
 		trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn,
 				     new_spte);