[v10,062/108] KVM: x86/tdp_mmu: implement MapGPA hypercall for TDX

Message ID 73ef2bdcdf8ec88bbec9d3780484cecda7a21e6f.1667110240.git.isaku.yamahata@intel.com
State New
Headers
Series KVM TDX basic feature support |

Commit Message

Isaku Yamahata Oct. 30, 2022, 6:23 a.m. UTC
  From: Isaku Yamahata <isaku.yamahata@intel.com>

The TDX Guest-Hypervisor communication interface(GHCI) specification
defines MapGPA hypercall for guest TD to request the host VMM to map given
GPA range as private or shared.

It means the guest TD uses the GPA as shared (or private).  The GPA
won't be used as private (or shared).  VMM should enforce GPA usage. VMM
doesn't have to map the GPA on the hypercall request.

- Zap the aliased region.
  If shared (or private) GPA is requested, zap private (or shared) GPA
  (modulo shared bit).
- Record the request GPA is shared (or private) by kvm.mem_attr_array.
- Don't map GPA. The GPA is mapped on the next EPT violation.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/mmu.h         |  5 ++++
 arch/x86/kvm/mmu/mmu.c     | 60 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.c | 35 ++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.h |  3 ++
 4 files changed, 103 insertions(+)
  

Comments

Binbin Wu Nov. 9, 2022, 3:05 p.m. UTC | #1
On 10/30/2022 2:23 PM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> The TDX Guest-Hypervisor communication interface(GHCI) specification
> defines MapGPA hypercall for guest TD to request the host VMM to map given
> GPA range as private or shared.
>
> It means the guest TD uses the GPA as shared (or private).  The GPA
> won't be used as private (or shared).  VMM should enforce GPA usage. VMM
> doesn't have to map the GPA on the hypercall request.
>
> - Zap the aliased region.
>    If shared (or private) GPA is requested, zap private (or shared) GPA
>    (modulo shared bit).
> - Record the request GPA is shared (or private) by kvm.mem_attr_array.
> - Don't map GPA. The GPA is mapped on the next EPT violation.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>   arch/x86/kvm/mmu.h         |  5 ++++
>   arch/x86/kvm/mmu/mmu.c     | 60 ++++++++++++++++++++++++++++++++++++++
>   arch/x86/kvm/mmu/tdp_mmu.c | 35 ++++++++++++++++++++++
>   arch/x86/kvm/mmu/tdp_mmu.h |  3 ++
>   4 files changed, 103 insertions(+)
>
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index e2a0dfbee56d..e1641fa5a862 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -219,6 +219,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
>   
>   int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
>   
> +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> +		      bool map_private);
> +int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
> +		    bool map_private);
> +
>   int kvm_mmu_post_init_vm(struct kvm *kvm);
>   void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
>   
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 168c84c99de3..37b378bf60df 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -6778,6 +6778,66 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
>   	}
>   }
>   
> +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> +		      bool map_private)
> +{
> +	gfn_t start = *startp;
> +	int attr;
> +	int ret;
> +
> +	if (!kvm_gfn_shared_mask(kvm))
> +		return -EOPNOTSUPP;
> +
> +	attr = map_private ? KVM_MEM_ATTR_PRIVATE : KVM_MEM_ATTR_SHARED;
> +	start = start & ~kvm_gfn_shared_mask(kvm);
> +	end = end & ~kvm_gfn_shared_mask(kvm);
> +
> +	/*
> +	 * To make the following kvm_vm_set_mem_attr() success within spinlock
> +	 * without memory allocation.
> +	 */
> +	ret = kvm_vm_reserve_mem_attr(kvm, start, end);
> +	if (ret)
> +		return ret;
> +
> +	write_lock(&kvm->mmu_lock);
> +	if (is_tdp_mmu_enabled(kvm)) {

How about moving the condition test to the beginning of the function to 
make the code simpler?


> +		gfn_t s = start;
> +
> +		ret = kvm_tdp_mmu_map_gpa(kvm, &s, end, map_private);
> +		if (!ret) {
> +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, end), kvm);
> +		} else if (ret == -EAGAIN) {
> +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, s), kvm);
> +			start = s;
> +		}
> +	} else {
> +		ret = -EOPNOTSUPP;
> +	}
> +	write_unlock(&kvm->mmu_lock);
> +
> +	if (ret == -EAGAIN) {
> +		if (map_private)
> +			*startp = kvm_gfn_private(kvm, start);
> +		else
> +			*startp = kvm_gfn_shared(kvm, start);
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(__kvm_mmu_map_gpa);
> +
> +int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
> +		    bool map_private)
> +{
> +	struct kvm_mmu *mmu = vcpu->arch.mmu;
> +
> +	if (!VALID_PAGE(mmu->root.hpa) || !VALID_PAGE(mmu->private_root_hpa))
> +		return -EINVAL;
> +
> +	return __kvm_mmu_map_gpa(vcpu->kvm, startp, end, map_private);
> +}
> +EXPORT_SYMBOL_GPL(kvm_mmu_map_gpa);
> +
>   static unsigned long
>   mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>   {
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 4b207ce83ffe..d3bab382ceaa 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -2156,6 +2156,41 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
>   	return spte_set;
>   }
>   
> +int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
> +			gfn_t *startp, gfn_t end, bool map_private)
> +{
> +	struct kvm_mmu_page *root;
> +	gfn_t start = *startp;
> +	bool flush = false;
> +	int i;
> +
> +	lockdep_assert_held_write(&kvm->mmu_lock);
> +	KVM_BUG_ON(start & kvm_gfn_shared_mask(kvm), kvm);
> +	KVM_BUG_ON(end & kvm_gfn_shared_mask(kvm), kvm);
> +
> +	kvm_mmu_invalidate_begin(kvm, start, end);
> +	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> +		for_each_tdp_mmu_root_yield_safe(kvm, root, i) {
> +			if (is_private_sp(root) == map_private)
> +				continue;
> +
> +			/*
> +			 * TODO: If necessary, return to the caller with -EAGAIN
> +			 * instead of yield-and-resume within
> +			 * tdp_mmu_zap_leafs().
> +			 */
> +			flush = tdp_mmu_zap_leafs(kvm, root, start, end,
> +						  /*can_yield=*/true, flush,
> +						  /*zap_private=*/is_private_sp(root));
> +		}
> +	}
> +	if (flush)
> +		kvm_flush_remote_tlbs_with_address(kvm, start, end - start);
> +	kvm_mmu_invalidate_end(kvm, start, end);
> +
> +	return 0;
> +}
> +
>   /*
>    * Return the level of the lowest level SPTE added to sptes.
>    * That SPTE may be non-present.
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> index 695175c921a5..cb13bc1c3679 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.h
> +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> @@ -51,6 +51,9 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
>   				      gfn_t start, gfn_t end,
>   				      int target_level, bool shared);
>   
> +int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
> +			gfn_t *startp, gfn_t end, bool map_private);

Sugguest to add some description about the function to avoid confusion, 
since the function name is quite generic but the usage is highly related 
to TDX.


> +
>   static inline void kvm_tdp_mmu_walk_lockless_begin(void)
>   {
>   	rcu_read_lock();
  
Vishal Annapurve Dec. 9, 2022, 12:01 a.m. UTC | #2
> +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> +		      bool map_private)
> +{
> +	gfn_t start = *startp;
> +	int attr;
> +	int ret;
> +
> +	if (!kvm_gfn_shared_mask(kvm))
> +		return -EOPNOTSUPP;
> +
> +	attr = map_private ? KVM_MEM_ATTR_PRIVATE : KVM_MEM_ATTR_SHARED;
> +	start = start & ~kvm_gfn_shared_mask(kvm);
> +	end = end & ~kvm_gfn_shared_mask(kvm);
> +
> +	/*
> +	 * To make the following kvm_vm_set_mem_attr() success within spinlock
> +	 * without memory allocation.
> +	 */
> +	ret = kvm_vm_reserve_mem_attr(kvm, start, end);
> +	if (ret)
> +		return ret;
> +
> +	write_lock(&kvm-> mmu_lock);
> +	if (is_tdp_mmu_enabled(kvm)) {
> +		gfn_t s = start;
> +
> +		ret = kvm_tdp_mmu_map_gpa(kvm, &s, end, map_private);
> +		if (!ret) {
> +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, end), kvm);

This will result in no exits to userspace during memory conversion requests from
guests. And as a result, userspace will not be able to explicitly back/unback
shared/private memory during conversions leading to double allocation of memory.

Is this an intended behavior for memory conversion with TDX VMs as per earlier
discussion?

> +		} else if (ret == -EAGAIN) {
> +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, s), kvm);
> +			start = s;
> +		}
> +	} else {
> +		ret = -EOPNOTSUPP;
> +	}
> +	write_unlock(&kvm-> mmu_lock);
> +
> +	if (ret == -EAGAIN) {
> +		if (map_private)
  
Isaku Yamahata Dec. 16, 2022, 12:18 a.m. UTC | #3
On Wed, Nov 09, 2022 at 11:05:46PM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> On 10/30/2022 2:23 PM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > 
> > The TDX Guest-Hypervisor communication interface(GHCI) specification
> > defines MapGPA hypercall for guest TD to request the host VMM to map given
> > GPA range as private or shared.
> > 
> > It means the guest TD uses the GPA as shared (or private).  The GPA
> > won't be used as private (or shared).  VMM should enforce GPA usage. VMM
> > doesn't have to map the GPA on the hypercall request.
> > 
> > - Zap the aliased region.
> >    If shared (or private) GPA is requested, zap private (or shared) GPA
> >    (modulo shared bit).
> > - Record the request GPA is shared (or private) by kvm.mem_attr_array.
> > - Don't map GPA. The GPA is mapped on the next EPT violation.
> > 
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> >   arch/x86/kvm/mmu.h         |  5 ++++
> >   arch/x86/kvm/mmu/mmu.c     | 60 ++++++++++++++++++++++++++++++++++++++
> >   arch/x86/kvm/mmu/tdp_mmu.c | 35 ++++++++++++++++++++++
> >   arch/x86/kvm/mmu/tdp_mmu.h |  3 ++
> >   4 files changed, 103 insertions(+)
> > 
> > diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> > index e2a0dfbee56d..e1641fa5a862 100644
> > --- a/arch/x86/kvm/mmu.h
> > +++ b/arch/x86/kvm/mmu.h
> > @@ -219,6 +219,11 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
> >   int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
> > +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> > +		      bool map_private);
> > +int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
> > +		    bool map_private);
> > +
> >   int kvm_mmu_post_init_vm(struct kvm *kvm);
> >   void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 168c84c99de3..37b378bf60df 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -6778,6 +6778,66 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
> >   	}
> >   }
> > +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> > +		      bool map_private)
> > +{
> > +	gfn_t start = *startp;
> > +	int attr;
> > +	int ret;
> > +
> > +	if (!kvm_gfn_shared_mask(kvm))
> > +		return -EOPNOTSUPP;
> > +
> > +	attr = map_private ? KVM_MEM_ATTR_PRIVATE : KVM_MEM_ATTR_SHARED;
> > +	start = start & ~kvm_gfn_shared_mask(kvm);
> > +	end = end & ~kvm_gfn_shared_mask(kvm);
> > +
> > +	/*
> > +	 * To make the following kvm_vm_set_mem_attr() success within spinlock
> > +	 * without memory allocation.
> > +	 */
> > +	ret = kvm_vm_reserve_mem_attr(kvm, start, end);
> > +	if (ret)
> > +		return ret;
> > +
> > +	write_lock(&kvm->mmu_lock);
> > +	if (is_tdp_mmu_enabled(kvm)) {
> 
> How about moving the condition test to the beginning of the function to make
> the code simpler?

Ok.


> > +		gfn_t s = start;
> > +
> > +		ret = kvm_tdp_mmu_map_gpa(kvm, &s, end, map_private);
> > +		if (!ret) {
> > +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, end), kvm);
> > +		} else if (ret == -EAGAIN) {
> > +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, s), kvm);
> > +			start = s;
> > +		}
> > +	} else {
> > +		ret = -EOPNOTSUPP;
> > +	}
> > +	write_unlock(&kvm->mmu_lock);
> > +
> > +	if (ret == -EAGAIN) {
> > +		if (map_private)
> > +			*startp = kvm_gfn_private(kvm, start);
> > +		else
> > +			*startp = kvm_gfn_shared(kvm, start);
> > +	}
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(__kvm_mmu_map_gpa);
> > +
> > +int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
> > +		    bool map_private)
> > +{
> > +	struct kvm_mmu *mmu = vcpu->arch.mmu;
> > +
> > +	if (!VALID_PAGE(mmu->root.hpa) || !VALID_PAGE(mmu->private_root_hpa))
> > +		return -EINVAL;
> > +
> > +	return __kvm_mmu_map_gpa(vcpu->kvm, startp, end, map_private);
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_mmu_map_gpa);
> > +
> >   static unsigned long
> >   mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> >   {
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> > index 4b207ce83ffe..d3bab382ceaa 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.c
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> > @@ -2156,6 +2156,41 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
> >   	return spte_set;
> >   }
> > +int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
> > +			gfn_t *startp, gfn_t end, bool map_private)
> > +{
> > +	struct kvm_mmu_page *root;
> > +	gfn_t start = *startp;
> > +	bool flush = false;
> > +	int i;
> > +
> > +	lockdep_assert_held_write(&kvm->mmu_lock);
> > +	KVM_BUG_ON(start & kvm_gfn_shared_mask(kvm), kvm);
> > +	KVM_BUG_ON(end & kvm_gfn_shared_mask(kvm), kvm);
> > +
> > +	kvm_mmu_invalidate_begin(kvm, start, end);
> > +	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
> > +		for_each_tdp_mmu_root_yield_safe(kvm, root, i) {
> > +			if (is_private_sp(root) == map_private)
> > +				continue;
> > +
> > +			/*
> > +			 * TODO: If necessary, return to the caller with -EAGAIN
> > +			 * instead of yield-and-resume within
> > +			 * tdp_mmu_zap_leafs().
> > +			 */
> > +			flush = tdp_mmu_zap_leafs(kvm, root, start, end,
> > +						  /*can_yield=*/true, flush,
> > +						  /*zap_private=*/is_private_sp(root));
> > +		}
> > +	}
> > +	if (flush)
> > +		kvm_flush_remote_tlbs_with_address(kvm, start, end - start);
> > +	kvm_mmu_invalidate_end(kvm, start, end);
> > +
> > +	return 0;
> > +}
> > +
> >   /*
> >    * Return the level of the lowest level SPTE added to sptes.
> >    * That SPTE may be non-present.
> > diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
> > index 695175c921a5..cb13bc1c3679 100644
> > --- a/arch/x86/kvm/mmu/tdp_mmu.h
> > +++ b/arch/x86/kvm/mmu/tdp_mmu.h
> > @@ -51,6 +51,9 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
> >   				      gfn_t start, gfn_t end,
> >   				      int target_level, bool shared);
> > +int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
> > +			gfn_t *startp, gfn_t end, bool map_private);
> 
> Sugguest to add some description about the function to avoid confusion,
> since the function name is quite generic but the usage is highly related to
> TDX.

Agreed that the name is too generic. Will rename it to kvm_tdp_mmu_map_private().

I can think of convert_private(), convert_gpa(), or any other suggestion?
  
Isaku Yamahata Dec. 16, 2022, 12:31 a.m. UTC | #4
On Fri, Dec 09, 2022 at 12:01:30AM +0000,
Vishal Annapurve <vannapurve@google.com> wrote:

> > +int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
> > +		      bool map_private)
> > +{
> > +	gfn_t start = *startp;
> > +	int attr;
> > +	int ret;
> > +
> > +	if (!kvm_gfn_shared_mask(kvm))
> > +		return -EOPNOTSUPP;
> > +
> > +	attr = map_private ? KVM_MEM_ATTR_PRIVATE : KVM_MEM_ATTR_SHARED;
> > +	start = start & ~kvm_gfn_shared_mask(kvm);
> > +	end = end & ~kvm_gfn_shared_mask(kvm);
> > +
> > +	/*
> > +	 * To make the following kvm_vm_set_mem_attr() success within spinlock
> > +	 * without memory allocation.
> > +	 */
> > +	ret = kvm_vm_reserve_mem_attr(kvm, start, end);
> > +	if (ret)
> > +		return ret;
> > +
> > +	write_lock(&kvm-> mmu_lock);
> > +	if (is_tdp_mmu_enabled(kvm)) {
> > +		gfn_t s = start;
> > +
> > +		ret = kvm_tdp_mmu_map_gpa(kvm, &s, end, map_private);
> > +		if (!ret) {
> > +			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, end), kvm);
> 
> This will result in no exits to userspace during memory conversion requests from
> guests. And as a result, userspace will not be able to explicitly back/unback
> shared/private memory during conversions leading to double allocation of memory.
> 
> Is this an intended behavior for memory conversion with TDX VMs as per earlier
> discussion?

It was because UPM ioctl interface was not stable yet. With v10, it seems mostly
stable, I'll change the patch of "[PATCH v10 101/108] KVM: TDX: Handle TDX PV
map_gpa hypercall" to exit to user space.
  

Patch

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e2a0dfbee56d..e1641fa5a862 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -219,6 +219,11 @@  void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
 
 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu);
 
+int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
+		      bool map_private);
+int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
+		    bool map_private);
+
 int kvm_mmu_post_init_vm(struct kvm *kvm);
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm);
 
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 168c84c99de3..37b378bf60df 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6778,6 +6778,66 @@  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 	}
 }
 
+int __kvm_mmu_map_gpa(struct kvm *kvm, gfn_t *startp, gfn_t end,
+		      bool map_private)
+{
+	gfn_t start = *startp;
+	int attr;
+	int ret;
+
+	if (!kvm_gfn_shared_mask(kvm))
+		return -EOPNOTSUPP;
+
+	attr = map_private ? KVM_MEM_ATTR_PRIVATE : KVM_MEM_ATTR_SHARED;
+	start = start & ~kvm_gfn_shared_mask(kvm);
+	end = end & ~kvm_gfn_shared_mask(kvm);
+
+	/*
+	 * To make the following kvm_vm_set_mem_attr() success within spinlock
+	 * without memory allocation.
+	 */
+	ret = kvm_vm_reserve_mem_attr(kvm, start, end);
+	if (ret)
+		return ret;
+
+	write_lock(&kvm->mmu_lock);
+	if (is_tdp_mmu_enabled(kvm)) {
+		gfn_t s = start;
+
+		ret = kvm_tdp_mmu_map_gpa(kvm, &s, end, map_private);
+		if (!ret) {
+			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, end), kvm);
+		} else if (ret == -EAGAIN) {
+			KVM_BUG_ON(kvm_vm_set_mem_attr(kvm, attr, start, s), kvm);
+			start = s;
+		}
+	} else {
+		ret = -EOPNOTSUPP;
+	}
+	write_unlock(&kvm->mmu_lock);
+
+	if (ret == -EAGAIN) {
+		if (map_private)
+			*startp = kvm_gfn_private(kvm, start);
+		else
+			*startp = kvm_gfn_shared(kvm, start);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(__kvm_mmu_map_gpa);
+
+int kvm_mmu_map_gpa(struct kvm_vcpu *vcpu, gfn_t *startp, gfn_t end,
+		    bool map_private)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+
+	if (!VALID_PAGE(mmu->root.hpa) || !VALID_PAGE(mmu->private_root_hpa))
+		return -EINVAL;
+
+	return __kvm_mmu_map_gpa(vcpu->kvm, startp, end, map_private);
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_map_gpa);
+
 static unsigned long
 mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4b207ce83ffe..d3bab382ceaa 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -2156,6 +2156,41 @@  bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
 	return spte_set;
 }
 
+int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
+			gfn_t *startp, gfn_t end, bool map_private)
+{
+	struct kvm_mmu_page *root;
+	gfn_t start = *startp;
+	bool flush = false;
+	int i;
+
+	lockdep_assert_held_write(&kvm->mmu_lock);
+	KVM_BUG_ON(start & kvm_gfn_shared_mask(kvm), kvm);
+	KVM_BUG_ON(end & kvm_gfn_shared_mask(kvm), kvm);
+
+	kvm_mmu_invalidate_begin(kvm, start, end);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		for_each_tdp_mmu_root_yield_safe(kvm, root, i) {
+			if (is_private_sp(root) == map_private)
+				continue;
+
+			/*
+			 * TODO: If necessary, return to the caller with -EAGAIN
+			 * instead of yield-and-resume within
+			 * tdp_mmu_zap_leafs().
+			 */
+			flush = tdp_mmu_zap_leafs(kvm, root, start, end,
+						  /*can_yield=*/true, flush,
+						  /*zap_private=*/is_private_sp(root));
+		}
+	}
+	if (flush)
+		kvm_flush_remote_tlbs_with_address(kvm, start, end - start);
+	kvm_mmu_invalidate_end(kvm, start, end);
+
+	return 0;
+}
+
 /*
  * Return the level of the lowest level SPTE added to sptes.
  * That SPTE may be non-present.
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 695175c921a5..cb13bc1c3679 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -51,6 +51,9 @@  void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
 				      gfn_t start, gfn_t end,
 				      int target_level, bool shared);
 
+int kvm_tdp_mmu_map_gpa(struct kvm *kvm,
+			gfn_t *startp, gfn_t end, bool map_private);
+
 static inline void kvm_tdp_mmu_walk_lockless_begin(void)
 {
 	rcu_read_lock();