[RFC,4/5] KVM: TDX: Implement moving private pages between 2 TDs

Message ID 20230407201921.2703758-5-sagis@google.com
State New
Headers
Series Add TDX intra host migration support |

Commit Message

Sagi Shahar April 7, 2023, 8:19 p.m. UTC
  Added functionality for moving the private EPT table from one TD to a
new one.

This function moves the root of the private EPT table and overwrite
the root of the destination.

Signed-off-by: Sagi Shahar <sagis@google.com>
---
 arch/x86/kvm/mmu.h         |  2 +
 arch/x86/kvm/mmu/mmu.c     | 60 +++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.c | 77 +++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/mmu/tdp_mmu.h |  3 ++
 4 files changed, 137 insertions(+), 5 deletions(-)
  

Comments

Isaku Yamahata June 2, 2023, 7 a.m. UTC | #1
On Fri, Apr 07, 2023 at 08:19:20PM +0000,
Sagi Shahar <sagis@google.com> wrote:

> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 327dee4f6170e..685528fdc0ad6 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -296,6 +296,23 @@ static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
>  	trace_kvm_mmu_get_page(sp, true);
>  }
>  
> +static struct kvm_mmu_page *
> +kvm_tdp_mmu_get_vcpu_root_no_alloc(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role)
> +{
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm_mmu_page *root;
> +
> +	lockdep_assert_held_read(&kvm->mmu_lock);

Because kvm_tdp_mmu_get_cpu_root() holds write lock, 
this should be lockdep_assert_held(&kvm->mmu_lock)

Thanks,

> +
> +	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> +		if (root->role.word == role.word &&
> +		    kvm_tdp_mmu_get_root(root))
> +			return root;
> +	}
> +
> +	return NULL;
> +}
> +
>  static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
>  						      bool private)
>  {
> @@ -311,11 +328,9 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
>  	 */
>  	if (private)
>  		kvm_mmu_page_role_set_private(&role);
> -	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> -		if (root->role.word == role.word &&
> -		    kvm_tdp_mmu_get_root(root))
> -			goto out;
> -	}
> +	root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
> +	if (!!root)
> +		goto out;
>  
>  	root = tdp_mmu_alloc_sp(vcpu, role);
>  	tdp_mmu_init_sp(root, NULL, 0);
> @@ -330,6 +345,58 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
>  	return root;
>  }
>  
> +hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
> +					  struct kvm_vcpu *src_vcpu)
> +{
> +	union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
> +	struct kvm *kvm = vcpu->kvm;
> +	struct kvm *src_kvm = src_vcpu->kvm;
> +	struct kvm_mmu_page *private_root = NULL;
> +	struct kvm_mmu_page *root;
> +	s64 num_private_pages, old;
> +
> +	lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
> +	lockdep_assert_held_write(&src_vcpu->kvm->mmu_lock);
> +
> +	/* Find the private root of the source. */
> +	kvm_mmu_page_role_set_private(&role);
> +	for_each_tdp_mmu_root(src_kvm, root, kvm_mmu_role_as_id(role)) {
> +		if (root->role.word == role.word) {
> +			private_root = root;
> +			break;
> +		}
> +	}
> +	if (!private_root)
> +		return INVALID_PAGE;
> +
> +	/* Remove the private root from the src kvm and add it to dst kvm. */
> +	list_del_rcu(&private_root->link);
> +	list_add_rcu(&private_root->link, &kvm->arch.tdp_mmu_roots);
> +
> +	num_private_pages = atomic64_read(&src_kvm->arch.tdp_private_mmu_pages);
> +	old = atomic64_cmpxchg(&kvm->arch.tdp_private_mmu_pages, 0,
> +			       num_private_pages);
> +	/* The destination VM should have no private pages at this point. */
> +	WARN_ON(old);
> +	atomic64_set(&src_kvm->arch.tdp_private_mmu_pages, 0);
> +
> +	return __pa(private_root->spt);
> +}
> +
> +hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private)
> +{
> +	struct kvm_mmu_page *root;
> +	union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
> +
> +	if (private)
> +		kvm_mmu_page_role_set_private(&role);
> +	root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
> +	if (!root)
> +		return INVALID_PAGE;
> +
> +	return __pa(root->spt);
> +}
> +
>  hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private)
>  {
>  	return __pa(kvm_tdp_mmu_get_vcpu_root(vcpu, private)->spt);
  

Patch

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index d10b08eeaefee..09bae7fe18a12 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -120,6 +120,8 @@  void kvm_mmu_unload(struct kvm_vcpu *vcpu);
 void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
 void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+int kvm_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *src_vcpu);
 
 static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index a35f2e7f9bc70..1acc9338323da 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3789,6 +3789,66 @@  static int mmu_first_shadow_root_alloc(struct kvm *kvm)
 	return r;
 }
 
+int kvm_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *src_vcpu)
+{
+	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	struct kvm_mmu *src_mmu = src_vcpu->arch.mmu;
+	gfn_t gfn_shared = kvm_gfn_shared_mask(vcpu->kvm);
+	hpa_t private_root_hpa, shared_root_hpa;
+	int r = -EINVAL;
+
+	// Hold locks for both src and dst. Always take the src lock first.
+	write_lock(&src_vcpu->kvm->mmu_lock);
+	write_lock(&vcpu->kvm->mmu_lock);
+
+	if (!gfn_shared)
+		goto out_unlock;
+
+	WARN_ON_ONCE(!is_tdp_mmu_active(vcpu));
+	WARN_ON_ONCE(!is_tdp_mmu_active(src_vcpu));
+
+	r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct);
+	if (r)
+		goto out_unlock;
+
+	/*
+	 * The private root is moved from the src to the dst and is marked as
+	 * invalid in the src.
+	 */
+	private_root_hpa = kvm_tdp_mmu_move_private_pages_from(vcpu, src_vcpu);
+	if (private_root_hpa == INVALID_PAGE) {
+		/*
+		 * This likely means that the private root was already moved by
+		 * another vCPU.
+		 */
+		private_root_hpa = kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(vcpu, true);
+		if (private_root_hpa == INVALID_PAGE) {
+			r = -EINVAL;
+			goto out_unlock;
+		}
+	}
+
+	mmu->private_root_hpa = private_root_hpa;
+	src_mmu->private_root_hpa = INVALID_PAGE;
+
+	/*
+	 * The shared root is allocated normally and is not moved from the src.
+	 */
+	shared_root_hpa = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu, false);
+	mmu->root.hpa = shared_root_hpa;
+
+	kvm_mmu_load_pgd(vcpu);
+	static_call(kvm_x86_flush_tlb_current)(vcpu);
+
+out_unlock:
+	write_unlock(&vcpu->kvm->mmu_lock);
+	write_unlock(&src_vcpu->kvm->mmu_lock);
+
+	return r;
+}
+EXPORT_SYMBOL(kvm_mmu_move_private_pages_from);
+
 static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 327dee4f6170e..685528fdc0ad6 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -296,6 +296,23 @@  static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
 	trace_kvm_mmu_get_page(sp, true);
 }
 
+static struct kvm_mmu_page *
+kvm_tdp_mmu_get_vcpu_root_no_alloc(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role)
+{
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_mmu_page *root;
+
+	lockdep_assert_held_read(&kvm->mmu_lock);
+
+	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
+		if (root->role.word == role.word &&
+		    kvm_tdp_mmu_get_root(root))
+			return root;
+	}
+
+	return NULL;
+}
+
 static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
 						      bool private)
 {
@@ -311,11 +328,9 @@  static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
 	 */
 	if (private)
 		kvm_mmu_page_role_set_private(&role);
-	for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
-		if (root->role.word == role.word &&
-		    kvm_tdp_mmu_get_root(root))
-			goto out;
-	}
+	root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
+	if (!!root)
+		goto out;
 
 	root = tdp_mmu_alloc_sp(vcpu, role);
 	tdp_mmu_init_sp(root, NULL, 0);
@@ -330,6 +345,58 @@  static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
 	return root;
 }
 
+hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+					  struct kvm_vcpu *src_vcpu)
+{
+	union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm *src_kvm = src_vcpu->kvm;
+	struct kvm_mmu_page *private_root = NULL;
+	struct kvm_mmu_page *root;
+	s64 num_private_pages, old;
+
+	lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
+	lockdep_assert_held_write(&src_vcpu->kvm->mmu_lock);
+
+	/* Find the private root of the source. */
+	kvm_mmu_page_role_set_private(&role);
+	for_each_tdp_mmu_root(src_kvm, root, kvm_mmu_role_as_id(role)) {
+		if (root->role.word == role.word) {
+			private_root = root;
+			break;
+		}
+	}
+	if (!private_root)
+		return INVALID_PAGE;
+
+	/* Remove the private root from the src kvm and add it to dst kvm. */
+	list_del_rcu(&private_root->link);
+	list_add_rcu(&private_root->link, &kvm->arch.tdp_mmu_roots);
+
+	num_private_pages = atomic64_read(&src_kvm->arch.tdp_private_mmu_pages);
+	old = atomic64_cmpxchg(&kvm->arch.tdp_private_mmu_pages, 0,
+			       num_private_pages);
+	/* The destination VM should have no private pages at this point. */
+	WARN_ON(old);
+	atomic64_set(&src_kvm->arch.tdp_private_mmu_pages, 0);
+
+	return __pa(private_root->spt);
+}
+
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private)
+{
+	struct kvm_mmu_page *root;
+	union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+
+	if (private)
+		kvm_mmu_page_role_set_private(&role);
+	root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
+	if (!root)
+		return INVALID_PAGE;
+
+	return __pa(root->spt);
+}
+
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private)
 {
 	return __pa(kvm_tdp_mmu_get_vcpu_root(vcpu, private)->spt);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 3ae3c3b8642ac..0e9d38432673d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -11,6 +11,9 @@  int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
 void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
 
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private);
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private);
+hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+					  struct kvm_vcpu *src_vcpu);
 
 __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
 {