[RFC,4/5] KVM: TDX: Implement moving private pages between 2 TDs
Commit Message
Added functionality for moving the private EPT table from one TD to a
new one.
This function moves the root of the private EPT table and overwrite
the root of the destination.
Signed-off-by: Sagi Shahar <sagis@google.com>
---
arch/x86/kvm/mmu.h | 2 +
arch/x86/kvm/mmu/mmu.c | 60 +++++++++++++++++++++++++++++
arch/x86/kvm/mmu/tdp_mmu.c | 77 +++++++++++++++++++++++++++++++++++---
arch/x86/kvm/mmu/tdp_mmu.h | 3 ++
4 files changed, 137 insertions(+), 5 deletions(-)
Comments
On Fri, Apr 07, 2023 at 08:19:20PM +0000,
Sagi Shahar <sagis@google.com> wrote:
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 327dee4f6170e..685528fdc0ad6 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -296,6 +296,23 @@ static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
> trace_kvm_mmu_get_page(sp, true);
> }
>
> +static struct kvm_mmu_page *
> +kvm_tdp_mmu_get_vcpu_root_no_alloc(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + struct kvm_mmu_page *root;
> +
> + lockdep_assert_held_read(&kvm->mmu_lock);
Because kvm_tdp_mmu_get_cpu_root() holds write lock,
this should be lockdep_assert_held(&kvm->mmu_lock)
Thanks,
> +
> + for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> + if (root->role.word == role.word &&
> + kvm_tdp_mmu_get_root(root))
> + return root;
> + }
> +
> + return NULL;
> +}
> +
> static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
> bool private)
> {
> @@ -311,11 +328,9 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
> */
> if (private)
> kvm_mmu_page_role_set_private(&role);
> - for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
> - if (root->role.word == role.word &&
> - kvm_tdp_mmu_get_root(root))
> - goto out;
> - }
> + root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
> + if (!!root)
> + goto out;
>
> root = tdp_mmu_alloc_sp(vcpu, role);
> tdp_mmu_init_sp(root, NULL, 0);
> @@ -330,6 +345,58 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
> return root;
> }
>
> +hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
> + struct kvm_vcpu *src_vcpu)
> +{
> + union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
> + struct kvm *kvm = vcpu->kvm;
> + struct kvm *src_kvm = src_vcpu->kvm;
> + struct kvm_mmu_page *private_root = NULL;
> + struct kvm_mmu_page *root;
> + s64 num_private_pages, old;
> +
> + lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
> + lockdep_assert_held_write(&src_vcpu->kvm->mmu_lock);
> +
> + /* Find the private root of the source. */
> + kvm_mmu_page_role_set_private(&role);
> + for_each_tdp_mmu_root(src_kvm, root, kvm_mmu_role_as_id(role)) {
> + if (root->role.word == role.word) {
> + private_root = root;
> + break;
> + }
> + }
> + if (!private_root)
> + return INVALID_PAGE;
> +
> + /* Remove the private root from the src kvm and add it to dst kvm. */
> + list_del_rcu(&private_root->link);
> + list_add_rcu(&private_root->link, &kvm->arch.tdp_mmu_roots);
> +
> + num_private_pages = atomic64_read(&src_kvm->arch.tdp_private_mmu_pages);
> + old = atomic64_cmpxchg(&kvm->arch.tdp_private_mmu_pages, 0,
> + num_private_pages);
> + /* The destination VM should have no private pages at this point. */
> + WARN_ON(old);
> + atomic64_set(&src_kvm->arch.tdp_private_mmu_pages, 0);
> +
> + return __pa(private_root->spt);
> +}
> +
> +hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private)
> +{
> + struct kvm_mmu_page *root;
> + union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
> +
> + if (private)
> + kvm_mmu_page_role_set_private(&role);
> + root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
> + if (!root)
> + return INVALID_PAGE;
> +
> + return __pa(root->spt);
> +}
> +
> hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private)
> {
> return __pa(kvm_tdp_mmu_get_vcpu_root(vcpu, private)->spt);
@@ -120,6 +120,8 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+int kvm_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu);
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
@@ -3789,6 +3789,66 @@ static int mmu_first_shadow_root_alloc(struct kvm *kvm)
return r;
}
+int kvm_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu)
+{
+ struct kvm_mmu *mmu = vcpu->arch.mmu;
+ struct kvm_mmu *src_mmu = src_vcpu->arch.mmu;
+ gfn_t gfn_shared = kvm_gfn_shared_mask(vcpu->kvm);
+ hpa_t private_root_hpa, shared_root_hpa;
+ int r = -EINVAL;
+
+ // Hold locks for both src and dst. Always take the src lock first.
+ write_lock(&src_vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
+
+ if (!gfn_shared)
+ goto out_unlock;
+
+ WARN_ON_ONCE(!is_tdp_mmu_active(vcpu));
+ WARN_ON_ONCE(!is_tdp_mmu_active(src_vcpu));
+
+ r = mmu_topup_memory_caches(vcpu, !vcpu->arch.mmu->root_role.direct);
+ if (r)
+ goto out_unlock;
+
+ /*
+ * The private root is moved from the src to the dst and is marked as
+ * invalid in the src.
+ */
+ private_root_hpa = kvm_tdp_mmu_move_private_pages_from(vcpu, src_vcpu);
+ if (private_root_hpa == INVALID_PAGE) {
+ /*
+ * This likely means that the private root was already moved by
+ * another vCPU.
+ */
+ private_root_hpa = kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(vcpu, true);
+ if (private_root_hpa == INVALID_PAGE) {
+ r = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
+ mmu->private_root_hpa = private_root_hpa;
+ src_mmu->private_root_hpa = INVALID_PAGE;
+
+ /*
+ * The shared root is allocated normally and is not moved from the src.
+ */
+ shared_root_hpa = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu, false);
+ mmu->root.hpa = shared_root_hpa;
+
+ kvm_mmu_load_pgd(vcpu);
+ static_call(kvm_x86_flush_tlb_current)(vcpu);
+
+out_unlock:
+ write_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&src_vcpu->kvm->mmu_lock);
+
+ return r;
+}
+EXPORT_SYMBOL(kvm_mmu_move_private_pages_from);
+
static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
@@ -296,6 +296,23 @@ static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
trace_kvm_mmu_get_page(sp, true);
}
+static struct kvm_mmu_page *
+kvm_tdp_mmu_get_vcpu_root_no_alloc(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_mmu_page *root;
+
+ lockdep_assert_held_read(&kvm->mmu_lock);
+
+ for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
+ if (root->role.word == role.word &&
+ kvm_tdp_mmu_get_root(root))
+ return root;
+ }
+
+ return NULL;
+}
+
static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
bool private)
{
@@ -311,11 +328,9 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
*/
if (private)
kvm_mmu_page_role_set_private(&role);
- for_each_tdp_mmu_root(kvm, root, kvm_mmu_role_as_id(role)) {
- if (root->role.word == role.word &&
- kvm_tdp_mmu_get_root(root))
- goto out;
- }
+ root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
+ if (!!root)
+ goto out;
root = tdp_mmu_alloc_sp(vcpu, role);
tdp_mmu_init_sp(root, NULL, 0);
@@ -330,6 +345,58 @@ static struct kvm_mmu_page *kvm_tdp_mmu_get_vcpu_root(struct kvm_vcpu *vcpu,
return root;
}
+hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu)
+{
+ union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm *src_kvm = src_vcpu->kvm;
+ struct kvm_mmu_page *private_root = NULL;
+ struct kvm_mmu_page *root;
+ s64 num_private_pages, old;
+
+ lockdep_assert_held_write(&vcpu->kvm->mmu_lock);
+ lockdep_assert_held_write(&src_vcpu->kvm->mmu_lock);
+
+ /* Find the private root of the source. */
+ kvm_mmu_page_role_set_private(&role);
+ for_each_tdp_mmu_root(src_kvm, root, kvm_mmu_role_as_id(role)) {
+ if (root->role.word == role.word) {
+ private_root = root;
+ break;
+ }
+ }
+ if (!private_root)
+ return INVALID_PAGE;
+
+ /* Remove the private root from the src kvm and add it to dst kvm. */
+ list_del_rcu(&private_root->link);
+ list_add_rcu(&private_root->link, &kvm->arch.tdp_mmu_roots);
+
+ num_private_pages = atomic64_read(&src_kvm->arch.tdp_private_mmu_pages);
+ old = atomic64_cmpxchg(&kvm->arch.tdp_private_mmu_pages, 0,
+ num_private_pages);
+ /* The destination VM should have no private pages at this point. */
+ WARN_ON(old);
+ atomic64_set(&src_kvm->arch.tdp_private_mmu_pages, 0);
+
+ return __pa(private_root->spt);
+}
+
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private)
+{
+ struct kvm_mmu_page *root;
+ union kvm_mmu_page_role role = vcpu->arch.mmu->root_role;
+
+ if (private)
+ kvm_mmu_page_role_set_private(&role);
+ root = kvm_tdp_mmu_get_vcpu_root_no_alloc(vcpu, role);
+ if (!root)
+ return INVALID_PAGE;
+
+ return __pa(root->spt);
+}
+
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private)
{
return __pa(kvm_tdp_mmu_get_vcpu_root(vcpu, private)->spt);
@@ -11,6 +11,9 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu, bool private);
+hpa_t kvm_tdp_mmu_get_vcpu_root_hpa_no_alloc(struct kvm_vcpu *vcpu, bool private);
+hpa_t kvm_tdp_mmu_move_private_pages_from(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu *src_vcpu);
__must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
{