Hi Sebastian,
kernel test robot noticed the following build warnings:
[auto build test WARNING on arm64/for-next/core]
[also build test WARNING on kvmarm/next linus/master v6.6-rc3 next-20230928]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Sebastian-Ene/KVM-arm64-Add-snap-shooting-the-host-stage-2-pagetables/20230927-192734
base: https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git for-next/core
patch link: https://lore.kernel.org/r/20230927112517.2631674-2-sebastianene%40google.com
patch subject: [PATCH 01/11] KVM: arm64: Add snap shooting the host stage-2 pagetables
config: arm64-allyesconfig (https://download.01.org/0day-ci/archive/20230929/202309290210.m6jkiTXx-lkp@intel.com/config)
compiler: aarch64-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230929/202309290210.m6jkiTXx-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309290210.m6jkiTXx-lkp@intel.com/
All warnings (new ones prefixed by >>):
arch/arm64/kvm/hyp/nvhe/mem_protect.c: In function '__pkvm_host_stage2_prepare_copy':
>> arch/arm64/kvm/hyp/nvhe/mem_protect.c:335:13: warning: variable 'nr_pages' set but not used [-Wunused-but-set-variable]
335 | u64 nr_pages;
| ^~~~~~~~
vim +/nr_pages +335 arch/arm64/kvm/hyp/nvhe/mem_protect.c
326
327 int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot)
328 {
329 size_t required_pgd_len;
330 struct kvm_pgtable_mm_ops mm_ops = {0};
331 struct kvm_pgtable *to_pgt, *from_pgt = &host_mmu.pgt;
332 struct kvm_hyp_memcache *memcache = &snapshot->mc;
333 int ret;
334 void *pgd;
> 335 u64 nr_pages;
336
337 required_pgd_len = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
338 if (snapshot->pgd_len < required_pgd_len)
339 return -ENOMEM;
340
341 to_pgt = &snapshot->pgtable;
342 nr_pages = snapshot->pgd_len / PAGE_SIZE;
343 pgd = kern_hyp_va(snapshot->pgd_hva);
344
345 hyp_spin_lock(&snapshot_pool_lock);
346 hyp_pool_init(&snapshot_pool, hyp_virt_to_pfn(pgd),
347 required_pgd_len / PAGE_SIZE, 0);
348
349 mm_ops.zalloc_pages_exact = snapshot_zalloc_pages_exact;
350 mm_ops.zalloc_page = snapshot_zalloc_page;
351 mm_ops.free_pages_exact = snapshot_s2_free_pages_exact;
352 mm_ops.get_page = snapshot_get_page;
353 mm_ops.phys_to_virt = hyp_phys_to_virt;
354 mm_ops.virt_to_phys = hyp_virt_to_phys;
355 mm_ops.page_count = hyp_page_count;
356
357 to_pgt->ia_bits = from_pgt->ia_bits;
358 to_pgt->start_level = from_pgt->start_level;
359 to_pgt->flags = from_pgt->flags;
360 to_pgt->mm_ops = &mm_ops;
361
362 host_lock_component();
363 ret = kvm_pgtable_stage2_copy(to_pgt, from_pgt, memcache);
364 host_unlock_component();
365
366 hyp_spin_unlock(&snapshot_pool_lock);
367
368 return ret;
369 }
370 #endif /* CONFIG_NVHE_EL2_DEBUG */
371
@@ -81,6 +81,7 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
+ __KVM_HOST_SMCCC_FUNC___pkvm_copy_host_stage2,
};
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
@@ -10,6 +10,7 @@
#include <linux/bits.h>
#include <linux/kvm_host.h>
#include <linux/types.h>
+#include <asm/kvm_host.h>
#define KVM_PGTABLE_MAX_LEVELS 4U
@@ -351,6 +352,21 @@ struct kvm_pgtable {
kvm_pgtable_force_pte_cb_t force_pte_cb;
};
+/**
+ * struct kvm_pgtable_snapshot - Snapshot page-table wrapper.
+ * @pgtable: The page-table configuration.
+ * @mc: Memcache used for pagetable pages allocation.
+ * @pgd_hva: Host virtual address of a physically contiguous buffer
+ * used for storing the PGD.
+ * @pgd_len: The size of the phyisically contiguous buffer in bytes.
+ */
+struct kvm_pgtable_snapshot {
+ struct kvm_pgtable pgtable;
+ struct kvm_hyp_memcache mc;
+ void *pgd_hva;
+ size_t pgd_len;
+};
+
/**
* kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
* @pgt: Uninitialised page-table structure to initialise.
@@ -756,4 +772,24 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
*/
void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
phys_addr_t addr, size_t size);
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+/**
+ * kvm_pgtable_stage2_copy() - Snapshot the pagetable
+ *
+ * @to_pgt: Destination pagetable
+ * @from_pgt: Source pagetable. The caller must lock the pagetables first
+ * @mc: The memcache where we allocate the destination pagetables from
+ */
+int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc);
+#else
+static inline int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc)
+{
+ return -EPERM;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */
#endif /* __ARM64_KVM_PGTABLE_H__ */
@@ -69,6 +69,7 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
+int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot);
bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
@@ -314,6 +314,23 @@ static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
}
+static void handle___pkvm_copy_host_stage2(struct kvm_cpu_context *host_ctxt)
+{
+ int ret = -EPERM;
+#ifdef CONFIG_NVHE_EL2_DEBUG
+ DECLARE_REG(struct kvm_pgtable_snapshot *, snapshot, host_ctxt, 1);
+ kvm_pteref_t pgd;
+
+ snapshot = kern_hyp_va(snapshot);
+ ret = __pkvm_host_stage2_prepare_copy(snapshot);
+ if (!ret) {
+ pgd = snapshot->pgtable.pgd;
+ snapshot->pgtable.pgd = (kvm_pteref_t)__hyp_pa(pgd);
+ }
+#endif
+ cpu_reg(host_ctxt, 1) = ret;
+}
+
typedef void (*hcall_t)(struct kvm_cpu_context *);
#define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -348,6 +365,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
HANDLE_FUNC(__pkvm_teardown_vm),
+ HANDLE_FUNC(__pkvm_copy_host_stage2),
};
static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
@@ -266,6 +266,109 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
return 0;
}
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static struct hyp_pool snapshot_pool = {0};
+static DEFINE_HYP_SPINLOCK(snapshot_pool_lock);
+
+static void *snapshot_zalloc_pages_exact(size_t size)
+{
+ void *addr = hyp_alloc_pages(&snapshot_pool, get_order(size));
+
+ hyp_split_page(hyp_virt_to_page(addr));
+
+ /*
+ * The size of concatenated PGDs is always a power of two of PAGE_SIZE,
+ * so there should be no need to free any of the tail pages to make the
+ * allocation exact.
+ */
+ WARN_ON(size != (PAGE_SIZE << get_order(size)));
+
+ return addr;
+}
+
+static void snapshot_get_page(void *addr)
+{
+ hyp_get_page(&snapshot_pool, addr);
+}
+
+static void *snapshot_zalloc_page(void *mc)
+{
+ struct hyp_page *p;
+ void *addr;
+
+ addr = hyp_alloc_pages(&snapshot_pool, 0);
+ if (addr)
+ return addr;
+
+ addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
+ if (!addr)
+ return addr;
+
+ memset(addr, 0, PAGE_SIZE);
+ p = hyp_virt_to_page(addr);
+ memset(p, 0, sizeof(*p));
+ p->refcount = 1;
+
+ return addr;
+}
+
+static void snapshot_s2_free_pages_exact(void *addr, unsigned long size)
+{
+ u8 order = get_order(size);
+ unsigned int i;
+ struct hyp_page *p;
+
+ for (i = 0; i < (1 << order); i++) {
+ p = hyp_virt_to_page(addr + (i * PAGE_SIZE));
+ hyp_page_ref_dec_and_test(p);
+ }
+}
+
+int __pkvm_host_stage2_prepare_copy(struct kvm_pgtable_snapshot *snapshot)
+{
+ size_t required_pgd_len;
+ struct kvm_pgtable_mm_ops mm_ops = {0};
+ struct kvm_pgtable *to_pgt, *from_pgt = &host_mmu.pgt;
+ struct kvm_hyp_memcache *memcache = &snapshot->mc;
+ int ret;
+ void *pgd;
+ u64 nr_pages;
+
+ required_pgd_len = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+ if (snapshot->pgd_len < required_pgd_len)
+ return -ENOMEM;
+
+ to_pgt = &snapshot->pgtable;
+ nr_pages = snapshot->pgd_len / PAGE_SIZE;
+ pgd = kern_hyp_va(snapshot->pgd_hva);
+
+ hyp_spin_lock(&snapshot_pool_lock);
+ hyp_pool_init(&snapshot_pool, hyp_virt_to_pfn(pgd),
+ required_pgd_len / PAGE_SIZE, 0);
+
+ mm_ops.zalloc_pages_exact = snapshot_zalloc_pages_exact;
+ mm_ops.zalloc_page = snapshot_zalloc_page;
+ mm_ops.free_pages_exact = snapshot_s2_free_pages_exact;
+ mm_ops.get_page = snapshot_get_page;
+ mm_ops.phys_to_virt = hyp_phys_to_virt;
+ mm_ops.virt_to_phys = hyp_virt_to_phys;
+ mm_ops.page_count = hyp_page_count;
+
+ to_pgt->ia_bits = from_pgt->ia_bits;
+ to_pgt->start_level = from_pgt->start_level;
+ to_pgt->flags = from_pgt->flags;
+ to_pgt->mm_ops = &mm_ops;
+
+ host_lock_component();
+ ret = kvm_pgtable_stage2_copy(to_pgt, from_pgt, memcache);
+ host_unlock_component();
+
+ hyp_spin_unlock(&snapshot_pool_lock);
+
+ return ret;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */
+
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
{
void *addr;
@@ -1598,3 +1598,59 @@ void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *p
WARN_ON(mm_ops->page_count(pgtable) != 1);
mm_ops->put_page(pgtable);
}
+
+#ifdef CONFIG_NVHE_EL2_DEBUG
+static int stage2_copy_walker(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
+{
+ struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+ void *copy_table, *original_addr;
+ kvm_pte_t new = ctx->old;
+
+ if (!stage2_pte_is_counted(ctx->old))
+ return 0;
+
+ if (kvm_pte_table(ctx->old, ctx->level)) {
+ copy_table = mm_ops->zalloc_page(ctx->arg);
+ if (!copy_table)
+ return -ENOMEM;
+
+ original_addr = kvm_pte_follow(ctx->old, mm_ops);
+
+ memcpy(copy_table, original_addr, PAGE_SIZE);
+ new = kvm_init_table_pte(copy_table, mm_ops);
+ }
+
+ *ctx->ptep = new;
+
+ return 0;
+}
+
+int kvm_pgtable_stage2_copy(struct kvm_pgtable *to_pgt,
+ const struct kvm_pgtable *from_pgt,
+ void *mc)
+{
+ int ret;
+ size_t pgd_sz;
+ struct kvm_pgtable_mm_ops *mm_ops = to_pgt->mm_ops;
+ struct kvm_pgtable_walker walker = {
+ .cb = stage2_copy_walker,
+ .flags = KVM_PGTABLE_WALK_LEAF |
+ KVM_PGTABLE_WALK_TABLE_PRE,
+ .arg = mc
+ };
+
+ pgd_sz = kvm_pgd_pages(to_pgt->ia_bits, to_pgt->start_level) *
+ PAGE_SIZE;
+ to_pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
+ if (!to_pgt->pgd)
+ return -ENOMEM;
+
+ memcpy(to_pgt->pgd, from_pgt->pgd, pgd_sz);
+
+ ret = kvm_pgtable_walk(to_pgt, 0, BIT(to_pgt->ia_bits), &walker);
+ mm_ops->free_pages_exact(to_pgt->pgd, pgd_sz);
+
+ return ret;
+}
+#endif /* CONFIG_NVHE_EL2_DEBUG */