@@ -78,6 +78,11 @@ struct protected_vm {
int8_t protected_bit;
};
+struct pgt_page {
+ vm_paddr_t paddr;
+ struct list_head list;
+};
+
struct kvm_vm {
int mode;
unsigned long type;
@@ -108,6 +113,10 @@ struct kvm_vm {
/* VM protection enabled: SEV, etc*/
bool protected;
+ struct list_head pgt_pages;
+ bool track_pgt_pages;
+ uint32_t num_pgt_pages;
+ vm_vaddr_t pgt_vaddr_start;
/* Cache of information for binary stats interface */
int stats_fd;
@@ -196,6 +205,25 @@ struct vm_guest_mode_params {
unsigned int page_size;
unsigned int page_shift;
};
+
+/*
+ * Structure shared with the guest containing information about:
+ * - Starting virtual address for num_pgt_pages physical pagetable
+ * page addresses tracked via paddrs array
+ * - page size of the guest
+ *
+ * Guest can walk through its pagetables using this information to
+ * read/modify pagetable attributes.
+ */
+struct guest_pgt_info {
+ uint64_t num_pgt_pages;
+ uint64_t pgt_vaddr_start;
+ uint64_t page_size;
+ uint64_t enc_mask;
+ uint64_t shared_mask;
+ uint64_t paddrs[];
+};
+
extern const struct vm_guest_mode_params vm_guest_mode_params[];
int open_path_or_exit(const char *path, int flags);
@@ -411,6 +439,48 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+
+/*
+ * function called by guest code to translate physical address of a pagetable
+ * page to guest virtual address.
+ *
+ * input args:
+ * gpgt_info - pointer to the guest_pgt_info structure containing info
+ * about guest virtual address mappings for guest physical
+ * addresses of page table pages.
+ * pgt_pa - physical address of guest page table page to be translated
+ * to a virtual address.
+ *
+ * output args: none
+ *
+ * return:
+ * pointer to the pagetable page, null in case physical address is not
+ * tracked via given guest_pgt_info structure.
+ */
+void *guest_code_get_pgt_vaddr(struct guest_pgt_info *gpgt_info, uint64_t pgt_pa);
+
+/*
+ * 1) Map page table physical pages to the guest virtual address range
+ * 2) Allocate and setup a page to be shared with guest containing guest_pgt_info
+ * structure.
+ *
+ * Note:
+ * 1) vm_set_pgt_alloc_tracking function should be used to start tracking
+ * of physical page table page allocation.
+ * 2) This function should be invoked after needed pagetable pages are
+ * mapped to the VM using virt_pg_map.
+ *
+ * input args:
+ * vm - virtual machine
+ * vaddr_min - Minimum guest virtual address to start mapping the
+ * guest pagetable pages and guest_pgt_info structure page(s).
+ *
+ * output args: none
+ *
+ * return: none
+ */
+void vm_map_page_table(struct kvm_vm *vm, vm_vaddr_t vaddr_min);
+
vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
@@ -673,10 +743,28 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
const char *exit_reason_str(unsigned int exit_reason);
+void sync_vm_gpgt_info(struct kvm_vm *vm, vm_vaddr_t pgt_info);
+
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t _vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t paddr_min, uint32_t memslot, bool protected);
+
+/*
+ * Enable tracking of physical guest pagetable pages for the given vm.
+ * This function should be called right after vm creation before any pages are
+ * mapped into the VM using vm_alloc_* / vm_vaddr_alloc* functions.
+ *
+ * input args:
+ * vm - virtual machine
+ *
+ * output args: none
+ *
+ * return:
+ * None
+ */
+void vm_set_pgt_alloc_tracking(struct kvm_vm *vm);
+
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
@@ -202,6 +202,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
+ INIT_LIST_HEAD(&vm->pgt_pages);
vm->regions.gpa_tree = RB_ROOT;
vm->regions.hva_tree = RB_ROOT;
hash_init(vm->regions.slot_hash);
@@ -695,6 +696,7 @@ void kvm_vm_free(struct kvm_vm *vmp)
{
int ctr;
struct hlist_node *node;
+ struct pgt_page *entry, *nentry;
struct userspace_mem_region *region;
if (vmp == NULL)
@@ -710,6 +712,9 @@ void kvm_vm_free(struct kvm_vm *vmp)
hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
__vm_mem_region_delete(vmp, region, false);
+ list_for_each_entry_safe(entry, nentry, &vmp->pgt_pages, list)
+ free(entry);
+
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
sparsebit_free(&vmp->vpages_mapped);
@@ -1330,6 +1335,72 @@ vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
return pgidx_start * vm->page_size;
}
+void __weak sync_vm_gpgt_info(struct kvm_vm *vm, vm_vaddr_t pgt_info)
+{
+}
+
+void *guest_code_get_pgt_vaddr(struct guest_pgt_info *gpgt_info,
+ uint64_t pgt_pa)
+{
+ uint64_t num_pgt_pages = gpgt_info->num_pgt_pages;
+ uint64_t pgt_vaddr_start = gpgt_info->pgt_vaddr_start;
+ uint64_t page_size = gpgt_info->page_size;
+
+ for (uint32_t i = 0; i < num_pgt_pages; i++) {
+ if (gpgt_info->paddrs[i] == pgt_pa)
+ return (void *)(pgt_vaddr_start + i * page_size);
+ }
+ return NULL;
+}
+
+static void vm_setup_pgt_info_buf(struct kvm_vm *vm, vm_vaddr_t vaddr_min)
+{
+ struct pgt_page *pgt_page_entry;
+ struct guest_pgt_info *gpgt_info;
+ uint64_t info_size = sizeof(*gpgt_info) + (sizeof(uint64_t) * vm->num_pgt_pages);
+ uint64_t num_pages = align_up(info_size, vm->page_size);
+ vm_vaddr_t buf_start = vm_vaddr_alloc(vm, num_pages, vaddr_min);
+ uint32_t i = 0;
+
+ gpgt_info = (struct guest_pgt_info *)addr_gva2hva(vm, buf_start);
+ gpgt_info->num_pgt_pages = vm->num_pgt_pages;
+ gpgt_info->pgt_vaddr_start = vm->pgt_vaddr_start;
+ gpgt_info->page_size = vm->page_size;
+ if (vm->protected) {
+ gpgt_info->enc_mask = vm->arch.c_bit;
+ gpgt_info->shared_mask = vm->arch.s_bit;
+ }
+ list_for_each_entry(pgt_page_entry, &vm->pgt_pages, list) {
+ gpgt_info->paddrs[i] = pgt_page_entry->paddr;
+ i++;
+ }
+ TEST_ASSERT((i == vm->num_pgt_pages), "pgt entries mismatch with the counter");
+ sync_vm_gpgt_info(vm, buf_start);
+}
+
+void vm_map_page_table(struct kvm_vm *vm, vm_vaddr_t vaddr_min)
+{
+ struct pgt_page *pgt_page_entry;
+ vm_vaddr_t vaddr;
+
+ /* Stop tracking further pgt pages, mapping pagetable may itself need
+ * new pages.
+ */
+ vm->track_pgt_pages = false;
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm,
+ vm->num_pgt_pages * vm->page_size, vaddr_min);
+ vaddr = vaddr_start;
+ list_for_each_entry(pgt_page_entry, &vm->pgt_pages, list) {
+ /* Map the virtual page. */
+ virt_pg_map(vm, vaddr, pgt_page_entry->paddr & ~vm->arch.c_bit);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+ vaddr += vm->page_size;
+ }
+ vm->pgt_vaddr_start = vaddr_start;
+
+ vm_setup_pgt_info_buf(vm, vaddr_min);
+}
+
/*
* VM Virtual Address Allocate Shared/Encrypted
*
@@ -1981,9 +2052,24 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
/* Arbitrary minimum physical address used for virtual translation tables. */
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+void vm_set_pgt_alloc_tracking(struct kvm_vm *vm)
+{
+ vm->track_pgt_pages = true;
+}
+
vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
{
- return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+ struct pgt_page *pgt;
+ vm_paddr_t paddr = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+
+ if (vm->track_pgt_pages) {
+ pgt = calloc(1, sizeof(*pgt));
+ TEST_ASSERT(pgt != NULL, "Insufficient memory");
+ pgt->paddr = (paddr | vm->arch.c_bit);
+ list_add(&pgt->list, &vm->pgt_pages);
+ vm->num_pgt_pages++;
+ }
+ return paddr;
}
/*
@@ -19,6 +19,7 @@
#define MAX_NR_CPUID_ENTRIES 100
vm_vaddr_t exception_handlers;
+static struct guest_pgt_info *gpgt_info;
static bool is_cpu_vendor_intel;
static bool is_cpu_vendor_amd;
@@ -241,6 +242,46 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
+static uint64_t *guest_code_get_pte(uint64_t vaddr)
+{
+ uint16_t index[4];
+ uint64_t *pml4e, *pdpe, *pde, *pte;
+ uint64_t pgt_paddr = get_cr3();
+
+ GUEST_ASSERT(gpgt_info != NULL);
+ uint64_t page_size = gpgt_info->page_size;
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ pml4e = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr);
+ GUEST_ASSERT(pml4e && (pml4e[index[3]] & PTE_PRESENT_MASK));
+
+ pgt_paddr = (PTE_GET_PFN(pml4e[index[3]]) * page_size);
+ pdpe = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr);
+ GUEST_ASSERT(pdpe && (pdpe[index[2]] & PTE_PRESENT_MASK) &&
+ !(pdpe[index[2]] & PTE_LARGE_MASK));
+
+ pgt_paddr = (PTE_GET_PFN(pdpe[index[2]]) * page_size);
+ pde = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr);
+ GUEST_ASSERT(pde && (pde[index[1]] & PTE_PRESENT_MASK) &&
+ !(pde[index[1]] & PTE_LARGE_MASK));
+
+ pgt_paddr = (PTE_GET_PFN(pde[index[1]]) * page_size);
+ pte = guest_code_get_pgt_vaddr(gpgt_info, pgt_paddr);
+ GUEST_ASSERT(pte && (pte[index[0]] & PTE_PRESENT_MASK));
+
+ return (uint64_t *)&pte[index[0]];
+}
+
+void sync_vm_gpgt_info(struct kvm_vm *vm, vm_vaddr_t pgt_info)
+{
+ gpgt_info = (struct guest_pgt_info *)pgt_info;
+ sync_global_to_guest(vm, gpgt_info);
+}
+
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t nr_bytes, int level)
{