@@ -33,7 +33,10 @@ typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
struct userspace_mem_region {
- struct kvm_userspace_memory_region region;
+ union {
+ struct kvm_userspace_memory_region region;
+ struct kvm_userspace_memory_region_ext region_ext;
+ };
struct sparsebit *unused_phy_pages;
struct sparsebit *protected_phy_pages;
int fd;
@@ -214,7 +217,7 @@ static inline bool kvm_has_cap(long cap)
#define kvm_do_ioctl(fd, cmd, arg) \
({ \
- static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \
+ static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) >= _IOC_SIZE(cmd), ""); \
ioctl(fd, cmd, arg); \
})
@@ -94,6 +94,7 @@ enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ VM_MEM_SRC_ANONYMOUS_AND_RESTRICTED_MEMFD,
VM_MEM_SRC_SHMEM,
VM_MEM_SRC_SHARED_HUGETLB,
NUM_SRC_TYPES,
@@ -104,6 +105,7 @@ enum vm_mem_backing_src_type {
struct vm_mem_backing_src_alias {
const char *name;
uint32_t flag;
+ bool need_restricted_memfd;
};
#define MIN_RUN_DELAY_NS 200000UL
@@ -15,7 +15,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <linux/kernel.h>
#define KVM_UTIL_MIN_PFN 2
@@ -799,6 +798,27 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
errno, strerror(errno));
}
+/**
+ * Initialize memory in restricted_fd with size @memory_region_size and return
+ * the fd.
+ *
+ * Errors out if there's any error
+ */
+static int initialize_restricted_memfd(uint64_t memory_region_size)
+{
+ int ret;
+ int mfd = -1;
+
+ mfd = syscall(__NR_memfd_restricted, 0);
+ TEST_ASSERT(mfd != -1, "Failed to create private memfd");
+ ret = ftruncate(mfd, memory_region_size);
+ TEST_ASSERT(ret != -1, "Failed to resize memfd %d to %lx", mfd, memory_region_size);
+ ret = fallocate(mfd, 0, 0, memory_region_size);
+ TEST_ASSERT(ret != -1, "Failed to allocate %lx bytes in memfd %d", memory_region_size, mfd);
+
+ return mfd;
+}
+
/*
* VM Userspace Memory Region Add
*
@@ -830,6 +850,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
struct userspace_mem_region *region;
size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
size_t alignment;
+ int restricted_memfd = -1;
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
"Number of guest pages is not compatible with the host. "
@@ -927,14 +948,24 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* As needed perform madvise */
if ((src_type == VM_MEM_SRC_ANONYMOUS ||
- src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
- ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ src_type == VM_MEM_SRC_ANONYMOUS_THP ||
+ src_type == VM_MEM_SRC_ANONYMOUS_AND_RESTRICTED_MEMFD) && thp_configured()) {
+ int advice = src_type == VM_MEM_SRC_ANONYMOUS_THP
+ ? MADV_HUGEPAGE
+ : MADV_NOHUGEPAGE;
+ ret = madvise(region->host_mem, npages * vm->page_size, advice);
TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
region->host_mem, npages * vm->page_size,
vm_mem_backing_src_alias(src_type)->name);
}
+ if (vm_mem_backing_src_alias(src_type)->need_restricted_memfd) {
+ restricted_memfd = initialize_restricted_memfd(npages * vm->page_size);
+ TEST_ASSERT(restricted_memfd != -1,
+ "Failed to create restricted memfd");
+ flags |= KVM_MEM_PRIVATE;
+ }
+
region->unused_phy_pages = sparsebit_alloc();
region->protected_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
@@ -944,13 +975,16 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.guest_phys_addr = guest_paddr;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
- ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region);
+ region->region_ext.restricted_fd = restricted_memfd;
+ region->region_ext.restricted_offset = 0;
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region_ext);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%lx size: 0x%lx restricted_fd: %d",
ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ guest_paddr, (uint64_t) region->region.memory_size,
+ restricted_memfd);
/* Add to quick lookup data structures */
vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
@@ -8,6 +8,7 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
@@ -254,6 +255,11 @@ const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
*/
.flag = MAP_SHARED,
},
+ [VM_MEM_SRC_ANONYMOUS_AND_RESTRICTED_MEMFD] = {
+ .name = "anonymous_and_restricted_memfd",
+ .flag = ANON_FLAGS,
+ .need_restricted_memfd = true,
+ },
};
_Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
"Missing new backing src types?");
@@ -272,6 +278,7 @@ size_t get_backing_src_pagesz(uint32_t i)
switch (i) {
case VM_MEM_SRC_ANONYMOUS:
case VM_MEM_SRC_SHMEM:
+ case VM_MEM_SRC_ANONYMOUS_AND_RESTRICTED_MEMFD:
return getpagesize();
case VM_MEM_SRC_ANONYMOUS_THP:
return get_trans_hugepagesz();