@@ -1151,7 +1151,18 @@ struct kvm_arch {
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
- struct list_head lpage_disallowed_mmu_pages;
+ /*
+ * A list of kvm_mmu_page structs that, if zapped, could possibly be
+ * replaced by an NX huge page. A shadow page is on this list if its
+ * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+ * and there are no other conditions that prevent a huge page, e.g.
+ * the backing host page is huge, dirtly logging is not enabled for its
+ * memslot, etc... Note, zapping shadow pages on this list doesn't
+ * guarantee an NX huge page will be created in its stead, e.g. if the
+ * guest attempts to execute from the region then KVM obviously can't
+ * create an NX huge page (without hanging the guest).
+ */
+ struct list_head possible_nx_huge_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;
/*
@@ -1267,7 +1278,7 @@ struct kvm_arch {
bool sgx_provisioning_allowed;
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
- struct task_struct *nx_lpage_recovery_thread;
+ struct task_struct *nx_huge_page_recovery_thread;
#ifdef CONFIG_X86_64
/*
@@ -1312,8 +1323,8 @@ struct kvm_arch {
* - tdp_mmu_roots (above)
* - tdp_mmu_pages (above)
* - the link field of kvm_mmu_page structs used by the TDP MMU
- * - lpage_disallowed_mmu_pages
- * - the lpage_disallowed_link field of kvm_mmu_page structs used
+ * - possible_nx_huge_pages;
+ * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
* by the TDP MMU
* It is acceptable, but not necessary, to acquire this lock when
* the thread holds the MMU lock in write mode.
@@ -802,10 +802,10 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
}
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
bool nx_huge_page_possible)
{
- sp->lpage_disallowed = true;
+ sp->nx_huge_page_disallowed = true;
/*
* If it's possible to replace the shadow page with an NX huge page,
@@ -815,12 +815,13 @@ void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
* on the list if KVM is reusing an existing shadow page, i.e. if KVM
* links a shadow page at multiple points.
*/
- if (!nx_huge_page_possible || !list_empty(&sp->lpage_disallowed_link))
+ if (!nx_huge_page_possible ||
+ !list_empty(&sp->possible_nx_huge_page_link))
return;
++kvm->stat.nx_lpage_splits;
- list_add_tail(&sp->lpage_disallowed_link,
- &kvm->arch.lpage_disallowed_mmu_pages);
+ list_add_tail(&sp->possible_nx_huge_page_link,
+ &kvm->arch.possible_nx_huge_pages);
}
static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -840,15 +841,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
-void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- sp->lpage_disallowed = false;
+ sp->nx_huge_page_disallowed = false;
- if (list_empty(&sp->lpage_disallowed_link))
+ if (list_empty(&sp->possible_nx_huge_page_link))
return;
--kvm->stat.nx_lpage_splits;
- list_del_init(&sp->lpage_disallowed_link);
+ list_del_init(&sp->possible_nx_huge_page_link);
}
static struct kvm_memory_slot *
@@ -2143,7 +2144,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
- INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+ INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
/*
* active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
@@ -2502,8 +2503,8 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
zapped_root = !is_obsolete_sp(kvm, sp);
}
- if (sp->lpage_disallowed)
- unaccount_huge_nx_page(kvm, sp);
+ if (sp->nx_huge_page_disallowed)
+ unaccount_nx_huge_page(kvm, sp);
sp->role.invalid = 1;
@@ -3143,7 +3144,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
link_shadow_page(vcpu, it.sptep, sp);
if (fault->is_tdp && fault->huge_page_disallowed)
- account_huge_nx_page(vcpu->kvm, sp,
+ account_nx_huge_page(vcpu->kvm, sp,
fault->req_level >= it.level);
}
@@ -5987,7 +5988,7 @@ int kvm_mmu_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
- INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
+ INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
r = kvm_mmu_init_tdp_mmu(kvm);
@@ -6672,7 +6673,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
kvm_mmu_zap_all_fast(kvm);
mutex_unlock(&kvm->slots_lock);
- wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+ wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
}
mutex_unlock(&kvm_lock);
}
@@ -6804,7 +6805,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
- wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+ wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
mutex_unlock(&kvm_lock);
}
@@ -6812,7 +6813,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
return err;
}
-static void kvm_recover_nx_lpages(struct kvm *kvm)
+static void kvm_recover_nx_huge_pages(struct kvm *kvm)
{
unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
int rcu_idx;
@@ -6835,23 +6836,25 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
for ( ; to_zap; --to_zap) {
- if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages))
+ if (list_empty(&kvm->arch.possible_nx_huge_pages))
break;
/*
* We use a separate list instead of just using active_mmu_pages
- * because the number of lpage_disallowed pages is expected to
- * be relatively small compared to the total.
+ * because the number of shadow pages that be replaced with an
+ * NX huge page is expected to be relatively small compared to
+ * the total number of shadow pages. And because the TDP MMU
+ * doesn't use active_mmu_pages.
*/
- sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+ sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
struct kvm_mmu_page,
- lpage_disallowed_link);
- WARN_ON_ONCE(!sp->lpage_disallowed);
+ possible_nx_huge_page_link);
+ WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
if (is_tdp_mmu_page(sp)) {
flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
} else {
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
- WARN_ON_ONCE(sp->lpage_disallowed);
+ WARN_ON_ONCE(sp->nx_huge_page_disallowed);
}
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
@@ -6872,7 +6875,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
-static long get_nx_lpage_recovery_timeout(u64 start_time)
+static long get_nx_huge_page_recovery_timeout(u64 start_time)
{
bool enabled;
uint period;
@@ -6883,19 +6886,19 @@ static long get_nx_lpage_recovery_timeout(u64 start_time)
: MAX_SCHEDULE_TIMEOUT;
}
-static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
{
u64 start_time;
long remaining_time;
while (true) {
start_time = get_jiffies_64();
- remaining_time = get_nx_lpage_recovery_timeout(start_time);
+ remaining_time = get_nx_huge_page_recovery_timeout(start_time);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && remaining_time > 0) {
schedule_timeout(remaining_time);
- remaining_time = get_nx_lpage_recovery_timeout(start_time);
+ remaining_time = get_nx_huge_page_recovery_timeout(start_time);
set_current_state(TASK_INTERRUPTIBLE);
}
@@ -6904,7 +6907,7 @@ static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
if (kthread_should_stop())
return 0;
- kvm_recover_nx_lpages(kvm);
+ kvm_recover_nx_huge_pages(kvm);
}
}
@@ -6912,17 +6915,17 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
{
int err;
- err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+ err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
"kvm-nx-lpage-recovery",
- &kvm->arch.nx_lpage_recovery_thread);
+ &kvm->arch.nx_huge_page_recovery_thread);
if (!err)
- kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+ kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
return err;
}
void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
{
- if (kvm->arch.nx_lpage_recovery_thread)
- kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+ if (kvm->arch.nx_huge_page_recovery_thread)
+ kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
}
@@ -57,7 +57,13 @@ struct kvm_mmu_page {
bool tdp_mmu_page;
bool unsync;
u8 mmu_valid_gen;
- bool lpage_disallowed; /* Can't be replaced by an equiv large page */
+
+ /*
+ * The shadow page can't be replaced by an equivalent huge page
+ * because it is being used to map an executable page in the guest
+ * and the NX huge page mitigation is enabled.
+ */
+ bool nx_huge_page_disallowed;
/*
* The following two entries are used to key the shadow page in the
@@ -102,12 +108,12 @@ struct kvm_mmu_page {
/*
* Tracks shadow pages that, if zapped, would allow KVM to create an NX
- * huge page. A shadow page will have lpage_disallowed set but not be
- * on the list if a huge page is disallowed for other reasons, e.g.
- * because KVM is shadowing a PTE at the same gfn, the memslot isn't
- * properly aligned, etc...
+ * huge page. A shadow page will have nx_huge_page_disallowed set but
+ * not be on the list if a huge page is disallowed for other reasons,
+ * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
+ * isn't properly aligned, etc...
*/
- struct list_head lpage_disallowed_link;
+ struct list_head possible_nx_huge_page_link;
#ifdef CONFIG_X86_32
/*
* Used out of the mmu-lock to avoid reading spte values while an
@@ -322,8 +328,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
bool nx_huge_page_possible);
-void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
#endif /* __KVM_X86_MMU_INTERNAL_H */
@@ -714,7 +714,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
link_shadow_page(vcpu, it.sptep, sp);
if (fault->huge_page_disallowed)
- account_huge_nx_page(vcpu->kvm, sp,
+ account_nx_huge_page(vcpu->kvm, sp,
fault->req_level >= it.level);
}
@@ -284,7 +284,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
gfn_t gfn, union kvm_mmu_page_role role)
{
- INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+ INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
@@ -403,8 +403,8 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
lockdep_assert_held_write(&kvm->mmu_lock);
list_del(&sp->link);
- if (sp->lpage_disallowed)
- unaccount_huge_nx_page(kvm, sp);
+ if (sp->nx_huge_page_disallowed)
+ unaccount_nx_huge_page(kvm, sp);
if (shared)
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1143,7 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
if (account_nx)
- account_huge_nx_page(kvm, sp, true);
+ account_nx_huge_page(kvm, sp, true);
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
tdp_account_mmu_page(kvm, sp);