[RFC,07/14] KVM: x86/MMU: Cleanup shrinker interface with Shadow MMU
Commit Message
The MMU shrinker currently only operates on the Shadow MMU, but having
the entire implemenatation in shadow_mmu.c is awkward since much of the
function isn't Shadow MMU specific. There has also been talk of changing the
target of the shrinker to the MMU caches rather than already allocated page
tables. As a result, it makes sense to move some of the implementation back
to mmu.c.
No functional change intended.
Signed-off-by: Ben Gardon <bgardon@google.com>
---
arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++
arch/x86/kvm/mmu/shadow_mmu.c | 62 ++++++++---------------------------
arch/x86/kvm/mmu/shadow_mmu.h | 3 +-
3 files changed, 58 insertions(+), 50 deletions(-)
Comments
On Wed, Dec 21, 2022 at 2:24 PM Ben Gardon <bgardon@google.com> wrote:
>
> The MMU shrinker currently only operates on the Shadow MMU, but having
> the entire implemenatation in shadow_mmu.c is awkward since much of the
> function isn't Shadow MMU specific. There has also been talk of changing the
> target of the shrinker to the MMU caches rather than already allocated page
> tables. As a result, it makes sense to move some of the implementation back
> to mmu.c.
>
My recommendation is to move the whole function back to mmu.c instead
of splitting. As all logic for shrinker will be contained in one place
instead of spreading it over two files. kvm_shadow_mmu_shrink_scan(),
the new function created in this patch, is not used anywhere else,
there are already some functions which are getting exposed from shadow
mmu in this patch, it will be cleaner to just expose the other
functions needed by shrink_scan() and keep the implementation of
shrink_scan same and at one place.
> No functional change intended.
>
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++
> arch/x86/kvm/mmu/shadow_mmu.c | 62 ++++++++---------------------------
> arch/x86/kvm/mmu/shadow_mmu.h | 3 +-
> 3 files changed, 58 insertions(+), 50 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index dd97e346c786..4c45a5b63356 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3147,6 +3147,49 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink,
> return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
> }
>
> +unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct kvm *kvm;
> + int nr_to_scan = sc->nr_to_scan;
> + unsigned long freed = 0;
> +
> + mutex_lock(&kvm_lock);
> +
> + list_for_each_entry(kvm, &vm_list, vm_list) {
> + /*
> + * Never scan more than sc->nr_to_scan VM instances.
> + * Will not hit this condition practically since we do not try
> + * to shrink more than one VM and it is very unlikely to see
> + * !n_used_mmu_pages so many times.
> + */
> + if (!nr_to_scan--)
> + break;
> +
> + /*
> + * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> + * here. We may skip a VM instance errorneosly, but we do not
> + * want to shrink a VM that only started to populate its MMU
> + * anyway.
> + */
> + if (!kvm->arch.n_used_mmu_pages &&
> + !kvm_shadow_mmu_has_zapped_obsolete_pages(kvm))
> + continue;
> +
> + freed = kvm_shadow_mmu_shrink_scan(kvm, sc->nr_to_scan);
> +
> + /*
> + * unfair on small ones
> + * per-vm shrinkers cry out
> + * sadness comes quickly
> + */
> + list_move_tail(&kvm->vm_list, &vm_list);
> + break;
> + }
> +
> + mutex_unlock(&kvm_lock);
> + return freed;
> +}
> +
> static struct shrinker mmu_shrinker = {
> .count_objects = mmu_shrink_count,
> .scan_objects = mmu_shrink_scan,
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.c b/arch/x86/kvm/mmu/shadow_mmu.c
> index 090b4788f7de..1259c4a3b140 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.c
> +++ b/arch/x86/kvm/mmu/shadow_mmu.c
> @@ -3147,7 +3147,7 @@ void kvm_zap_obsolete_pages(struct kvm *kvm)
> kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
> }
>
> -static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm)
> {
> return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
> }
> @@ -3416,60 +3416,24 @@ void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
> kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
> }
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free)
> {
> - struct kvm *kvm;
> - int nr_to_scan = sc->nr_to_scan;
> unsigned long freed = 0;
> + int idx;
>
> - mutex_lock(&kvm_lock);
> -
> - list_for_each_entry(kvm, &vm_list, vm_list) {
> - int idx;
> - LIST_HEAD(invalid_list);
> -
> - /*
> - * Never scan more than sc->nr_to_scan VM instances.
> - * Will not hit this condition practically since we do not try
> - * to shrink more than one VM and it is very unlikely to see
> - * !n_used_mmu_pages so many times.
> - */
> - if (!nr_to_scan--)
> - break;
> - /*
> - * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> - * here. We may skip a VM instance errorneosly, but we do not
> - * want to shrink a VM that only started to populate its MMU
> - * anyway.
> - */
> - if (!kvm->arch.n_used_mmu_pages &&
> - !kvm_has_zapped_obsolete_pages(kvm))
> - continue;
> -
> - idx = srcu_read_lock(&kvm->srcu);
> - write_lock(&kvm->mmu_lock);
> -
> - if (kvm_has_zapped_obsolete_pages(kvm)) {
> - kvm_mmu_commit_zap_page(kvm,
> - &kvm->arch.zapped_obsolete_pages);
> - goto unlock;
> - }
> + idx = srcu_read_lock(&kvm->srcu);
> + write_lock(&kvm->mmu_lock);
>
> - freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
> + if (kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) {
> + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
> + goto out;
> + }
>
> -unlock:
> - write_unlock(&kvm->mmu_lock);
> - srcu_read_unlock(&kvm->srcu, idx);
> + freed = kvm_mmu_zap_oldest_mmu_pages(kvm, pages_to_free);
>
> - /*
> - * unfair on small ones
> - * per-vm shrinkers cry out
> - * sadness comes quickly
> - */
> - list_move_tail(&kvm->vm_list, &vm_list);
> - break;
> - }
> +out:
> + write_unlock(&kvm->mmu_lock);
> + srcu_read_unlock(&kvm->srcu, idx);
>
> - mutex_unlock(&kvm_lock);
> return freed;
> }
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.h b/arch/x86/kvm/mmu/shadow_mmu.h
> index 20c65a0ea52c..9952aa1e86cf 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.h
> +++ b/arch/x86/kvm/mmu/shadow_mmu.h
> @@ -99,7 +99,8 @@ void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
> void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
> const struct kvm_memory_slot *slot);
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc);
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm);
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free);
>
> /* Exports from paging_tmpl.h */
> gpa_t paging32_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> --
> 2.39.0.314.g84b9a713c41-goog
>
On Wed, Dec 21, 2022 at 2:24 PM Ben Gardon <bgardon@google.com> wrote:
>
> The MMU shrinker currently only operates on the Shadow MMU, but having
> the entire implemenatation in shadow_mmu.c is awkward since much of the
> function isn't Shadow MMU specific. There has also been talk of changing the
> target of the shrinker to the MMU caches rather than already allocated page
> tables. As a result, it makes sense to move some of the implementation back
> to mmu.c.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
> arch/x86/kvm/mmu/mmu.c | 43 ++++++++++++++++++++++++
> arch/x86/kvm/mmu/shadow_mmu.c | 62 ++++++++---------------------------
> arch/x86/kvm/mmu/shadow_mmu.h | 3 +-
> 3 files changed, 58 insertions(+), 50 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index dd97e346c786..4c45a5b63356 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -3147,6 +3147,49 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink,
> return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
> }
>
> +unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +{
> + struct kvm *kvm;
> + int nr_to_scan = sc->nr_to_scan;
> + unsigned long freed = 0;
> +
> + mutex_lock(&kvm_lock);
> +
> + list_for_each_entry(kvm, &vm_list, vm_list) {
> + /*
> + * Never scan more than sc->nr_to_scan VM instances.
> + * Will not hit this condition practically since we do not try
> + * to shrink more than one VM and it is very unlikely to see
> + * !n_used_mmu_pages so many times.
> + */
> + if (!nr_to_scan--)
> + break;
> +
> + /*
> + * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> + * here. We may skip a VM instance errorneosly, but we do not
> + * want to shrink a VM that only started to populate its MMU
> + * anyway.
> + */
> + if (!kvm->arch.n_used_mmu_pages &&
> + !kvm_shadow_mmu_has_zapped_obsolete_pages(kvm))
> + continue;
> +
> + freed = kvm_shadow_mmu_shrink_scan(kvm, sc->nr_to_scan);
> +
> + /*
> + * unfair on small ones
> + * per-vm shrinkers cry out
> + * sadness comes quickly
> + */
> + list_move_tail(&kvm->vm_list, &vm_list);
> + break;
> + }
> +
> + mutex_unlock(&kvm_lock);
> + return freed;
> +}
> +
> static struct shrinker mmu_shrinker = {
> .count_objects = mmu_shrink_count,
> .scan_objects = mmu_shrink_scan,
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.c b/arch/x86/kvm/mmu/shadow_mmu.c
> index 090b4788f7de..1259c4a3b140 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.c
> +++ b/arch/x86/kvm/mmu/shadow_mmu.c
> @@ -3147,7 +3147,7 @@ void kvm_zap_obsolete_pages(struct kvm *kvm)
> kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
> }
>
> -static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm)
Function renaming and removing static should be two separate commits.
> {
> return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
> }
> @@ -3416,60 +3416,24 @@ void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
> kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
> }
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free)
> {
> - struct kvm *kvm;
> - int nr_to_scan = sc->nr_to_scan;
> unsigned long freed = 0;
> + int idx;
>
> - mutex_lock(&kvm_lock);
> -
> - list_for_each_entry(kvm, &vm_list, vm_list) {
> - int idx;
> - LIST_HEAD(invalid_list);
> -
> - /*
> - * Never scan more than sc->nr_to_scan VM instances.
> - * Will not hit this condition practically since we do not try
> - * to shrink more than one VM and it is very unlikely to see
> - * !n_used_mmu_pages so many times.
> - */
> - if (!nr_to_scan--)
> - break;
> - /*
> - * n_used_mmu_pages is accessed without holding kvm->mmu_lock
> - * here. We may skip a VM instance errorneosly, but we do not
> - * want to shrink a VM that only started to populate its MMU
> - * anyway.
> - */
> - if (!kvm->arch.n_used_mmu_pages &&
> - !kvm_has_zapped_obsolete_pages(kvm))
> - continue;
> -
> - idx = srcu_read_lock(&kvm->srcu);
> - write_lock(&kvm->mmu_lock);
> -
> - if (kvm_has_zapped_obsolete_pages(kvm)) {
> - kvm_mmu_commit_zap_page(kvm,
> - &kvm->arch.zapped_obsolete_pages);
> - goto unlock;
> - }
> + idx = srcu_read_lock(&kvm->srcu);
> + write_lock(&kvm->mmu_lock);
>
> - freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
> + if (kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) {
> + kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
> + goto out;
> + }
>
> -unlock:
> - write_unlock(&kvm->mmu_lock);
> - srcu_read_unlock(&kvm->srcu, idx);
> + freed = kvm_mmu_zap_oldest_mmu_pages(kvm, pages_to_free);
>
> - /*
> - * unfair on small ones
> - * per-vm shrinkers cry out
> - * sadness comes quickly
> - */
> - list_move_tail(&kvm->vm_list, &vm_list);
> - break;
> - }
> +out:
> + write_unlock(&kvm->mmu_lock);
> + srcu_read_unlock(&kvm->srcu, idx);
>
> - mutex_unlock(&kvm_lock);
> return freed;
> }
> diff --git a/arch/x86/kvm/mmu/shadow_mmu.h b/arch/x86/kvm/mmu/shadow_mmu.h
> index 20c65a0ea52c..9952aa1e86cf 100644
> --- a/arch/x86/kvm/mmu/shadow_mmu.h
> +++ b/arch/x86/kvm/mmu/shadow_mmu.h
> @@ -99,7 +99,8 @@ void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
> void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
> const struct kvm_memory_slot *slot);
>
> -unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc);
> +bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm);
> +unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free);
>
> /* Exports from paging_tmpl.h */
> gpa_t paging32_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> --
> 2.39.0.314.g84b9a713c41-goog
>
@@ -3147,6 +3147,49 @@ static unsigned long mmu_shrink_count(struct shrinker *shrink,
return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
}
+unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct kvm *kvm;
+ int nr_to_scan = sc->nr_to_scan;
+ unsigned long freed = 0;
+
+ mutex_lock(&kvm_lock);
+
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ /*
+ * Never scan more than sc->nr_to_scan VM instances.
+ * Will not hit this condition practically since we do not try
+ * to shrink more than one VM and it is very unlikely to see
+ * !n_used_mmu_pages so many times.
+ */
+ if (!nr_to_scan--)
+ break;
+
+ /*
+ * n_used_mmu_pages is accessed without holding kvm->mmu_lock
+ * here. We may skip a VM instance errorneosly, but we do not
+ * want to shrink a VM that only started to populate its MMU
+ * anyway.
+ */
+ if (!kvm->arch.n_used_mmu_pages &&
+ !kvm_shadow_mmu_has_zapped_obsolete_pages(kvm))
+ continue;
+
+ freed = kvm_shadow_mmu_shrink_scan(kvm, sc->nr_to_scan);
+
+ /*
+ * unfair on small ones
+ * per-vm shrinkers cry out
+ * sadness comes quickly
+ */
+ list_move_tail(&kvm->vm_list, &vm_list);
+ break;
+ }
+
+ mutex_unlock(&kvm_lock);
+ return freed;
+}
+
static struct shrinker mmu_shrinker = {
.count_objects = mmu_shrink_count,
.scan_objects = mmu_shrink_scan,
@@ -3147,7 +3147,7 @@ void kvm_zap_obsolete_pages(struct kvm *kvm)
kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
}
-static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
+bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm)
{
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
@@ -3416,60 +3416,24 @@ void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
}
-unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free)
{
- struct kvm *kvm;
- int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
+ int idx;
- mutex_lock(&kvm_lock);
-
- list_for_each_entry(kvm, &vm_list, vm_list) {
- int idx;
- LIST_HEAD(invalid_list);
-
- /*
- * Never scan more than sc->nr_to_scan VM instances.
- * Will not hit this condition practically since we do not try
- * to shrink more than one VM and it is very unlikely to see
- * !n_used_mmu_pages so many times.
- */
- if (!nr_to_scan--)
- break;
- /*
- * n_used_mmu_pages is accessed without holding kvm->mmu_lock
- * here. We may skip a VM instance errorneosly, but we do not
- * want to shrink a VM that only started to populate its MMU
- * anyway.
- */
- if (!kvm->arch.n_used_mmu_pages &&
- !kvm_has_zapped_obsolete_pages(kvm))
- continue;
-
- idx = srcu_read_lock(&kvm->srcu);
- write_lock(&kvm->mmu_lock);
-
- if (kvm_has_zapped_obsolete_pages(kvm)) {
- kvm_mmu_commit_zap_page(kvm,
- &kvm->arch.zapped_obsolete_pages);
- goto unlock;
- }
+ idx = srcu_read_lock(&kvm->srcu);
+ write_lock(&kvm->mmu_lock);
- freed = kvm_mmu_zap_oldest_mmu_pages(kvm, sc->nr_to_scan);
+ if (kvm_shadow_mmu_has_zapped_obsolete_pages(kvm)) {
+ kvm_mmu_commit_zap_page(kvm, &kvm->arch.zapped_obsolete_pages);
+ goto out;
+ }
-unlock:
- write_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ freed = kvm_mmu_zap_oldest_mmu_pages(kvm, pages_to_free);
- /*
- * unfair on small ones
- * per-vm shrinkers cry out
- * sadness comes quickly
- */
- list_move_tail(&kvm->vm_list, &vm_list);
- break;
- }
+out:
+ write_unlock(&kvm->mmu_lock);
+ srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm_lock);
return freed;
}
@@ -99,7 +99,8 @@ void kvm_shadow_mmu_try_split_huge_pages(struct kvm *kvm,
void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *slot);
-unsigned long mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc);
+bool kvm_shadow_mmu_has_zapped_obsolete_pages(struct kvm *kvm);
+unsigned long kvm_shadow_mmu_shrink_scan(struct kvm *kvm, int pages_to_free);
/* Exports from paging_tmpl.h */
gpa_t paging32_gva_to_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,