[v3,8/9] KVM: x86/mmu: Make split_shadow_page_cache NUMA aware

Message ID 20221222023457.1764-9-vipinsh@google.com
State New
Headers
Series NUMA aware page table's pages allocation |

Commit Message

Vipin Sharma Dec. 22, 2022, 2:34 a.m. UTC
  Make split_shadow_page_cache NUMA aware and allocate page table's pages
during the split based on the underlying physical page's NUMA node.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
 2 files changed, 29 insertions(+), 23 deletions(-)
  

Comments

Ben Gardon Dec. 27, 2022, 7:42 p.m. UTC | #1
On Wed, Dec 21, 2022 at 6:35 PM Vipin Sharma <vipinsh@google.com> wrote:
>
> Make split_shadow_page_cache NUMA aware and allocate page table's pages
> during the split based on the underlying physical page's NUMA node.
>
> Signed-off-by: Vipin Sharma <vipinsh@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +-
>  arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
>  2 files changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b1f319ad6f89..7b3f36ae37a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1410,7 +1410,7 @@ struct kvm_arch {
>          *
>          * Protected by kvm->slots_lock.
>          */
> -       struct kvm_mmu_memory_cache split_shadow_page_cache;
> +       struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
>         struct kvm_mmu_memory_cache split_page_header_cache;
>
>         /*
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 511c6ef265ee..7454bfc49a51 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -6126,7 +6126,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
>  int kvm_mmu_init_vm(struct kvm *kvm)
>  {
>         struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
> -       int r;
> +       int r, nid;
>
>         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>         INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
> @@ -6145,8 +6145,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
>         INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
>                                   mmu_page_header_cache, NUMA_NO_NODE);
>
> -       INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
> -                                 NULL, NUMA_NO_NODE);
> +       for_each_node(nid)

Again, assuming no one sets CONFIG_NODE_SHIFT to a ridiculous value,
it would probably be fine to initialize the entire array here since
that doesn't take any extra memory and we're not in a super hot path.

> +               INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
> +                                         NULL, NUMA_NO_NODE);
>         spin_lock_init(&kvm->arch.split_shadow_page_cache_lock);
>
>         INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache,
> @@ -6157,10 +6158,13 @@ int kvm_mmu_init_vm(struct kvm *kvm)
>
>  static void mmu_free_vm_memory_caches(struct kvm *kvm)
>  {
> +       int nid;
> +
>         kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
>         kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
> -       mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
> -                                &kvm->arch.split_shadow_page_cache_lock);
> +       for_each_node(nid)

Again, could just iterate over the whole array here.

> +               mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
> +                                        &kvm->arch.split_shadow_page_cache_lock);
>  }
>
>  void kvm_mmu_uninit_vm(struct kvm *kvm)
> @@ -6269,7 +6273,7 @@ static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
>         return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
>  }
>
> -static bool need_topup_split_caches_or_resched(struct kvm *kvm)
> +static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
>  {
>         if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
>                 return true;
> @@ -6281,10 +6285,10 @@ static bool need_topup_split_caches_or_resched(struct kvm *kvm)
>          */
>         return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
>                need_topup(&kvm->arch.split_page_header_cache, 1) ||
> -              need_topup(&kvm->arch.split_shadow_page_cache, 1);
> +              need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
>  }
>
> -static int topup_split_caches(struct kvm *kvm)
> +static int topup_split_caches(struct kvm *kvm, int nid)
>  {
>         /*
>          * Allocating rmap list entries when splitting huge pages for nested
> @@ -6314,18 +6318,21 @@ static int topup_split_caches(struct kvm *kvm)
>         if (r)
>                 return r;
>
> -       return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
> +       return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
>                                          &kvm->arch.split_shadow_page_cache_lock,
>                                          1);
>  }
>
> -static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
> +static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm,
> +                                                       u64 *huge_sptep,
> +                                                       u64 huge_spte)

These can go on the same line.

>  {
>         struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
>         struct shadow_page_caches caches = {};
>         union kvm_mmu_page_role role;
>         unsigned int access;
>         gfn_t gfn;
> +       int nid;
>
>         gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
>         access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep));
> @@ -6338,9 +6345,11 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
>          */
>         role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access);
>
> +       nid = kvm_pfn_to_page_table_nid(spte_to_pfn(huge_spte));
> +
>         /* Direct SPs do not require a shadowed_info_cache. */
>         caches.page_header_cache = &kvm->arch.split_page_header_cache;
> -       caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
> +       caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid];
>         caches.shadow_page_cache_lock = &kvm->arch.split_shadow_page_cache_lock;
>
>         /* Safe to pass NULL for vCPU since requesting a direct SP. */
> @@ -6360,7 +6369,7 @@ static void shadow_mmu_split_huge_page(struct kvm *kvm,
>         gfn_t gfn;
>         int index;
>
> -       sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep);
> +       sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep, huge_spte);
>
>         for (index = 0; index < SPTE_ENT_PER_PAGE; index++) {
>                 sptep = &sp->spt[index];
> @@ -6398,7 +6407,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
>                                           u64 *huge_sptep)
>  {
>         struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
> -       int level, r = 0;
> +       int level, r = 0, nid;
>         gfn_t gfn;
>         u64 spte;
>
> @@ -6406,13 +6415,14 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
>         gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
>         level = huge_sp->role.level;
>         spte = *huge_sptep;
> +       nid = kvm_pfn_to_page_table_nid(spte_to_pfn(spte));
>
>         if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) {
>                 r = -ENOSPC;
>                 goto out;
>         }
>
> -       if (need_topup_split_caches_or_resched(kvm)) {
> +       if (need_topup_split_caches_or_resched(kvm, nid)) {
>                 write_unlock(&kvm->mmu_lock);
>                 cond_resched();
>                 /*
> @@ -6420,7 +6430,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
>                  * rmap iterator should be restarted because the MMU lock was
>                  * dropped.
>                  */
> -               r = topup_split_caches(kvm) ?: -EAGAIN;
> +               r = topup_split_caches(kvm, nid) ?: -EAGAIN;
>                 write_lock(&kvm->mmu_lock);
>                 goto out;
>         }
> @@ -6709,17 +6719,15 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
>  }
>
>  static unsigned long mmu_shrink_cache(struct kvm_mmu_memory_cache *cache,
> -                                     int cache_count,
>                                       spinlock_t *cache_lock)
>  {
>         unsigned long freed = 0;
>         int nid;
>
>         spin_lock(cache_lock);
> -       for (nid = 0; nid < cache_count; nid++) {
> -               if (node_online(nid) && cache[nid].nobjs)
> +       for_each_online_node(nid)
> +               if (cache[nid].nobjs)
>                         freed += kvm_mmu_empty_memory_cache(&cache[nid]);
> -       }
>         spin_unlock(cache_lock);
>         return freed;
>  }
> @@ -6741,8 +6749,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>                         first_kvm = kvm;
>                 list_move_tail(&kvm->vm_list, &vm_list);
>
> -               freed += mmu_shrink_cache(&kvm->arch.split_shadow_page_cache,
> -                                         1,
> +               freed += mmu_shrink_cache(kvm->arch.split_shadow_page_cache,
>                                           &kvm->arch.split_shadow_page_cache_lock);
>
>                 if (freed >= sc->nr_to_scan)
> @@ -6750,7 +6757,6 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
>
>                 kvm_for_each_vcpu(i, vcpu, kvm) {
>                         freed += mmu_shrink_cache(vcpu->arch.mmu_shadow_page_cache,
> -                                                 MAX_NUMNODES,
>                                                   &vcpu->arch.mmu_shadow_page_cache_lock);
>                 }
>
> --
> 2.39.0.314.g84b9a713c41-goog
>
  
Vipin Sharma Dec. 28, 2022, 10:08 p.m. UTC | #2
On Tue, Dec 27, 2022 at 11:43 AM Ben Gardon <bgardon@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 6:35 PM Vipin Sharma <vipinsh@google.com> wrote:
> >
> > Make split_shadow_page_cache NUMA aware and allocate page table's pages
> > during the split based on the underlying physical page's NUMA node.
> >
> > Signed-off-by: Vipin Sharma <vipinsh@google.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  2 +-
> >  arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
> >  2 files changed, 29 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index b1f319ad6f89..7b3f36ae37a4 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1410,7 +1410,7 @@ struct kvm_arch {
> >          *
> >          * Protected by kvm->slots_lock.
> >          */
> > -       struct kvm_mmu_memory_cache split_shadow_page_cache;
> > +       struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
> >         struct kvm_mmu_memory_cache split_page_header_cache;
> >
> >         /*
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 511c6ef265ee..7454bfc49a51 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -6126,7 +6126,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
> >  int kvm_mmu_init_vm(struct kvm *kvm)
> >  {
> >         struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
> > -       int r;
> > +       int r, nid;
> >
> >         INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> >         INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
> > @@ -6145,8 +6145,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
> >         INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
> >                                   mmu_page_header_cache, NUMA_NO_NODE);
> >
> > -       INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
> > -                                 NULL, NUMA_NO_NODE);
> > +       for_each_node(nid)
>
> Again, assuming no one sets CONFIG_NODE_SHIFT to a ridiculous value,
> it would probably be fine to initialize the entire array here since
> that doesn't take any extra memory and we're not in a super hot path.

This goes through the entire array. I think you are confusing it with
for_each_online_node().

>
> > +               INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
> > +                                         NULL, NUMA_NO_NODE);
> >         spin_lock_init(&kvm->arch.split_shadow_page_cache_lock);
> >
> >         INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache,
> > @@ -6157,10 +6158,13 @@ int kvm_mmu_init_vm(struct kvm *kvm)
> >
> >  static void mmu_free_vm_memory_caches(struct kvm *kvm)
> >  {
> > +       int nid;
> > +
> >         kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
> >         kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
> > -       mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
> > -                                &kvm->arch.split_shadow_page_cache_lock);
> > +       for_each_node(nid)
>
> Again, could just iterate over the whole array here.
>
> > +               mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
> > +                                        &kvm->arch.split_shadow_page_cache_lock);
> >  }
> >
> >  void kvm_mmu_uninit_vm(struct kvm *kvm)
> > @@ -6269,7 +6273,7 @@ static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
> >         return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
> >  }
> >
> > -static bool need_topup_split_caches_or_resched(struct kvm *kvm)
> > +static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
> >  {
> >         if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
> >                 return true;
> > @@ -6281,10 +6285,10 @@ static bool need_topup_split_caches_or_resched(struct kvm *kvm)
> >          */
> >         return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
> >                need_topup(&kvm->arch.split_page_header_cache, 1) ||
> > -              need_topup(&kvm->arch.split_shadow_page_cache, 1);
> > +              need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
> >  }
> >
> > -static int topup_split_caches(struct kvm *kvm)
> > +static int topup_split_caches(struct kvm *kvm, int nid)
> >  {
> >         /*
> >          * Allocating rmap list entries when splitting huge pages for nested
> > @@ -6314,18 +6318,21 @@ static int topup_split_caches(struct kvm *kvm)
> >         if (r)
> >                 return r;
> >
> > -       return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
> > +       return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
> >                                          &kvm->arch.split_shadow_page_cache_lock,
> >                                          1);
> >  }
> >
> > -static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
> > +static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm,
> > +                                                       u64 *huge_sptep,
> > +                                                       u64 huge_spte)
>
> These can go on the same line.

Git diff is showing it weirdly. They are aligned to "struct kvm *kvm"
and both will be on different lines to keep them in the 80 char limit.


>
> >  {
> >         struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
> >         struct shadow_page_caches caches = {};
> >         union kvm_mmu_page_role role;
> >         unsigned int access;
> >         gfn_t gfn;
> > +       int nid;
> >
> >         gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
> >         access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep));
> > @@ -6338,9 +6345,11 @@ static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
> >          */
> >         role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access);
> >
> > +       nid = kvm_pfn_to_page_table_nid(spte_to_pfn(huge_spte));
> > +
> >         /* Direct SPs do not require a shadowed_info_cache. */
> >         caches.page_header_cache = &kvm->arch.split_page_header_cache;
> > -       caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
> > +       caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid];
> >         caches.shadow_page_cache_lock = &kvm->arch.split_shadow_page_cache_lock;
> >
> >         /* Safe to pass NULL for vCPU since requesting a direct SP. */
> > @@ -6360,7 +6369,7 @@ static void shadow_mmu_split_huge_page(struct kvm *kvm,
> >         gfn_t gfn;
> >         int index;
> >
> > -       sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep);
> > +       sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep, huge_spte);
> >
> >         for (index = 0; index < SPTE_ENT_PER_PAGE; index++) {
> >                 sptep = &sp->spt[index];
> > @@ -6398,7 +6407,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
> >                                           u64 *huge_sptep)
> >  {
> >         struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
> > -       int level, r = 0;
> > +       int level, r = 0, nid;
> >         gfn_t gfn;
> >         u64 spte;
> >
> > @@ -6406,13 +6415,14 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
> >         gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
> >         level = huge_sp->role.level;
> >         spte = *huge_sptep;
> > +       nid = kvm_pfn_to_page_table_nid(spte_to_pfn(spte));
> >
> >         if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) {
> >                 r = -ENOSPC;
> >                 goto out;
> >         }
> >
> > -       if (need_topup_split_caches_or_resched(kvm)) {
> > +       if (need_topup_split_caches_or_resched(kvm, nid)) {
> >                 write_unlock(&kvm->mmu_lock);
> >                 cond_resched();
> >                 /*
> > @@ -6420,7 +6430,7 @@ static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
> >                  * rmap iterator should be restarted because the MMU lock was
> >                  * dropped.
> >                  */
> > -               r = topup_split_caches(kvm) ?: -EAGAIN;
> > +               r = topup_split_caches(kvm, nid) ?: -EAGAIN;
> >                 write_lock(&kvm->mmu_lock);
> >                 goto out;
> >         }
> > @@ -6709,17 +6719,15 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
> >  }
> >
> >  static unsigned long mmu_shrink_cache(struct kvm_mmu_memory_cache *cache,
> > -                                     int cache_count,
> >                                       spinlock_t *cache_lock)
> >  {
> >         unsigned long freed = 0;
> >         int nid;
> >
> >         spin_lock(cache_lock);
> > -       for (nid = 0; nid < cache_count; nid++) {
> > -               if (node_online(nid) && cache[nid].nobjs)
> > +       for_each_online_node(nid)
> > +               if (cache[nid].nobjs)
> >                         freed += kvm_mmu_empty_memory_cache(&cache[nid]);
> > -       }
> >         spin_unlock(cache_lock);
> >         return freed;
> >  }
> > @@ -6741,8 +6749,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> >                         first_kvm = kvm;
> >                 list_move_tail(&kvm->vm_list, &vm_list);
> >
> > -               freed += mmu_shrink_cache(&kvm->arch.split_shadow_page_cache,
> > -                                         1,
> > +               freed += mmu_shrink_cache(kvm->arch.split_shadow_page_cache,
> >                                           &kvm->arch.split_shadow_page_cache_lock);
> >
> >                 if (freed >= sc->nr_to_scan)
> > @@ -6750,7 +6757,6 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
> >
> >                 kvm_for_each_vcpu(i, vcpu, kvm) {
> >                         freed += mmu_shrink_cache(vcpu->arch.mmu_shadow_page_cache,
> > -                                                 MAX_NUMNODES,
> >                                                   &vcpu->arch.mmu_shadow_page_cache_lock);
> >                 }
> >
> > --
> > 2.39.0.314.g84b9a713c41-goog
> >
  
David Matlack Dec. 29, 2022, 11:18 p.m. UTC | #3
On Wed, Dec 21, 2022 at 06:34:56PM -0800, Vipin Sharma wrote:
> Make split_shadow_page_cache NUMA aware and allocate page table's pages
> during the split based on the underlying physical page's NUMA node.
> 
> Signed-off-by: Vipin Sharma <vipinsh@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 +-
>  arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
>  2 files changed, 29 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b1f319ad6f89..7b3f36ae37a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1410,7 +1410,7 @@ struct kvm_arch {
>  	 *
>  	 * Protected by kvm->slots_lock.
>  	 */
> -	struct kvm_mmu_memory_cache split_shadow_page_cache;
> +	struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
>  	struct kvm_mmu_memory_cache split_page_header_cache;
>  
>  	/*
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 511c6ef265ee..7454bfc49a51 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -6126,7 +6126,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
>  int kvm_mmu_init_vm(struct kvm *kvm)
>  {
>  	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
> -	int r;
> +	int r, nid;
>  
>  	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
>  	INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
> @@ -6145,8 +6145,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
>  	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
>  				  mmu_page_header_cache, NUMA_NO_NODE);
>  
> -	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
> -				  NULL, NUMA_NO_NODE);
> +	for_each_node(nid)
> +		INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
> +					  NULL, NUMA_NO_NODE);
                                                ^^^^^^^^^^^^
						Should this be nid?
  
Vipin Sharma Jan. 3, 2023, 6:49 p.m. UTC | #4
On Thu, Dec 29, 2022 at 3:18 PM David Matlack <dmatlack@google.com> wrote:
>
> On Wed, Dec 21, 2022 at 06:34:56PM -0800, Vipin Sharma wrote:
> > Make split_shadow_page_cache NUMA aware and allocate page table's pages
> > during the split based on the underlying physical page's NUMA node.
> >
> > Signed-off-by: Vipin Sharma <vipinsh@google.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |  2 +-
> >  arch/x86/kvm/mmu/mmu.c          | 50 ++++++++++++++++++---------------
> >  2 files changed, 29 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index b1f319ad6f89..7b3f36ae37a4 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1410,7 +1410,7 @@ struct kvm_arch {
> >        *
> >        * Protected by kvm->slots_lock.
> >        */
> > -     struct kvm_mmu_memory_cache split_shadow_page_cache;
> > +     struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
> >       struct kvm_mmu_memory_cache split_page_header_cache;
> >
> >       /*
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 511c6ef265ee..7454bfc49a51 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -6126,7 +6126,7 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
> >  int kvm_mmu_init_vm(struct kvm *kvm)
> >  {
> >       struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
> > -     int r;
> > +     int r, nid;
> >
> >       INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
> >       INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
> > @@ -6145,8 +6145,9 @@ int kvm_mmu_init_vm(struct kvm *kvm)
> >       INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
> >                                 mmu_page_header_cache, NUMA_NO_NODE);
> >
> > -     INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
> > -                               NULL, NUMA_NO_NODE);
> > +     for_each_node(nid)
> > +             INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
> > +                                       NULL, NUMA_NO_NODE);
>                                                 ^^^^^^^^^^^^
>                                                 Should this be nid?
Yes, I will fix it in the next version. Thanks
  

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b1f319ad6f89..7b3f36ae37a4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1410,7 +1410,7 @@  struct kvm_arch {
 	 *
 	 * Protected by kvm->slots_lock.
 	 */
-	struct kvm_mmu_memory_cache split_shadow_page_cache;
+	struct kvm_mmu_memory_cache split_shadow_page_cache[MAX_NUMNODES];
 	struct kvm_mmu_memory_cache split_page_header_cache;
 
 	/*
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 511c6ef265ee..7454bfc49a51 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6126,7 +6126,7 @@  static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
 int kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-	int r;
+	int r, nid;
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
@@ -6145,8 +6145,9 @@  int kvm_mmu_init_vm(struct kvm *kvm)
 	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_page_header_cache,
 				  mmu_page_header_cache, NUMA_NO_NODE);
 
-	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache,
-				  NULL, NUMA_NO_NODE);
+	for_each_node(nid)
+		INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_shadow_page_cache[nid],
+					  NULL, NUMA_NO_NODE);
 	spin_lock_init(&kvm->arch.split_shadow_page_cache_lock);
 
 	INIT_KVM_MMU_MEMORY_CACHE(&kvm->arch.split_desc_cache,
@@ -6157,10 +6158,13 @@  int kvm_mmu_init_vm(struct kvm *kvm)
 
 static void mmu_free_vm_memory_caches(struct kvm *kvm)
 {
+	int nid;
+
 	kvm_mmu_free_memory_cache(&kvm->arch.split_desc_cache);
 	kvm_mmu_free_memory_cache(&kvm->arch.split_page_header_cache);
-	mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
-				 &kvm->arch.split_shadow_page_cache_lock);
+	for_each_node(nid)
+		mmu_free_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
+					 &kvm->arch.split_shadow_page_cache_lock);
 }
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -6269,7 +6273,7 @@  static inline bool need_topup(struct kvm_mmu_memory_cache *cache, int min)
 	return kvm_mmu_memory_cache_nr_free_objects(cache) < min;
 }
 
-static bool need_topup_split_caches_or_resched(struct kvm *kvm)
+static bool need_topup_split_caches_or_resched(struct kvm *kvm, int nid)
 {
 	if (need_resched() || rwlock_needbreak(&kvm->mmu_lock))
 		return true;
@@ -6281,10 +6285,10 @@  static bool need_topup_split_caches_or_resched(struct kvm *kvm)
 	 */
 	return need_topup(&kvm->arch.split_desc_cache, SPLIT_DESC_CACHE_MIN_NR_OBJECTS) ||
 	       need_topup(&kvm->arch.split_page_header_cache, 1) ||
-	       need_topup(&kvm->arch.split_shadow_page_cache, 1);
+	       need_topup(&kvm->arch.split_shadow_page_cache[nid], 1);
 }
 
-static int topup_split_caches(struct kvm *kvm)
+static int topup_split_caches(struct kvm *kvm, int nid)
 {
 	/*
 	 * Allocating rmap list entries when splitting huge pages for nested
@@ -6314,18 +6318,21 @@  static int topup_split_caches(struct kvm *kvm)
 	if (r)
 		return r;
 
-	return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache,
+	return mmu_topup_sp_memory_cache(&kvm->arch.split_shadow_page_cache[nid],
 					 &kvm->arch.split_shadow_page_cache_lock,
 					 1);
 }
 
-static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *huge_sptep)
+static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm,
+							u64 *huge_sptep,
+							u64 huge_spte)
 {
 	struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
 	struct shadow_page_caches caches = {};
 	union kvm_mmu_page_role role;
 	unsigned int access;
 	gfn_t gfn;
+	int nid;
 
 	gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
 	access = kvm_mmu_page_get_access(huge_sp, spte_index(huge_sptep));
@@ -6338,9 +6345,11 @@  static struct kvm_mmu_page *shadow_mmu_get_sp_for_split(struct kvm *kvm, u64 *hu
 	 */
 	role = kvm_mmu_child_role(huge_sptep, /*direct=*/true, access);
 
+	nid = kvm_pfn_to_page_table_nid(spte_to_pfn(huge_spte));
+
 	/* Direct SPs do not require a shadowed_info_cache. */
 	caches.page_header_cache = &kvm->arch.split_page_header_cache;
-	caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache;
+	caches.shadow_page_cache = &kvm->arch.split_shadow_page_cache[nid];
 	caches.shadow_page_cache_lock = &kvm->arch.split_shadow_page_cache_lock;
 
 	/* Safe to pass NULL for vCPU since requesting a direct SP. */
@@ -6360,7 +6369,7 @@  static void shadow_mmu_split_huge_page(struct kvm *kvm,
 	gfn_t gfn;
 	int index;
 
-	sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep);
+	sp = shadow_mmu_get_sp_for_split(kvm, huge_sptep, huge_spte);
 
 	for (index = 0; index < SPTE_ENT_PER_PAGE; index++) {
 		sptep = &sp->spt[index];
@@ -6398,7 +6407,7 @@  static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
 					  u64 *huge_sptep)
 {
 	struct kvm_mmu_page *huge_sp = sptep_to_sp(huge_sptep);
-	int level, r = 0;
+	int level, r = 0, nid;
 	gfn_t gfn;
 	u64 spte;
 
@@ -6406,13 +6415,14 @@  static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
 	gfn = kvm_mmu_page_get_gfn(huge_sp, spte_index(huge_sptep));
 	level = huge_sp->role.level;
 	spte = *huge_sptep;
+	nid = kvm_pfn_to_page_table_nid(spte_to_pfn(spte));
 
 	if (kvm_mmu_available_pages(kvm) <= KVM_MIN_FREE_MMU_PAGES) {
 		r = -ENOSPC;
 		goto out;
 	}
 
-	if (need_topup_split_caches_or_resched(kvm)) {
+	if (need_topup_split_caches_or_resched(kvm, nid)) {
 		write_unlock(&kvm->mmu_lock);
 		cond_resched();
 		/*
@@ -6420,7 +6430,7 @@  static int shadow_mmu_try_split_huge_page(struct kvm *kvm,
 		 * rmap iterator should be restarted because the MMU lock was
 		 * dropped.
 		 */
-		r = topup_split_caches(kvm) ?: -EAGAIN;
+		r = topup_split_caches(kvm, nid) ?: -EAGAIN;
 		write_lock(&kvm->mmu_lock);
 		goto out;
 	}
@@ -6709,17 +6719,15 @@  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
 }
 
 static unsigned long mmu_shrink_cache(struct kvm_mmu_memory_cache *cache,
-				      int cache_count,
 				      spinlock_t *cache_lock)
 {
 	unsigned long freed = 0;
 	int nid;
 
 	spin_lock(cache_lock);
-	for (nid = 0; nid < cache_count; nid++) {
-		if (node_online(nid) && cache[nid].nobjs)
+	for_each_online_node(nid)
+		if (cache[nid].nobjs)
 			freed += kvm_mmu_empty_memory_cache(&cache[nid]);
-	}
 	spin_unlock(cache_lock);
 	return freed;
 }
@@ -6741,8 +6749,7 @@  mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 			first_kvm = kvm;
 		list_move_tail(&kvm->vm_list, &vm_list);
 
-		freed += mmu_shrink_cache(&kvm->arch.split_shadow_page_cache,
-					  1,
+		freed += mmu_shrink_cache(kvm->arch.split_shadow_page_cache,
 					  &kvm->arch.split_shadow_page_cache_lock);
 
 		if (freed >= sc->nr_to_scan)
@@ -6750,7 +6757,6 @@  mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 
 		kvm_for_each_vcpu(i, vcpu, kvm) {
 			freed += mmu_shrink_cache(vcpu->arch.mmu_shadow_page_cache,
-						  MAX_NUMNODES,
 						  &vcpu->arch.mmu_shadow_page_cache_lock);
 		}