[1/7] KVM: x86/MMU: Move pte_list operations to rmap.c
Commit Message
In the interest of eventually splitting the Shadow MMU out of mmu.c,
start by moving some of the operations for manipulating pte_lists out of
mmu.c and into a new pair of files: rmap.c and rmap.h.
No functional change intended.
Signed-off-by: Ben Gardon <bgardon@google.com>
---
arch/x86/kvm/Makefile | 2 +-
arch/x86/kvm/debugfs.c | 1 +
arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
arch/x86/kvm/mmu/mmu_internal.h | 1 -
arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
arch/x86/kvm/mmu/rmap.h | 34 +++++++
6 files changed, 179 insertions(+), 152 deletions(-)
create mode 100644 arch/x86/kvm/mmu/rmap.c
create mode 100644 arch/x86/kvm/mmu/rmap.h
Comments
Hi Ben,
Thank you for the patch! Perhaps something to improve:
[auto build test WARNING on kvm/queue]
[also build test WARNING on mst-vhost/linux-next]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20221206173601.549281-2-bgardon%40google.com
patch subject: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c
config: x86_64-allyesconfig
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/22e700d90cb6d8c054c5cacffe0be568004918a8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
git checkout 22e700d90cb6d8c054c5cacffe0be568004918a8
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash arch/x86/kvm/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
In file included from arch/x86/kvm/debugfs.c:12:
>> arch/x86/kvm/mmu/rmap.h:26:27: warning: 'pte_list_desc_cache' defined but not used [-Wunused-variable]
26 | static struct kmem_cache *pte_list_desc_cache;
| ^~~~~~~~~~~~~~~~~~~
vim +/pte_list_desc_cache +26 arch/x86/kvm/mmu/rmap.h
25
> 26 static struct kmem_cache *pte_list_desc_cache;
27
On Tue, Dec 6, 2022 at 9:36 AM Ben Gardon <bgardon@google.com> wrote:
>
> In the interest of eventually splitting the Shadow MMU out of mmu.c,
> start by moving some of the operations for manipulating pte_lists out of
> mmu.c and into a new pair of files: rmap.c and rmap.h.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
> arch/x86/kvm/Makefile | 2 +-
> arch/x86/kvm/debugfs.c | 1 +
> arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
> arch/x86/kvm/mmu/mmu_internal.h | 1 -
> arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
> arch/x86/kvm/mmu/rmap.h | 34 +++++++
> 6 files changed, 179 insertions(+), 152 deletions(-)
> create mode 100644 arch/x86/kvm/mmu/rmap.c
> create mode 100644 arch/x86/kvm/mmu/rmap.h
>
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index 80e3fe184d17..9f766eebeddf 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
> kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
> i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
> - mmu/spte.o
> + mmu/spte.o mmu/rmap.o
>
> ifdef CONFIG_HYPERV
> kvm-y += kvm_onhyperv.o
> diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> index c1390357126a..29f692ecd6f3 100644
> --- a/arch/x86/kvm/debugfs.c
> +++ b/arch/x86/kvm/debugfs.c
> @@ -9,6 +9,7 @@
> #include "lapic.h"
> #include "mmu.h"
> #include "mmu/mmu_internal.h"
> +#include "mmu/rmap.h"
>
> static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> {
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 4736d7849c60..90b3735d6064 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -26,6 +26,7 @@
> #include "kvm_emulate.h"
> #include "cpuid.h"
> #include "spte.h"
> +#include "rmap.h"
>
> #include <linux/kvm_host.h>
> #include <linux/types.h>
> @@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);
>
> #include <trace/events/kvm.h>
>
> -/* make pte_list_desc fit well in cache lines */
> -#define PTE_LIST_EXT 14
> -
> -/*
> - * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> - * at the start; then accessing it will only use one single cacheline for
> - * either full (entries==PTE_LIST_EXT) case or entries<=6.
> - */
> -struct pte_list_desc {
> - struct pte_list_desc *more;
> - /*
> - * Stores number of entries stored in the pte_list_desc. No need to be
> - * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> - */
> - u64 spte_count;
> - u64 *sptes[PTE_LIST_EXT];
> -};
> -
> struct kvm_shadow_walk_iterator {
> u64 addr;
> hpa_t shadow_addr;
> @@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
> ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
> __shadow_walk_next(&(_walker), spte))
>
> -static struct kmem_cache *pte_list_desc_cache;
> struct kmem_cache *mmu_page_header_cache;
> static struct percpu_counter kvm_total_used_mmu_pages;
>
> @@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> }
>
> -static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> -{
> - kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> -}
> -
> static bool sp_has_gptes(struct kvm_mmu_page *sp);
>
> static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> @@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
> return slot;
> }
>
> -/*
> - * About rmap_head encoding:
> - *
> - * If the bit zero of rmap_head->val is clear, then it points to the only spte
> - * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> - * pte_list_desc containing more mappings.
> - */
> -
> -/*
> - * Returns the number of pointers in the rmap chain, not counting the new one.
> - */
> -static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> - struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - int count = 0;
> -
> - if (!rmap_head->val) {
> - rmap_printk("%p %llx 0->1\n", spte, *spte);
> - rmap_head->val = (unsigned long)spte;
> - } else if (!(rmap_head->val & 1)) {
> - rmap_printk("%p %llx 1->many\n", spte, *spte);
> - desc = kvm_mmu_memory_cache_alloc(cache);
> - desc->sptes[0] = (u64 *)rmap_head->val;
> - desc->sptes[1] = spte;
> - desc->spte_count = 2;
> - rmap_head->val = (unsigned long)desc | 1;
> - ++count;
> - } else {
> - rmap_printk("%p %llx many->many\n", spte, *spte);
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> - while (desc->spte_count == PTE_LIST_EXT) {
> - count += PTE_LIST_EXT;
> - if (!desc->more) {
> - desc->more = kvm_mmu_memory_cache_alloc(cache);
> - desc = desc->more;
> - desc->spte_count = 0;
> - break;
> - }
> - desc = desc->more;
> - }
> - count += desc->spte_count;
> - desc->sptes[desc->spte_count++] = spte;
> - }
> - return count;
> -}
> -
> -static void
> -pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> - struct pte_list_desc *desc, int i,
> - struct pte_list_desc *prev_desc)
> -{
> - int j = desc->spte_count - 1;
> -
> - desc->sptes[i] = desc->sptes[j];
> - desc->sptes[j] = NULL;
> - desc->spte_count--;
> - if (desc->spte_count)
> - return;
> - if (!prev_desc && !desc->more)
> - rmap_head->val = 0;
> - else
> - if (prev_desc)
> - prev_desc->more = desc->more;
> - else
> - rmap_head->val = (unsigned long)desc->more | 1;
> - mmu_free_pte_list_desc(desc);
> -}
> -
> -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - struct pte_list_desc *prev_desc;
> - int i;
> -
> - if (!rmap_head->val) {
> - pr_err("%s: %p 0->BUG\n", __func__, spte);
> - BUG();
> - } else if (!(rmap_head->val & 1)) {
> - rmap_printk("%p 1->0\n", spte);
> - if ((u64 *)rmap_head->val != spte) {
> - pr_err("%s: %p 1->BUG\n", __func__, spte);
> - BUG();
> - }
> - rmap_head->val = 0;
> - } else {
> - rmap_printk("%p many->many\n", spte);
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> - prev_desc = NULL;
> - while (desc) {
> - for (i = 0; i < desc->spte_count; ++i) {
> - if (desc->sptes[i] == spte) {
> - pte_list_desc_remove_entry(rmap_head,
> - desc, i, prev_desc);
> - return;
> - }
> - }
> - prev_desc = desc;
> - desc = desc->more;
> - }
> - pr_err("%s: %p many->many\n", __func__, spte);
> - BUG();
> - }
> -}
> -
> static void kvm_zap_one_rmap_spte(struct kvm *kvm,
> struct kvm_rmap_head *rmap_head, u64 *sptep)
> {
> @@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> for (i = 0; i < desc->spte_count; i++)
> mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
> next = desc->more;
> - mmu_free_pte_list_desc(desc);
> + free_pte_list_desc(desc);
> }
> out:
> /* rmap_head is meaningless now, remember to reset it */
> @@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> return true;
> }
>
> -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> -{
> - struct pte_list_desc *desc;
> - unsigned int count = 0;
> -
> - if (!rmap_head->val)
> - return 0;
> - else if (!(rmap_head->val & 1))
> - return 1;
> -
> - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> -
> - while (desc) {
> - count += desc->spte_count;
> - desc = desc->more;
> - }
> -
> - return count;
> -}
> -
> static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
> const struct kvm_memory_slot *slot)
> {
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index dbaf6755c5a7..cd1c8f32269d 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
> int min_level);
> void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
> u64 start_gfn, u64 pages);
> -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>
> extern int nx_huge_pages;
> static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
> new file mode 100644
> index 000000000000..daa99dee0709
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.c
> @@ -0,0 +1,141 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
A comment would be nice to write expectations from this file and what
code lives here.
> +#include "mmu.h"
> +#include "mmu_internal.h"
> +#include "mmutrace.h"
> +#include "rmap.h"
> +#include "spte.h"
> +
> +#include <asm/cmpxchg.h>
> +#include <trace/events/kvm.h>
> +
> +/*
> + * About rmap_head encoding:
> + *
> + * If the bit zero of rmap_head->val is clear, then it points to the only spte
> + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> + * pte_list_desc containing more mappings.
> + */
> +
> +/*
> + * Returns the number of pointers in the rmap chain, not counting the new one.
> + */
> +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> + struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + int count = 0;
> +
> + if (!rmap_head->val) {
> + rmap_printk("%p %llx 0->1\n", spte, *spte);
> + rmap_head->val = (unsigned long)spte;
> + } else if (!(rmap_head->val & 1)) {
> + rmap_printk("%p %llx 1->many\n", spte, *spte);
> + desc = kvm_mmu_memory_cache_alloc(cache);
> + desc->sptes[0] = (u64 *)rmap_head->val;
> + desc->sptes[1] = spte;
> + desc->spte_count = 2;
> + rmap_head->val = (unsigned long)desc | 1;
> + ++count;
> + } else {
> + rmap_printk("%p %llx many->many\n", spte, *spte);
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> + while (desc->spte_count == PTE_LIST_EXT) {
> + count += PTE_LIST_EXT;
> + if (!desc->more) {
> + desc->more = kvm_mmu_memory_cache_alloc(cache);
> + desc = desc->more;
> + desc->spte_count = 0;
> + break;
> + }
> + desc = desc->more;
> + }
> + count += desc->spte_count;
> + desc->sptes[desc->spte_count++] = spte;
> + }
> + return count;
> +}
> +
> +void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> +{
> + kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> +}
> +
> +static void
> +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> + struct pte_list_desc *desc, int i,
> + struct pte_list_desc *prev_desc)
> +{
> + int j = desc->spte_count - 1;
> +
> + desc->sptes[i] = desc->sptes[j];
> + desc->sptes[j] = NULL;
> + desc->spte_count--;
> + if (desc->spte_count)
> + return;
> + if (!prev_desc && !desc->more)
> + rmap_head->val = 0;
> + else
> + if (prev_desc)
> + prev_desc->more = desc->more;
> + else
> + rmap_head->val = (unsigned long)desc->more | 1;
> + free_pte_list_desc(desc);
> +}
> +
> +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + struct pte_list_desc *prev_desc;
> + int i;
> +
> + if (!rmap_head->val) {
> + pr_err("%s: %p 0->BUG\n", __func__, spte);
> + BUG();
> + } else if (!(rmap_head->val & 1)) {
> + rmap_printk("%p 1->0\n", spte);
> + if ((u64 *)rmap_head->val != spte) {
> + pr_err("%s: %p 1->BUG\n", __func__, spte);
> + BUG();
> + }
> + rmap_head->val = 0;
> + } else {
> + rmap_printk("%p many->many\n", spte);
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> + prev_desc = NULL;
> + while (desc) {
> + for (i = 0; i < desc->spte_count; ++i) {
> + if (desc->sptes[i] == spte) {
> + pte_list_desc_remove_entry(rmap_head,
> + desc, i, prev_desc);
> + return;
> + }
> + }
> + prev_desc = desc;
> + desc = desc->more;
> + }
> + pr_err("%s: %p many->many\n", __func__, spte);
> + BUG();
> + }
> +}
> +
> +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> +{
> + struct pte_list_desc *desc;
> + unsigned int count = 0;
> +
> + if (!rmap_head->val)
> + return 0;
> + else if (!(rmap_head->val & 1))
> + return 1;
> +
> + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> +
> + while (desc) {
> + count += desc->spte_count;
> + desc = desc->more;
> + }
> +
> + return count;
> +}
> +
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> new file mode 100644
> index 000000000000..059765b6e066
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef __KVM_X86_MMU_RMAP_H
> +#define __KVM_X86_MMU_RMAP_H
> +
> +#include <linux/kvm_host.h>
> +
> +/* make pte_list_desc fit well in cache lines */
> +#define PTE_LIST_EXT 14
> +
> +/*
> + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> + * at the start; then accessing it will only use one single cacheline for
> + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> + */
> +struct pte_list_desc {
> + struct pte_list_desc *more;
> + /*
> + * Stores number of entries stored in the pte_list_desc. No need to be
> + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> + */
> + u64 spte_count;
> + u64 *sptes[PTE_LIST_EXT];
> +};
> +
> +static struct kmem_cache *pte_list_desc_cache;
Does it make sense to make it non static and extern here. Also, you
can provide an init function which can be called from mmu.c?
> +
> +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> + struct kvm_rmap_head *rmap_head);
> +void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> +
Similar to tdp_mmu, and other rmap functions in next patches in the
series should above functions be prefixed with "rmap_"?
> +#endif /* __KVM_X86_MMU_RMAP_H */
> --
> 2.39.0.rc0.267.gcb52ba06e7-goog
>
On Tue, Dec 06, 2022 at 05:35:55PM +0000, Ben Gardon wrote:
> In the interest of eventually splitting the Shadow MMU out of mmu.c,
> start by moving some of the operations for manipulating pte_lists out of
> mmu.c and into a new pair of files: rmap.c and rmap.h.
>
> No functional change intended.
>
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
[...]
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> new file mode 100644
> index 000000000000..059765b6e066
> --- /dev/null
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -0,0 +1,34 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#ifndef __KVM_X86_MMU_RMAP_H
> +#define __KVM_X86_MMU_RMAP_H
> +
> +#include <linux/kvm_host.h>
> +
> +/* make pte_list_desc fit well in cache lines */
> +#define PTE_LIST_EXT 14
> +
> +/*
> + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> + * at the start; then accessing it will only use one single cacheline for
> + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> + */
> +struct pte_list_desc {
> + struct pte_list_desc *more;
> + /*
> + * Stores number of entries stored in the pte_list_desc. No need to be
> + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> + */
> + u64 spte_count;
> + u64 *sptes[PTE_LIST_EXT];
> +};
> +
> +static struct kmem_cache *pte_list_desc_cache;
The definition of pte_list_desc_cache needs to go in a C file since it's
a global variable. Since it now needs to be accessed by more than once C
file, drop the static. Then it can be accessed with extern.
Since most of the code that sets up and deals with pte_list_desc_cache
is still in mmu.c, my vote is to keep the definition there.
i.e.
mmu.c:
struct kmem_cache *pte_list_desc_cache;
rmap.c
extern struct kmem_cache *pte_list_desc_cache;
And no need for anything in rmap.h.
On Fri, Dec 9, 2022 at 2:22 PM David Matlack <dmatlack@google.com> wrote:
>
> On Tue, Dec 06, 2022 at 05:35:55PM +0000, Ben Gardon wrote:
> > In the interest of eventually splitting the Shadow MMU out of mmu.c,
> > start by moving some of the operations for manipulating pte_lists out of
> > mmu.c and into a new pair of files: rmap.c and rmap.h.
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <bgardon@google.com>
> > ---
> [...]
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > new file mode 100644
> > index 000000000000..059765b6e066
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -0,0 +1,34 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#ifndef __KVM_X86_MMU_RMAP_H
> > +#define __KVM_X86_MMU_RMAP_H
> > +
> > +#include <linux/kvm_host.h>
> > +
> > +/* make pte_list_desc fit well in cache lines */
> > +#define PTE_LIST_EXT 14
> > +
> > +/*
> > + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > + * at the start; then accessing it will only use one single cacheline for
> > + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > + */
> > +struct pte_list_desc {
> > + struct pte_list_desc *more;
> > + /*
> > + * Stores number of entries stored in the pte_list_desc. No need to be
> > + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > + */
> > + u64 spte_count;
> > + u64 *sptes[PTE_LIST_EXT];
> > +};
> > +
> > +static struct kmem_cache *pte_list_desc_cache;
>
> The definition of pte_list_desc_cache needs to go in a C file since it's
> a global variable. Since it now needs to be accessed by more than once C
> file, drop the static. Then it can be accessed with extern.
>
> Since most of the code that sets up and deals with pte_list_desc_cache
> is still in mmu.c, my vote is to keep the definition there.
>
> i.e.
>
> mmu.c:
>
> struct kmem_cache *pte_list_desc_cache;
>
> rmap.c
>
> extern struct kmem_cache *pte_list_desc_cache;
>
> And no need for anything in rmap.h.
Right, good point. I'll fix that in the next edition.
On Wed, Dec 7, 2022 at 2:58 PM Vipin Sharma <vipinsh@google.com> wrote:
>
> On Tue, Dec 6, 2022 at 9:36 AM Ben Gardon <bgardon@google.com> wrote:
> >
> > In the interest of eventually splitting the Shadow MMU out of mmu.c,
> > start by moving some of the operations for manipulating pte_lists out of
> > mmu.c and into a new pair of files: rmap.c and rmap.h.
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <bgardon@google.com>
> > ---
> > arch/x86/kvm/Makefile | 2 +-
> > arch/x86/kvm/debugfs.c | 1 +
> > arch/x86/kvm/mmu/mmu.c | 152 +-------------------------------
> > arch/x86/kvm/mmu/mmu_internal.h | 1 -
> > arch/x86/kvm/mmu/rmap.c | 141 +++++++++++++++++++++++++++++
> > arch/x86/kvm/mmu/rmap.h | 34 +++++++
> > 6 files changed, 179 insertions(+), 152 deletions(-)
> > create mode 100644 arch/x86/kvm/mmu/rmap.c
> > create mode 100644 arch/x86/kvm/mmu/rmap.h
> >
> > diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> > index 80e3fe184d17..9f766eebeddf 100644
> > --- a/arch/x86/kvm/Makefile
> > +++ b/arch/x86/kvm/Makefile
> > @@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
> > kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
> > i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
> > hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
> > - mmu/spte.o
> > + mmu/spte.o mmu/rmap.o
> >
> > ifdef CONFIG_HYPERV
> > kvm-y += kvm_onhyperv.o
> > diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
> > index c1390357126a..29f692ecd6f3 100644
> > --- a/arch/x86/kvm/debugfs.c
> > +++ b/arch/x86/kvm/debugfs.c
> > @@ -9,6 +9,7 @@
> > #include "lapic.h"
> > #include "mmu.h"
> > #include "mmu/mmu_internal.h"
> > +#include "mmu/rmap.h"
> >
> > static int vcpu_get_timer_advance_ns(void *data, u64 *val)
> > {
> > diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> > index 4736d7849c60..90b3735d6064 100644
> > --- a/arch/x86/kvm/mmu/mmu.c
> > +++ b/arch/x86/kvm/mmu/mmu.c
> > @@ -26,6 +26,7 @@
> > #include "kvm_emulate.h"
> > #include "cpuid.h"
> > #include "spte.h"
> > +#include "rmap.h"
> >
> > #include <linux/kvm_host.h>
> > #include <linux/types.h>
> > @@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);
> >
> > #include <trace/events/kvm.h>
> >
> > -/* make pte_list_desc fit well in cache lines */
> > -#define PTE_LIST_EXT 14
> > -
> > -/*
> > - * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > - * at the start; then accessing it will only use one single cacheline for
> > - * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > - */
> > -struct pte_list_desc {
> > - struct pte_list_desc *more;
> > - /*
> > - * Stores number of entries stored in the pte_list_desc. No need to be
> > - * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > - */
> > - u64 spte_count;
> > - u64 *sptes[PTE_LIST_EXT];
> > -};
> > -
> > struct kvm_shadow_walk_iterator {
> > u64 addr;
> > hpa_t shadow_addr;
> > @@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
> > ({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
> > __shadow_walk_next(&(_walker), spte))
> >
> > -static struct kmem_cache *pte_list_desc_cache;
> > struct kmem_cache *mmu_page_header_cache;
> > static struct percpu_counter kvm_total_used_mmu_pages;
> >
> > @@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> > kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> > }
> >
> > -static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> > -{
> > - kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> > -}
> > -
> > static bool sp_has_gptes(struct kvm_mmu_page *sp);
> >
> > static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
> > @@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
> > return slot;
> > }
> >
> > -/*
> > - * About rmap_head encoding:
> > - *
> > - * If the bit zero of rmap_head->val is clear, then it points to the only spte
> > - * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> > - * pte_list_desc containing more mappings.
> > - */
> > -
> > -/*
> > - * Returns the number of pointers in the rmap chain, not counting the new one.
> > - */
> > -static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > - struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - int count = 0;
> > -
> > - if (!rmap_head->val) {
> > - rmap_printk("%p %llx 0->1\n", spte, *spte);
> > - rmap_head->val = (unsigned long)spte;
> > - } else if (!(rmap_head->val & 1)) {
> > - rmap_printk("%p %llx 1->many\n", spte, *spte);
> > - desc = kvm_mmu_memory_cache_alloc(cache);
> > - desc->sptes[0] = (u64 *)rmap_head->val;
> > - desc->sptes[1] = spte;
> > - desc->spte_count = 2;
> > - rmap_head->val = (unsigned long)desc | 1;
> > - ++count;
> > - } else {
> > - rmap_printk("%p %llx many->many\n", spte, *spte);
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > - while (desc->spte_count == PTE_LIST_EXT) {
> > - count += PTE_LIST_EXT;
> > - if (!desc->more) {
> > - desc->more = kvm_mmu_memory_cache_alloc(cache);
> > - desc = desc->more;
> > - desc->spte_count = 0;
> > - break;
> > - }
> > - desc = desc->more;
> > - }
> > - count += desc->spte_count;
> > - desc->sptes[desc->spte_count++] = spte;
> > - }
> > - return count;
> > -}
> > -
> > -static void
> > -pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> > - struct pte_list_desc *desc, int i,
> > - struct pte_list_desc *prev_desc)
> > -{
> > - int j = desc->spte_count - 1;
> > -
> > - desc->sptes[i] = desc->sptes[j];
> > - desc->sptes[j] = NULL;
> > - desc->spte_count--;
> > - if (desc->spte_count)
> > - return;
> > - if (!prev_desc && !desc->more)
> > - rmap_head->val = 0;
> > - else
> > - if (prev_desc)
> > - prev_desc->more = desc->more;
> > - else
> > - rmap_head->val = (unsigned long)desc->more | 1;
> > - mmu_free_pte_list_desc(desc);
> > -}
> > -
> > -static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - struct pte_list_desc *prev_desc;
> > - int i;
> > -
> > - if (!rmap_head->val) {
> > - pr_err("%s: %p 0->BUG\n", __func__, spte);
> > - BUG();
> > - } else if (!(rmap_head->val & 1)) {
> > - rmap_printk("%p 1->0\n", spte);
> > - if ((u64 *)rmap_head->val != spte) {
> > - pr_err("%s: %p 1->BUG\n", __func__, spte);
> > - BUG();
> > - }
> > - rmap_head->val = 0;
> > - } else {
> > - rmap_printk("%p many->many\n", spte);
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > - prev_desc = NULL;
> > - while (desc) {
> > - for (i = 0; i < desc->spte_count; ++i) {
> > - if (desc->sptes[i] == spte) {
> > - pte_list_desc_remove_entry(rmap_head,
> > - desc, i, prev_desc);
> > - return;
> > - }
> > - }
> > - prev_desc = desc;
> > - desc = desc->more;
> > - }
> > - pr_err("%s: %p many->many\n", __func__, spte);
> > - BUG();
> > - }
> > -}
> > -
> > static void kvm_zap_one_rmap_spte(struct kvm *kvm,
> > struct kvm_rmap_head *rmap_head, u64 *sptep)
> > {
> > @@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> > for (i = 0; i < desc->spte_count; i++)
> > mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
> > next = desc->more;
> > - mmu_free_pte_list_desc(desc);
> > + free_pte_list_desc(desc);
> > }
> > out:
> > /* rmap_head is meaningless now, remember to reset it */
> > @@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
> > return true;
> > }
> >
> > -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> > -{
> > - struct pte_list_desc *desc;
> > - unsigned int count = 0;
> > -
> > - if (!rmap_head->val)
> > - return 0;
> > - else if (!(rmap_head->val & 1))
> > - return 1;
> > -
> > - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > -
> > - while (desc) {
> > - count += desc->spte_count;
> > - desc = desc->more;
> > - }
> > -
> > - return count;
> > -}
> > -
> > static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
> > const struct kvm_memory_slot *slot)
> > {
> > diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> > index dbaf6755c5a7..cd1c8f32269d 100644
> > --- a/arch/x86/kvm/mmu/mmu_internal.h
> > +++ b/arch/x86/kvm/mmu/mmu_internal.h
> > @@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
> > int min_level);
> > void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
> > u64 start_gfn, u64 pages);
> > -unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >
> > extern int nx_huge_pages;
> > static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
> > diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
> > new file mode 100644
> > index 000000000000..daa99dee0709
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.c
> > @@ -0,0 +1,141 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
>
> A comment would be nice to write expectations from this file and what
> code lives here.
I'll add one.
>
> > +#include "mmu.h"
> > +#include "mmu_internal.h"
> > +#include "mmutrace.h"
> > +#include "rmap.h"
> > +#include "spte.h"
> > +
> > +#include <asm/cmpxchg.h>
> > +#include <trace/events/kvm.h>
> > +
> > +/*
> > + * About rmap_head encoding:
> > + *
> > + * If the bit zero of rmap_head->val is clear, then it points to the only spte
> > + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
> > + * pte_list_desc containing more mappings.
> > + */
> > +
> > +/*
> > + * Returns the number of pointers in the rmap chain, not counting the new one.
> > + */
> > +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > + struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + int count = 0;
> > +
> > + if (!rmap_head->val) {
> > + rmap_printk("%p %llx 0->1\n", spte, *spte);
> > + rmap_head->val = (unsigned long)spte;
> > + } else if (!(rmap_head->val & 1)) {
> > + rmap_printk("%p %llx 1->many\n", spte, *spte);
> > + desc = kvm_mmu_memory_cache_alloc(cache);
> > + desc->sptes[0] = (u64 *)rmap_head->val;
> > + desc->sptes[1] = spte;
> > + desc->spte_count = 2;
> > + rmap_head->val = (unsigned long)desc | 1;
> > + ++count;
> > + } else {
> > + rmap_printk("%p %llx many->many\n", spte, *spte);
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > + while (desc->spte_count == PTE_LIST_EXT) {
> > + count += PTE_LIST_EXT;
> > + if (!desc->more) {
> > + desc->more = kvm_mmu_memory_cache_alloc(cache);
> > + desc = desc->more;
> > + desc->spte_count = 0;
> > + break;
> > + }
> > + desc = desc->more;
> > + }
> > + count += desc->spte_count;
> > + desc->sptes[desc->spte_count++] = spte;
> > + }
> > + return count;
> > +}
> > +
> > +void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
> > +{
> > + kmem_cache_free(pte_list_desc_cache, pte_list_desc);
> > +}
> > +
> > +static void
> > +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
> > + struct pte_list_desc *desc, int i,
> > + struct pte_list_desc *prev_desc)
> > +{
> > + int j = desc->spte_count - 1;
> > +
> > + desc->sptes[i] = desc->sptes[j];
> > + desc->sptes[j] = NULL;
> > + desc->spte_count--;
> > + if (desc->spte_count)
> > + return;
> > + if (!prev_desc && !desc->more)
> > + rmap_head->val = 0;
> > + else
> > + if (prev_desc)
> > + prev_desc->more = desc->more;
> > + else
> > + rmap_head->val = (unsigned long)desc->more | 1;
> > + free_pte_list_desc(desc);
> > +}
> > +
> > +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + struct pte_list_desc *prev_desc;
> > + int i;
> > +
> > + if (!rmap_head->val) {
> > + pr_err("%s: %p 0->BUG\n", __func__, spte);
> > + BUG();
> > + } else if (!(rmap_head->val & 1)) {
> > + rmap_printk("%p 1->0\n", spte);
> > + if ((u64 *)rmap_head->val != spte) {
> > + pr_err("%s: %p 1->BUG\n", __func__, spte);
> > + BUG();
> > + }
> > + rmap_head->val = 0;
> > + } else {
> > + rmap_printk("%p many->many\n", spte);
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > + prev_desc = NULL;
> > + while (desc) {
> > + for (i = 0; i < desc->spte_count; ++i) {
> > + if (desc->sptes[i] == spte) {
> > + pte_list_desc_remove_entry(rmap_head,
> > + desc, i, prev_desc);
> > + return;
> > + }
> > + }
> > + prev_desc = desc;
> > + desc = desc->more;
> > + }
> > + pr_err("%s: %p many->many\n", __func__, spte);
> > + BUG();
> > + }
> > +}
> > +
> > +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
> > +{
> > + struct pte_list_desc *desc;
> > + unsigned int count = 0;
> > +
> > + if (!rmap_head->val)
> > + return 0;
> > + else if (!(rmap_head->val & 1))
> > + return 1;
> > +
> > + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
> > +
> > + while (desc) {
> > + count += desc->spte_count;
> > + desc = desc->more;
> > + }
> > +
> > + return count;
> > +}
> > +
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > new file mode 100644
> > index 000000000000..059765b6e066
> > --- /dev/null
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -0,0 +1,34 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +
> > +#ifndef __KVM_X86_MMU_RMAP_H
> > +#define __KVM_X86_MMU_RMAP_H
> > +
> > +#include <linux/kvm_host.h>
> > +
> > +/* make pte_list_desc fit well in cache lines */
> > +#define PTE_LIST_EXT 14
> > +
> > +/*
> > + * Slight optimization of cacheline layout, by putting `more' and `spte_count'
> > + * at the start; then accessing it will only use one single cacheline for
> > + * either full (entries==PTE_LIST_EXT) case or entries<=6.
> > + */
> > +struct pte_list_desc {
> > + struct pte_list_desc *more;
> > + /*
> > + * Stores number of entries stored in the pte_list_desc. No need to be
> > + * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
> > + */
> > + u64 spte_count;
> > + u64 *sptes[PTE_LIST_EXT];
> > +};
> > +
> > +static struct kmem_cache *pte_list_desc_cache;
>
> Does it make sense to make it non static and extern here. Also, you
> can provide an init function which can be called from mmu.c?
Going to follow David's suggestion and leave it in mmu.c for now.
>
>
> > +
> > +int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
> > + struct kvm_rmap_head *rmap_head);
> > +void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> > +void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> > +unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> > +
>
> Similar to tdp_mmu, and other rmap functions in next patches in the
> series should above functions be prefixed with "rmap_"?
I think I'm going to abandon the idea of having a seperate file for
rmap stuff and just have one, larger shadow mmu file with a variety of
names. I'll clean up the naming at the end of the series once
everything is moved over and the set of things being exported from the
shadow_mmu.c file has stabilized.
>
>
> > +#endif /* __KVM_X86_MMU_RMAP_H */
> > --
> > 2.39.0.rc0.267.gcb52ba06e7-goog
> >
Hi Ben,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on kvm/queue]
[also build test ERROR on mst-vhost/linux-next linus/master v6.2-rc1 next-20221226]
[cannot apply to kvm/linux-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
base: https://git.kernel.org/pub/scm/virt/kvm/kvm.git queue
patch link: https://lore.kernel.org/r/20221206173601.549281-2-bgardon%40google.com
patch subject: [PATCH 1/7] KVM: x86/MMU: Move pte_list operations to rmap.c
config: i386-randconfig-a004-20220606
compiler: gcc-11 (Debian 11.3.0-8) 11.3.0
reproduce (this is a W=1 build):
# https://github.com/intel-lab-lkp/linux/commit/22e700d90cb6d8c054c5cacffe0be568004918a8
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Ben-Gardon/KVM-x86-MMU-Factor-rmap-operations-out-of-mmu-c/20221207-013733
git checkout 22e700d90cb6d8c054c5cacffe0be568004918a8
# save the config file
mkdir build_dir && cp config build_dir/.config
make W=1 O=build_dir ARCH=i386 olddefconfig
make W=1 O=build_dir ARCH=i386 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
In file included from arch/x86/kvm/debugfs.c:12:
>> arch/x86/kvm/mmu/rmap.h:26:27: error: 'pte_list_desc_cache' defined but not used [-Werror=unused-variable]
26 | static struct kmem_cache *pte_list_desc_cache;
| ^~~~~~~~~~~~~~~~~~~
cc1: all warnings being treated as errors
vim +/pte_list_desc_cache +26 arch/x86/kvm/mmu/rmap.h
25
> 26 static struct kmem_cache *pte_list_desc_cache;
27
@@ -12,7 +12,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
- mmu/spte.o
+ mmu/spte.o mmu/rmap.o
ifdef CONFIG_HYPERV
kvm-y += kvm_onhyperv.o
@@ -9,6 +9,7 @@
#include "lapic.h"
#include "mmu.h"
#include "mmu/mmu_internal.h"
+#include "mmu/rmap.h"
static int vcpu_get_timer_advance_ns(void *data, u64 *val)
{
@@ -26,6 +26,7 @@
#include "kvm_emulate.h"
#include "cpuid.h"
#include "spte.h"
+#include "rmap.h"
#include <linux/kvm_host.h>
#include <linux/types.h>
@@ -112,24 +113,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
-/* make pte_list_desc fit well in cache lines */
-#define PTE_LIST_EXT 14
-
-/*
- * Slight optimization of cacheline layout, by putting `more' and `spte_count'
- * at the start; then accessing it will only use one single cacheline for
- * either full (entries==PTE_LIST_EXT) case or entries<=6.
- */
-struct pte_list_desc {
- struct pte_list_desc *more;
- /*
- * Stores number of entries stored in the pte_list_desc. No need to be
- * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
- */
- u64 spte_count;
- u64 *sptes[PTE_LIST_EXT];
-};
-
struct kvm_shadow_walk_iterator {
u64 addr;
hpa_t shadow_addr;
@@ -155,7 +138,6 @@ struct kvm_shadow_walk_iterator {
({ spte = mmu_spte_get_lockless(_walker.sptep); 1; }); \
__shadow_walk_next(&(_walker), spte))
-static struct kmem_cache *pte_list_desc_cache;
struct kmem_cache *mmu_page_header_cache;
static struct percpu_counter kvm_total_used_mmu_pages;
@@ -674,11 +656,6 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
}
-static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc)
-{
- kmem_cache_free(pte_list_desc_cache, pte_list_desc);
-}
-
static bool sp_has_gptes(struct kvm_mmu_page *sp);
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
@@ -878,111 +855,6 @@ gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
return slot;
}
-/*
- * About rmap_head encoding:
- *
- * If the bit zero of rmap_head->val is clear, then it points to the only spte
- * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
- * pte_list_desc containing more mappings.
- */
-
-/*
- * Returns the number of pointers in the rmap chain, not counting the new one.
- */
-static int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
- struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- int count = 0;
-
- if (!rmap_head->val) {
- rmap_printk("%p %llx 0->1\n", spte, *spte);
- rmap_head->val = (unsigned long)spte;
- } else if (!(rmap_head->val & 1)) {
- rmap_printk("%p %llx 1->many\n", spte, *spte);
- desc = kvm_mmu_memory_cache_alloc(cache);
- desc->sptes[0] = (u64 *)rmap_head->val;
- desc->sptes[1] = spte;
- desc->spte_count = 2;
- rmap_head->val = (unsigned long)desc | 1;
- ++count;
- } else {
- rmap_printk("%p %llx many->many\n", spte, *spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- while (desc->spte_count == PTE_LIST_EXT) {
- count += PTE_LIST_EXT;
- if (!desc->more) {
- desc->more = kvm_mmu_memory_cache_alloc(cache);
- desc = desc->more;
- desc->spte_count = 0;
- break;
- }
- desc = desc->more;
- }
- count += desc->spte_count;
- desc->sptes[desc->spte_count++] = spte;
- }
- return count;
-}
-
-static void
-pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
- struct pte_list_desc *desc, int i,
- struct pte_list_desc *prev_desc)
-{
- int j = desc->spte_count - 1;
-
- desc->sptes[i] = desc->sptes[j];
- desc->sptes[j] = NULL;
- desc->spte_count--;
- if (desc->spte_count)
- return;
- if (!prev_desc && !desc->more)
- rmap_head->val = 0;
- else
- if (prev_desc)
- prev_desc->more = desc->more;
- else
- rmap_head->val = (unsigned long)desc->more | 1;
- mmu_free_pte_list_desc(desc);
-}
-
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- struct pte_list_desc *prev_desc;
- int i;
-
- if (!rmap_head->val) {
- pr_err("%s: %p 0->BUG\n", __func__, spte);
- BUG();
- } else if (!(rmap_head->val & 1)) {
- rmap_printk("%p 1->0\n", spte);
- if ((u64 *)rmap_head->val != spte) {
- pr_err("%s: %p 1->BUG\n", __func__, spte);
- BUG();
- }
- rmap_head->val = 0;
- } else {
- rmap_printk("%p many->many\n", spte);
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
- prev_desc = NULL;
- while (desc) {
- for (i = 0; i < desc->spte_count; ++i) {
- if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(rmap_head,
- desc, i, prev_desc);
- return;
- }
- }
- prev_desc = desc;
- desc = desc->more;
- }
- pr_err("%s: %p many->many\n", __func__, spte);
- BUG();
- }
-}
-
static void kvm_zap_one_rmap_spte(struct kvm *kvm,
struct kvm_rmap_head *rmap_head, u64 *sptep)
{
@@ -1011,7 +883,7 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
for (i = 0; i < desc->spte_count; i++)
mmu_spte_clear_track_bits(kvm, desc->sptes[i]);
next = desc->more;
- mmu_free_pte_list_desc(desc);
+ free_pte_list_desc(desc);
}
out:
/* rmap_head is meaningless now, remember to reset it */
@@ -1019,26 +891,6 @@ static bool kvm_zap_all_rmap_sptes(struct kvm *kvm,
return true;
}
-unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
-{
- struct pte_list_desc *desc;
- unsigned int count = 0;
-
- if (!rmap_head->val)
- return 0;
- else if (!(rmap_head->val & 1))
- return 1;
-
- desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-
- while (desc) {
- count += desc->spte_count;
- desc = desc->more;
- }
-
- return count;
-}
-
static struct kvm_rmap_head *gfn_to_rmap(gfn_t gfn, int level,
const struct kvm_memory_slot *slot)
{
@@ -166,7 +166,6 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
int min_level);
void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
u64 start_gfn, u64 pages);
-unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
extern int nx_huge_pages;
static inline bool is_nx_huge_page_enabled(struct kvm *kvm)
new file mode 100644
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "mmu.h"
+#include "mmu_internal.h"
+#include "mmutrace.h"
+#include "rmap.h"
+#include "spte.h"
+
+#include <asm/cmpxchg.h>
+#include <trace/events/kvm.h>
+
+/*
+ * About rmap_head encoding:
+ *
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
+ * pte_list_desc containing more mappings.
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
+ */
+int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
+ struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ int count = 0;
+
+ if (!rmap_head->val) {
+ rmap_printk("%p %llx 0->1\n", spte, *spte);
+ rmap_head->val = (unsigned long)spte;
+ } else if (!(rmap_head->val & 1)) {
+ rmap_printk("%p %llx 1->many\n", spte, *spte);
+ desc = kvm_mmu_memory_cache_alloc(cache);
+ desc->sptes[0] = (u64 *)rmap_head->val;
+ desc->sptes[1] = spte;
+ desc->spte_count = 2;
+ rmap_head->val = (unsigned long)desc | 1;
+ ++count;
+ } else {
+ rmap_printk("%p %llx many->many\n", spte, *spte);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ while (desc->spte_count == PTE_LIST_EXT) {
+ count += PTE_LIST_EXT;
+ if (!desc->more) {
+ desc->more = kvm_mmu_memory_cache_alloc(cache);
+ desc = desc->more;
+ desc->spte_count = 0;
+ break;
+ }
+ desc = desc->more;
+ }
+ count += desc->spte_count;
+ desc->sptes[desc->spte_count++] = spte;
+ }
+ return count;
+}
+
+void free_pte_list_desc(struct pte_list_desc *pte_list_desc)
+{
+ kmem_cache_free(pte_list_desc_cache, pte_list_desc);
+}
+
+static void
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+ struct pte_list_desc *desc, int i,
+ struct pte_list_desc *prev_desc)
+{
+ int j = desc->spte_count - 1;
+
+ desc->sptes[i] = desc->sptes[j];
+ desc->sptes[j] = NULL;
+ desc->spte_count--;
+ if (desc->spte_count)
+ return;
+ if (!prev_desc && !desc->more)
+ rmap_head->val = 0;
+ else
+ if (prev_desc)
+ prev_desc->more = desc->more;
+ else
+ rmap_head->val = (unsigned long)desc->more | 1;
+ free_pte_list_desc(desc);
+}
+
+void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ struct pte_list_desc *prev_desc;
+ int i;
+
+ if (!rmap_head->val) {
+ pr_err("%s: %p 0->BUG\n", __func__, spte);
+ BUG();
+ } else if (!(rmap_head->val & 1)) {
+ rmap_printk("%p 1->0\n", spte);
+ if ((u64 *)rmap_head->val != spte) {
+ pr_err("%s: %p 1->BUG\n", __func__, spte);
+ BUG();
+ }
+ rmap_head->val = 0;
+ } else {
+ rmap_printk("%p many->many\n", spte);
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+ prev_desc = NULL;
+ while (desc) {
+ for (i = 0; i < desc->spte_count; ++i) {
+ if (desc->sptes[i] == spte) {
+ pte_list_desc_remove_entry(rmap_head,
+ desc, i, prev_desc);
+ return;
+ }
+ }
+ prev_desc = desc;
+ desc = desc->more;
+ }
+ pr_err("%s: %p many->many\n", __func__, spte);
+ BUG();
+ }
+}
+
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
+{
+ struct pte_list_desc *desc;
+ unsigned int count = 0;
+
+ if (!rmap_head->val)
+ return 0;
+ else if (!(rmap_head->val & 1))
+ return 1;
+
+ desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+
+ while (desc) {
+ count += desc->spte_count;
+ desc = desc->more;
+ }
+
+ return count;
+}
+
new file mode 100644
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef __KVM_X86_MMU_RMAP_H
+#define __KVM_X86_MMU_RMAP_H
+
+#include <linux/kvm_host.h>
+
+/* make pte_list_desc fit well in cache lines */
+#define PTE_LIST_EXT 14
+
+/*
+ * Slight optimization of cacheline layout, by putting `more' and `spte_count'
+ * at the start; then accessing it will only use one single cacheline for
+ * either full (entries==PTE_LIST_EXT) case or entries<=6.
+ */
+struct pte_list_desc {
+ struct pte_list_desc *more;
+ /*
+ * Stores number of entries stored in the pte_list_desc. No need to be
+ * u64 but just for easier alignment. When PTE_LIST_EXT, means full.
+ */
+ u64 spte_count;
+ u64 *sptes[PTE_LIST_EXT];
+};
+
+static struct kmem_cache *pte_list_desc_cache;
+
+int pte_list_add(struct kvm_mmu_memory_cache *cache, u64 *spte,
+ struct kvm_rmap_head *rmap_head);
+void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
+void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
+unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
+
+#endif /* __KVM_X86_MMU_RMAP_H */