diff mbox series

[2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h

Message ID	20221206173601.549281-3-bgardon@google.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 2620:137:e000::1:20 as permitted sender) client-ip=2620:137:e000::1:20; Date: Tue, 6 Dec 2022 17:35:56 +0000 In-Reply-To: <20221206173601.549281-1-bgardon@google.com> Mime-Version: 1.0 References: <20221206173601.549281-1-bgardon@google.com> Message-ID: <20221206173601.549281-3-bgardon@google.com> Subject: [PATCH 2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h From: Ben Gardon <bgardon@google.com> To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org Cc: Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>, Sean Christopherson <seanjc@google.com>, David Matlack <dmatlack@google.com>, Vipin Sharma <vipinsh@google.com>, Ben Gardon <bgardon@google.com> Content-Type: text/plain; charset="UTF-8" Precedence: bulk
Series	KVM: x86/MMU: Factor rmap operations out of mmu.c \| [0/7] KVM: x86/MMU: Factor rmap operations out of mmu.c [1/7] KVM: x86/MMU: Move pte_list operations to rmap.c [2/7] KVM: x86/MMU: Move rmap_iterator to rmap.h [3/7] KVM: x86/MMU: Move gfn_to_rmap() to rmap.c [4/7] KVM: x86/MMU: Move rmap_can_add() and rmap_remove() to rmap.c [5/7] KVM: x86/MMU: Move the rmap walk iterator out of mmu.c [6/7] KVM: x86/MMU: Move rmap zap operations to rmap.c [7/7] KVM: x86/MMU: Move rmap_add() to rmap.c

Commit Message

Ben Gardon Dec. 6, 2022, 5:35 p.m. UTC

  In continuing to factor the rmap out of mmu.c, move the rmap_iterator
and associated functions and macros into rmap.(c|h).

No functional change intended.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/kvm/mmu/mmu.c  | 76 -----------------------------------------
 arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
 3 files changed, 79 insertions(+), 76 deletions(-)

Comments

David Matlack Dec. 9, 2022, 11:04 p.m. UTC | #1

On Tue, Dec 06, 2022 at 05:35:56PM +0000, Ben Gardon wrote:
> In continuing to factor the rmap out of mmu.c, move the rmap_iterator
> and associated functions and macros into rmap.(c|h).
> 
> No functional change intended.
> 
> Signed-off-by: Ben Gardon <bgardon@google.com>
> ---
>  arch/x86/kvm/mmu/mmu.c  | 76 -----------------------------------------
>  arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
>  arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
>  3 files changed, 79 insertions(+), 76 deletions(-)
> 
[...]
> diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> index 059765b6e066..13b265f3a95e 100644
> --- a/arch/x86/kvm/mmu/rmap.h
> +++ b/arch/x86/kvm/mmu/rmap.h
> @@ -31,4 +31,22 @@ void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
>  void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
>  unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
>  
> +/*
> + * Used by the following functions to iterate through the sptes linked by a
> + * rmap.  All fields are private and not assumed to be used outside.
> + */
> +struct rmap_iterator {
> +	/* private fields */
> +	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
> +	int pos;			/* index of the sptep */
> +};
> +
> +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> +		    struct rmap_iterator *iter);
> +u64 *rmap_get_next(struct rmap_iterator *iter);
> +
> +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
> +	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
> +	     _spte_; _spte_ = rmap_get_next(_iter_))
> +

I always found these function names and kvm_rmap_head confusing since
they are about iterating through the pte_list_desc data structure. The
rmap (gfn -> list of sptes) is a specific application of the
pte_list_desc structure, but not the only application. There's also
parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
old list of ptes.

While you are refactoring this code, what do you think about doing the
following renames?

  struct kvm_rmap_head	-> struct pte_list_head
  struct rmap_iterator	-> struct pte_list_iterator
  rmap_get_first()	-> pte_list_get_first()
  rmap_get_next()	-> pte_list_get_next()
  for_each_rmap_spte()	-> for_each_pte_list_entry()

Then we can reserve the term "rmap" just for the actual rmap
(slot->arch.rmap), and code that deals with sp->parent_ptes will become
a lot more clear IMO (because it will not longer mention rmap).

e.g. We go from this:

  struct rmap_iterator iter;
  u64 *sptep;

  for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
     ...
  }

To this:

  struct pte_list_iterator iter;
  u64 *sptep;

  for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
     ...
  }

Ben Gardon Dec. 14, 2022, 12:12 a.m. UTC | #2

On Fri, Dec 9, 2022 at 3:04 PM David Matlack <dmatlack@google.com> wrote:
>
> On Tue, Dec 06, 2022 at 05:35:56PM +0000, Ben Gardon wrote:
> > In continuing to factor the rmap out of mmu.c, move the rmap_iterator
> > and associated functions and macros into rmap.(c|h).
> >
> > No functional change intended.
> >
> > Signed-off-by: Ben Gardon <bgardon@google.com>
> > ---
> >  arch/x86/kvm/mmu/mmu.c  | 76 -----------------------------------------
> >  arch/x86/kvm/mmu/rmap.c | 61 +++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/mmu/rmap.h | 18 ++++++++++
> >  3 files changed, 79 insertions(+), 76 deletions(-)
> >
> [...]
> > diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
> > index 059765b6e066..13b265f3a95e 100644
> > --- a/arch/x86/kvm/mmu/rmap.h
> > +++ b/arch/x86/kvm/mmu/rmap.h
> > @@ -31,4 +31,22 @@ void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
> >  void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
> >  unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
> >
> > +/*
> > + * Used by the following functions to iterate through the sptes linked by a
> > + * rmap.  All fields are private and not assumed to be used outside.
> > + */
> > +struct rmap_iterator {
> > +     /* private fields */
> > +     struct pte_list_desc *desc;     /* holds the sptep if not NULL */
> > +     int pos;                        /* index of the sptep */
> > +};
> > +
> > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > +                 struct rmap_iterator *iter);
> > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > +
> > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)                      \
> > +     for (_spte_ = rmap_get_first(_rmap_head_, _iter_);              \
> > +          _spte_; _spte_ = rmap_get_next(_iter_))
> > +
>
> I always found these function names and kvm_rmap_head confusing since
> they are about iterating through the pte_list_desc data structure. The
> rmap (gfn -> list of sptes) is a specific application of the
> pte_list_desc structure, but not the only application. There's also
> parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> old list of ptes.
>
> While you are refactoring this code, what do you think about doing the
> following renames?
>
>   struct kvm_rmap_head  -> struct pte_list_head
>   struct rmap_iterator  -> struct pte_list_iterator
>   rmap_get_first()      -> pte_list_get_first()
>   rmap_get_next()       -> pte_list_get_next()
>   for_each_rmap_spte()  -> for_each_pte_list_entry()
>
> Then we can reserve the term "rmap" just for the actual rmap
> (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> a lot more clear IMO (because it will not longer mention rmap).
>
> e.g. We go from this:
>
>   struct rmap_iterator iter;
>   u64 *sptep;
>
>   for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
>      ...
>   }
>
> To this:
>
>   struct pte_list_iterator iter;
>   u64 *sptep;
>
>   for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
>      ...
>   }

I like this suggestion, and I do think it'll make things more
readable. It's going to be a huge patch to rename all the instances of
kvm_rmap_head, but it's probably worth it.

Sean Christopherson Dec. 14, 2022, 12:59 a.m. UTC | #3

On Tue, Dec 13, 2022, Ben Gardon wrote:
> On Fri, Dec 9, 2022 at 3:04 PM David Matlack <dmatlack@google.com> wrote:
> >
> > > +/*
> > > + * Used by the following functions to iterate through the sptes linked by a
> > > + * rmap.  All fields are private and not assumed to be used outside.
> > > + */
> > > +struct rmap_iterator {
> > > +     /* private fields */
> > > +     struct pte_list_desc *desc;     /* holds the sptep if not NULL */
> > > +     int pos;                        /* index of the sptep */
> > > +};
> > > +
> > > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > > +                 struct rmap_iterator *iter);
> > > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > > +
> > > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)                      \
> > > +     for (_spte_ = rmap_get_first(_rmap_head_, _iter_);              \
> > > +          _spte_; _spte_ = rmap_get_next(_iter_))
> > > +
> >
> > I always found these function names and kvm_rmap_head confusing since

Heh, you definitely aren't the only one.

> > they are about iterating through the pte_list_desc data structure. The
> > rmap (gfn -> list of sptes) is a specific application of the
> > pte_list_desc structure, but not the only application. There's also
> > parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> > old list of ptes.
>
> > While you are refactoring this code, what do you think about doing the
> > following renames?
> >
> >   struct kvm_rmap_head  -> struct pte_list_head
> >   struct rmap_iterator  -> struct pte_list_iterator
> >   rmap_get_first()      -> pte_list_get_first()
> >   rmap_get_next()       -> pte_list_get_next()
> >   for_each_rmap_spte()  -> for_each_pte_list_entry()

I would strongly prefer to keep "spte" in this one regardless of what other naming
changes we do (see below).  Maybe just for_each_spte()?  IMO, "pte_list_entry"
unnecessarily obfuscates that it's a list of SPTEs.

> > Then we can reserve the term "rmap" just for the actual rmap
> > (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> > a lot more clear IMO (because it will not longer mention rmap).
> >
> > e.g. We go from this:
> >
> >   struct rmap_iterator iter;
> >   u64 *sptep;
> >
> >   for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> >      ...
> >   }
> >
> > To this:
> >
> >   struct pte_list_iterator iter;
> >   u64 *sptep;
> >
> >   for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
> >      ...
> >   }
> 
> I like this suggestion, and I do think it'll make things more
> readable. It's going to be a huge patch to rename all the instances of
> kvm_rmap_head, but it's probably worth it.

I generally like this idea too, but tying into my above comment, before jumping
in I think we should figure out what end state we want, i.e. get the bikeshedding
out of the way now to hopefully avoid dragging out a series while various things
get nitpicked.

E.g. if we if we just rename the structs and their macros, then we'll end up with
things like

	static bool slot_rmap_write_protect(struct kvm *kvm,
					    struct pte_list_head *rmap_head,
					    const struct kvm_memory_slot *slot)
	{
		return rmap_write_protect(rmap_head, false);
	}

which isn't terrible, but there's still opportunity for cleanup, e.g.
rmap_write_protect() could easily be sptes_write_protect() or write_protect_sptes().

That will generate a naming conflict of sorts with pte_list_head if we don't also
rename that to spte_list_head.  And I think capturing that it's a list of SPTEs and
not guest PTEs will be helpful in general.

And if we rename pte_list_head, then we might as well commit 100% and use consisnent
nomenclature across the board, e.g. end up with

	static bool sptes_clear_dirty(struct kvm *kvm, struct sptes_list_head *head,
				      const struct kvm_memory_slot *slot)
	{
		u64 *sptep;
		struct spte_list_iterator iter;
		bool flush = false;

		for_each_spte(head, &iter, sptep) {
			if (spte_ad_need_write_protect(*sptep))
				flush |= spte_wrprot_for_clear_dirty(sptep);
			else
				flush |= spte_clear_dirty(sptep);
		}

		return flush;
	}

versus the current

	static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
				       const struct kvm_memory_slot *slot)
	{
		u64 *sptep;
		struct rmap_iterator iter;
		bool flush = false;

		for_each_rmap_spte(rmap_head, &iter, sptep)
			if (spte_ad_need_write_protect(*sptep))
				flush |= spte_wrprot_for_clear_dirty(sptep);
			else
				flush |= spte_clear_dirty(sptep);

		return flush;
	}

Ben Gardon Dec. 14, 2022, 5:53 p.m. UTC | #4

On Tue, Dec 13, 2022 at 4:59 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Tue, Dec 13, 2022, Ben Gardon wrote:
> > On Fri, Dec 9, 2022 at 3:04 PM David Matlack <dmatlack@google.com> wrote:
> > >
> > > > +/*
> > > > + * Used by the following functions to iterate through the sptes linked by a
> > > > + * rmap.  All fields are private and not assumed to be used outside.
> > > > + */
> > > > +struct rmap_iterator {
> > > > +     /* private fields */
> > > > +     struct pte_list_desc *desc;     /* holds the sptep if not NULL */
> > > > +     int pos;                        /* index of the sptep */
> > > > +};
> > > > +
> > > > +u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
> > > > +                 struct rmap_iterator *iter);
> > > > +u64 *rmap_get_next(struct rmap_iterator *iter);
> > > > +
> > > > +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)                      \
> > > > +     for (_spte_ = rmap_get_first(_rmap_head_, _iter_);              \
> > > > +          _spte_; _spte_ = rmap_get_next(_iter_))
> > > > +
> > >
> > > I always found these function names and kvm_rmap_head confusing since
>
> Heh, you definitely aren't the only one.
>
> > > they are about iterating through the pte_list_desc data structure. The
> > > rmap (gfn -> list of sptes) is a specific application of the
> > > pte_list_desc structure, but not the only application. There's also
> > > parent_ptes in struct kvm_mmu_page, which is not an rmap, just a plain
> > > old list of ptes.
> >
> > > While you are refactoring this code, what do you think about doing the
> > > following renames?
> > >
> > >   struct kvm_rmap_head  -> struct pte_list_head
> > >   struct rmap_iterator  -> struct pte_list_iterator
> > >   rmap_get_first()      -> pte_list_get_first()
> > >   rmap_get_next()       -> pte_list_get_next()
> > >   for_each_rmap_spte()  -> for_each_pte_list_entry()
>
> I would strongly prefer to keep "spte" in this one regardless of what other naming
> changes we do (see below).  Maybe just for_each_spte()?  IMO, "pte_list_entry"
> unnecessarily obfuscates that it's a list of SPTEs.
>
> > > Then we can reserve the term "rmap" just for the actual rmap
> > > (slot->arch.rmap), and code that deals with sp->parent_ptes will become
> > > a lot more clear IMO (because it will not longer mention rmap).
> > >
> > > e.g. We go from this:
> > >
> > >   struct rmap_iterator iter;
> > >   u64 *sptep;
> > >
> > >   for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
> > >      ...
> > >   }
> > >
> > > To this:
> > >
> > >   struct pte_list_iterator iter;
> > >   u64 *sptep;
> > >
> > >   for_each_pte_list_entry(&sp->parent_ptes, &iter, sptep) {
> > >      ...
> > >   }
> >
> > I like this suggestion, and I do think it'll make things more
> > readable. It's going to be a huge patch to rename all the instances of
> > kvm_rmap_head, but it's probably worth it.
>
> I generally like this idea too, but tying into my above comment, before jumping
> in I think we should figure out what end state we want, i.e. get the bikeshedding
> out of the way now to hopefully avoid dragging out a series while various things
> get nitpicked.
>
> E.g. if we if we just rename the structs and their macros, then we'll end up with
> things like
>
>         static bool slot_rmap_write_protect(struct kvm *kvm,
>                                             struct pte_list_head *rmap_head,
>                                             const struct kvm_memory_slot *slot)
>         {
>                 return rmap_write_protect(rmap_head, false);
>         }
>
> which isn't terrible, but there's still opportunity for cleanup, e.g.
> rmap_write_protect() could easily be sptes_write_protect() or write_protect_sptes().
>
> That will generate a naming conflict of sorts with pte_list_head if we don't also
> rename that to spte_list_head.  And I think capturing that it's a list of SPTEs and
> not guest PTEs will be helpful in general.
>
> And if we rename pte_list_head, then we might as well commit 100% and use consisnent
> nomenclature across the board, e.g. end up with
>
>         static bool sptes_clear_dirty(struct kvm *kvm, struct sptes_list_head *head,
>                                       const struct kvm_memory_slot *slot)
>         {
>                 u64 *sptep;
>                 struct spte_list_iterator iter;
>                 bool flush = false;
>
>                 for_each_spte(head, &iter, sptep) {
>                         if (spte_ad_need_write_protect(*sptep))
>                                 flush |= spte_wrprot_for_clear_dirty(sptep);
>                         else
>                                 flush |= spte_clear_dirty(sptep);
>                 }
>
>                 return flush;
>         }
>
> versus the current
>
>         static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
>                                        const struct kvm_memory_slot *slot)
>         {
>                 u64 *sptep;
>                 struct rmap_iterator iter;
>                 bool flush = false;
>
>                 for_each_rmap_spte(rmap_head, &iter, sptep)
>                         if (spte_ad_need_write_protect(*sptep))
>                                 flush |= spte_wrprot_for_clear_dirty(sptep);
>                         else
>                                 flush |= spte_clear_dirty(sptep);
>
>                 return flush;
>         }

I'd be happy to see some consistent SPTE-based naming in the Shadow
MMU and more or less get rid of the rmap naming scheme. Once you
change to spte_list_head or whatever, the use of the actual rmap (an
array of spte_list_heads) becomes super narrow.

Given the potential for enormous scope creep on what's already going
to be a long series, I'm inclined to split this work into two parts:
1. Move code from mmu.c to shadow_mmu.c with minimal cleanups /
refactors / renames; just move the code
2. Clean up naming conventions: make the functions exported in
shadow_mmu.h consistent, get rid of the whole rmap naming scheme, etc.

That way git-blame will preserve context around the renames /
refactors which would be obfuscated if we did 2 before 1, and we can
reduce merge conflicts.

Sean Christopherson Dec. 15, 2022, 12:34 a.m. UTC | #5

On Wed, Dec 14, 2022, Ben Gardon wrote:
> On Tue, Dec 13, 2022 at 4:59 PM Sean Christopherson <seanjc@google.com> wrote:
> > And if we rename pte_list_head, then we might as well commit 100% and use consisnent
> > nomenclature across the board, e.g. end up with

...

> I'd be happy to see some consistent SPTE-based naming in the Shadow
> MMU and more or less get rid of the rmap naming scheme. Once you
> change to spte_list_head or whatever, the use of the actual rmap (an
> array of spte_list_heads) becomes super narrow.

Yeah.  And at least for me, the more literal "walk a list of SPTEs" is much
easier for me to wrap my head around than "walk rmaps".

> Given the potential for enormous scope creep on what's already going
> to be a long series, I'm inclined to split this work into two parts:
> 1. Move code from mmu.c to shadow_mmu.c with minimal cleanups /
> refactors / renames; just move the code
> 2. Clean up naming conventions: make the functions exported in
> shadow_mmu.h consistent, get rid of the whole rmap naming scheme, etc.
> 
> That way git-blame will preserve context around the renames /
> refactors which would be obfuscated if we did 2 before 1,

+1

> and we can reduce merge conflicts.

That might be wishful thinking ;-)

One thought for the rename would be to gather all the reviews and feedback, and
then wait to send the final version until shortly before the merge window, i.e.
wait for everything else to land so that only future development gets affected.
That would give Paolo and I a bit of extra motiviation to get the x86 queue
solidified sooner than later too...

diff mbox series

Patch

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 90b3735d6064..c3a7f443a213 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -932,82 +932,6 @@  static void rmap_remove(struct kvm *kvm, u64 *spte)
 	pte_list_remove(spte, rmap_head);
 }
 
-/*
- * Used by the following functions to iterate through the sptes linked by a
- * rmap.  All fields are private and not assumed to be used outside.
- */
-struct rmap_iterator {
-	/* private fields */
-	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
-	int pos;			/* index of the sptep */
-};
-
-/*
- * Iteration must be started by this function.  This should also be used after
- * removing/dropping sptes from the rmap link because in such cases the
- * information in the iterator may not be valid.
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
-			   struct rmap_iterator *iter)
-{
-	u64 *sptep;
-
-	if (!rmap_head->val)
-		return NULL;
-
-	if (!(rmap_head->val & 1)) {
-		iter->desc = NULL;
-		sptep = (u64 *)rmap_head->val;
-		goto out;
-	}
-
-	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-	iter->pos = 0;
-	sptep = iter->desc->sptes[iter->pos];
-out:
-	BUG_ON(!is_shadow_present_pte(*sptep));
-	return sptep;
-}
-
-/*
- * Must be used with a valid iterator: e.g. after rmap_get_first().
- *
- * Returns sptep if found, NULL otherwise.
- */
-static u64 *rmap_get_next(struct rmap_iterator *iter)
-{
-	u64 *sptep;
-
-	if (iter->desc) {
-		if (iter->pos < PTE_LIST_EXT - 1) {
-			++iter->pos;
-			sptep = iter->desc->sptes[iter->pos];
-			if (sptep)
-				goto out;
-		}
-
-		iter->desc = iter->desc->more;
-
-		if (iter->desc) {
-			iter->pos = 0;
-			/* desc->sptes[0] cannot be NULL */
-			sptep = iter->desc->sptes[iter->pos];
-			goto out;
-		}
-	}
-
-	return NULL;
-out:
-	BUG_ON(!is_shadow_present_pte(*sptep));
-	return sptep;
-}
-
-#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
-	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
-	     _spte_; _spte_ = rmap_get_next(_iter_))
-
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
 	u64 old_spte = mmu_spte_clear_track_bits(kvm, sptep);
diff --git a/arch/x86/kvm/mmu/rmap.c b/arch/x86/kvm/mmu/rmap.c
index daa99dee0709..c3bad366b627 100644
--- a/arch/x86/kvm/mmu/rmap.c
+++ b/arch/x86/kvm/mmu/rmap.c
@@ -139,3 +139,64 @@  unsigned int pte_list_count(struct kvm_rmap_head *rmap_head)
 	return count;
 }
 
+/*
+ * Iteration must be started by this function.  This should also be used after
+ * removing/dropping sptes from the rmap link because in such cases the
+ * information in the iterator may not be valid.
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter)
+{
+	u64 *sptep;
+
+	if (!rmap_head->val)
+		return NULL;
+
+	if (!(rmap_head->val & 1)) {
+		iter->desc = NULL;
+		sptep = (u64 *)rmap_head->val;
+		goto out;
+	}
+
+	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
+	iter->pos = 0;
+	sptep = iter->desc->sptes[iter->pos];
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
+}
+
+/*
+ * Must be used with a valid iterator: e.g. after rmap_get_first().
+ *
+ * Returns sptep if found, NULL otherwise.
+ */
+u64 *rmap_get_next(struct rmap_iterator *iter)
+{
+	u64 *sptep;
+
+	if (iter->desc) {
+		if (iter->pos < PTE_LIST_EXT - 1) {
+			++iter->pos;
+			sptep = iter->desc->sptes[iter->pos];
+			if (sptep)
+				goto out;
+		}
+
+		iter->desc = iter->desc->more;
+
+		if (iter->desc) {
+			iter->pos = 0;
+			/* desc->sptes[0] cannot be NULL */
+			sptep = iter->desc->sptes[iter->pos];
+			goto out;
+		}
+	}
+
+	return NULL;
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
+}
+
diff --git a/arch/x86/kvm/mmu/rmap.h b/arch/x86/kvm/mmu/rmap.h
index 059765b6e066..13b265f3a95e 100644
--- a/arch/x86/kvm/mmu/rmap.h
+++ b/arch/x86/kvm/mmu/rmap.h
@@ -31,4 +31,22 @@  void free_pte_list_desc(struct pte_list_desc *pte_list_desc);
 void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head);
 unsigned int pte_list_count(struct kvm_rmap_head *rmap_head);
 
+/*
+ * Used by the following functions to iterate through the sptes linked by a
+ * rmap.  All fields are private and not assumed to be used outside.
+ */
+struct rmap_iterator {
+	/* private fields */
+	struct pte_list_desc *desc;	/* holds the sptep if not NULL */
+	int pos;			/* index of the sptep */
+};
+
+u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+		    struct rmap_iterator *iter);
+u64 *rmap_get_next(struct rmap_iterator *iter);
+
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
+	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
+	     _spte_; _spte_ = rmap_get_next(_iter_))
+
 #endif /* __KVM_X86_MMU_RMAP_H */