[v3,4/7] KVM: x86/mmu: Optimize SPTE change for aging gfn range
Commit Message
No need to check all of the conditions in __handle_changed_spte(). Aging
a gfn range implies resetting access bit or marking spte for access
tracking.
Use atomic operation to only reset those bits. This avoids checking many
conditions in __handle_changed_spte() API. Also, clean up code by
removing dead code and API parameters.
Signed-off-by: Vipin Sharma <vipinsh@google.com>
---
arch/x86/kvm/mmu/tdp_mmu.c | 36 +++++++++++++++++++-----------------
1 file changed, 19 insertions(+), 17 deletions(-)
Comments
On Fri, Feb 10, 2023 at 05:46:23PM -0800, Vipin Sharma wrote:
> No need to check all of the conditions in __handle_changed_spte(). Aging
> a gfn range implies resetting access bit or marking spte for access
> tracking.
nit: State what the patch does first.
>
> Use atomic operation to only reset those bits. This avoids checking many
> conditions in __handle_changed_spte() API. Also, clean up code by
> removing dead code and API parameters.
>
> Signed-off-by: Vipin Sharma <vipinsh@google.com>
nits aside,
Reviewed-by: David Matlack <dmatlack@google.com>
> ---
> arch/x86/kvm/mmu/tdp_mmu.c | 36 +++++++++++++++++++-----------------
> 1 file changed, 19 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index c895560244de..5d6e77554797 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -758,13 +758,6 @@ static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
> _tdp_mmu_set_spte(kvm, iter, new_spte, true);
> }
>
> -static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
> - struct tdp_iter *iter,
> - u64 new_spte)
> -{
> - _tdp_mmu_set_spte(kvm, iter, new_spte, false);
> -}
> -
> #define tdp_root_for_each_pte(_iter, _root, _start, _end) \
> for_each_tdp_pte(_iter, _root, _start, _end)
>
> @@ -1251,32 +1244,41 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
> /*
> * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
> * if any of the GFNs in the range have been accessed.
> + *
> + * No need to mark corresponding PFN as accessed as this call is coming from
> + * the clear_young() or clear_flush_young() notifier, which uses the return
> + * value to determine if the page has been accessed.
> */
> static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
> struct kvm_gfn_range *range)
> {
> - u64 new_spte = 0;
> + u64 new_spte;
>
> /* If we have a non-accessed entry we don't need to change the pte. */
> if (!is_accessed_spte(iter->old_spte))
> return false;
>
> - new_spte = iter->old_spte;
> -
> - if (spte_ad_enabled(new_spte)) {
> - new_spte &= ~shadow_accessed_mask;
> + if (spte_ad_enabled(iter->old_spte)) {
> + iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
nit: Extra space before =
On Fri, Feb 10, 2023, Vipin Sharma wrote:
> } else {
> + new_spte = mark_spte_for_access_track(iter->old_spte);
> + iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
> + iter->old_spte, new_spte,
> + iter->level);
> /*
> * Capture the dirty status of the page, so that it doesn't get
> * lost when the SPTE is marked for access tracking.
> */
> - if (is_writable_pte(new_spte))
> - kvm_set_pfn_dirty(spte_to_pfn(new_spte));
> -
> - new_spte = mark_spte_for_access_track(new_spte);
> + if (is_writable_pte(iter->old_spte))
> + kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
Moving this block below kvm_tdp_mmu_write_spte() is an unrelated change. Much to
my chagrin, I discovered that past me gave you this code. I still think the change
is correct, but I dropped it for now, mostly because the legacy/shadow MMU has the
same pattern (marks the PFN dirty before setting the SPTE).
I think this might actually be a bug fix, e.g. if the XCHG races with a fast page
fault fix and drops the Writable bit, the CPU could insert writable entry into the
TLB without KVM invoking kvm_set_pfn_dirty(). But I'm not 100% confident that I'm
not missing something, and _if_ there's a bug then mmu_spte_age() needs the same
fix, so for now, I dropped it.
@@ -758,13 +758,6 @@ static inline void tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
_tdp_mmu_set_spte(kvm, iter, new_spte, true);
}
-static inline void tdp_mmu_set_spte_no_acc_track(struct kvm *kvm,
- struct tdp_iter *iter,
- u64 new_spte)
-{
- _tdp_mmu_set_spte(kvm, iter, new_spte, false);
-}
-
#define tdp_root_for_each_pte(_iter, _root, _start, _end) \
for_each_tdp_pte(_iter, _root, _start, _end)
@@ -1251,32 +1244,41 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
/*
* Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
* if any of the GFNs in the range have been accessed.
+ *
+ * No need to mark corresponding PFN as accessed as this call is coming from
+ * the clear_young() or clear_flush_young() notifier, which uses the return
+ * value to determine if the page has been accessed.
*/
static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_gfn_range *range)
{
- u64 new_spte = 0;
+ u64 new_spte;
/* If we have a non-accessed entry we don't need to change the pte. */
if (!is_accessed_spte(iter->old_spte))
return false;
- new_spte = iter->old_spte;
-
- if (spte_ad_enabled(new_spte)) {
- new_spte &= ~shadow_accessed_mask;
+ if (spte_ad_enabled(iter->old_spte)) {
+ iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep,
+ iter->old_spte,
+ shadow_accessed_mask,
+ iter->level);
+ new_spte = iter->old_spte & ~shadow_accessed_mask;
} else {
+ new_spte = mark_spte_for_access_track(iter->old_spte);
+ iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep,
+ iter->old_spte, new_spte,
+ iter->level);
/*
* Capture the dirty status of the page, so that it doesn't get
* lost when the SPTE is marked for access tracking.
*/
- if (is_writable_pte(new_spte))
- kvm_set_pfn_dirty(spte_to_pfn(new_spte));
-
- new_spte = mark_spte_for_access_track(new_spte);
+ if (is_writable_pte(iter->old_spte))
+ kvm_set_pfn_dirty(spte_to_pfn(iter->old_spte));
}
- tdp_mmu_set_spte_no_acc_track(kvm, iter, new_spte);
+ trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level,
+ iter->old_spte, new_spte);
return true;
}