[v3,10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping
Commit Message
Register a debugfs file on guest creation to be able to view their
second translation tables with ptdump. This assumes that the host is in
control of the guest stage-2 and has direct access to the pagetables.
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
arch/arm64/include/asm/ptdump.h | 7 ++++
arch/arm64/kvm/debug.c | 6 +++
arch/arm64/kvm/mmu.c | 2 +
arch/arm64/mm/ptdump.c | 68 +++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+)
Comments
On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> +struct ptdump_registered_guest {
> + struct list_head reg_list;
> + struct ptdump_info info;
> + struct kvm_pgtable_snapshot snapshot;
> + rwlock_t *lock;
> +};
Why can't we just store a pointer directly to struct kvm in ::private?
Also, shouldn't you take a reference on struct kvm when the file is
opened to protect against VM teardown?
> +static LIST_HEAD(ptdump_guest_list);
> +static DEFINE_MUTEX(ptdump_list_lock);
What is the list for?
> static phys_addr_t ptdump_host_pa(void *addr)
> {
> return __pa(addr);
> @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> }
>
> +static void guest_stage2_ptdump_walk(struct seq_file *s,
> + struct ptdump_info *info)
> +{
> + struct ptdump_info_file_priv *f_priv =
> + container_of(info, struct ptdump_info_file_priv, info);
> + struct ptdump_registered_guest *guest = info->priv;
> +
> + f_priv->file_priv = &guest->snapshot;
> +
> + read_lock(guest->lock);
> + stage2_ptdump_walk(s, info);
> + read_unlock(guest->lock);
Taking the mmu lock for read allows other table walkers to add new
mappings and adjust the granularity of existing ones. Should this
instead take the mmu lock for write?
> +}
> +
> +int ptdump_register_guest_stage2(struct kvm *kvm)
> +{
> + struct ptdump_registered_guest *guest;
> + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> + struct kvm_pgtable *pgt = mmu->pgt;
> +
> + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
You want GFP_KERNEL_ACCOUNT here.
On Wed, Nov 22, 2023 at 11:35:57PM +0000, Oliver Upton wrote:
> On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> > +struct ptdump_registered_guest {
> > + struct list_head reg_list;
> > + struct ptdump_info info;
> > + struct kvm_pgtable_snapshot snapshot;
> > + rwlock_t *lock;
> > +};
>
> Why can't we just store a pointer directly to struct kvm in ::private?
I don't think it will work unless we expect a struct kvm_pgtable
in stage2_ptdump_walk file_priv field. I think it is a good ideea and will
simplify things a little bit dropping kvm_pgtable_snapshot from here.
The current code has some fileds that are reduntant (the priv pointers)
because I also made this to work with protected guests where we can't
access their pagetables directly.
> Also, shouldn't you take a reference on struct kvm when the file is
> opened to protect against VM teardown?
>
I am not sure about the need could you please elaborate a bit ? On VM
teardown we expect ptdump_unregister_guest_stage2 to be invoked.
> > +static LIST_HEAD(ptdump_guest_list);
> > +static DEFINE_MUTEX(ptdump_list_lock);
>
> What is the list for?
>
I am keeping a list of registered guests with ptdump and the lock should
protect the list against concurent VM teardowns.
> > static phys_addr_t ptdump_host_pa(void *addr)
> > {
> > return __pa(addr);
> > @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> > kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> > }
> >
> > +static void guest_stage2_ptdump_walk(struct seq_file *s,
> > + struct ptdump_info *info)
> > +{
> > + struct ptdump_info_file_priv *f_priv =
> > + container_of(info, struct ptdump_info_file_priv, info);
> > + struct ptdump_registered_guest *guest = info->priv;
> > +
> > + f_priv->file_priv = &guest->snapshot;
> > +
> > + read_lock(guest->lock);
> > + stage2_ptdump_walk(s, info);
> > + read_unlock(guest->lock);
>
> Taking the mmu lock for read allows other table walkers to add new
> mappings and adjust the granularity of existing ones. Should this
> instead take the mmu lock for write?
>
Thanks for pointing our, this is exactly what I was trying to avoid,
so yes I should use the write mmu lock in this case.
> > +}
> > +
> > +int ptdump_register_guest_stage2(struct kvm *kvm)
> > +{
> > + struct ptdump_registered_guest *guest;
> > + struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > + struct kvm_pgtable *pgt = mmu->pgt;
> > +
> > + guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
>
> You want GFP_KERNEL_ACCOUNT here.
>
Right, thanks this is because it is an untrusted allocation triggered from
userspace.
> --
> Thanks,
> Oliver
Thank you,
Seb
@@ -5,6 +5,8 @@
#ifndef __ASM_PTDUMP_H
#define __ASM_PTDUMP_H
+#include <asm/kvm_pgtable.h>
+
#ifdef CONFIG_PTDUMP_CORE
#include <linux/mm_types.h>
@@ -23,6 +25,7 @@ struct ptdump_info {
int (*ptdump_prepare_walk)(void *file_priv);
void (*ptdump_end_walk)(void *file_priv);
size_t mc_len;
+ void *priv;
};
void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
@@ -48,8 +51,12 @@ void ptdump_check_wx(void);
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void ptdump_register_host_stage2(void);
+int ptdump_register_guest_stage2(struct kvm *kvm);
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt);
#else
static inline void ptdump_register_host_stage2(void) { }
+static inline int ptdump_register_guest_stage2(struct kvm *kvm) { return 0; }
+static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { }
#endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
#ifdef CONFIG_DEBUG_WX
@@ -13,6 +13,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_emulate.h>
+#include <asm/ptdump.h>
#include "trace.h"
@@ -342,3 +343,8 @@ void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
}
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+ return ptdump_register_guest_stage2(kvm);
+}
@@ -11,6 +11,7 @@
#include <linux/sched/signal.h>
#include <trace/events/kvm.h>
#include <asm/pgalloc.h>
+#include <asm/ptdump.h>
#include <asm/cacheflush.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_mmu.h>
@@ -1021,6 +1022,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
write_unlock(&kvm->mmu_lock);
if (pgt) {
+ ptdump_unregister_guest_stage2(pgt);
kvm_pgtable_stage2_destroy(pgt);
kfree(pgt);
}
@@ -27,6 +27,7 @@
#include <asm/kvm_pkvm.h>
#include <asm/kvm_pgtable.h>
#include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
enum address_markers_idx {
@@ -519,6 +520,16 @@ void ptdump_check_wx(void)
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
static struct ptdump_info stage2_kernel_ptdump_info;
+struct ptdump_registered_guest {
+ struct list_head reg_list;
+ struct ptdump_info info;
+ struct kvm_pgtable_snapshot snapshot;
+ rwlock_t *lock;
+};
+
+static LIST_HEAD(ptdump_guest_list);
+static DEFINE_MUTEX(ptdump_list_lock);
+
static phys_addr_t ptdump_host_pa(void *addr)
{
return __pa(addr);
@@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
}
+static void guest_stage2_ptdump_walk(struct seq_file *s,
+ struct ptdump_info *info)
+{
+ struct ptdump_info_file_priv *f_priv =
+ container_of(info, struct ptdump_info_file_priv, info);
+ struct ptdump_registered_guest *guest = info->priv;
+
+ f_priv->file_priv = &guest->snapshot;
+
+ read_lock(guest->lock);
+ stage2_ptdump_walk(s, info);
+ read_unlock(guest->lock);
+}
+
+int ptdump_register_guest_stage2(struct kvm *kvm)
+{
+ struct ptdump_registered_guest *guest;
+ struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+ struct kvm_pgtable *pgt = mmu->pgt;
+
+ guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
+ if (!guest)
+ return -ENOMEM;
+
+ memcpy(&guest->snapshot.pgtable, pgt, sizeof(struct kvm_pgtable));
+ guest->info = (struct ptdump_info) {
+ .ptdump_walk = guest_stage2_ptdump_walk,
+ };
+
+ guest->info.priv = guest;
+ guest->lock = &kvm->mmu_lock;
+ mutex_lock(&ptdump_list_lock);
+
+ ptdump_debugfs_kvm_register(&guest->info, "stage2_page_tables",
+ kvm->debugfs_dentry);
+
+ list_add(&guest->reg_list, &ptdump_guest_list);
+ mutex_unlock(&ptdump_list_lock);
+
+ return 0;
+}
+
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt)
+{
+ struct ptdump_registered_guest *guest;
+
+ mutex_lock(&ptdump_list_lock);
+ list_for_each_entry(guest, &ptdump_guest_list, reg_list) {
+ if (guest->snapshot.pgtable.pgd == pgt->pgd) {
+ list_del(&guest->reg_list);
+ kfree(guest);
+ break;
+ }
+ }
+ mutex_unlock(&ptdump_list_lock);
+}
+
void ptdump_register_host_stage2(void)
{
if (!is_protected_kvm_enabled())