[v3,10/10] arm64: ptdump: Add support for guest stage-2 pagetables dumping

Message ID 20231115171639.2852644-12-sebastianene@google.com
State New
Headers
Series arm64: ptdump: View the second stage page-tables |

Commit Message

Sebastian Ene Nov. 15, 2023, 5:16 p.m. UTC
  Register a debugfs file on guest creation to be able to view their
second translation tables with ptdump. This assumes that the host is in
control of the guest stage-2 and has direct access to the pagetables.

Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
 arch/arm64/include/asm/ptdump.h |  7 ++++
 arch/arm64/kvm/debug.c          |  6 +++
 arch/arm64/kvm/mmu.c            |  2 +
 arch/arm64/mm/ptdump.c          | 68 +++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+)
  

Comments

Oliver Upton Nov. 22, 2023, 11:35 p.m. UTC | #1
On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> +struct ptdump_registered_guest {
> +	struct list_head		reg_list;
> +	struct ptdump_info		info;
> +	struct kvm_pgtable_snapshot	snapshot;
> +	rwlock_t			*lock;
> +};

Why can't we just store a pointer directly to struct kvm in ::private?
Also, shouldn't you take a reference on struct kvm when the file is
opened to protect against VM teardown?

> +static LIST_HEAD(ptdump_guest_list);
> +static DEFINE_MUTEX(ptdump_list_lock);

What is the list for?

>  static phys_addr_t ptdump_host_pa(void *addr)
>  {
>  	return __pa(addr);
> @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
>  	kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
>  }
>  
> +static void guest_stage2_ptdump_walk(struct seq_file *s,
> +				     struct ptdump_info *info)
> +{
> +	struct ptdump_info_file_priv *f_priv =
> +		container_of(info, struct ptdump_info_file_priv, info);
> +	struct ptdump_registered_guest *guest = info->priv;
> +
> +	f_priv->file_priv = &guest->snapshot;
> +
> +	read_lock(guest->lock);
> +	stage2_ptdump_walk(s, info);
> +	read_unlock(guest->lock);

Taking the mmu lock for read allows other table walkers to add new
mappings and adjust the granularity of existing ones. Should this
instead take the mmu lock for write?

> +}
> +
> +int ptdump_register_guest_stage2(struct kvm *kvm)
> +{
> +	struct ptdump_registered_guest *guest;
> +	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> +	struct kvm_pgtable *pgt = mmu->pgt;
> +
> +	guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);

You want GFP_KERNEL_ACCOUNT here.
  
Sebastian Ene Nov. 23, 2023, 10:58 a.m. UTC | #2
On Wed, Nov 22, 2023 at 11:35:57PM +0000, Oliver Upton wrote:
> On Wed, Nov 15, 2023 at 05:16:40PM +0000, Sebastian Ene wrote:
> > +struct ptdump_registered_guest {
> > +	struct list_head		reg_list;
> > +	struct ptdump_info		info;
> > +	struct kvm_pgtable_snapshot	snapshot;
> > +	rwlock_t			*lock;
> > +};
> 
> Why can't we just store a pointer directly to struct kvm in ::private?

I don't think it will work unless we expect a struct kvm_pgtable
in stage2_ptdump_walk file_priv field. I think it is a good ideea and will
simplify things a little bit dropping kvm_pgtable_snapshot from here.

The current code has some fileds that are reduntant (the priv pointers)
because I also made this to work with protected guests where we can't
access their pagetables directly.

> Also, shouldn't you take a reference on struct kvm when the file is
> opened to protect against VM teardown?
>

I am not sure about the need could you please elaborate a bit ? On VM
teardown we expect ptdump_unregister_guest_stage2 to be invoked.

> > +static LIST_HEAD(ptdump_guest_list);
> > +static DEFINE_MUTEX(ptdump_list_lock);
> 
> What is the list for?
> 

I am keeping a list of registered guests with ptdump and the lock should
protect the list against concurent VM teardowns.

> >  static phys_addr_t ptdump_host_pa(void *addr)
> >  {
> >  	return __pa(addr);
> > @@ -757,6 +768,63 @@ static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
> >  	kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
> >  }
> >  
> > +static void guest_stage2_ptdump_walk(struct seq_file *s,
> > +				     struct ptdump_info *info)
> > +{
> > +	struct ptdump_info_file_priv *f_priv =
> > +		container_of(info, struct ptdump_info_file_priv, info);
> > +	struct ptdump_registered_guest *guest = info->priv;
> > +
> > +	f_priv->file_priv = &guest->snapshot;
> > +
> > +	read_lock(guest->lock);
> > +	stage2_ptdump_walk(s, info);
> > +	read_unlock(guest->lock);
> 
> Taking the mmu lock for read allows other table walkers to add new
> mappings and adjust the granularity of existing ones. Should this
> instead take the mmu lock for write?
>

Thanks for pointing our, this is exactly what I was trying to avoid,
so yes I should use the write mmu lock in this case.

> > +}
> > +
> > +int ptdump_register_guest_stage2(struct kvm *kvm)
> > +{
> > +	struct ptdump_registered_guest *guest;
> > +	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
> > +	struct kvm_pgtable *pgt = mmu->pgt;
> > +
> > +	guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
> 
> You want GFP_KERNEL_ACCOUNT here.
> 

Right, thanks this is because it is an untrusted allocation triggered from
userspace.

> -- 
> Thanks,
> Oliver

Thank you,
Seb
  

Patch

diff --git a/arch/arm64/include/asm/ptdump.h b/arch/arm64/include/asm/ptdump.h
index de5a5a0c0ecf..21b281715d27 100644
--- a/arch/arm64/include/asm/ptdump.h
+++ b/arch/arm64/include/asm/ptdump.h
@@ -5,6 +5,8 @@ 
 #ifndef __ASM_PTDUMP_H
 #define __ASM_PTDUMP_H
 
+#include <asm/kvm_pgtable.h>
+
 #ifdef CONFIG_PTDUMP_CORE
 
 #include <linux/mm_types.h>
@@ -23,6 +25,7 @@  struct ptdump_info {
 	int (*ptdump_prepare_walk)(void *file_priv);
 	void (*ptdump_end_walk)(void *file_priv);
 	size_t				mc_len;
+	void				*priv;
 };
 
 void ptdump_walk(struct seq_file *s, struct ptdump_info *info);
@@ -48,8 +51,12 @@  void ptdump_check_wx(void);
 
 #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
 void ptdump_register_host_stage2(void);
+int ptdump_register_guest_stage2(struct kvm *kvm);
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt);
 #else
 static inline void ptdump_register_host_stage2(void) { }
+static inline int ptdump_register_guest_stage2(struct kvm *kvm) { return 0; }
+static inline void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt) { }
 #endif /* CONFIG_PTDUMP_STAGE2_DEBUGFS */
 
 #ifdef CONFIG_DEBUG_WX
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index 8725291cb00a..555d022f8ad9 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -13,6 +13,7 @@ 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_emulate.h>
+#include <asm/ptdump.h>
 
 #include "trace.h"
 
@@ -342,3 +343,8 @@  void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu)
 	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_SPE);
 	vcpu_clear_flag(vcpu, DEBUG_STATE_SAVE_TRBE);
 }
+
+int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+{
+	return ptdump_register_guest_stage2(kvm);
+}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d87c8fcc4c24..da45050596e6 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -11,6 +11,7 @@ 
 #include <linux/sched/signal.h>
 #include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
+#include <asm/ptdump.h>
 #include <asm/cacheflush.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
@@ -1021,6 +1022,7 @@  void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 	write_unlock(&kvm->mmu_lock);
 
 	if (pgt) {
+		ptdump_unregister_guest_stage2(pgt);
 		kvm_pgtable_stage2_destroy(pgt);
 		kfree(pgt);
 	}
diff --git a/arch/arm64/mm/ptdump.c b/arch/arm64/mm/ptdump.c
index ffb87237078b..741764cff105 100644
--- a/arch/arm64/mm/ptdump.c
+++ b/arch/arm64/mm/ptdump.c
@@ -27,6 +27,7 @@ 
 #include <asm/kvm_pkvm.h>
 #include <asm/kvm_pgtable.h>
 #include <asm/kvm_host.h>
+#include <asm/kvm_mmu.h>
 
 
 enum address_markers_idx {
@@ -519,6 +520,16 @@  void ptdump_check_wx(void)
 #ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
 static struct ptdump_info stage2_kernel_ptdump_info;
 
+struct ptdump_registered_guest {
+	struct list_head		reg_list;
+	struct ptdump_info		info;
+	struct kvm_pgtable_snapshot	snapshot;
+	rwlock_t			*lock;
+};
+
+static LIST_HEAD(ptdump_guest_list);
+static DEFINE_MUTEX(ptdump_list_lock);
+
 static phys_addr_t ptdump_host_pa(void *addr)
 {
 	return __pa(addr);
@@ -757,6 +768,63 @@  static void stage2_ptdump_walk(struct seq_file *s, struct ptdump_info *info)
 	kvm_pgtable_walk(pgtable, start_ipa, end_ipa, &walker);
 }
 
+static void guest_stage2_ptdump_walk(struct seq_file *s,
+				     struct ptdump_info *info)
+{
+	struct ptdump_info_file_priv *f_priv =
+		container_of(info, struct ptdump_info_file_priv, info);
+	struct ptdump_registered_guest *guest = info->priv;
+
+	f_priv->file_priv = &guest->snapshot;
+
+	read_lock(guest->lock);
+	stage2_ptdump_walk(s, info);
+	read_unlock(guest->lock);
+}
+
+int ptdump_register_guest_stage2(struct kvm *kvm)
+{
+	struct ptdump_registered_guest *guest;
+	struct kvm_s2_mmu *mmu = &kvm->arch.mmu;
+	struct kvm_pgtable *pgt = mmu->pgt;
+
+	guest = kzalloc(sizeof(struct ptdump_registered_guest), GFP_KERNEL);
+	if (!guest)
+		return -ENOMEM;
+
+	memcpy(&guest->snapshot.pgtable, pgt, sizeof(struct kvm_pgtable));
+	guest->info = (struct ptdump_info) {
+		.ptdump_walk		= guest_stage2_ptdump_walk,
+	};
+
+	guest->info.priv = guest;
+	guest->lock = &kvm->mmu_lock;
+	mutex_lock(&ptdump_list_lock);
+
+	ptdump_debugfs_kvm_register(&guest->info, "stage2_page_tables",
+				    kvm->debugfs_dentry);
+
+	list_add(&guest->reg_list, &ptdump_guest_list);
+	mutex_unlock(&ptdump_list_lock);
+
+	return 0;
+}
+
+void ptdump_unregister_guest_stage2(struct kvm_pgtable *pgt)
+{
+	struct ptdump_registered_guest *guest;
+
+	mutex_lock(&ptdump_list_lock);
+	list_for_each_entry(guest, &ptdump_guest_list, reg_list) {
+		if (guest->snapshot.pgtable.pgd == pgt->pgd) {
+			list_del(&guest->reg_list);
+			kfree(guest);
+			break;
+		}
+	}
+	mutex_unlock(&ptdump_list_lock);
+}
+
 void ptdump_register_host_stage2(void)
 {
 	if (!is_protected_kvm_enabled())