[v1,07/26] x86/fault: Add helper for dumping RMP entries

Message ID 20231230161954.569267-8-michael.roth@amd.com
State New
Headers
Series Add AMD Secure Nested Paging (SEV-SNP) Initialization Support |

Commit Message

Michael Roth Dec. 30, 2023, 4:19 p.m. UTC
  From: Brijesh Singh <brijesh.singh@amd.com>

This information will be useful for debugging things like page faults
due to RMP access violations and RMPUPDATE failures.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
[mdr: move helper to standalone patch, rework dump logic to reduce
      verbosity]
Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/include/asm/sev.h |  2 +
 arch/x86/virt/svm/sev.c    | 77 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)
  

Comments

Borislav Petkov Jan. 10, 2024, 11:13 a.m. UTC | #1
On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote:
> +	while (pfn_current < pfn_end) {
> +		e = __snp_lookup_rmpentry(pfn_current, &level);
> +		if (IS_ERR(e)) {
> +			pfn_current++;
> +			continue;
> +		}
> +
> +		e_data = (u64 *)e;
> +		if (e_data[0] || e_data[1]) {
> +			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> +				pfn, pfn_current, e_data[1], e_data[0]);
> +			return;
> +		}
> +		pfn_current++;
> +	}
> +
> +	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
> +		pfn);
> +}

Ok, I went and reworked this, see below.

Yes, I think it is important - at least in the beginning - to dump the
whole 2M PFN region for debugging purposes. If that output starts
becoming too unwieldy and overflowing terminals or log files, we'd
shorten it or put it behind a debug option or so.

Thx.

---
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index a8cf33b7da71..259a1dd655a7 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -35,16 +35,21 @@
  * Family 19h Model 01h, Rev B1 processor.
  */
 struct rmpentry {
-	u64	assigned	: 1,
-		pagesize	: 1,
-		immutable	: 1,
-		rsvd1		: 9,
-		gpa		: 39,
-		asid		: 10,
-		vmsa		: 1,
-		validated	: 1,
-		rsvd2		: 1;
-	u64 rsvd3;
+	union {
+		struct {
+			u64	assigned	: 1,
+				pagesize	: 1,
+				immutable	: 1,
+				rsvd1		: 9,
+				gpa		: 39,
+				asid		: 10,
+				vmsa		: 1,
+				validated	: 1,
+				rsvd2		: 1;
+		};
+		u64 lo;
+	};
+	u64 hi;
 } __packed;
 
 /*
@@ -272,22 +277,20 @@ EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
  */
 static void dump_rmpentry(u64 pfn)
 {
-	u64 pfn_current, pfn_end;
+	u64 pfn_i, pfn_end;
 	struct rmpentry *e;
-	u64 *e_data;
 	int level;
 
 	e = __snp_lookup_rmpentry(pfn, &level);
 	if (IS_ERR(e)) {
-		pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
-			pfn, PTR_ERR(e));
+		pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
+			PTR_ERR(e), pfn);
 		return;
 	}
 
-	e_data = (u64 *)e;
 	if (e->assigned) {
-		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
-			pfn, e_data[1], e_data[0]);
+		pr_info("PFN 0x%llx, RMP entry: [0x%016llx - 0x%016llx]\n",
+			pfn, e->lo, e->hi);
 		return;
 	}
 
@@ -299,27 +302,28 @@ static void dump_rmpentry(u64 pfn)
 	 * certain situations, such as when the PFN is being accessed via a 2MB
 	 * mapping in the host page table.
 	 */
-	pfn_current = ALIGN(pfn, PTRS_PER_PMD);
-	pfn_end = pfn_current + PTRS_PER_PMD;
+	pfn_i = ALIGN(pfn, PTRS_PER_PMD);
+	pfn_end = pfn_i + PTRS_PER_PMD;
 
-	while (pfn_current < pfn_end) {
-		e = __snp_lookup_rmpentry(pfn_current, &level);
+	pr_info("PFN 0x%llx unassigned, dumping the whole 2M PFN region: [0x%llx - 0x%llx]\n",
+		pfn, pfn_i, pfn_end);
+
+	while (pfn_i < pfn_end) {
+		e = __snp_lookup_rmpentry(pfn_i, &level);
 		if (IS_ERR(e)) {
-			pfn_current++;
+			pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
+				PTR_ERR(e), pfn_i);
+			pfn_i++;
 			continue;
 		}
 
-		e_data = (u64 *)e;
-		if (e_data[0] || e_data[1]) {
-			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
-				pfn, pfn_current, e_data[1], e_data[0]);
-			return;
-		}
-		pfn_current++;
-	}
+		if (e->lo || e->hi)
+			pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
+		else
+			pr_info("PFN: 0x%llx ...\n", pfn_i);
 
-	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
-		pfn);
+		pfn_i++;
+	}
 }
 
 void snp_dump_hva_rmpentry(unsigned long hva)
@@ -339,4 +343,3 @@ void snp_dump_hva_rmpentry(unsigned long hva)
 
 	dump_rmpentry(pte_pfn(*pte));
 }
-EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
  
Tom Lendacky Jan. 10, 2024, 3:10 p.m. UTC | #2
On 12/30/23 10:19, Michael Roth wrote:
> From: Brijesh Singh <brijesh.singh@amd.com>
> 
> This information will be useful for debugging things like page faults
> due to RMP access violations and RMPUPDATE failures.
> 
> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
> Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
> [mdr: move helper to standalone patch, rework dump logic to reduce
>        verbosity]
> Signed-off-by: Michael Roth <michael.roth@amd.com>
> ---
>   arch/x86/include/asm/sev.h |  2 +
>   arch/x86/virt/svm/sev.c    | 77 ++++++++++++++++++++++++++++++++++++++
>   2 files changed, 79 insertions(+)
> 
> diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
> index 01ce61b283a3..2c53e3de0b71 100644
> --- a/arch/x86/include/asm/sev.h
> +++ b/arch/x86/include/asm/sev.h
> @@ -247,9 +247,11 @@ static inline u64 sev_get_status(void) { return 0; }
>   #ifdef CONFIG_KVM_AMD_SEV
>   bool snp_probe_rmptable_info(void);
>   int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
> +void snp_dump_hva_rmpentry(unsigned long address);
>   #else
>   static inline bool snp_probe_rmptable_info(void) { return false; }
>   static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
> +static inline void snp_dump_hva_rmpentry(unsigned long address) {}
>   #endif
>   
>   #endif
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index 49fdfbf4e518..7c9ced8911e9 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c
> @@ -266,3 +266,80 @@ int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
>   	return 0;
>   }
>   EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
> +
> +/*
> + * Dump the raw RMP entry for a particular PFN. These bits are documented in the
> + * PPR for a particular CPU model and provide useful information about how a
> + * particular PFN is being utilized by the kernel/firmware at the time certain
> + * unexpected events occur, such as RMP faults.
> + */
> +static void dump_rmpentry(u64 pfn)
> +{
> +	u64 pfn_current, pfn_end;
> +	struct rmpentry *e;
> +	u64 *e_data;
> +	int level;
> +
> +	e = __snp_lookup_rmpentry(pfn, &level);
> +	if (IS_ERR(e)) {
> +		pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
> +			pfn, PTR_ERR(e));
> +		return;
> +	}
> +
> +	e_data = (u64 *)e;
> +	if (e->assigned) {
> +		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> +			pfn, e_data[1], e_data[0]);
> +		return;
> +	}
> +
> +	/*
> +	 * If the RMP entry for a particular PFN is not in an assigned state,
> +	 * then it is sometimes useful to get an idea of whether or not any RMP
> +	 * entries for other PFNs within the same 2MB region are assigned, since
> +	 * those too can affect the ability to access a particular PFN in
> +	 * certain situations, such as when the PFN is being accessed via a 2MB
> +	 * mapping in the host page table.
> +	 */
> +	pfn_current = ALIGN(pfn, PTRS_PER_PMD);
> +	pfn_end = pfn_current + PTRS_PER_PMD;
> +
> +	while (pfn_current < pfn_end) {
> +		e = __snp_lookup_rmpentry(pfn_current, &level);
> +		if (IS_ERR(e)) {
> +			pfn_current++;
> +			continue;
> +		}
> +
> +		e_data = (u64 *)e;
> +		if (e_data[0] || e_data[1]) {
> +			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> +				pfn, pfn_current, e_data[1], e_data[0]);
> +			return;
> +		}
> +		pfn_current++;
> +	}
> +
> +	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
> +		pfn);
> +}
> +
> +void snp_dump_hva_rmpentry(unsigned long hva)
> +{
> +	unsigned int level;
> +	pgd_t *pgd;
> +	pte_t *pte;
> +
> +	pgd = __va(read_cr3_pa());
> +	pgd += pgd_index(hva);
> +	pte = lookup_address_in_pgd(pgd, hva, &level);
> +
> +	if (!pte) {
> +		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
> +		return;
> +	}
> +
> +	dump_rmpentry(pte_pfn(*pte));

Already worked with Mike offline when I was running into issues using this 
function. Net of that conversation is that the PFN needs to be adjusted 
using the address offset if the PTE level indicates a huge page.

Additionally the loop in dump_rmpentry() needs to use ALIGN_DOWN() in 
order to get the PFN of the starting 2MB area.

Thanks,
Tom


> +}
> +EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
  
Tom Lendacky Jan. 10, 2024, 3:20 p.m. UTC | #3
On 1/10/24 05:13, Borislav Petkov wrote:
> On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote:
>> +	while (pfn_current < pfn_end) {
>> +		e = __snp_lookup_rmpentry(pfn_current, &level);
>> +		if (IS_ERR(e)) {
>> +			pfn_current++;
>> +			continue;
>> +		}
>> +
>> +		e_data = (u64 *)e;
>> +		if (e_data[0] || e_data[1]) {
>> +			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
>> +				pfn, pfn_current, e_data[1], e_data[0]);
>> +			return;
>> +		}
>> +		pfn_current++;
>> +	}
>> +
>> +	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
>> +		pfn);
>> +}
> 
> Ok, I went and reworked this, see below.
> 
> Yes, I think it is important - at least in the beginning - to dump the
> whole 2M PFN region for debugging purposes. If that output starts
> becoming too unwieldy and overflowing terminals or log files, we'd
> shorten it or put it behind a debug option or so.
> 
> Thx.
> 
> ---
> diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
> index a8cf33b7da71..259a1dd655a7 100644
> --- a/arch/x86/virt/svm/sev.c
> +++ b/arch/x86/virt/svm/sev.c

> +	pr_info("PFN 0x%llx unassigned, dumping the whole 2M PFN region: [0x%llx - 0x%llx]\n",
> +		pfn, pfn_i, pfn_end);

How about saying "... dumping all non-zero entries in the whole ..."

and then removing the print below that prints the PFN and "..."

> +
> +	while (pfn_i < pfn_end) {
> +		e = __snp_lookup_rmpentry(pfn_i, &level);
>   		if (IS_ERR(e)) {
> -			pfn_current++;
> +			pr_err("Error %ld reading RMP entry for PFN 0x%llx\n",
> +				PTR_ERR(e), pfn_i);
> +			pfn_i++;
>   			continue;
>   		}
>   
> -		e_data = (u64 *)e;
> -		if (e_data[0] || e_data[1]) {
> -			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> -				pfn, pfn_current, e_data[1], e_data[0]);
> -			return;
> -		}
> -		pfn_current++;
> -	}
> +		if (e->lo || e->hi)
> +			pr_info("PFN: 0x%llx, [0x%016llx - 0x%016llx]\n", pfn_i, e->lo, e->hi);
> +		else
> +			pr_info("PFN: 0x%llx ...\n", pfn_i);

Remove this one.

That should cut down on excess output since you are really only concerned 
with non-zero RMP entries when the input PFN RMP entry is not assigned.

Thanks,
Tom

>   
> -	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
> -		pfn);
> +		pfn_i++;
> +	}
>   }
>   
>   void snp_dump_hva_rmpentry(unsigned long hva)
> @@ -339,4 +343,3 @@ void snp_dump_hva_rmpentry(unsigned long hva)
>   
>   	dump_rmpentry(pte_pfn(*pte));
>   }
> -EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);
>
  
Tom Lendacky Jan. 10, 2024, 3:51 p.m. UTC | #4
On 1/10/24 09:27, Borislav Petkov wrote:
> On Wed, Jan 10, 2024 at 09:20:44AM -0600, Tom Lendacky wrote:
>> How about saying "... dumping all non-zero entries in the whole ..."
> 
> I'm trying not to have long stories in printk statements :)

Well it only adds "non-zero"

> 
>> and then removing the print below that prints the PFN and "..."
> 
> Why remove the print? You want to print every non-null RMP entry in the
> 2M range, no?

I'm only suggesting getting rid of the else that prints "..." when the 
entry is all zeroes. Printing the non-zero entries would still occur.

Thanks,
Tom

> 
> And the "..." says that it is a null entry.
>
  
Jarkko Sakkinen Jan. 10, 2024, 8:18 p.m. UTC | #5
On Wed Jan 10, 2024 at 11:59 AM EET, Borislav Petkov wrote:
> On Sat, Dec 30, 2023 at 10:19:35AM -0600, Michael Roth wrote:
> > +void snp_dump_hva_rmpentry(unsigned long hva)
> > +{
> > +	unsigned int level;
> > +	pgd_t *pgd;
> > +	pte_t *pte;
> > +
> > +	pgd = __va(read_cr3_pa());
> > +	pgd += pgd_index(hva);
> > +	pte = lookup_address_in_pgd(pgd, hva, &level);
> > +
> > +	if (!pte) {
> > +		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
                ~~~~~~~
		is this correct log level?

BR, Jarkko
  

Patch

diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 01ce61b283a3..2c53e3de0b71 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -247,9 +247,11 @@  static inline u64 sev_get_status(void) { return 0; }
 #ifdef CONFIG_KVM_AMD_SEV
 bool snp_probe_rmptable_info(void);
 int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void snp_dump_hva_rmpentry(unsigned long address);
 #else
 static inline bool snp_probe_rmptable_info(void) { return false; }
 static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
+static inline void snp_dump_hva_rmpentry(unsigned long address) {}
 #endif
 
 #endif
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 49fdfbf4e518..7c9ced8911e9 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -266,3 +266,80 @@  int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
+
+/*
+ * Dump the raw RMP entry for a particular PFN. These bits are documented in the
+ * PPR for a particular CPU model and provide useful information about how a
+ * particular PFN is being utilized by the kernel/firmware at the time certain
+ * unexpected events occur, such as RMP faults.
+ */
+static void dump_rmpentry(u64 pfn)
+{
+	u64 pfn_current, pfn_end;
+	struct rmpentry *e;
+	u64 *e_data;
+	int level;
+
+	e = __snp_lookup_rmpentry(pfn, &level);
+	if (IS_ERR(e)) {
+		pr_info("Failed to read RMP entry for PFN 0x%llx, error %ld\n",
+			pfn, PTR_ERR(e));
+		return;
+	}
+
+	e_data = (u64 *)e;
+	if (e->assigned) {
+		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
+			pfn, e_data[1], e_data[0]);
+		return;
+	}
+
+	/*
+	 * If the RMP entry for a particular PFN is not in an assigned state,
+	 * then it is sometimes useful to get an idea of whether or not any RMP
+	 * entries for other PFNs within the same 2MB region are assigned, since
+	 * those too can affect the ability to access a particular PFN in
+	 * certain situations, such as when the PFN is being accessed via a 2MB
+	 * mapping in the host page table.
+	 */
+	pfn_current = ALIGN(pfn, PTRS_PER_PMD);
+	pfn_end = pfn_current + PTRS_PER_PMD;
+
+	while (pfn_current < pfn_end) {
+		e = __snp_lookup_rmpentry(pfn_current, &level);
+		if (IS_ERR(e)) {
+			pfn_current++;
+			continue;
+		}
+
+		e_data = (u64 *)e;
+		if (e_data[0] || e_data[1]) {
+			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
+				pfn, pfn_current, e_data[1], e_data[0]);
+			return;
+		}
+		pfn_current++;
+	}
+
+	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
+		pfn);
+}
+
+void snp_dump_hva_rmpentry(unsigned long hva)
+{
+	unsigned int level;
+	pgd_t *pgd;
+	pte_t *pte;
+
+	pgd = __va(read_cr3_pa());
+	pgd += pgd_index(hva);
+	pte = lookup_address_in_pgd(pgd, hva, &level);
+
+	if (!pte) {
+		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
+		return;
+	}
+
+	dump_rmpentry(pte_pfn(*pte));
+}
+EXPORT_SYMBOL_GPL(snp_dump_hva_rmpentry);