[RFC/RFT,2/4] riscv: Add a runtime detection of invalid TLB entries caching
Commit Message
This mechanism allows to completely bypass the sfence.vma introduced by
the previous commit for uarchs that do not cache invalid TLB entries.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
1 file changed, 124 insertions(+)
Comments
Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> This mechanism allows to completely bypass the sfence.vma introduced by
> the previous commit for uarchs that do not cache invalid TLB entries.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 124 insertions(+)
>
> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> index 379403de6c6f..2e854613740c 100644
> --- a/arch/riscv/mm/init.c
> +++ b/arch/riscv/mm/init.c
> @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
> EXPORT_SYMBOL(pgtable_l4_enabled);
> EXPORT_SYMBOL(pgtable_l5_enabled);
>
> +bool tlb_caching_invalid_entries;
> +
> phys_addr_t phys_ram_base __ro_after_init;
> EXPORT_SYMBOL(phys_ram_base);
>
> @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
> satp_mode = SATP_MODE_39;
> }
>
> +static void __init enable_pgtable_l5(void)
> +{
> + pgtable_l5_enabled = true;
> + satp_mode = SATP_MODE_57;
> +}
> +
> +static void __init enable_pgtable_l4(void)
> +{
> + pgtable_l4_enabled = true;
> + satp_mode = SATP_MODE_48;
> +}
> +
> static int __init print_no4lvl(char *p)
> {
> pr_info("Disabled 4-level and 5-level paging");
> @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> memset(early_pud, 0, PAGE_SIZE);
> memset(early_pmd, 0, PAGE_SIZE);
> }
> +
> +/* Determine at runtime if the uarch caches invalid TLB entries */
> +static __init void set_tlb_caching_invalid_entries(void)
> +{
> +#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
Looks odd to have macros nested in the middle of a function.
> + uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> + // TODO the test_addr as defined below could go into another pud...
> + uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> + pmd_t valid_pmd;
> + u64 satp;
> + int i = 0;
> +
> + /* To ease the page table creation */
> + disable_pgtable_l5();
> + disable_pgtable_l4();
> +
> + /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> + create_pgd_mapping(early_pg_dir,
> + set_tlb_caching_invalid_entries_pmd,
> + (uintptr_t)early_pmd,
> + PGDIR_SIZE, PAGE_TABLE);
> +
> + /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> + create_pmd_mapping(early_pmd,
> + set_tlb_caching_invalid_entries_pmd,
> + set_tlb_caching_invalid_entries_pmd,
> + PMD_SIZE, PAGE_KERNEL_EXEC);
> + create_pmd_mapping(early_pmd,
> + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> + PMD_SIZE, PAGE_KERNEL_EXEC);
> +
> + /* Establish an invalid mapping */
> + create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> +
> + /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> + valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> +
> + local_flush_tlb_all();
> + satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> + csr_write(CSR_SATP, satp);
> +
> + /*
> + * Set stvec to after the trapping access, access this invalid mapping
> + * and legitimately trap
> + */
> + // TODO: Should I save the previous stvec?
> +#define ASM_STR(x) __ASM_STR(x)
Looks odd to have macros nested in the middle of a function.
> + asm volatile(
> + "la a0, 1f \n"
> + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> + "ld a0, 0(%0) \n"
> + ".align 2 \n"
> + "1: \n"
> + :
> + : "r" (test_addr)
> + : "a0"
> + );
> +
> + /* Now establish a valid mapping to check if the invalid one is cached */
> + early_pmd[pmd_index(test_addr)] = valid_pmd;
> +
> + /*
> + * Access the valid mapping multiple times: indeed, we can't use
> + * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> + * so we may trap even if the uarch does not cache invalid entries. By
> + * trying a few times, we make sure that those uarchs will see the right
> + * mapping at some point.
> + */
> +
> + i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> +
> +#define ASM_STR(x) __ASM_STR(x)
Deplicate define ?
> + asm_volatile_goto(
> + "la a0, 1f \n"
> + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> + ".align 2 \n"
> + "1: \n"
> + "addi %0, %0, -1 \n"
> + "blt %0, zero, %l[caching_invalid_entries] \n"
> + "ld a0, 0(%1) \n"
> + :
> + : "r" (i), "r" (test_addr)
> + : "a0"
> + : caching_invalid_entries
> + );
> +
> + csr_write(CSR_SATP, 0ULL);
> + local_flush_tlb_all();
> +
> + /* If we don't trap, the uarch does not cache invalid entries! */
> + tlb_caching_invalid_entries = false;
> + goto clean;
> +
> +caching_invalid_entries:
> + csr_write(CSR_SATP, 0ULL);
> + local_flush_tlb_all();
> +
> + tlb_caching_invalid_entries = true;
> +clean:
> + memset(early_pg_dir, 0, PAGE_SIZE);
> + memset(early_pmd, 0, PAGE_SIZE);
Use clear_page() instead ?
> +
> + enable_pgtable_l4();
> + enable_pgtable_l5();
> +}
> #endif
>
> /*
> @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> #endif
>
> #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> + set_tlb_caching_invalid_entries();
> set_satp_mode(dtb_pa);
> #endif
>
> @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
> local_flush_tlb_all();
>
> pt_ops_set_late();
> +
> + pr_info("uarch caches invalid entries: %s",
> + tlb_caching_invalid_entries ? "yes" : "no");
> }
> #else
> asmlinkage void __init setup_vm(uintptr_t dtb_pa)
On Thu, Dec 7, 2023 at 4:55 PM Christophe Leroy
<christophe.leroy@csgroup.eu> wrote:
>
>
>
> Le 07/12/2023 à 16:03, Alexandre Ghiti a écrit :
> > This mechanism allows to completely bypass the sfence.vma introduced by
> > the previous commit for uarchs that do not cache invalid TLB entries.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/mm/init.c | 124 +++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 124 insertions(+)
> >
> > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
> > index 379403de6c6f..2e854613740c 100644
> > --- a/arch/riscv/mm/init.c
> > +++ b/arch/riscv/mm/init.c
> > @@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
> > EXPORT_SYMBOL(pgtable_l4_enabled);
> > EXPORT_SYMBOL(pgtable_l5_enabled);
> >
> > +bool tlb_caching_invalid_entries;
> > +
> > phys_addr_t phys_ram_base __ro_after_init;
> > EXPORT_SYMBOL(phys_ram_base);
> >
> > @@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
> > satp_mode = SATP_MODE_39;
> > }
> >
> > +static void __init enable_pgtable_l5(void)
> > +{
> > + pgtable_l5_enabled = true;
> > + satp_mode = SATP_MODE_57;
> > +}
> > +
> > +static void __init enable_pgtable_l4(void)
> > +{
> > + pgtable_l4_enabled = true;
> > + satp_mode = SATP_MODE_48;
> > +}
> > +
> > static int __init print_no4lvl(char *p)
> > {
> > pr_info("Disabled 4-level and 5-level paging");
> > @@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
> > memset(early_pud, 0, PAGE_SIZE);
> > memset(early_pmd, 0, PAGE_SIZE);
> > }
> > +
> > +/* Determine at runtime if the uarch caches invalid TLB entries */
> > +static __init void set_tlb_caching_invalid_entries(void)
> > +{
> > +#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
>
> Looks odd to have macros nested in the middle of a function.
>
> > + uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
> > + // TODO the test_addr as defined below could go into another pud...
> > + uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
> > + pmd_t valid_pmd;
> > + u64 satp;
> > + int i = 0;
> > +
> > + /* To ease the page table creation */
> > + disable_pgtable_l5();
> > + disable_pgtable_l4();
> > +
> > + /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
> > + create_pgd_mapping(early_pg_dir,
> > + set_tlb_caching_invalid_entries_pmd,
> > + (uintptr_t)early_pmd,
> > + PGDIR_SIZE, PAGE_TABLE);
> > +
> > + /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
> > + create_pmd_mapping(early_pmd,
> > + set_tlb_caching_invalid_entries_pmd,
> > + set_tlb_caching_invalid_entries_pmd,
> > + PMD_SIZE, PAGE_KERNEL_EXEC);
> > + create_pmd_mapping(early_pmd,
> > + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > + set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
> > + PMD_SIZE, PAGE_KERNEL_EXEC);
> > +
> > + /* Establish an invalid mapping */
> > + create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
> > +
> > + /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
> > + valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
> > +
> > + local_flush_tlb_all();
> > + satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
> > + csr_write(CSR_SATP, satp);
> > +
> > + /*
> > + * Set stvec to after the trapping access, access this invalid mapping
> > + * and legitimately trap
> > + */
> > + // TODO: Should I save the previous stvec?
> > +#define ASM_STR(x) __ASM_STR(x)
>
> Looks odd to have macros nested in the middle of a function.
>
>
> > + asm volatile(
> > + "la a0, 1f \n"
> > + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> > + "ld a0, 0(%0) \n"
> > + ".align 2 \n"
> > + "1: \n"
> > + :
> > + : "r" (test_addr)
> > + : "a0"
> > + );
> > +
> > + /* Now establish a valid mapping to check if the invalid one is cached */
> > + early_pmd[pmd_index(test_addr)] = valid_pmd;
> > +
> > + /*
> > + * Access the valid mapping multiple times: indeed, we can't use
> > + * sfence.vma as a barrier to make sure the cpu did not reorder accesses
> > + * so we may trap even if the uarch does not cache invalid entries. By
> > + * trying a few times, we make sure that those uarchs will see the right
> > + * mapping at some point.
> > + */
> > +
> > + i = NR_RETRIES_CACHING_INVALID_ENTRIES;
> > +
> > +#define ASM_STR(x) __ASM_STR(x)
>
> Deplicate define ?
>
> > + asm_volatile_goto(
> > + "la a0, 1f \n"
> > + "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
> > + ".align 2 \n"
> > + "1: \n"
> > + "addi %0, %0, -1 \n"
> > + "blt %0, zero, %l[caching_invalid_entries] \n"
> > + "ld a0, 0(%1) \n"
> > + :
> > + : "r" (i), "r" (test_addr)
> > + : "a0"
> > + : caching_invalid_entries
> > + );
> > +
> > + csr_write(CSR_SATP, 0ULL);
> > + local_flush_tlb_all();
> > +
> > + /* If we don't trap, the uarch does not cache invalid entries! */
> > + tlb_caching_invalid_entries = false;
> > + goto clean;
> > +
> > +caching_invalid_entries:
> > + csr_write(CSR_SATP, 0ULL);
> > + local_flush_tlb_all();
> > +
> > + tlb_caching_invalid_entries = true;
> > +clean:
> > + memset(early_pg_dir, 0, PAGE_SIZE);
> > + memset(early_pmd, 0, PAGE_SIZE);
>
> Use clear_page() instead ?
>
> > +
> > + enable_pgtable_l4();
> > + enable_pgtable_l5();
> > +}
> > #endif
> >
> > /*
> > @@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
> > #endif
> >
> > #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
> > + set_tlb_caching_invalid_entries();
> > set_satp_mode(dtb_pa);
> > #endif
> >
> > @@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
> > local_flush_tlb_all();
> >
> > pt_ops_set_late();
> > +
> > + pr_info("uarch caches invalid entries: %s",
> > + tlb_caching_invalid_entries ? "yes" : "no");
> > }
> > #else
> > asmlinkage void __init setup_vm(uintptr_t dtb_pa)
I left this patch so that people can easily test this without knowing
what their uarch is actually doing, but it will very likely be dropped
as a new extension has just been proposed for that.
Thanks anyway, I should have been more clear in the patch title,
Alex
@@ -56,6 +56,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KER
EXPORT_SYMBOL(pgtable_l4_enabled);
EXPORT_SYMBOL(pgtable_l5_enabled);
+bool tlb_caching_invalid_entries;
+
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
@@ -750,6 +752,18 @@ static void __init disable_pgtable_l4(void)
satp_mode = SATP_MODE_39;
}
+static void __init enable_pgtable_l5(void)
+{
+ pgtable_l5_enabled = true;
+ satp_mode = SATP_MODE_57;
+}
+
+static void __init enable_pgtable_l4(void)
+{
+ pgtable_l4_enabled = true;
+ satp_mode = SATP_MODE_48;
+}
+
static int __init print_no4lvl(char *p)
{
pr_info("Disabled 4-level and 5-level paging");
@@ -826,6 +840,112 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
memset(early_pud, 0, PAGE_SIZE);
memset(early_pmd, 0, PAGE_SIZE);
}
+
+/* Determine at runtime if the uarch caches invalid TLB entries */
+static __init void set_tlb_caching_invalid_entries(void)
+{
+#define NR_RETRIES_CACHING_INVALID_ENTRIES 50
+ uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned long)set_tlb_caching_invalid_entries) & PMD_MASK;
+ // TODO the test_addr as defined below could go into another pud...
+ uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2 * PMD_SIZE;
+ pmd_t valid_pmd;
+ u64 satp;
+ int i = 0;
+
+ /* To ease the page table creation */
+ disable_pgtable_l5();
+ disable_pgtable_l4();
+
+ /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
+ create_pgd_mapping(early_pg_dir,
+ set_tlb_caching_invalid_entries_pmd,
+ (uintptr_t)early_pmd,
+ PGDIR_SIZE, PAGE_TABLE);
+
+ /* Handle the case where set_tlb_caching_invalid_entries straddles 2 PMDs */
+ create_pmd_mapping(early_pmd,
+ set_tlb_caching_invalid_entries_pmd,
+ set_tlb_caching_invalid_entries_pmd,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+ create_pmd_mapping(early_pmd,
+ set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+ set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+ PMD_SIZE, PAGE_KERNEL_EXEC);
+
+ /* Establish an invalid mapping */
+ create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
+
+ /* Precompute the valid pmd here because the mapping for pfn_pmd() won't exist */
+ valid_pmd = pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
+
+ local_flush_tlb_all();
+ satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
+ csr_write(CSR_SATP, satp);
+
+ /*
+ * Set stvec to after the trapping access, access this invalid mapping
+ * and legitimately trap
+ */
+ // TODO: Should I save the previous stvec?
+#define ASM_STR(x) __ASM_STR(x)
+ asm volatile(
+ "la a0, 1f \n"
+ "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
+ "ld a0, 0(%0) \n"
+ ".align 2 \n"
+ "1: \n"
+ :
+ : "r" (test_addr)
+ : "a0"
+ );
+
+ /* Now establish a valid mapping to check if the invalid one is cached */
+ early_pmd[pmd_index(test_addr)] = valid_pmd;
+
+ /*
+ * Access the valid mapping multiple times: indeed, we can't use
+ * sfence.vma as a barrier to make sure the cpu did not reorder accesses
+ * so we may trap even if the uarch does not cache invalid entries. By
+ * trying a few times, we make sure that those uarchs will see the right
+ * mapping at some point.
+ */
+
+ i = NR_RETRIES_CACHING_INVALID_ENTRIES;
+
+#define ASM_STR(x) __ASM_STR(x)
+ asm_volatile_goto(
+ "la a0, 1f \n"
+ "csrw " ASM_STR(CSR_TVEC) ", a0 \n"
+ ".align 2 \n"
+ "1: \n"
+ "addi %0, %0, -1 \n"
+ "blt %0, zero, %l[caching_invalid_entries] \n"
+ "ld a0, 0(%1) \n"
+ :
+ : "r" (i), "r" (test_addr)
+ : "a0"
+ : caching_invalid_entries
+ );
+
+ csr_write(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ /* If we don't trap, the uarch does not cache invalid entries! */
+ tlb_caching_invalid_entries = false;
+ goto clean;
+
+caching_invalid_entries:
+ csr_write(CSR_SATP, 0ULL);
+ local_flush_tlb_all();
+
+ tlb_caching_invalid_entries = true;
+clean:
+ memset(early_pg_dir, 0, PAGE_SIZE);
+ memset(early_pmd, 0, PAGE_SIZE);
+
+ enable_pgtable_l4();
+ enable_pgtable_l5();
+}
#endif
/*
@@ -1072,6 +1192,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+ set_tlb_caching_invalid_entries();
set_satp_mode(dtb_pa);
#endif
@@ -1322,6 +1443,9 @@ static void __init setup_vm_final(void)
local_flush_tlb_all();
pt_ops_set_late();
+
+ pr_info("uarch caches invalid entries: %s",
+ tlb_caching_invalid_entries ? "yes" : "no");
}
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)