[1/4] riscv: tlb: fix __p*d_free_tlb()

Message ID 20231219175046.2496-2-jszhang@kernel.org
State New
Headers
Series riscv: support fast gup |

Commit Message

Jisheng Zhang Dec. 19, 2023, 5:50 p.m. UTC
  If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is
a must for safe, imagine if an implementation caches the non-leaf
translation in TLB, although I didn't meet this HW so far, but it's
possible in theory.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
 arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
  

Comments

Alexandre Ghiti Dec. 31, 2023, 6:21 a.m. UTC | #1
Hi Jisheng,

On 19/12/2023 18:50, Jisheng Zhang wrote:
> If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is
> a must for safe, imagine if an implementation caches the non-leaf
> translation in TLB, although I didn't meet this HW so far, but it's
> possible in theory.
>
> Signed-off-by: Jisheng Zhang<jszhang@kernel.org>
> ---
>   arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++---
>   1 file changed, 17 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index d169a4f41a2e..a12fb83fa1f5 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>   		__pud_free(mm, pud);
>   }
>   
> -#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
> +#define __pud_free_tlb(tlb, pud, addr)					\
> +do {									\
> +	if (pgtable_l4_enabled) {					\
> +		pagetable_pud_dtor(virt_to_ptdesc(pud));		\
> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\


The specification indeed states that an sfence.vma must be emitted after 
a page directory modification. Your change is not enough though since 
eventually tlb_flush() is called and in this function we should add:

if (tlb->freed_tables)
     tlb_flush_mm();

otherwise we are not guaranteed that a "global" sfence.vma is called.

Would you be able to benchmark this change and see the performance impact?

Thanks,

Alex


> +	}								\
> +} while (0)
>   
>   #define p4d_alloc_one p4d_alloc_one
>   static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
> @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
>   		__p4d_free(mm, p4d);
>   }
>   
> -#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
> +#define __p4d_free_tlb(tlb, p4d, addr)					\
> +do {									\
> +	if (pgtable_l5_enabled)						\
> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));	\
> +} while (0)
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
>   static inline void sync_kernel_mappings(pgd_t *pgd)
> @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>   
>   #ifndef __PAGETABLE_PMD_FOLDED
>   
> -#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
> +#define __pmd_free_tlb(tlb, pmd, addr)				\
> +do {								\
> +	pagetable_pmd_dtor(virt_to_ptdesc(pmd));		\
> +	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
> +} while (0)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>
  
Alexandre Ghiti Jan. 4, 2024, 10:55 a.m. UTC | #2
On 19/12/2023 18:50, Jisheng Zhang wrote:
> If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is
> a must for safe, imagine if an implementation caches the non-leaf
> translation in TLB, although I didn't meet this HW so far, but it's
> possible in theory.


And since this is a fix, it would be worth trying to add a Fixes tag 
here. Not easy I agree because it fixes several commits (I have 
07037db5d479f,  e8a62cc26ddf5, d10efa21a9374 and c5e9b2c2ae822 if you 
implement tlb_flush() as I suggested).

So I would add the latest commit as the Fixes commit (which would be 
c5e9b2c2ae822), and then I'd send a patch to stable for each commit with 
the right Fixes tag...@Conor: let me know if you have a simpler idea or 
if this is wrong.

Thanks,

Alex


> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
>   arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++---
>   1 file changed, 17 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
> index d169a4f41a2e..a12fb83fa1f5 100644
> --- a/arch/riscv/include/asm/pgalloc.h
> +++ b/arch/riscv/include/asm/pgalloc.h
> @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>   		__pud_free(mm, pud);
>   }
>   
> -#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
> +#define __pud_free_tlb(tlb, pud, addr)					\
> +do {									\
> +	if (pgtable_l4_enabled) {					\
> +		pagetable_pud_dtor(virt_to_ptdesc(pud));		\
> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\
> +	}								\
> +} while (0)
>   
>   #define p4d_alloc_one p4d_alloc_one
>   static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
> @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
>   		__p4d_free(mm, p4d);
>   }
>   
> -#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
> +#define __p4d_free_tlb(tlb, p4d, addr)					\
> +do {									\
> +	if (pgtable_l5_enabled)						\
> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));	\
> +} while (0)
>   #endif /* __PAGETABLE_PMD_FOLDED */
>   
>   static inline void sync_kernel_mappings(pgd_t *pgd)
> @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>   
>   #ifndef __PAGETABLE_PMD_FOLDED
>   
> -#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
> +#define __pmd_free_tlb(tlb, pmd, addr)				\
> +do {								\
> +	pagetable_pmd_dtor(virt_to_ptdesc(pmd));		\
> +	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
> +} while (0)
>   
>   #endif /* __PAGETABLE_PMD_FOLDED */
>
  
Palmer Dabbelt Jan. 10, 2024, 2:52 p.m. UTC | #3
On Thu, 04 Jan 2024 02:55:40 PST (-0800), alex@ghiti.fr wrote:
> On 19/12/2023 18:50, Jisheng Zhang wrote:
>> If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is
>> a must for safe, imagine if an implementation caches the non-leaf
>> translation in TLB, although I didn't meet this HW so far, but it's
>> possible in theory.
>
>
> And since this is a fix, it would be worth trying to add a Fixes tag
> here. Not easy I agree because it fixes several commits (I have
> 07037db5d479f,  e8a62cc26ddf5, d10efa21a9374 and c5e9b2c2ae822 if you
> implement tlb_flush() as I suggested).
>
> So I would add the latest commit as the Fixes commit (which would be
> c5e9b2c2ae822), and then I'd send a patch to stable for each commit with
> the right Fixes tag...@Conor: let me know if you have a simpler idea or
> if this is wrong.

I just went with

Fixes: c5e9b2c2ae82 ("riscv: Improve tlb_flush()")
Cc: stable@vger.kernel.org

hopefully that's fine.  It's still getting tested, it's batched up with 
some other stuff and I managed to find a bad merge so it might take a 
bit...

>
> Thanks,
>
> Alex
>
>
>> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
>> ---
>>   arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++---
>>   1 file changed, 17 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
>> index d169a4f41a2e..a12fb83fa1f5 100644
>> --- a/arch/riscv/include/asm/pgalloc.h
>> +++ b/arch/riscv/include/asm/pgalloc.h
>> @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
>>   		__pud_free(mm, pud);
>>   }
>>
>> -#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
>> +#define __pud_free_tlb(tlb, pud, addr)					\
>> +do {									\
>> +	if (pgtable_l4_enabled) {					\
>> +		pagetable_pud_dtor(virt_to_ptdesc(pud));		\
>> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\
>> +	}								\
>> +} while (0)
>>
>>   #define p4d_alloc_one p4d_alloc_one
>>   static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
>> @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
>>   		__p4d_free(mm, p4d);
>>   }
>>
>> -#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
>> +#define __p4d_free_tlb(tlb, p4d, addr)					\
>> +do {									\
>> +	if (pgtable_l5_enabled)						\
>> +		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));	\
>> +} while (0)
>>   #endif /* __PAGETABLE_PMD_FOLDED */
>>
>>   static inline void sync_kernel_mappings(pgd_t *pgd)
>> @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
>>
>>   #ifndef __PAGETABLE_PMD_FOLDED
>>
>> -#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
>> +#define __pmd_free_tlb(tlb, pmd, addr)				\
>> +do {								\
>> +	pagetable_pmd_dtor(virt_to_ptdesc(pmd));		\
>> +	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
>> +} while (0)
>>
>>   #endif /* __PAGETABLE_PMD_FOLDED */
>>
  
Alexandre Ghiti Feb. 7, 2024, 7:28 a.m. UTC | #4
Hi Jisheng,

On 31/12/2023 07:21, Alexandre Ghiti wrote:
> Hi Jisheng,
>
> On 19/12/2023 18:50, Jisheng Zhang wrote:
>> If non-leaf PTEs I.E pmd, pud or p4d is modified, a sfence.vma is
>> a must for safe, imagine if an implementation caches the non-leaf
>> translation in TLB, although I didn't meet this HW so far, but it's
>> possible in theory.
>>
>> Signed-off-by: Jisheng Zhang<jszhang@kernel.org>
>> ---
>>   arch/riscv/include/asm/pgalloc.h | 20 +++++++++++++++++---
>>   1 file changed, 17 insertions(+), 3 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/pgalloc.h 
>> b/arch/riscv/include/asm/pgalloc.h
>> index d169a4f41a2e..a12fb83fa1f5 100644
>> --- a/arch/riscv/include/asm/pgalloc.h
>> +++ b/arch/riscv/include/asm/pgalloc.h
>> @@ -95,7 +95,13 @@ static inline void pud_free(struct mm_struct *mm, 
>> pud_t *pud)
>>           __pud_free(mm, pud);
>>   }
>>   -#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
>> +#define __pud_free_tlb(tlb, pud, addr)                    \
>> +do {                                    \
>> +    if (pgtable_l4_enabled) {                    \
>> +        pagetable_pud_dtor(virt_to_ptdesc(pud));        \
>> +        tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud)); \
>
>
> The specification indeed states that an sfence.vma must be emitted 
> after a page directory modification. Your change is not enough though 
> since eventually tlb_flush() is called and in this function we should 
> add:
>
> if (tlb->freed_tables)
>     tlb_flush_mm();


I sent a patch for that here 
https://lore.kernel.org/linux-riscv/20240128120405.25876-1-alexghiti@rivosinc.com/

You can add:

Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>

Thanks,

Alex


>
> otherwise we are not guaranteed that a "global" sfence.vma is called.
>
> Would you be able to benchmark this change and see the performance 
> impact?
>
> Thanks,
>
> Alex
>
>
>> +    }                                \
>> +} while (0)
>>     #define p4d_alloc_one p4d_alloc_one
>>   static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned 
>> long addr)
>> @@ -124,7 +130,11 @@ static inline void p4d_free(struct mm_struct 
>> *mm, p4d_t *p4d)
>>           __p4d_free(mm, p4d);
>>   }
>>   -#define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d)
>> +#define __p4d_free_tlb(tlb, p4d, addr)                    \
>> +do {                                    \
>> +    if (pgtable_l5_enabled)                        \
>> +        tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d)); \
>> +} while (0)
>>   #endif /* __PAGETABLE_PMD_FOLDED */
>>     static inline void sync_kernel_mappings(pgd_t *pgd)
>> @@ -149,7 +159,11 @@ static inline pgd_t *pgd_alloc(struct mm_struct 
>> *mm)
>>     #ifndef __PAGETABLE_PMD_FOLDED
>>   -#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
>> +#define __pmd_free_tlb(tlb, pmd, addr)                \
>> +do {                                \
>> +    pagetable_pmd_dtor(virt_to_ptdesc(pmd));        \
>> +    tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));    \
>> +} while (0)
>>     #endif /* __PAGETABLE_PMD_FOLDED */
>
  

Patch

diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index d169a4f41a2e..a12fb83fa1f5 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -95,7 +95,13 @@  static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 		__pud_free(mm, pud);
 }
 
-#define __pud_free_tlb(tlb, pud, addr)  pud_free((tlb)->mm, pud)
+#define __pud_free_tlb(tlb, pud, addr)					\
+do {									\
+	if (pgtable_l4_enabled) {					\
+		pagetable_pud_dtor(virt_to_ptdesc(pud));		\
+		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pud));	\
+	}								\
+} while (0)
 
 #define p4d_alloc_one p4d_alloc_one
 static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -124,7 +130,11 @@  static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
 		__p4d_free(mm, p4d);
 }
 
-#define __p4d_free_tlb(tlb, p4d, addr)  p4d_free((tlb)->mm, p4d)
+#define __p4d_free_tlb(tlb, p4d, addr)					\
+do {									\
+	if (pgtable_l5_enabled)						\
+		tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(p4d));	\
+} while (0)
 #endif /* __PAGETABLE_PMD_FOLDED */
 
 static inline void sync_kernel_mappings(pgd_t *pgd)
@@ -149,7 +159,11 @@  static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 #ifndef __PAGETABLE_PMD_FOLDED
 
-#define __pmd_free_tlb(tlb, pmd, addr)  pmd_free((tlb)->mm, pmd)
+#define __pmd_free_tlb(tlb, pmd, addr)				\
+do {								\
+	pagetable_pmd_dtor(virt_to_ptdesc(pmd));		\
+	tlb_remove_page_ptdesc((tlb), virt_to_ptdesc(pmd));	\
+} while (0)
 
 #endif /* __PAGETABLE_PMD_FOLDED */