[RFC,1/4] arm64/mm: Add SW and HW dirty state helpers

Message ID 20230707053331.510041-2-anshuman.khandual@arm.com
State New
Headers
Series arm64/mm: Clean up pte_dirty() state management |

Commit Message

Anshuman Khandual July 7, 2023, 5:33 a.m. UTC
  This factors out low level SW and HW state changes i.e make and clear into
separate helpers making them explicit improving readability. This also adds
pte_rdonly() helper as well. No functional change is intended.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
 arch/arm64/include/asm/pgtable.h | 52 ++++++++++++++++++++++++++------
 1 file changed, 42 insertions(+), 10 deletions(-)
  

Comments

David Hildenbrand July 7, 2023, 12:09 p.m. UTC | #1
On 07.07.23 07:33, Anshuman Khandual wrote:
> This factors out low level SW and HW state changes i.e make and clear into
> separate helpers making them explicit improving readability. This also adds
> pte_rdonly() helper as well. No functional change is intended.
> 
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
> ---
>   arch/arm64/include/asm/pgtable.h | 52 ++++++++++++++++++++++++++------
>   1 file changed, 42 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 0bd18de9fd97..fb03be697819 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>   #define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
>   #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
>   #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
> +#define pte_rdonly(pte)		(!!(pte_val(pte) & PTE_RDONLY))
>   #define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
>   #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
>   #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
> @@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>   	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
>   })
>   
> -#define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
> +#define pte_hw_dirty(pte)	(pte_write(pte) && !pte_rdonly(pte))
>   #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
>   #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
>   
> @@ -174,6 +175,39 @@ static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
>   	return pmd;
>   }
>   
> +static inline pte_t pte_hw_mkdirty(pte_t pte)

I'd have called this "pte_mkhw_dirty", similar to "pte_mksoft_dirty".

> +{
> +	if (pte_write(pte))
> +		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
> +
> +	return pte;
> +}
> +
> +static inline pte_t pte_sw_mkdirty(pte_t pte)

pte_mksw_dirty

> +{
> +	return set_pte_bit(pte, __pgprot(PTE_DIRTY));
> +}
> +
> +static inline __always_unused pte_t pte_hw_clr_dirty(pte_t pte)

pte_clear_hw_dirty (again, similar to pte_clear_soft_dirty )

> +{
> +	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
> +}
> +
> +static inline pte_t pte_sw_clr_dirty(pte_t pte)

pte_clear_sw_dirty

> +{
> +	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
> +
> +	/*
> +	 * Clearing the software dirty state requires clearing
> +	 * the PTE_DIRTY bit along with setting the PTE_RDONLY
> +	 * ensuring a page fault on subsequent write access.
> +	 *
> +	 * NOTE: Setting the PTE_RDONLY (as a coincident) also
> +	 * implies clearing the HW dirty state.
> +	 */
> +	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
> +}
> +
>   static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
>   {
>   	pmd_val(pmd) |= pgprot_val(prot);
> @@ -189,19 +223,17 @@ static inline pte_t pte_mkwrite(pte_t pte)
>   
>   static inline pte_t pte_mkclean(pte_t pte)
>   {
> -	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
> -	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
> -
> -	return pte;
> +	/*
> +	 * Subsequent call to pte_hw_clr_dirty() is not required
> +	 * because pte_sw_clr_dirty() in turn does that as well.
> +	 */
> +	return pte_sw_clr_dirty(pte);

Hm, I'm not sure if that simplifies things.

You call pte_sw_clr_dirty() and suddenly your hw dirty bit is clear?

In that case I think the current implementation is clearer: it doesn't 
provide primitives that don't make any sense.

>   }
>   
>   static inline pte_t pte_mkdirty(pte_t pte)
>   {
> -	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
> -
> -	if (pte_write(pte))
> -		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
> -
> +	pte = pte_sw_mkdirty(pte);
> +	pte = pte_hw_mkdirty(pte);

That looks weird. Especially, pte_hw_mkdirty() only does something if 
pte_write().

Shouldn't pte_hw_mkdirty() bail out if it cannot do anything reasonable 
(IOW, !writable)?

>   	return pte;
>   }
>
  
Anshuman Khandual July 10, 2023, 2:54 a.m. UTC | #2
On 7/7/23 17:39, David Hildenbrand wrote:
> On 07.07.23 07:33, Anshuman Khandual wrote:
>> This factors out low level SW and HW state changes i.e make and clear into
>> separate helpers making them explicit improving readability. This also adds
>> pte_rdonly() helper as well. No functional change is intended.
>>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Cc: linux-kernel@vger.kernel.org
>> Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
>> ---
>>   arch/arm64/include/asm/pgtable.h | 52 ++++++++++++++++++++++++++------
>>   1 file changed, 42 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
>> index 0bd18de9fd97..fb03be697819 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>>   #define pte_young(pte)        (!!(pte_val(pte) & PTE_AF))
>>   #define pte_special(pte)    (!!(pte_val(pte) & PTE_SPECIAL))
>>   #define pte_write(pte)        (!!(pte_val(pte) & PTE_WRITE))
>> +#define pte_rdonly(pte)        (!!(pte_val(pte) & PTE_RDONLY))
>>   #define pte_user(pte)        (!!(pte_val(pte) & PTE_USER))
>>   #define pte_user_exec(pte)    (!(pte_val(pte) & PTE_UXN))
>>   #define pte_cont(pte)        (!!(pte_val(pte) & PTE_CONT))
>> @@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
>>       (__boundary - 1 < (end) - 1) ? __boundary : (end);            \
>>   })
>>   -#define pte_hw_dirty(pte)    (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
>> +#define pte_hw_dirty(pte)    (pte_write(pte) && !pte_rdonly(pte))
>>   #define pte_sw_dirty(pte)    (!!(pte_val(pte) & PTE_DIRTY))
>>   #define pte_dirty(pte)        (pte_sw_dirty(pte) || pte_hw_dirty(pte))
>>   @@ -174,6 +175,39 @@ static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
>>       return pmd;
>>   }
>>   +static inline pte_t pte_hw_mkdirty(pte_t pte)
> 
> I'd have called this "pte_mkhw_dirty", similar to "pte_mksoft_dirty".
> 
>> +{
>> +    if (pte_write(pte))
>> +        pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
>> +
>> +    return pte;
>> +}
>> +
>> +static inline pte_t pte_sw_mkdirty(pte_t pte)
> 
> pte_mksw_dirty

Sure, will change them as pte_mkhw_dirty()/pte_mksw_dirty() instead.

> 
>> +{
>> +    return set_pte_bit(pte, __pgprot(PTE_DIRTY));
>> +}
>> +
>> +static inline __always_unused pte_t pte_hw_clr_dirty(pte_t pte)
> 
> pte_clear_hw_dirty (again, similar to pte_clear_soft_dirty )
> 
>> +{
>> +    return set_pte_bit(pte, __pgprot(PTE_RDONLY));
>> +}
>> +
>> +static inline pte_t pte_sw_clr_dirty(pte_t pte)
> 
> pte_clear_sw_dirty

Sure, will change them as pte_clear_hw_dirty()/pte_clear_sw_dirty() instead.

> 
>> +{
>> +    pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
>> +
>> +    /*
>> +     * Clearing the software dirty state requires clearing
>> +     * the PTE_DIRTY bit along with setting the PTE_RDONLY
>> +     * ensuring a page fault on subsequent write access.
>> +     *
>> +     * NOTE: Setting the PTE_RDONLY (as a coincident) also
>> +     * implies clearing the HW dirty state.
>> +     */
>> +    return set_pte_bit(pte, __pgprot(PTE_RDONLY));
>> +}
>> +
>>   static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
>>   {
>>       pmd_val(pmd) |= pgprot_val(prot);
>> @@ -189,19 +223,17 @@ static inline pte_t pte_mkwrite(pte_t pte)
>>     static inline pte_t pte_mkclean(pte_t pte)
>>   {
>> -    pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
>> -    pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
>> -
>> -    return pte;
>> +    /*
>> +     * Subsequent call to pte_hw_clr_dirty() is not required
>> +     * because pte_sw_clr_dirty() in turn does that as well.
>> +     */
>> +    return pte_sw_clr_dirty(pte);
> 
> Hm, I'm not sure if that simplifies things.
> 
> You call pte_sw_clr_dirty() and suddenly your hw dirty bit is clear?

Because clearing HW dirty bit just needs setting PTE_RDONLY bit, which as
a coincidence is also required, after clearing the SW dirty bit to enable
a subsequent write fault. Here pte_sw_clr_dirty() just happen to contain
pte_hw_clr_dirty().

> 
> In that case I think the current implementation is clearer: it doesn't provide primitives that don't make any sense.

It actually does a SW dirty bit clearing which also takes care of HW dirty
bit clearing without saying so explicitly. These new helpers demonstrate
bit clearly what is happening.

> 
>>   }
>>     static inline pte_t pte_mkdirty(pte_t pte)
>>   {
>> -    pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
>> -
>> -    if (pte_write(pte))
>> -        pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
>> -
>> +    pte = pte_sw_mkdirty(pte);
>> +    pte = pte_hw_mkdirty(pte);
> 
> That looks weird. Especially, pte_hw_mkdirty() only does something if pte_write().

pte_write() check asserts if DBM is implemented and being used before clearing
PTE_RDONLY making it a HW dirty state. If pte_write() is cleared, either DBM
is not implemented or it's a non-writable entry, either way dirty state cannot
be tracked in HW.

> 
> Shouldn't pte_hw_mkdirty() bail out if it cannot do anything reasonable (IOW, !writable)?

static inline pte_t pte_hw_mkdirty(pte_t pte)
{
	if (pte_write(pte))
		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));

	return pte;
}

If pte_write() is not positive, it's in !writable state on DBM enabled systems.
Otherwise pte_write() state does not matter, as the bit position does not make
sense on non DBM enabled systems.

> 
>>       return pte;
>>   }
>>   
>
  

Patch

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0bd18de9fd97..fb03be697819 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -103,6 +103,7 @@  static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 #define pte_young(pte)		(!!(pte_val(pte) & PTE_AF))
 #define pte_special(pte)	(!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)		(!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte)		(!!(pte_val(pte) & PTE_RDONLY))
 #define pte_user(pte)		(!!(pte_val(pte) & PTE_USER))
 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
@@ -120,7 +121,7 @@  static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 	(__boundary - 1 < (end) - 1) ? __boundary : (end);			\
 })
 
-#define pte_hw_dirty(pte)	(pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte)	(pte_write(pte) && !pte_rdonly(pte))
 #define pte_sw_dirty(pte)	(!!(pte_val(pte) & PTE_DIRTY))
 #define pte_dirty(pte)		(pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
@@ -174,6 +175,39 @@  static inline pmd_t clear_pmd_bit(pmd_t pmd, pgprot_t prot)
 	return pmd;
 }
 
+static inline pte_t pte_hw_mkdirty(pte_t pte)
+{
+	if (pte_write(pte))
+		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
+
+	return pte;
+}
+
+static inline pte_t pte_sw_mkdirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_DIRTY));
+}
+
+static inline __always_unused pte_t pte_hw_clr_dirty(pte_t pte)
+{
+	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
+static inline pte_t pte_sw_clr_dirty(pte_t pte)
+{
+	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
+
+	/*
+	 * Clearing the software dirty state requires clearing
+	 * the PTE_DIRTY bit along with setting the PTE_RDONLY
+	 * ensuring a page fault on subsequent write access.
+	 *
+	 * NOTE: Setting the PTE_RDONLY (as a coincident) also
+	 * implies clearing the HW dirty state.
+	 */
+	return set_pte_bit(pte, __pgprot(PTE_RDONLY));
+}
+
 static inline pmd_t set_pmd_bit(pmd_t pmd, pgprot_t prot)
 {
 	pmd_val(pmd) |= pgprot_val(prot);
@@ -189,19 +223,17 @@  static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline pte_t pte_mkclean(pte_t pte)
 {
-	pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY));
-	pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
-
-	return pte;
+	/*
+	 * Subsequent call to pte_hw_clr_dirty() is not required
+	 * because pte_sw_clr_dirty() in turn does that as well.
+	 */
+	return pte_sw_clr_dirty(pte);
 }
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
-
-	if (pte_write(pte))
-		pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY));
-
+	pte = pte_sw_mkdirty(pte);
+	pte = pte_hw_mkdirty(pte);
 	return pte;
 }