parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd()

Message ID 20240216073315.3801833-1-linux@roeck-us.net
State New
Headers
Series parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd() |

Commit Message

Guenter Roeck Feb. 16, 2024, 7:33 a.m. UTC
  Convert to use real temp variables instead of clobbering processor
registers. This aligns the 64-bit inline assembly code with the 32-bit
assembly code which was rewritten with commit 427c1073a2a1
("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").

While at it, fix comment in 32-bit rewrite code. Temporary variables are
now used for both 32-bit and 64-bit code, so move their declarations
to the function header.

No functional change intended.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
Implemented while analyzing a bug. I am not really sure of it is worth
the effort, but I figured that I might as well submit it.

 arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)
  

Comments

Helge Deller Feb. 16, 2024, 1:48 p.m. UTC | #1
On 2/16/24 08:33, Guenter Roeck wrote:
> Convert to use real temp variables instead of clobbering processor
> registers.

Thanks for doing this.
It was on my todo list since quite some time :-)

> This aligns the 64-bit inline assembly code with the 32-bit
> assembly code which was rewritten with commit 427c1073a2a1
> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>
> While at it, fix comment in 32-bit rewrite code. Temporary variables are
> now used for both 32-bit and 64-bit code, so move their declarations
> to the function header.
>
> No functional change intended.
>
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> Implemented while analyzing a bug. I am not really sure of it is worth
> the effort, but I figured that I might as well submit it.
>
>   arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>   1 file changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index c520e551a165..622c7b549fb8 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>   static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>   {
>   	unsigned long saddr = regs->ior;
> -	__u64 val = 0;
> +	unsigned long shift;
> +	__u64 val = 0, temp1;

temp1 is ok to be "long".

>   	ASM_EXCEPTIONTABLE_VAR(ret);
>
>   	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>
>   #ifdef CONFIG_64BIT
>   	__asm__ __volatile__  (
> -"	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
> -"	mtsp	%4, %%sr1\n"
> -"	depd	%%r0,63,3,%3\n"
> -"1:	ldd	0(%%sr1,%3),%0\n"
> -"2:	ldd	8(%%sr1,%3),%%r20\n"
> -"	subi	64,%%r19,%%r19\n"
> -"	mtsar	%%r19\n"
> -"	shrpd	%0,%%r20,%%sar,%0\n"
> +"	depd,z	%4,60,3,%2\n"		/* shift=(ofs&7)*8 */
> +"	mtsp	%5, %%sr1\n"
> +"	depd	%%r0,63,3,%4\n"
> +"1:	ldd	0(%%sr1,%4),%0\n"
> +"2:	ldd	8(%%sr1,%4),%3\n"
> +"	subi	64,%2,%2\n"
> +"	mtsar	%2\n"
> +"	shrpd	%0,%3,%%sar,%0\n"
>   "3:	\n"
>   	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>   	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
> -	: "=r" (val), "+r" (ret)
> -	: "0" (val), "r" (saddr), "r" (regs->isr)
> -	: "r19", "r20" );
> +	: "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
> +	: "r" (saddr), "r" (regs->isr) );

addr is actually being modified.
That's why I moved it into the output registers and
shuffled shift and temp1 one backwards, so that the registers
are now in the same ordering as on the 32-bit path.

I've pushed the modified patch here:
https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914

Please double-check!

Thanks!
Helge


>   #else
> -    {
> -	unsigned long shift, temp1;
>   	__asm__ __volatile__  (
> -"	zdep	%2,29,2,%3\n"		/* r19=(ofs&3)*8 */
> +"	zdep	%2,29,2,%3\n"		/* shift=(ofs&3)*8 */
>   "	mtsp	%5, %%sr1\n"
>   "	dep	%%r0,31,2,%2\n"
>   "1:	ldw	0(%%sr1,%2),%0\n"
> @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>   	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
>   	: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
>   	: "r" (regs->isr) );
> -    }
>   #endif
>
>   	DPRINTF("val = 0x%llx\n", val);
  
Guenter Roeck Feb. 16, 2024, 3:15 p.m. UTC | #2
On 2/16/24 05:48, Helge Deller wrote:
> On 2/16/24 08:33, Guenter Roeck wrote:
>> Convert to use real temp variables instead of clobbering processor
>> registers.
> 
> Thanks for doing this.
> It was on my todo list since quite some time :-)
> 
>> This aligns the 64-bit inline assembly code with the 32-bit
>> assembly code which was rewritten with commit 427c1073a2a1
>> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>>
>> While at it, fix comment in 32-bit rewrite code. Temporary variables are
>> now used for both 32-bit and 64-bit code, so move their declarations
>> to the function header.
>>
>> No functional change intended.
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
>> Implemented while analyzing a bug. I am not really sure of it is worth
>> the effort, but I figured that I might as well submit it.
>>
>>   arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>>   1 file changed, 13 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
>> index c520e551a165..622c7b549fb8 100644
>> --- a/arch/parisc/kernel/unaligned.c
>> +++ b/arch/parisc/kernel/unaligned.c
>> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>>   static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>   {
>>       unsigned long saddr = regs->ior;
>> -    __u64 val = 0;
>> +    unsigned long shift;
>> +    __u64 val = 0, temp1;
> 
> temp1 is ok to be "long".
> 
>>       ASM_EXCEPTIONTABLE_VAR(ret);
>>
>>       DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
>> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>
>>   #ifdef CONFIG_64BIT
>>       __asm__ __volatile__  (
>> -"    depd,z    %3,60,3,%%r19\n"        /* r19=(ofs&7)*8 */
>> -"    mtsp    %4, %%sr1\n"
>> -"    depd    %%r0,63,3,%3\n"
>> -"1:    ldd    0(%%sr1,%3),%0\n"
>> -"2:    ldd    8(%%sr1,%3),%%r20\n"
>> -"    subi    64,%%r19,%%r19\n"
>> -"    mtsar    %%r19\n"
>> -"    shrpd    %0,%%r20,%%sar,%0\n"
>> +"    depd,z    %4,60,3,%2\n"        /* shift=(ofs&7)*8 */
>> +"    mtsp    %5, %%sr1\n"
>> +"    depd    %%r0,63,3,%4\n"
>> +"1:    ldd    0(%%sr1,%4),%0\n"
>> +"2:    ldd    8(%%sr1,%4),%3\n"
>> +"    subi    64,%2,%2\n"
>> +"    mtsar    %2\n"
>> +"    shrpd    %0,%3,%%sar,%0\n"
>>   "3:    \n"
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
>> -    : "=r" (val), "+r" (ret)
>> -    : "0" (val), "r" (saddr), "r" (regs->isr)
>> -    : "r19", "r20" );
>> +    : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
>> +    : "r" (saddr), "r" (regs->isr) );
> 
> addr is actually being modified.
> That's why I moved it into the output registers and
> shuffled shift and temp1 one backwards, so that the registers
> are now in the same ordering as on the 32-bit path.
> 
> I've pushed the modified patch here:
> https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914
> 
> Please double-check!
> 
Confirmed working.

Thanks,
Guenter

> Thanks!
> Helge
> 
> 
>>   #else
>> -    {
>> -    unsigned long shift, temp1;
>>       __asm__ __volatile__  (
>> -"    zdep    %2,29,2,%3\n"        /* r19=(ofs&3)*8 */
>> +"    zdep    %2,29,2,%3\n"        /* shift=(ofs&3)*8 */
>>   "    mtsp    %5, %%sr1\n"
>>   "    dep    %%r0,31,2,%2\n"
>>   "1:    ldw    0(%%sr1,%2),%0\n"
>> @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>       ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
>>       : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
>>       : "r" (regs->isr) );
>> -    }
>>   #endif
>>
>>       DPRINTF("val = 0x%llx\n", val);
>
  
Guenter Roeck Feb. 26, 2024, 7:29 p.m. UTC | #3
Hi Helge,

On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote:
> Convert to use real temp variables instead of clobbering processor
> registers. This aligns the 64-bit inline assembly code with the 32-bit
> assembly code which was rewritten with commit 427c1073a2a1
> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
> 
> While at it, fix comment in 32-bit rewrite code. Temporary variables are
> now used for both 32-bit and 64-bit code, so move their declarations
> to the function header.
> 
> No functional change intended.
> 
> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
> ---
> Implemented while analyzing a bug. I am not really sure of it is worth
> the effort, but I figured that I might as well submit it.
> 
>  arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index c520e551a165..622c7b549fb8 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>  static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>  {
>  	unsigned long saddr = regs->ior;
> -	__u64 val = 0;
> +	unsigned long shift;
> +	__u64 val = 0, temp1;
>  	ASM_EXCEPTIONTABLE_VAR(ret);
>  
>  	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>  
>  #ifdef CONFIG_64BIT
>  	__asm__ __volatile__  (
> -"	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
> -"	mtsp	%4, %%sr1\n"
> -"	depd	%%r0,63,3,%3\n"
> -"1:	ldd	0(%%sr1,%3),%0\n"
> -"2:	ldd	8(%%sr1,%3),%%r20\n"
> -"	subi	64,%%r19,%%r19\n"
> -"	mtsar	%%r19\n"
> -"	shrpd	%0,%%r20,%%sar,%0\n"
> +"	depd,z	%4,60,3,%2\n"		/* shift=(ofs&7)*8 */
> +"	mtsp	%5, %%sr1\n"
> +"	depd	%%r0,63,3,%4\n"
> +"1:	ldd	0(%%sr1,%4),%0\n"
> +"2:	ldd	8(%%sr1,%4),%3\n"
> +"	subi	64,%2,%2\n"
> +"	mtsar	%2\n"
> +"	shrpd	%0,%3,%%sar,%0\n"
>  "3:	\n"
>  	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>  	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
> -	: "=r" (val), "+r" (ret)
> -	: "0" (val), "r" (saddr), "r" (regs->isr)
> -	: "r19", "r20" );
> +	: "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
> +	: "r" (saddr), "r" (regs->isr) );

It looks like something went wrong when this patch was applied. It is now

+"      depd,z  %4,60,3,%3\n"           /* shift=(ofs&7)*8 */
..
+       : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
+       : "r" (regs->isr) );

meaning saddr is now %2, but the depd,z instruction
still assumes it is %4. Unfortunately this results in a crash
when trying to boot linux-next on parisc64.

The patch below on top of linux-next fixes the problem for me.

Guenter

---
iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 31974eddedc9..a8e75e5b884a 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)

 #ifdef CONFIG_64BIT
        __asm__ __volatile__  (
-"      depd,z  %4,60,3,%3\n"           /* shift=(ofs&7)*8 */
+"      depd,z  %2,60,3,%3\n"           /* shift=(ofs&7)*8 */
 "      mtsp    %5, %%sr1\n"
 "      depd    %%r0,63,3,%2\n"
 "1:    ldd     0(%%sr1,%2),%0\n"
  
Helge Deller Feb. 26, 2024, 8:18 p.m. UTC | #4
On 2/26/24 20:29, Guenter Roeck wrote:
> Hi Helge,
>
> On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote:
>> Convert to use real temp variables instead of clobbering processor
>> registers. This aligns the 64-bit inline assembly code with the 32-bit
>> assembly code which was rewritten with commit 427c1073a2a1
>> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()").
>>
>> While at it, fix comment in 32-bit rewrite code. Temporary variables are
>> now used for both 32-bit and 64-bit code, so move their declarations
>> to the function header.
>>
>> No functional change intended.
>>
>> Signed-off-by: Guenter Roeck <linux@roeck-us.net>
>> ---
>> Implemented while analyzing a bug. I am not really sure of it is worth
>> the effort, but I figured that I might as well submit it.
>>
>>   arch/parisc/kernel/unaligned.c | 29 +++++++++++++----------------
>>   1 file changed, 13 insertions(+), 16 deletions(-)
>>
>> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
>> index c520e551a165..622c7b549fb8 100644
>> --- a/arch/parisc/kernel/unaligned.c
>> +++ b/arch/parisc/kernel/unaligned.c
>> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
>>   static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>   {
>>   	unsigned long saddr = regs->ior;
>> -	__u64 val = 0;
>> +	unsigned long shift;
>> +	__u64 val = 0, temp1;
>>   	ASM_EXCEPTIONTABLE_VAR(ret);
>>
>>   	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n",
>> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>>
>>   #ifdef CONFIG_64BIT
>>   	__asm__ __volatile__  (
>> -"	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
>> -"	mtsp	%4, %%sr1\n"
>> -"	depd	%%r0,63,3,%3\n"
>> -"1:	ldd	0(%%sr1,%3),%0\n"
>> -"2:	ldd	8(%%sr1,%3),%%r20\n"
>> -"	subi	64,%%r19,%%r19\n"
>> -"	mtsar	%%r19\n"
>> -"	shrpd	%0,%%r20,%%sar,%0\n"
>> +"	depd,z	%4,60,3,%2\n"		/* shift=(ofs&7)*8 */
>> +"	mtsp	%5, %%sr1\n"
>> +"	depd	%%r0,63,3,%4\n"
>> +"1:	ldd	0(%%sr1,%4),%0\n"
>> +"2:	ldd	8(%%sr1,%4),%3\n"
>> +"	subi	64,%2,%2\n"
>> +"	mtsar	%2\n"
>> +"	shrpd	%0,%3,%%sar,%0\n"
>>   "3:	\n"
>>   	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
>>   	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
>> -	: "=r" (val), "+r" (ret)
>> -	: "0" (val), "r" (saddr), "r" (regs->isr)
>> -	: "r19", "r20" );
>> +	: "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
>> +	: "r" (saddr), "r" (regs->isr) );
>
> It looks like something went wrong when this patch was applied.

I think this was my fault when I tried to reshuffle the input vars :-(

> It is now
>
> +"      depd,z  %4,60,3,%3\n"           /* shift=(ofs&7)*8 */
> ...
> +       : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
> +       : "r" (regs->isr) );
>
> meaning saddr is now %2, but the depd,z instruction
> still assumes it is %4. Unfortunately this results in a crash
> when trying to boot linux-next on parisc64.
>
> The patch below on top of linux-next fixes the problem for me.

I fixed it up with your hunk below in the parisc for-next branch, so it should be
fixed in linux-next soon.

THANKS!

Helge



> Guenter
>
> ---
> iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
> index 31974eddedc9..a8e75e5b884a 100644
> --- a/arch/parisc/kernel/unaligned.c
> +++ b/arch/parisc/kernel/unaligned.c
> @@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
>
>   #ifdef CONFIG_64BIT
>          __asm__ __volatile__  (
> -"      depd,z  %4,60,3,%3\n"           /* shift=(ofs&7)*8 */
> +"      depd,z  %2,60,3,%3\n"           /* shift=(ofs&7)*8 */
>   "      mtsp    %5, %%sr1\n"
>   "      depd    %%r0,63,3,%2\n"
>   "1:    ldd     0(%%sr1,%2),%0\n"
  

Patch

diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index c520e551a165..622c7b549fb8 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -169,7 +169,8 @@  static int emulate_ldw(struct pt_regs *regs, int toreg, int flop)
 static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 {
 	unsigned long saddr = regs->ior;
-	__u64 val = 0;
+	unsigned long shift;
+	__u64 val = 0, temp1;
 	ASM_EXCEPTIONTABLE_VAR(ret);
 
 	DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", 
@@ -180,25 +181,22 @@  static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 
 #ifdef CONFIG_64BIT
 	__asm__ __volatile__  (
-"	depd,z	%3,60,3,%%r19\n"		/* r19=(ofs&7)*8 */
-"	mtsp	%4, %%sr1\n"
-"	depd	%%r0,63,3,%3\n"
-"1:	ldd	0(%%sr1,%3),%0\n"
-"2:	ldd	8(%%sr1,%3),%%r20\n"
-"	subi	64,%%r19,%%r19\n"
-"	mtsar	%%r19\n"
-"	shrpd	%0,%%r20,%%sar,%0\n"
+"	depd,z	%4,60,3,%2\n"		/* shift=(ofs&7)*8 */
+"	mtsp	%5, %%sr1\n"
+"	depd	%%r0,63,3,%4\n"
+"1:	ldd	0(%%sr1,%4),%0\n"
+"2:	ldd	8(%%sr1,%4),%3\n"
+"	subi	64,%2,%2\n"
+"	mtsar	%2\n"
+"	shrpd	%0,%3,%%sar,%0\n"
 "3:	\n"
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1")
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1")
-	: "=r" (val), "+r" (ret)
-	: "0" (val), "r" (saddr), "r" (regs->isr)
-	: "r19", "r20" );
+	: "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1)
+	: "r" (saddr), "r" (regs->isr) );
 #else
-    {
-	unsigned long shift, temp1;
 	__asm__ __volatile__  (
-"	zdep	%2,29,2,%3\n"		/* r19=(ofs&3)*8 */
+"	zdep	%2,29,2,%3\n"		/* shift=(ofs&3)*8 */
 "	mtsp	%5, %%sr1\n"
 "	dep	%%r0,31,2,%2\n"
 "1:	ldw	0(%%sr1,%2),%0\n"
@@ -214,7 +212,6 @@  static int emulate_ldd(struct pt_regs *regs, int toreg, int flop)
 	ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1")
 	: "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1)
 	: "r" (regs->isr) );
-    }
 #endif
 
 	DPRINTF("val = 0x%llx\n", val);