[1/4] LoongArch: Use trampoline for exception handlers and kill la.abs
Commit Message
From: Xi Ruoyao <xry111@xry111.site>
Use a trampoline as an exception handlers, which can kill some use of
la.abs in preparation for the subsequent support of the PIE kernel.
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Youling Tang <tangyouling@loongson.cn>
---
arch/loongarch/include/asm/stackframe.h | 8 +++----
arch/loongarch/include/asm/uaccess.h | 1 -
arch/loongarch/kernel/entry.S | 6 +++---
arch/loongarch/kernel/genex.S | 20 +++++++++---------
arch/loongarch/kernel/head.S | 2 +-
arch/loongarch/kernel/traps.c | 4 +++-
arch/loongarch/mm/tlbex.S | 28 +++++++------------------
7 files changed, 29 insertions(+), 40 deletions(-)
Comments
Hi, Ruoyao and Youling,
I care about the performance when NUMA enabled. We set CSR.EENTRY
for each possible cpus where is NUMA-relative. So, I guess the more
codes in NUMA-relative memory makes more performance. If we just set
handler_trampoline as exception handler, the performance may be
influenced.
Thanks,
Jinyang
On 2023-01-09 17:07, Youling Tang wrote:
> From: Xi Ruoyao <xry111@xry111.site>
>
> Use a trampoline as an exception handlers, which can kill some use of
> la.abs in preparation for the subsequent support of the PIE kernel.
>
> Signed-off-by: Xi Ruoyao <xry111@xry111.site>
> Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> ---
> arch/loongarch/include/asm/stackframe.h | 8 +++----
> arch/loongarch/include/asm/uaccess.h | 1 -
> arch/loongarch/kernel/entry.S | 6 +++---
> arch/loongarch/kernel/genex.S | 20 +++++++++---------
> arch/loongarch/kernel/head.S | 2 +-
> arch/loongarch/kernel/traps.c | 4 +++-
> arch/loongarch/mm/tlbex.S | 28 +++++++------------------
> 7 files changed, 29 insertions(+), 40 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
> index 4ca953062b5b..96c94035b5d0 100644
> --- a/arch/loongarch/include/asm/stackframe.h
> +++ b/arch/loongarch/include/asm/stackframe.h
> @@ -76,8 +76,8 @@
> * kernelsp array for it. It stores the current sp in t0 and loads the
> * new value in sp.
> */
> - .macro get_saved_sp docfi=0
> - la.abs t1, kernelsp
> + .macro get_saved_sp docfi=0
> + la.pcrel t1, kernelsp
> #ifdef CONFIG_SMP
> csrrd t0, PERCPU_BASE_KS
> LONG_ADD t1, t1, t0
> @@ -89,8 +89,8 @@
> LONG_L sp, t1, 0
> .endm
>
> - .macro set_saved_sp stackp temp temp2
> - la.abs \temp, kernelsp
> + .macro set_saved_sp stackp temp temp2
> + la.pcrel \temp, kernelsp
> #ifdef CONFIG_SMP
> LONG_ADD \temp, \temp, u0
> #endif
> diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
> index 255899d4a7c3..0d22991ae430 100644
> --- a/arch/loongarch/include/asm/uaccess.h
> +++ b/arch/loongarch/include/asm/uaccess.h
> @@ -22,7 +22,6 @@
> extern u64 __ua_limit;
>
> #define __UA_ADDR ".dword"
> -#define __UA_LA "la.abs"
> #define __UA_LIMIT __ua_limit
>
> /*
> diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> index d53b631c9022..ca01afdbec3f 100644
> --- a/arch/loongarch/kernel/entry.S
> +++ b/arch/loongarch/kernel/entry.S
> @@ -18,9 +18,9 @@
> .text
> .cfi_sections .debug_frame
> .align 5
> -SYM_FUNC_START(handle_syscall)
> +SYM_FUNC_START(handle_sys)
> csrrd t0, PERCPU_BASE_KS
> - la.abs t1, kernelsp
> + la.pcrel t1, kernelsp
> add.d t1, t1, t0
> move t2, sp
> ld.d sp, t1, 0
> @@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall)
> bl do_syscall
>
> RESTORE_ALL_AND_RET
> -SYM_FUNC_END(handle_syscall)
> +SYM_FUNC_END(handle_sys)
>
> SYM_CODE_START(ret_from_fork)
> bl schedule_tail # a0 = struct task_struct *prev
> diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
> index 75e5be807a0d..d3df0fa725a2 100644
> --- a/arch/loongarch/kernel/genex.S
> +++ b/arch/loongarch/kernel/genex.S
> @@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle)
> SYM_FUNC_END(__arch_cpu_idle)
>
> SYM_FUNC_START(handle_vint)
> - BACKUP_T0T1
> SAVE_ALL
> - la.abs t1, __arch_cpu_idle
> + la.pcrel t1, __arch_cpu_idle
> LONG_L t0, sp, PT_ERA
> /* 32 byte rollback region */
> ori t0, t0, 0x1f
> @@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint)
> LONG_S t0, sp, PT_ERA
> 1: move a0, sp
> move a1, sp
> - la.abs t0, do_vint
> - jirl ra, t0, 0
> + bl do_vint
> RESTORE_ALL_AND_RET
> SYM_FUNC_END(handle_vint)
>
> @@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex)
> .macro BUILD_HANDLER exception handler prep
> .align 5
> SYM_FUNC_START(handle_\exception)
> - BACKUP_T0T1
> SAVE_ALL
> build_prep_\prep
> move a0, sp
> - la.abs t0, do_\handler
> - jirl ra, t0, 0
> + bl do_\handler
> RESTORE_ALL_AND_RET
> SYM_FUNC_END(handle_\exception)
> .endm
> @@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex)
> BUILD_HANDLER watch watch none
> BUILD_HANDLER reserved reserved none /* others */
>
> -SYM_FUNC_START(handle_sys)
> - la.abs t0, handle_syscall
> +SYM_FUNC_START(handler_trampoline)
> + csrwr t0, EXCEPTION_KS0
> + csrwr t1, EXCEPTION_KS1
> + pcaddi t0, 0
> + ld.d t0, t0, 16
> jr t0
> -SYM_FUNC_END(handle_sys)
> + nop
> +SYM_FUNC_END(handler_trampoline)
> diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
> index 57bada6b4e93..aa6181714ec3 100644
> --- a/arch/loongarch/kernel/head.S
> +++ b/arch/loongarch/kernel/head.S
> @@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry)
> li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0
> csrwr t0, LOONGARCH_CSR_EUEN
>
> - la.abs t0, cpuboot_data
> + la.pcrel t0, cpuboot_data
> ld.d sp, t0, CPU_BOOT_STACK
> ld.d tp, t0, CPU_BOOT_TINFO
>
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index 7ea62faeeadb..0e8faaca3679 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void);
> extern asmlinkage void handle_reserved(void);
> extern asmlinkage void handle_watch(void);
> extern asmlinkage void handle_vint(void);
> +extern asmlinkage void handler_trampoline(void);
>
> static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
> const char *loglvl, bool user)
> @@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu)
> /* Install CPU exception handler */
> void set_handler(unsigned long offset, void *addr, unsigned long size)
> {
> - memcpy((void *)(eentry + offset), addr, size);
> + memcpy((void *)(eentry + offset), &handler_trampoline, 24);
> + memcpy((void *)(eentry + offset + 24), &addr, 8);
> local_flush_icache_range(eentry + offset, eentry + offset + size);
> }
>
> diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> index 58781c6e4191..cfaacdac518c 100644
> --- a/arch/loongarch/mm/tlbex.S
> +++ b/arch/loongarch/mm/tlbex.S
> @@ -24,8 +24,7 @@
> move a0, sp
> REG_S a2, sp, PT_BVADDR
> li.w a1, \write
> - la.abs t0, do_page_fault
> - jirl ra, t0, 0
> + bl do_page_fault
> RESTORE_ALL_AND_RET
> SYM_FUNC_END(tlb_do_page_fault_\write)
> .endm
> @@ -34,20 +33,16 @@
> tlb_do_page_fault 1
>
> SYM_FUNC_START(handle_tlb_protect)
> - BACKUP_T0T1
> SAVE_ALL
> move a0, sp
> move a1, zero
> csrrd a2, LOONGARCH_CSR_BADV
> REG_S a2, sp, PT_BVADDR
> - la.abs t0, do_page_fault
> - jirl ra, t0, 0
> + bl do_page_fault
> RESTORE_ALL_AND_RET
> SYM_FUNC_END(handle_tlb_protect)
>
> SYM_FUNC_START(handle_tlb_load)
> - csrwr t0, EXCEPTION_KS0
> - csrwr t1, EXCEPTION_KS1
> csrwr ra, EXCEPTION_KS2
>
> /*
> @@ -116,7 +111,7 @@ smp_pgtable_change_load:
>
> #ifdef CONFIG_64BIT
> vmalloc_load:
> - la.abs t1, swapper_pg_dir
> + la.pcrel t1, swapper_pg_dir
> b vmalloc_done_load
> #endif
>
> @@ -187,13 +182,10 @@ tlb_huge_update_load:
> nopage_tlb_load:
> dbar 0
> csrrd ra, EXCEPTION_KS2
> - la.abs t0, tlb_do_page_fault_0
> - jr t0
> + b tlb_do_page_fault_0
> SYM_FUNC_END(handle_tlb_load)
>
> SYM_FUNC_START(handle_tlb_store)
> - csrwr t0, EXCEPTION_KS0
> - csrwr t1, EXCEPTION_KS1
> csrwr ra, EXCEPTION_KS2
>
> /*
> @@ -263,7 +255,7 @@ smp_pgtable_change_store:
>
> #ifdef CONFIG_64BIT
> vmalloc_store:
> - la.abs t1, swapper_pg_dir
> + la.pcrel t1, swapper_pg_dir
> b vmalloc_done_store
> #endif
>
> @@ -336,13 +328,10 @@ tlb_huge_update_store:
> nopage_tlb_store:
> dbar 0
> csrrd ra, EXCEPTION_KS2
> - la.abs t0, tlb_do_page_fault_1
> - jr t0
> + b tlb_do_page_fault_1
> SYM_FUNC_END(handle_tlb_store)
>
> SYM_FUNC_START(handle_tlb_modify)
> - csrwr t0, EXCEPTION_KS0
> - csrwr t1, EXCEPTION_KS1
> csrwr ra, EXCEPTION_KS2
>
> /*
> @@ -411,7 +400,7 @@ smp_pgtable_change_modify:
>
> #ifdef CONFIG_64BIT
> vmalloc_modify:
> - la.abs t1, swapper_pg_dir
> + la.pcrel t1, swapper_pg_dir
> b vmalloc_done_modify
> #endif
>
> @@ -483,8 +472,7 @@ tlb_huge_update_modify:
> nopage_tlb_modify:
> dbar 0
> csrrd ra, EXCEPTION_KS2
> - la.abs t0, tlb_do_page_fault_1
> - jr t0
> + b tlb_do_page_fault_1
> SYM_FUNC_END(handle_tlb_modify)
>
> SYM_FUNC_START(handle_tlb_refill)
On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn> wrote:
>
> Hi, Ruoyao and Youling,
>
> I care about the performance when NUMA enabled. We set CSR.EENTRY
> for each possible cpus where is NUMA-relative. So, I guess the more
> codes in NUMA-relative memory makes more performance. If we just set
> handler_trampoline as exception handler, the performance may be
> influenced.
So copying both the handlers and handler_trampoline can solve the
problem? If that is possible, please do that on top of the latest code
in
https://github.com/loongson/linux/commits/loongarch-next
Huacai
>
>
> Thanks,
>
> Jinyang
>
>
> On 2023-01-09 17:07, Youling Tang wrote:
> > From: Xi Ruoyao <xry111@xry111.site>
> >
> > Use a trampoline as an exception handlers, which can kill some use of
> > la.abs in preparation for the subsequent support of the PIE kernel.
> >
> > Signed-off-by: Xi Ruoyao <xry111@xry111.site>
> > Signed-off-by: Youling Tang <tangyouling@loongson.cn>
> > ---
> > arch/loongarch/include/asm/stackframe.h | 8 +++----
> > arch/loongarch/include/asm/uaccess.h | 1 -
> > arch/loongarch/kernel/entry.S | 6 +++---
> > arch/loongarch/kernel/genex.S | 20 +++++++++---------
> > arch/loongarch/kernel/head.S | 2 +-
> > arch/loongarch/kernel/traps.c | 4 +++-
> > arch/loongarch/mm/tlbex.S | 28 +++++++------------------
> > 7 files changed, 29 insertions(+), 40 deletions(-)
> >
> > diff --git a/arch/loongarch/include/asm/stackframe.h b/arch/loongarch/include/asm/stackframe.h
> > index 4ca953062b5b..96c94035b5d0 100644
> > --- a/arch/loongarch/include/asm/stackframe.h
> > +++ b/arch/loongarch/include/asm/stackframe.h
> > @@ -76,8 +76,8 @@
> > * kernelsp array for it. It stores the current sp in t0 and loads the
> > * new value in sp.
> > */
> > - .macro get_saved_sp docfi=0
> > - la.abs t1, kernelsp
> > + .macro get_saved_sp docfi=0
> > + la.pcrel t1, kernelsp
> > #ifdef CONFIG_SMP
> > csrrd t0, PERCPU_BASE_KS
> > LONG_ADD t1, t1, t0
> > @@ -89,8 +89,8 @@
> > LONG_L sp, t1, 0
> > .endm
> >
> > - .macro set_saved_sp stackp temp temp2
> > - la.abs \temp, kernelsp
> > + .macro set_saved_sp stackp temp temp2
> > + la.pcrel \temp, kernelsp
> > #ifdef CONFIG_SMP
> > LONG_ADD \temp, \temp, u0
> > #endif
> > diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
> > index 255899d4a7c3..0d22991ae430 100644
> > --- a/arch/loongarch/include/asm/uaccess.h
> > +++ b/arch/loongarch/include/asm/uaccess.h
> > @@ -22,7 +22,6 @@
> > extern u64 __ua_limit;
> >
> > #define __UA_ADDR ".dword"
> > -#define __UA_LA "la.abs"
> > #define __UA_LIMIT __ua_limit
> >
> > /*
> > diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S
> > index d53b631c9022..ca01afdbec3f 100644
> > --- a/arch/loongarch/kernel/entry.S
> > +++ b/arch/loongarch/kernel/entry.S
> > @@ -18,9 +18,9 @@
> > .text
> > .cfi_sections .debug_frame
> > .align 5
> > -SYM_FUNC_START(handle_syscall)
> > +SYM_FUNC_START(handle_sys)
> > csrrd t0, PERCPU_BASE_KS
> > - la.abs t1, kernelsp
> > + la.pcrel t1, kernelsp
> > add.d t1, t1, t0
> > move t2, sp
> > ld.d sp, t1, 0
> > @@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall)
> > bl do_syscall
> >
> > RESTORE_ALL_AND_RET
> > -SYM_FUNC_END(handle_syscall)
> > +SYM_FUNC_END(handle_sys)
> >
> > SYM_CODE_START(ret_from_fork)
> > bl schedule_tail # a0 = struct task_struct *prev
> > diff --git a/arch/loongarch/kernel/genex.S b/arch/loongarch/kernel/genex.S
> > index 75e5be807a0d..d3df0fa725a2 100644
> > --- a/arch/loongarch/kernel/genex.S
> > +++ b/arch/loongarch/kernel/genex.S
> > @@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle)
> > SYM_FUNC_END(__arch_cpu_idle)
> >
> > SYM_FUNC_START(handle_vint)
> > - BACKUP_T0T1
> > SAVE_ALL
> > - la.abs t1, __arch_cpu_idle
> > + la.pcrel t1, __arch_cpu_idle
> > LONG_L t0, sp, PT_ERA
> > /* 32 byte rollback region */
> > ori t0, t0, 0x1f
> > @@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint)
> > LONG_S t0, sp, PT_ERA
> > 1: move a0, sp
> > move a1, sp
> > - la.abs t0, do_vint
> > - jirl ra, t0, 0
> > + bl do_vint
> > RESTORE_ALL_AND_RET
> > SYM_FUNC_END(handle_vint)
> >
> > @@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex)
> > .macro BUILD_HANDLER exception handler prep
> > .align 5
> > SYM_FUNC_START(handle_\exception)
> > - BACKUP_T0T1
> > SAVE_ALL
> > build_prep_\prep
> > move a0, sp
> > - la.abs t0, do_\handler
> > - jirl ra, t0, 0
> > + bl do_\handler
> > RESTORE_ALL_AND_RET
> > SYM_FUNC_END(handle_\exception)
> > .endm
> > @@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex)
> > BUILD_HANDLER watch watch none
> > BUILD_HANDLER reserved reserved none /* others */
> >
> > -SYM_FUNC_START(handle_sys)
> > - la.abs t0, handle_syscall
> > +SYM_FUNC_START(handler_trampoline)
> > + csrwr t0, EXCEPTION_KS0
> > + csrwr t1, EXCEPTION_KS1
> > + pcaddi t0, 0
> > + ld.d t0, t0, 16
> > jr t0
> > -SYM_FUNC_END(handle_sys)
> > + nop
> > +SYM_FUNC_END(handler_trampoline)
> > diff --git a/arch/loongarch/kernel/head.S b/arch/loongarch/kernel/head.S
> > index 57bada6b4e93..aa6181714ec3 100644
> > --- a/arch/loongarch/kernel/head.S
> > +++ b/arch/loongarch/kernel/head.S
> > @@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry)
> > li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0
> > csrwr t0, LOONGARCH_CSR_EUEN
> >
> > - la.abs t0, cpuboot_data
> > + la.pcrel t0, cpuboot_data
> > ld.d sp, t0, CPU_BOOT_STACK
> > ld.d tp, t0, CPU_BOOT_TINFO
> >
> > diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> > index 7ea62faeeadb..0e8faaca3679 100644
> > --- a/arch/loongarch/kernel/traps.c
> > +++ b/arch/loongarch/kernel/traps.c
> > @@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void);
> > extern asmlinkage void handle_reserved(void);
> > extern asmlinkage void handle_watch(void);
> > extern asmlinkage void handle_vint(void);
> > +extern asmlinkage void handler_trampoline(void);
> >
> > static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
> > const char *loglvl, bool user)
> > @@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu)
> > /* Install CPU exception handler */
> > void set_handler(unsigned long offset, void *addr, unsigned long size)
> > {
> > - memcpy((void *)(eentry + offset), addr, size);
> > + memcpy((void *)(eentry + offset), &handler_trampoline, 24);
> > + memcpy((void *)(eentry + offset + 24), &addr, 8);
> > local_flush_icache_range(eentry + offset, eentry + offset + size);
> > }
> >
> > diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
> > index 58781c6e4191..cfaacdac518c 100644
> > --- a/arch/loongarch/mm/tlbex.S
> > +++ b/arch/loongarch/mm/tlbex.S
> > @@ -24,8 +24,7 @@
> > move a0, sp
> > REG_S a2, sp, PT_BVADDR
> > li.w a1, \write
> > - la.abs t0, do_page_fault
> > - jirl ra, t0, 0
> > + bl do_page_fault
> > RESTORE_ALL_AND_RET
> > SYM_FUNC_END(tlb_do_page_fault_\write)
> > .endm
> > @@ -34,20 +33,16 @@
> > tlb_do_page_fault 1
> >
> > SYM_FUNC_START(handle_tlb_protect)
> > - BACKUP_T0T1
> > SAVE_ALL
> > move a0, sp
> > move a1, zero
> > csrrd a2, LOONGARCH_CSR_BADV
> > REG_S a2, sp, PT_BVADDR
> > - la.abs t0, do_page_fault
> > - jirl ra, t0, 0
> > + bl do_page_fault
> > RESTORE_ALL_AND_RET
> > SYM_FUNC_END(handle_tlb_protect)
> >
> > SYM_FUNC_START(handle_tlb_load)
> > - csrwr t0, EXCEPTION_KS0
> > - csrwr t1, EXCEPTION_KS1
> > csrwr ra, EXCEPTION_KS2
> >
> > /*
> > @@ -116,7 +111,7 @@ smp_pgtable_change_load:
> >
> > #ifdef CONFIG_64BIT
> > vmalloc_load:
> > - la.abs t1, swapper_pg_dir
> > + la.pcrel t1, swapper_pg_dir
> > b vmalloc_done_load
> > #endif
> >
> > @@ -187,13 +182,10 @@ tlb_huge_update_load:
> > nopage_tlb_load:
> > dbar 0
> > csrrd ra, EXCEPTION_KS2
> > - la.abs t0, tlb_do_page_fault_0
> > - jr t0
> > + b tlb_do_page_fault_0
> > SYM_FUNC_END(handle_tlb_load)
> >
> > SYM_FUNC_START(handle_tlb_store)
> > - csrwr t0, EXCEPTION_KS0
> > - csrwr t1, EXCEPTION_KS1
> > csrwr ra, EXCEPTION_KS2
> >
> > /*
> > @@ -263,7 +255,7 @@ smp_pgtable_change_store:
> >
> > #ifdef CONFIG_64BIT
> > vmalloc_store:
> > - la.abs t1, swapper_pg_dir
> > + la.pcrel t1, swapper_pg_dir
> > b vmalloc_done_store
> > #endif
> >
> > @@ -336,13 +328,10 @@ tlb_huge_update_store:
> > nopage_tlb_store:
> > dbar 0
> > csrrd ra, EXCEPTION_KS2
> > - la.abs t0, tlb_do_page_fault_1
> > - jr t0
> > + b tlb_do_page_fault_1
> > SYM_FUNC_END(handle_tlb_store)
> >
> > SYM_FUNC_START(handle_tlb_modify)
> > - csrwr t0, EXCEPTION_KS0
> > - csrwr t1, EXCEPTION_KS1
> > csrwr ra, EXCEPTION_KS2
> >
> > /*
> > @@ -411,7 +400,7 @@ smp_pgtable_change_modify:
> >
> > #ifdef CONFIG_64BIT
> > vmalloc_modify:
> > - la.abs t1, swapper_pg_dir
> > + la.pcrel t1, swapper_pg_dir
> > b vmalloc_done_modify
> > #endif
> >
> > @@ -483,8 +472,7 @@ tlb_huge_update_modify:
> > nopage_tlb_modify:
> > dbar 0
> > csrrd ra, EXCEPTION_KS2
> > - la.abs t0, tlb_do_page_fault_1
> > - jr t0
> > + b tlb_do_page_fault_1
> > SYM_FUNC_END(handle_tlb_modify)
> >
> > SYM_FUNC_START(handle_tlb_refill)
>
On Mon, 2023-01-16 at 09:30 +0800, Huacai Chen wrote:
> On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn>
> wrote:
> >
> > Hi, Ruoyao and Youling,
> >
> > I care about the performance when NUMA enabled. We set CSR.EENTRY
> > for each possible cpus where is NUMA-relative. So, I guess the more
> > codes in NUMA-relative memory makes more performance. If we just set
> > handler_trampoline as exception handler, the performance may be
> > influenced.
> So copying both the handlers and handler_trampoline can solve the
> problem? If that is possible, please do that on top of the latest code
> in
> https://github.com/loongson/linux/commits/loongarch-next
Hi folks,
I just wrote the trampoline code as a PoC to show "relocatable kernel
can work" and there must be some better way. But I'm too sad to write
any serious code in this month, and I don't have access to a LoongArch
NUMA system. So I think it's better to leave the job for you guys now
:).
Best regards
On Mon, Jan 16, 2023 at 1:41 PM Xi Ruoyao <xry111@xry111.site> wrote:
>
> On Mon, 2023-01-16 at 09:30 +0800, Huacai Chen wrote:
> > On Sat, Jan 14, 2023 at 2:38 PM Jinyang He <hejinyang@loongson.cn>
> > wrote:
> > >
> > > Hi, Ruoyao and Youling,
> > >
> > > I care about the performance when NUMA enabled. We set CSR.EENTRY
> > > for each possible cpus where is NUMA-relative. So, I guess the more
> > > codes in NUMA-relative memory makes more performance. If we just set
> > > handler_trampoline as exception handler, the performance may be
> > > influenced.
> > So copying both the handlers and handler_trampoline can solve the
> > problem? If that is possible, please do that on top of the latest code
> > in
> > https://github.com/loongson/linux/commits/loongarch-next
>
> Hi folks,
>
> I just wrote the trampoline code as a PoC to show "relocatable kernel
> can work" and there must be some better way. But I'm too sad to write
> any serious code in this month, and I don't have access to a LoongArch
> NUMA system. So I think it's better to leave the job for you guys now
> :).
Hmm, I hope this series can be merged in 6.3. :)
Huacai
>
> Best regards
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
@@ -76,8 +76,8 @@
* kernelsp array for it. It stores the current sp in t0 and loads the
* new value in sp.
*/
- .macro get_saved_sp docfi=0
- la.abs t1, kernelsp
+ .macro get_saved_sp docfi=0
+ la.pcrel t1, kernelsp
#ifdef CONFIG_SMP
csrrd t0, PERCPU_BASE_KS
LONG_ADD t1, t1, t0
@@ -89,8 +89,8 @@
LONG_L sp, t1, 0
.endm
- .macro set_saved_sp stackp temp temp2
- la.abs \temp, kernelsp
+ .macro set_saved_sp stackp temp temp2
+ la.pcrel \temp, kernelsp
#ifdef CONFIG_SMP
LONG_ADD \temp, \temp, u0
#endif
@@ -22,7 +22,6 @@
extern u64 __ua_limit;
#define __UA_ADDR ".dword"
-#define __UA_LA "la.abs"
#define __UA_LIMIT __ua_limit
/*
@@ -18,9 +18,9 @@
.text
.cfi_sections .debug_frame
.align 5
-SYM_FUNC_START(handle_syscall)
+SYM_FUNC_START(handle_sys)
csrrd t0, PERCPU_BASE_KS
- la.abs t1, kernelsp
+ la.pcrel t1, kernelsp
add.d t1, t1, t0
move t2, sp
ld.d sp, t1, 0
@@ -66,7 +66,7 @@ SYM_FUNC_START(handle_syscall)
bl do_syscall
RESTORE_ALL_AND_RET
-SYM_FUNC_END(handle_syscall)
+SYM_FUNC_END(handle_sys)
SYM_CODE_START(ret_from_fork)
bl schedule_tail # a0 = struct task_struct *prev
@@ -32,9 +32,8 @@ SYM_FUNC_START(__arch_cpu_idle)
SYM_FUNC_END(__arch_cpu_idle)
SYM_FUNC_START(handle_vint)
- BACKUP_T0T1
SAVE_ALL
- la.abs t1, __arch_cpu_idle
+ la.pcrel t1, __arch_cpu_idle
LONG_L t0, sp, PT_ERA
/* 32 byte rollback region */
ori t0, t0, 0x1f
@@ -43,8 +42,7 @@ SYM_FUNC_START(handle_vint)
LONG_S t0, sp, PT_ERA
1: move a0, sp
move a1, sp
- la.abs t0, do_vint
- jirl ra, t0, 0
+ bl do_vint
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_vint)
@@ -67,12 +65,10 @@ SYM_FUNC_END(except_vec_cex)
.macro BUILD_HANDLER exception handler prep
.align 5
SYM_FUNC_START(handle_\exception)
- BACKUP_T0T1
SAVE_ALL
build_prep_\prep
move a0, sp
- la.abs t0, do_\handler
- jirl ra, t0, 0
+ bl do_\handler
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
.endm
@@ -89,7 +85,11 @@ SYM_FUNC_END(except_vec_cex)
BUILD_HANDLER watch watch none
BUILD_HANDLER reserved reserved none /* others */
-SYM_FUNC_START(handle_sys)
- la.abs t0, handle_syscall
+SYM_FUNC_START(handler_trampoline)
+ csrwr t0, EXCEPTION_KS0
+ csrwr t1, EXCEPTION_KS1
+ pcaddi t0, 0
+ ld.d t0, t0, 16
jr t0
-SYM_FUNC_END(handle_sys)
+ nop
+SYM_FUNC_END(handler_trampoline)
@@ -117,7 +117,7 @@ SYM_CODE_START(smpboot_entry)
li.w t0, 0x00 # FPE=0, SXE=0, ASXE=0, BTE=0
csrwr t0, LOONGARCH_CSR_EUEN
- la.abs t0, cpuboot_data
+ la.pcrel t0, cpuboot_data
ld.d sp, t0, CPU_BOOT_STACK
ld.d tp, t0, CPU_BOOT_TINFO
@@ -61,6 +61,7 @@ extern asmlinkage void handle_lasx(void);
extern asmlinkage void handle_reserved(void);
extern asmlinkage void handle_watch(void);
extern asmlinkage void handle_vint(void);
+extern asmlinkage void handler_trampoline(void);
static void show_backtrace(struct task_struct *task, const struct pt_regs *regs,
const char *loglvl, bool user)
@@ -716,7 +717,8 @@ void per_cpu_trap_init(int cpu)
/* Install CPU exception handler */
void set_handler(unsigned long offset, void *addr, unsigned long size)
{
- memcpy((void *)(eentry + offset), addr, size);
+ memcpy((void *)(eentry + offset), &handler_trampoline, 24);
+ memcpy((void *)(eentry + offset + 24), &addr, 8);
local_flush_icache_range(eentry + offset, eentry + offset + size);
}
@@ -24,8 +24,7 @@
move a0, sp
REG_S a2, sp, PT_BVADDR
li.w a1, \write
- la.abs t0, do_page_fault
- jirl ra, t0, 0
+ bl do_page_fault
RESTORE_ALL_AND_RET
SYM_FUNC_END(tlb_do_page_fault_\write)
.endm
@@ -34,20 +33,16 @@
tlb_do_page_fault 1
SYM_FUNC_START(handle_tlb_protect)
- BACKUP_T0T1
SAVE_ALL
move a0, sp
move a1, zero
csrrd a2, LOONGARCH_CSR_BADV
REG_S a2, sp, PT_BVADDR
- la.abs t0, do_page_fault
- jirl ra, t0, 0
+ bl do_page_fault
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_tlb_protect)
SYM_FUNC_START(handle_tlb_load)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -116,7 +111,7 @@ smp_pgtable_change_load:
#ifdef CONFIG_64BIT
vmalloc_load:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_load
#endif
@@ -187,13 +182,10 @@ tlb_huge_update_load:
nopage_tlb_load:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_0
- jr t0
+ b tlb_do_page_fault_0
SYM_FUNC_END(handle_tlb_load)
SYM_FUNC_START(handle_tlb_store)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -263,7 +255,7 @@ smp_pgtable_change_store:
#ifdef CONFIG_64BIT
vmalloc_store:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_store
#endif
@@ -336,13 +328,10 @@ tlb_huge_update_store:
nopage_tlb_store:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_1
- jr t0
+ b tlb_do_page_fault_1
SYM_FUNC_END(handle_tlb_store)
SYM_FUNC_START(handle_tlb_modify)
- csrwr t0, EXCEPTION_KS0
- csrwr t1, EXCEPTION_KS1
csrwr ra, EXCEPTION_KS2
/*
@@ -411,7 +400,7 @@ smp_pgtable_change_modify:
#ifdef CONFIG_64BIT
vmalloc_modify:
- la.abs t1, swapper_pg_dir
+ la.pcrel t1, swapper_pg_dir
b vmalloc_done_modify
#endif
@@ -483,8 +472,7 @@ tlb_huge_update_modify:
nopage_tlb_modify:
dbar 0
csrrd ra, EXCEPTION_KS2
- la.abs t0, tlb_do_page_fault_1
- jr t0
+ b tlb_do_page_fault_1
SYM_FUNC_END(handle_tlb_modify)
SYM_FUNC_START(handle_tlb_refill)