[v15,27/30] LoongArch: KVM: Implement vcpu world switch

Message ID 20230626084752.1138621-28-zhaotianrui@loongson.cn
State New
Headers
Series Add KVM LoongArch support |

Commit Message

zhaotianrui June 26, 2023, 8:47 a.m. UTC
  Implement LoongArch vcpu world switch, including vcpu enter guest and
vcpu exit from guest, both operations need to save or restore the host
and guest registers.

Reviewed-by: Bibo Mao <maobibo@loongson.cn>
Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
---
 arch/loongarch/kernel/asm-offsets.c |  32 +++
 arch/loongarch/kvm/switch.S         | 299 ++++++++++++++++++++++++++++
 2 files changed, 331 insertions(+)
 create mode 100644 arch/loongarch/kvm/switch.S
  

Comments

Jinyang He June 28, 2023, 3:42 a.m. UTC | #1
On 2023-06-26 16:47, Tianrui Zhao wrote:

> Implement LoongArch vcpu world switch, including vcpu enter guest and
> vcpu exit from guest, both operations need to save or restore the host
> and guest registers.
>
> Reviewed-by: Bibo Mao <maobibo@loongson.cn>
> Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
> ---
>   arch/loongarch/kernel/asm-offsets.c |  32 +++
>   arch/loongarch/kvm/switch.S         | 299 ++++++++++++++++++++++++++++
>   2 files changed, 331 insertions(+)
>   create mode 100644 arch/loongarch/kvm/switch.S
>
> diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
> index 4bdb203fc66e..cb6c5a5afea9 100644
> --- a/arch/loongarch/kernel/asm-offsets.c
> +++ b/arch/loongarch/kernel/asm-offsets.c
> @@ -9,6 +9,7 @@
>   #include <linux/mm.h>
>   #include <linux/kbuild.h>
>   #include <linux/suspend.h>
> +#include <linux/kvm_host.h>
>   #include <asm/cpu-info.h>
>   #include <asm/ptrace.h>
>   #include <asm/processor.h>
> @@ -272,3 +273,34 @@ void output_pbe_defines(void)
>   	BLANK();
>   }
>   #endif
> +
> +static void __used output_kvm_defines(void)
> +{
> +	COMMENT(" KVM/LOONGARCH Specific offsets. ");
> +
> +	OFFSET(VCPU_FCSR0, kvm_vcpu_arch, fpu.fcsr);
> +	OFFSET(VCPU_FCC, kvm_vcpu_arch, fpu.fcc);
> +	BLANK();
> +
> +	OFFSET(KVM_VCPU_ARCH, kvm_vcpu, arch);
> +	OFFSET(KVM_VCPU_KVM, kvm_vcpu, kvm);
> +	OFFSET(KVM_VCPU_RUN, kvm_vcpu, run);
> +	BLANK();
> +
> +	OFFSET(KVM_ARCH_HSP, kvm_vcpu_arch, host_sp);
> +	OFFSET(KVM_ARCH_HTP, kvm_vcpu_arch, host_tp);
> +	OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit);
> +	OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd);
> +	OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry);
> +	OFFSET(KVM_ARCH_GPC, kvm_vcpu_arch, pc);
> +	OFFSET(KVM_ARCH_GGPR, kvm_vcpu_arch, gprs);
> +	OFFSET(KVM_ARCH_HESTAT, kvm_vcpu_arch, host_estat);
> +	OFFSET(KVM_ARCH_HBADV, kvm_vcpu_arch, badv);
> +	OFFSET(KVM_ARCH_HBADI, kvm_vcpu_arch, badi);
> +	OFFSET(KVM_ARCH_HECFG, kvm_vcpu_arch, host_ecfg);
> +	OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry);
> +	OFFSET(KVM_ARCH_HPERCPU, kvm_vcpu_arch, host_percpu);
> +
> +	OFFSET(KVM_GPGD, kvm, arch.gpa_mm.pgd);
> +	BLANK();
> +}
> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
> new file mode 100644
> index 000000000000..7e55976c6fec
> --- /dev/null
> +++ b/arch/loongarch/kvm/switch.S
> @@ -0,0 +1,299 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
> + */
> +
> +#include <linux/linkage.h>
> +#include <asm/stackframe.h>
> +#include <asm/asm.h>
> +#include <asm/asmmacro.h>
> +#include <asm/regdef.h>
> +#include <asm/loongarch.h>
> +#include <asm/export.h>
> +
> +#define PT_GPR_OFFSET(x)	(PT_R0 + 8*x)
> +	.text

The .text is redundant, you also marked it later.


> +
> +.macro kvm_save_host_gpr base
> +	.irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +	st.d	$r\n, \base, PT_GPR_OFFSET(\n)
> +	.endr
> +.endm
> +
> +.macro kvm_restore_host_gpr base
> +	.irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
> +	ld.d	$r\n, \base, PT_GPR_OFFSET(\n)
> +	.endr
> +.endm
> +
> +/*
> + * prepare switch to guest, save host reg and restore guest reg.
> + * a2: kvm_vcpu_arch, don't touch it until 'ertn'
> + * t0, t1: temp register
> + */
> +.macro kvm_switch_to_guest
> +	/* set host excfg.VS=0, all exceptions share one exception entry */
> +	csrrd		t0, LOONGARCH_CSR_ECFG
> +	bstrins.w	t0, zero, CSR_ECFG_VS_SHIFT_END, CSR_ECFG_VS_SHIFT
> +	csrwr		t0, LOONGARCH_CSR_ECFG
> +
> +	/* Load up the new EENTRY */
> +	ld.d	t0, a2, KVM_ARCH_GEENTRY
> +	csrwr	t0, LOONGARCH_CSR_EENTRY
> +
> +	/* Set Guest ERA */
> +	ld.d	t0, a2, KVM_ARCH_GPC
> +	csrwr	t0, LOONGARCH_CSR_ERA
> +
> +	/* Save host PGDL */
> +	csrrd	t0, LOONGARCH_CSR_PGDL
> +	st.d	t0, a2, KVM_ARCH_HPGD
> +
> +	/* Switch to kvm */
> +	ld.d	t1, a2, KVM_VCPU_KVM - KVM_VCPU_ARCH
> +
> +	/* Load guest PGDL */
> +	lu12i.w t0, KVM_GPGD
> +	srli.w  t0, t0, 12

I think the "lu12i.w+srli.w" can be replaced by "li.w t0, KVM_GPGD" .


> +	ldx.d   t0, t1, t0
> +	csrwr	t0, LOONGARCH_CSR_PGDL
> +
> +	/* Mix GID and RID */
> +	csrrd		t1, LOONGARCH_CSR_GSTAT
> +	bstrpick.w	t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
> +	csrrd		t0, LOONGARCH_CSR_GTLBC
> +	bstrins.w	t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
> +	csrwr		t0, LOONGARCH_CSR_GTLBC
> +
> +	/*
> +	 * Switch to guest:
> +	 *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
> +	 *  ertn
> +	 */
> +
> +	/*
> +	 * Enable intr in root mode with future ertn so that host interrupt
> +	 * can be responsed during VM runs
> +	 * guest crmd comes from separate gcsr_CRMD register
> +	 */
> +	ori	t0, zero, CSR_PRMD_PIE
li.w t0, CSR_PRMD_PIE
> +	csrxchg	t0, t0,   LOONGARCH_CSR_PRMD
> +
> +	/* Set PVM bit to setup ertn to guest context */
> +	ori	t0, zero, CSR_GSTAT_PVM
li.w t0, ...
> +	csrxchg	t0, t0,   LOONGARCH_CSR_GSTAT
> +
> +	/* Load Guest gprs */
> +	ld.d	ra,	a2,	(KVM_ARCH_GGPR + 8 * REG_RA)
> +	ld.d	tp,	a2,	(KVM_ARCH_GGPR + 8 * REG_TP)
> +	ld.d	sp,	a2,	(KVM_ARCH_GGPR + 8 * REG_SP)
> +	ld.d	a0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A0)
> +	ld.d	a1,	a2,	(KVM_ARCH_GGPR + 8 * REG_A1)
> +	ld.d	a3,	a2,	(KVM_ARCH_GGPR + 8 * REG_A3)
> +	ld.d	a4,	a2,	(KVM_ARCH_GGPR + 8 * REG_A4)
> +	ld.d	a5,	a2,	(KVM_ARCH_GGPR + 8 * REG_A5)
> +	ld.d	a6,	a2,	(KVM_ARCH_GGPR + 8 * REG_A6)
> +	ld.d	a7,	a2,	(KVM_ARCH_GGPR + 8 * REG_A7)
> +	ld.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_T0)
> +	ld.d	t1,	a2,	(KVM_ARCH_GGPR + 8 * REG_T1)
> +	ld.d	t2,	a2,	(KVM_ARCH_GGPR + 8 * REG_T2)
> +	ld.d	t3,	a2,	(KVM_ARCH_GGPR + 8 * REG_T3)
> +	ld.d	t4,	a2,	(KVM_ARCH_GGPR + 8 * REG_T4)
> +	ld.d	t5,	a2,	(KVM_ARCH_GGPR + 8 * REG_T5)
> +	ld.d	t6,	a2,	(KVM_ARCH_GGPR + 8 * REG_T6)
> +	ld.d	t7,	a2,	(KVM_ARCH_GGPR + 8 * REG_T7)
> +	ld.d	t8,	a2,	(KVM_ARCH_GGPR + 8 * REG_T8)
> +	ld.d	u0,	a2,	(KVM_ARCH_GGPR + 8 * REG_U0)
> +	ld.d	fp,	a2,	(KVM_ARCH_GGPR + 8 * REG_FP)
> +	ld.d	s0,	a2,	(KVM_ARCH_GGPR + 8 * REG_S0)
> +	ld.d	s1,	a2,	(KVM_ARCH_GGPR + 8 * REG_S1)
> +	ld.d	s2,	a2,	(KVM_ARCH_GGPR + 8 * REG_S2)
> +	ld.d	s3,	a2,	(KVM_ARCH_GGPR + 8 * REG_S3)
> +	ld.d	s4,	a2,	(KVM_ARCH_GGPR + 8 * REG_S4)
> +	ld.d	s5,	a2,	(KVM_ARCH_GGPR + 8 * REG_S5)
> +	ld.d	s6,	a2,	(KVM_ARCH_GGPR + 8 * REG_S6)
> +	ld.d	s7,	a2,	(KVM_ARCH_GGPR + 8 * REG_S7)
> +	ld.d	s8,	a2,	(KVM_ARCH_GGPR + 8 * REG_S8)

Why not use .irp which like kvm_save/restore_host_gpr do here?


> +	/* Load KVM_ARCH register */
> +	ld.d	a2,	a2,	(KVM_ARCH_GGPR + 8 * REG_A2)
> +
> +	ertn
> +.endm
> +
> +	/*
> +	 * exception entry for general exception from guest mode
> +	 *  - IRQ is disabled
> +	 *  - kernel privilege in root mode
> +	 *  - page mode keep unchanged from previous prmd in root mode
> +	 *  - Fixme: tlb exception cannot happen since registers relative with TLB
> +	 *  -        is still in guest mode, such as pgd table/vmid registers etc,
> +	 *  -        will fix with hw page walk enabled in future
> +	 * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS
> +	 */
> +	.section	.text

here.


> +	.cfi_sections	.debug_frame
> +SYM_CODE_START(kvm_vector_entry)
> +	csrwr	a2,   KVM_TEMP_KS
> +	csrrd	a2,   KVM_VCPU_KS
> +	addi.d	a2,   a2, KVM_VCPU_ARCH
> +
> +	/* After save gprs, free to use any gpr */
> +	st.d	ra,	a2,	(KVM_ARCH_GGPR + 8 * REG_RA)
> +	st.d	tp,	a2,	(KVM_ARCH_GGPR + 8 * REG_TP)
> +	st.d	sp,	a2,	(KVM_ARCH_GGPR + 8 * REG_SP)
> +	st.d	a0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A0)
> +	st.d	a1,	a2,	(KVM_ARCH_GGPR + 8 * REG_A1)
> +	st.d	a3,	a2,	(KVM_ARCH_GGPR + 8 * REG_A3)
> +	st.d	a4,	a2,	(KVM_ARCH_GGPR + 8 * REG_A4)
> +	st.d	a5,	a2,	(KVM_ARCH_GGPR + 8 * REG_A5)
> +	st.d	a6,	a2,	(KVM_ARCH_GGPR + 8 * REG_A6)
> +	st.d	a7,	a2,	(KVM_ARCH_GGPR + 8 * REG_A7)
> +	st.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_T0)
> +	st.d	t1,	a2,	(KVM_ARCH_GGPR + 8 * REG_T1)
> +	st.d	t2,	a2,	(KVM_ARCH_GGPR + 8 * REG_T2)
> +	st.d	t3,	a2,	(KVM_ARCH_GGPR + 8 * REG_T3)
> +	st.d	t4,	a2,	(KVM_ARCH_GGPR + 8 * REG_T4)
> +	st.d	t5,	a2,	(KVM_ARCH_GGPR + 8 * REG_T5)
> +	st.d	t6,	a2,	(KVM_ARCH_GGPR + 8 * REG_T6)
> +	st.d	t7,	a2,	(KVM_ARCH_GGPR + 8 * REG_T7)
> +	st.d	t8,	a2,	(KVM_ARCH_GGPR + 8 * REG_T8)
> +	st.d	u0,	a2,	(KVM_ARCH_GGPR + 8 * REG_U0)
> +	st.d	fp,	a2,	(KVM_ARCH_GGPR + 8 * REG_FP)
> +	st.d	s0,	a2,	(KVM_ARCH_GGPR + 8 * REG_S0)
> +	st.d	s1,	a2,	(KVM_ARCH_GGPR + 8 * REG_S1)
> +	st.d	s2,	a2,	(KVM_ARCH_GGPR + 8 * REG_S2)
> +	st.d	s3,	a2,	(KVM_ARCH_GGPR + 8 * REG_S3)
> +	st.d	s4,	a2,	(KVM_ARCH_GGPR + 8 * REG_S4)
> +	st.d	s5,	a2,	(KVM_ARCH_GGPR + 8 * REG_S5)
> +	st.d	s6,	a2,	(KVM_ARCH_GGPR + 8 * REG_S6)
> +	st.d	s7,	a2,	(KVM_ARCH_GGPR + 8 * REG_S7)
> +	st.d	s8,	a2,	(KVM_ARCH_GGPR + 8 * REG_S8)

Why not .irp ... ?


> +	/* Save guest a2 */
> +	csrrd	t0,	KVM_TEMP_KS
> +	st.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A2)
> +
> +	/* a2: kvm_vcpu_arch, a1 is free to use */
> +	csrrd	s1,   KVM_VCPU_KS
> +	ld.d	s0,   s1, KVM_VCPU_RUN
> +
> +	csrrd	t0,   LOONGARCH_CSR_ESTAT
> +	st.d	t0,   a2, KVM_ARCH_HESTAT
> +	csrrd	t0,   LOONGARCH_CSR_ERA
> +	st.d	t0,   a2, KVM_ARCH_GPC
> +	csrrd	t0,   LOONGARCH_CSR_BADV
> +	st.d	t0,   a2, KVM_ARCH_HBADV
> +	csrrd	t0,   LOONGARCH_CSR_BADI
> +	st.d	t0,   a2, KVM_ARCH_HBADI
> +
> +	/* Restore host excfg.VS */
> +	csrrd	t0, LOONGARCH_CSR_ECFG
> +	ld.d	t1, a2, KVM_ARCH_HECFG
> +	or	t0, t0, t1
> +	csrwr	t0, LOONGARCH_CSR_ECFG
> +
> +	/* Restore host eentry */
> +	ld.d	t0, a2, KVM_ARCH_HEENTRY
> +	csrwr	t0, LOONGARCH_CSR_EENTRY
> +
> +	/* restore host pgd table */
> +	ld.d    t0, a2, KVM_ARCH_HPGD
> +	csrwr   t0, LOONGARCH_CSR_PGDL
> +
> +	/*
> +	 * Disable PGM bit to enter root mode by default with next ertn
> +	 */
> +	ori	t0, zero, CSR_GSTAT_PVM
li.w
> +	csrxchg	zero, t0, LOONGARCH_CSR_GSTAT
> +	/*
> +	 * Clear GTLBC.TGID field
> +	 *       0: for root  tlb update in future tlb instr
> +	 *  others: for guest tlb update like gpa to hpa in future tlb instr
> +	 */
> +	csrrd	t0, LOONGARCH_CSR_GTLBC
> +	bstrins.w	t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
> +	csrwr	t0, LOONGARCH_CSR_GTLBC
> +	ld.d	tp, a2, KVM_ARCH_HTP
> +	ld.d	sp, a2, KVM_ARCH_HSP
> +	/* restore per cpu register */
> +	ld.d	u0, a2, KVM_ARCH_HPERCPU
> +	addi.d	sp, sp, -PT_SIZE
> +
> +	/* Prepare handle exception */
> +	or	a0, s0, zero
move
> +	or	a1, s1, zero
move
> +	ld.d	t8, a2, KVM_ARCH_HANDLE_EXIT
> +	jirl	ra, t8, 0
> +
> +	or	a2, s1, zero
move
> +	addi.d	a2, a2, KVM_VCPU_ARCH
> +
> +	/* resume host when ret <= 0 */
> +	bge	zero, a0, ret_to_host
blez a0, ret_to_host
> +
> +	/*
> +         * return to guest
> +         * save per cpu register again, maybe switched to another cpu
> +         */
> +	st.d	u0, a2, KVM_ARCH_HPERCPU
> +
> +	/* Save kvm_vcpu to kscratch */
> +	csrwr	s1, KVM_VCPU_KS
> +	kvm_switch_to_guest
> +
> +ret_to_host:
> +	ld.d    a2, a2, KVM_ARCH_HSP
> +	addi.d  a2, a2, -PT_SIZE
> +	kvm_restore_host_gpr    a2
> +	jr      ra
> +SYM_CODE_END(kvm_vector_entry)
> +kvm_vector_entry_end:
> +
> +/*
> + * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
> + *
> + * @register_param:
> + *  a0: kvm_run* run
> + *  a1: kvm_vcpu* vcpu
> + */
> +SYM_FUNC_START(kvm_enter_guest)
> +	/* allocate space in stack bottom */
> +	addi.d	a2, sp, -PT_SIZE
> +	/* save host gprs */
> +	kvm_save_host_gpr a2
> +
> +	/* save host crmd,prmd csr to stack */
> +	csrrd	a3, LOONGARCH_CSR_CRMD
> +	st.d	a3, a2, PT_CRMD
> +	csrrd	a3, LOONGARCH_CSR_PRMD
> +	st.d	a3, a2, PT_PRMD
> +
> +	addi.d	a2, a1, KVM_VCPU_ARCH
> +	st.d	sp, a2, KVM_ARCH_HSP
> +	st.d	tp, a2, KVM_ARCH_HTP
> +	/* Save per cpu register */
> +	st.d	u0, a2, KVM_ARCH_HPERCPU
> +
> +	/* Save kvm_vcpu to kscratch */
> +	csrwr	a1, KVM_VCPU_KS
> +	kvm_switch_to_guest
> +SYM_FUNC_END(kvm_enter_guest)
> +kvm_enter_guest_end:
> +
> +	.section ".rodata"
> +SYM_DATA(kvm_vector_size, .quad kvm_vector_entry_end - kvm_vector_entry)
> +SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)

Not restore .text section. Although I'd like to use .pushsection ... .pop

after marked kvm_..._end, you can put them after kvm_restore_fpu, too.


Jinyang


> +
> +
> +SYM_FUNC_START(kvm_save_fpu)
> +	fpu_save_csr	a0 t1
> +	fpu_save_double a0 t1
> +	fpu_save_cc	a0 t1 t2
> +	jr              ra
> +SYM_FUNC_END(kvm_save_fpu)
> +
> +SYM_FUNC_START(kvm_restore_fpu)
> +	fpu_restore_double a0 t1
> +	fpu_restore_csr    a0 t1
> +	fpu_restore_cc	   a0 t1 t2
> +	jr                 ra
> +SYM_FUNC_END(kvm_restore_fpu)
  
zhaotianrui June 28, 2023, 8:34 a.m. UTC | #2
在 2023/6/28 上午11:42, Jinyang He 写道:
> On 2023-06-26 16:47, Tianrui Zhao wrote:
>
>> Implement LoongArch vcpu world switch, including vcpu enter guest and
>> vcpu exit from guest, both operations need to save or restore the host
>> and guest registers.
>>
>> Reviewed-by: Bibo Mao <maobibo@loongson.cn>
>> Signed-off-by: Tianrui Zhao <zhaotianrui@loongson.cn>
>> ---
>>   arch/loongarch/kernel/asm-offsets.c |  32 +++
>>   arch/loongarch/kvm/switch.S         | 299 ++++++++++++++++++++++++++++
>>   2 files changed, 331 insertions(+)
>>   create mode 100644 arch/loongarch/kvm/switch.S
>>
>> diff --git a/arch/loongarch/kernel/asm-offsets.c 
>> b/arch/loongarch/kernel/asm-offsets.c
>> index 4bdb203fc66e..cb6c5a5afea9 100644
>> --- a/arch/loongarch/kernel/asm-offsets.c
>> +++ b/arch/loongarch/kernel/asm-offsets.c
>> @@ -9,6 +9,7 @@
>>   #include <linux/mm.h>
>>   #include <linux/kbuild.h>
>>   #include <linux/suspend.h>
>> +#include <linux/kvm_host.h>
>>   #include <asm/cpu-info.h>
>>   #include <asm/ptrace.h>
>>   #include <asm/processor.h>
>> @@ -272,3 +273,34 @@ void output_pbe_defines(void)
>>       BLANK();
>>   }
>>   #endif
>> +
>> +static void __used output_kvm_defines(void)
>> +{
>> +    COMMENT(" KVM/LOONGARCH Specific offsets. ");
>> +
>> +    OFFSET(VCPU_FCSR0, kvm_vcpu_arch, fpu.fcsr);
>> +    OFFSET(VCPU_FCC, kvm_vcpu_arch, fpu.fcc);
>> +    BLANK();
>> +
>> +    OFFSET(KVM_VCPU_ARCH, kvm_vcpu, arch);
>> +    OFFSET(KVM_VCPU_KVM, kvm_vcpu, kvm);
>> +    OFFSET(KVM_VCPU_RUN, kvm_vcpu, run);
>> +    BLANK();
>> +
>> +    OFFSET(KVM_ARCH_HSP, kvm_vcpu_arch, host_sp);
>> +    OFFSET(KVM_ARCH_HTP, kvm_vcpu_arch, host_tp);
>> +    OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit);
>> +    OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd);
>> +    OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry);
>> +    OFFSET(KVM_ARCH_GPC, kvm_vcpu_arch, pc);
>> +    OFFSET(KVM_ARCH_GGPR, kvm_vcpu_arch, gprs);
>> +    OFFSET(KVM_ARCH_HESTAT, kvm_vcpu_arch, host_estat);
>> +    OFFSET(KVM_ARCH_HBADV, kvm_vcpu_arch, badv);
>> +    OFFSET(KVM_ARCH_HBADI, kvm_vcpu_arch, badi);
>> +    OFFSET(KVM_ARCH_HECFG, kvm_vcpu_arch, host_ecfg);
>> +    OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry);
>> +    OFFSET(KVM_ARCH_HPERCPU, kvm_vcpu_arch, host_percpu);
>> +
>> +    OFFSET(KVM_GPGD, kvm, arch.gpa_mm.pgd);
>> +    BLANK();
>> +}
>> diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
>> new file mode 100644
>> index 000000000000..7e55976c6fec
>> --- /dev/null
>> +++ b/arch/loongarch/kvm/switch.S
>> @@ -0,0 +1,299 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
>> + */
>> +
>> +#include <linux/linkage.h>
>> +#include <asm/stackframe.h>
>> +#include <asm/asm.h>
>> +#include <asm/asmmacro.h>
>> +#include <asm/regdef.h>
>> +#include <asm/loongarch.h>
>> +#include <asm/export.h>
>> +
>> +#define PT_GPR_OFFSET(x)    (PT_R0 + 8*x)
>> +    .text
>
> The .text is redundant, you also marked it later.
Thanks, I will remove this .text
>
>
>> +
>> +.macro kvm_save_host_gpr base
>> +    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>> +    st.d    $r\n, \base, PT_GPR_OFFSET(\n)
>> +    .endr
>> +.endm
>> +
>> +.macro kvm_restore_host_gpr base
>> +    .irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
>> +    ld.d    $r\n, \base, PT_GPR_OFFSET(\n)
>> +    .endr
>> +.endm
>> +
>> +/*
>> + * prepare switch to guest, save host reg and restore guest reg.
>> + * a2: kvm_vcpu_arch, don't touch it until 'ertn'
>> + * t0, t1: temp register
>> + */
>> +.macro kvm_switch_to_guest
>> +    /* set host excfg.VS=0, all exceptions share one exception entry */
>> +    csrrd        t0, LOONGARCH_CSR_ECFG
>> +    bstrins.w    t0, zero, CSR_ECFG_VS_SHIFT_END, CSR_ECFG_VS_SHIFT
>> +    csrwr        t0, LOONGARCH_CSR_ECFG
>> +
>> +    /* Load up the new EENTRY */
>> +    ld.d    t0, a2, KVM_ARCH_GEENTRY
>> +    csrwr    t0, LOONGARCH_CSR_EENTRY
>> +
>> +    /* Set Guest ERA */
>> +    ld.d    t0, a2, KVM_ARCH_GPC
>> +    csrwr    t0, LOONGARCH_CSR_ERA
>> +
>> +    /* Save host PGDL */
>> +    csrrd    t0, LOONGARCH_CSR_PGDL
>> +    st.d    t0, a2, KVM_ARCH_HPGD
>> +
>> +    /* Switch to kvm */
>> +    ld.d    t1, a2, KVM_VCPU_KVM - KVM_VCPU_ARCH
>> +
>> +    /* Load guest PGDL */
>> +    lu12i.w t0, KVM_GPGD
>> +    srli.w  t0, t0, 12
>
> I think the "lu12i.w+srli.w" can be replaced by "li.w t0, KVM_GPGD" .
Thanks, I will use "li.w t0, KVM_GPGD" to simplify the two instructions.
>
>
>> +    ldx.d   t0, t1, t0
>> +    csrwr    t0, LOONGARCH_CSR_PGDL
>> +
>> +    /* Mix GID and RID */
>> +    csrrd        t1, LOONGARCH_CSR_GSTAT
>> +    bstrpick.w    t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
>> +    csrrd        t0, LOONGARCH_CSR_GTLBC
>> +    bstrins.w    t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>> +    csrwr        t0, LOONGARCH_CSR_GTLBC
>> +
>> +    /*
>> +     * Switch to guest:
>> +     *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
>> +     *  ertn
>> +     */
>> +
>> +    /*
>> +     * Enable intr in root mode with future ertn so that host interrupt
>> +     * can be responsed during VM runs
>> +     * guest crmd comes from separate gcsr_CRMD register
>> +     */
>> +    ori    t0, zero, CSR_PRMD_PIE
> li.w t0, CSR_PRMD_PIE
Thanks for your advice, and I think it need not to replace it with 
"li.w" there, as it has the same meaning with "ori" instruction, and 
"ori" instruction is simple and clear enough. The same as the following 
"move" instructions. What do you think of it.
>> +    csrxchg    t0, t0, LOONGARCH_CSR_PRMD
>> +
>> +    /* Set PVM bit to setup ertn to guest context */
>> +    ori    t0, zero, CSR_GSTAT_PVM
> li.w t0, ...
>> +    csrxchg    t0, t0, LOONGARCH_CSR_GSTAT
>> +
>> +    /* Load Guest gprs */
>> +    ld.d    ra,    a2,    (KVM_ARCH_GGPR + 8 * REG_RA)
>> +    ld.d    tp,    a2,    (KVM_ARCH_GGPR + 8 * REG_TP)
>> +    ld.d    sp,    a2,    (KVM_ARCH_GGPR + 8 * REG_SP)
>> +    ld.d    a0,    a2,    (KVM_ARCH_GGPR + 8 * REG_A0)
>> +    ld.d    a1,    a2,    (KVM_ARCH_GGPR + 8 * REG_A1)
>> +    ld.d    a3,    a2,    (KVM_ARCH_GGPR + 8 * REG_A3)
>> +    ld.d    a4,    a2,    (KVM_ARCH_GGPR + 8 * REG_A4)
>> +    ld.d    a5,    a2,    (KVM_ARCH_GGPR + 8 * REG_A5)
>> +    ld.d    a6,    a2,    (KVM_ARCH_GGPR + 8 * REG_A6)
>> +    ld.d    a7,    a2,    (KVM_ARCH_GGPR + 8 * REG_A7)
>> +    ld.d    t0,    a2,    (KVM_ARCH_GGPR + 8 * REG_T0)
>> +    ld.d    t1,    a2,    (KVM_ARCH_GGPR + 8 * REG_T1)
>> +    ld.d    t2,    a2,    (KVM_ARCH_GGPR + 8 * REG_T2)
>> +    ld.d    t3,    a2,    (KVM_ARCH_GGPR + 8 * REG_T3)
>> +    ld.d    t4,    a2,    (KVM_ARCH_GGPR + 8 * REG_T4)
>> +    ld.d    t5,    a2,    (KVM_ARCH_GGPR + 8 * REG_T5)
>> +    ld.d    t6,    a2,    (KVM_ARCH_GGPR + 8 * REG_T6)
>> +    ld.d    t7,    a2,    (KVM_ARCH_GGPR + 8 * REG_T7)
>> +    ld.d    t8,    a2,    (KVM_ARCH_GGPR + 8 * REG_T8)
>> +    ld.d    u0,    a2,    (KVM_ARCH_GGPR + 8 * REG_U0)
>> +    ld.d    fp,    a2,    (KVM_ARCH_GGPR + 8 * REG_FP)
>> +    ld.d    s0,    a2,    (KVM_ARCH_GGPR + 8 * REG_S0)
>> +    ld.d    s1,    a2,    (KVM_ARCH_GGPR + 8 * REG_S1)
>> +    ld.d    s2,    a2,    (KVM_ARCH_GGPR + 8 * REG_S2)
>> +    ld.d    s3,    a2,    (KVM_ARCH_GGPR + 8 * REG_S3)
>> +    ld.d    s4,    a2,    (KVM_ARCH_GGPR + 8 * REG_S4)
>> +    ld.d    s5,    a2,    (KVM_ARCH_GGPR + 8 * REG_S5)
>> +    ld.d    s6,    a2,    (KVM_ARCH_GGPR + 8 * REG_S6)
>> +    ld.d    s7,    a2,    (KVM_ARCH_GGPR + 8 * REG_S7)
>> +    ld.d    s8,    a2,    (KVM_ARCH_GGPR + 8 * REG_S8)
>
> Why not use .irp which like kvm_save/restore_host_gpr do here?
Thanks, I will add kvm_restore_guest_gprs fucntion and use .irp 
statement to replace these instructions.
>
>
>> +    /* Load KVM_ARCH register */
>> +    ld.d    a2,    a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>> +
>> +    ertn
>> +.endm
>> +
>> +    /*
>> +     * exception entry for general exception from guest mode
>> +     *  - IRQ is disabled
>> +     *  - kernel privilege in root mode
>> +     *  - page mode keep unchanged from previous prmd in root mode
>> +     *  - Fixme: tlb exception cannot happen since registers 
>> relative with TLB
>> +     *  -        is still in guest mode, such as pgd table/vmid 
>> registers etc,
>> +     *  -        will fix with hw page walk enabled in future
>> +     * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to 
>> KVM_TEMP_KS
>> +     */
>> +    .section    .text
>
> here.
>
>
>> +    .cfi_sections    .debug_frame
>> +SYM_CODE_START(kvm_vector_entry)
>> +    csrwr    a2,   KVM_TEMP_KS
>> +    csrrd    a2,   KVM_VCPU_KS
>> +    addi.d    a2,   a2, KVM_VCPU_ARCH
>> +
>> +    /* After save gprs, free to use any gpr */
>> +    st.d    ra,    a2,    (KVM_ARCH_GGPR + 8 * REG_RA)
>> +    st.d    tp,    a2,    (KVM_ARCH_GGPR + 8 * REG_TP)
>> +    st.d    sp,    a2,    (KVM_ARCH_GGPR + 8 * REG_SP)
>> +    st.d    a0,    a2,    (KVM_ARCH_GGPR + 8 * REG_A0)
>> +    st.d    a1,    a2,    (KVM_ARCH_GGPR + 8 * REG_A1)
>> +    st.d    a3,    a2,    (KVM_ARCH_GGPR + 8 * REG_A3)
>> +    st.d    a4,    a2,    (KVM_ARCH_GGPR + 8 * REG_A4)
>> +    st.d    a5,    a2,    (KVM_ARCH_GGPR + 8 * REG_A5)
>> +    st.d    a6,    a2,    (KVM_ARCH_GGPR + 8 * REG_A6)
>> +    st.d    a7,    a2,    (KVM_ARCH_GGPR + 8 * REG_A7)
>> +    st.d    t0,    a2,    (KVM_ARCH_GGPR + 8 * REG_T0)
>> +    st.d    t1,    a2,    (KVM_ARCH_GGPR + 8 * REG_T1)
>> +    st.d    t2,    a2,    (KVM_ARCH_GGPR + 8 * REG_T2)
>> +    st.d    t3,    a2,    (KVM_ARCH_GGPR + 8 * REG_T3)
>> +    st.d    t4,    a2,    (KVM_ARCH_GGPR + 8 * REG_T4)
>> +    st.d    t5,    a2,    (KVM_ARCH_GGPR + 8 * REG_T5)
>> +    st.d    t6,    a2,    (KVM_ARCH_GGPR + 8 * REG_T6)
>> +    st.d    t7,    a2,    (KVM_ARCH_GGPR + 8 * REG_T7)
>> +    st.d    t8,    a2,    (KVM_ARCH_GGPR + 8 * REG_T8)
>> +    st.d    u0,    a2,    (KVM_ARCH_GGPR + 8 * REG_U0)
>> +    st.d    fp,    a2,    (KVM_ARCH_GGPR + 8 * REG_FP)
>> +    st.d    s0,    a2,    (KVM_ARCH_GGPR + 8 * REG_S0)
>> +    st.d    s1,    a2,    (KVM_ARCH_GGPR + 8 * REG_S1)
>> +    st.d    s2,    a2,    (KVM_ARCH_GGPR + 8 * REG_S2)
>> +    st.d    s3,    a2,    (KVM_ARCH_GGPR + 8 * REG_S3)
>> +    st.d    s4,    a2,    (KVM_ARCH_GGPR + 8 * REG_S4)
>> +    st.d    s5,    a2,    (KVM_ARCH_GGPR + 8 * REG_S5)
>> +    st.d    s6,    a2,    (KVM_ARCH_GGPR + 8 * REG_S6)
>> +    st.d    s7,    a2,    (KVM_ARCH_GGPR + 8 * REG_S7)
>> +    st.d    s8,    a2,    (KVM_ARCH_GGPR + 8 * REG_S8)
>
> Why not .irp ... ?
Thanks, I will add kvm_save_guest_gprs fucntion and use .irp statement 
to replace these instructions.
>
>
>> +    /* Save guest a2 */
>> +    csrrd    t0,    KVM_TEMP_KS
>> +    st.d    t0,    a2,    (KVM_ARCH_GGPR + 8 * REG_A2)
>> +
>> +    /* a2: kvm_vcpu_arch, a1 is free to use */
>> +    csrrd    s1,   KVM_VCPU_KS
>> +    ld.d    s0,   s1, KVM_VCPU_RUN
>> +
>> +    csrrd    t0,   LOONGARCH_CSR_ESTAT
>> +    st.d    t0,   a2, KVM_ARCH_HESTAT
>> +    csrrd    t0,   LOONGARCH_CSR_ERA
>> +    st.d    t0,   a2, KVM_ARCH_GPC
>> +    csrrd    t0,   LOONGARCH_CSR_BADV
>> +    st.d    t0,   a2, KVM_ARCH_HBADV
>> +    csrrd    t0,   LOONGARCH_CSR_BADI
>> +    st.d    t0,   a2, KVM_ARCH_HBADI
>> +
>> +    /* Restore host excfg.VS */
>> +    csrrd    t0, LOONGARCH_CSR_ECFG
>> +    ld.d    t1, a2, KVM_ARCH_HECFG
>> +    or    t0, t0, t1
>> +    csrwr    t0, LOONGARCH_CSR_ECFG
>> +
>> +    /* Restore host eentry */
>> +    ld.d    t0, a2, KVM_ARCH_HEENTRY
>> +    csrwr    t0, LOONGARCH_CSR_EENTRY
>> +
>> +    /* restore host pgd table */
>> +    ld.d    t0, a2, KVM_ARCH_HPGD
>> +    csrwr   t0, LOONGARCH_CSR_PGDL
>> +
>> +    /*
>> +     * Disable PGM bit to enter root mode by default with next ertn
>> +     */
>> +    ori    t0, zero, CSR_GSTAT_PVM
> li.w
>> +    csrxchg    zero, t0, LOONGARCH_CSR_GSTAT
>> +    /*
>> +     * Clear GTLBC.TGID field
>> +     *       0: for root  tlb update in future tlb instr
>> +     *  others: for guest tlb update like gpa to hpa in future tlb 
>> instr
>> +     */
>> +    csrrd    t0, LOONGARCH_CSR_GTLBC
>> +    bstrins.w    t0, zero, CSR_GTLBC_TGID_SHIFT_END, 
>> CSR_GTLBC_TGID_SHIFT
>> +    csrwr    t0, LOONGARCH_CSR_GTLBC
>> +    ld.d    tp, a2, KVM_ARCH_HTP
>> +    ld.d    sp, a2, KVM_ARCH_HSP
>> +    /* restore per cpu register */
>> +    ld.d    u0, a2, KVM_ARCH_HPERCPU
>> +    addi.d    sp, sp, -PT_SIZE
>> +
>> +    /* Prepare handle exception */
>> +    or    a0, s0, zero
> move
>> +    or    a1, s1, zero
> move
>> +    ld.d    t8, a2, KVM_ARCH_HANDLE_EXIT
>> +    jirl    ra, t8, 0
>> +
>> +    or    a2, s1, zero
> move
>> +    addi.d    a2, a2, KVM_VCPU_ARCH
>> +
>> +    /* resume host when ret <= 0 */
>> +    bge    zero, a0, ret_to_host
> blez a0, ret_to_host
I think we need not to change it, as they have the same meaning.
>> +
>> +    /*
>> +         * return to guest
>> +         * save per cpu register again, maybe switched to another cpu
>> +         */
>> +    st.d    u0, a2, KVM_ARCH_HPERCPU
>> +
>> +    /* Save kvm_vcpu to kscratch */
>> +    csrwr    s1, KVM_VCPU_KS
>> +    kvm_switch_to_guest
>> +
>> +ret_to_host:
>> +    ld.d    a2, a2, KVM_ARCH_HSP
>> +    addi.d  a2, a2, -PT_SIZE
>> +    kvm_restore_host_gpr    a2
>> +    jr      ra
>> +SYM_CODE_END(kvm_vector_entry)
>> +kvm_vector_entry_end:
>> +
>> +/*
>> + * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
>> + *
>> + * @register_param:
>> + *  a0: kvm_run* run
>> + *  a1: kvm_vcpu* vcpu
>> + */
>> +SYM_FUNC_START(kvm_enter_guest)
>> +    /* allocate space in stack bottom */
>> +    addi.d    a2, sp, -PT_SIZE
>> +    /* save host gprs */
>> +    kvm_save_host_gpr a2
>> +
>> +    /* save host crmd,prmd csr to stack */
>> +    csrrd    a3, LOONGARCH_CSR_CRMD
>> +    st.d    a3, a2, PT_CRMD
>> +    csrrd    a3, LOONGARCH_CSR_PRMD
>> +    st.d    a3, a2, PT_PRMD
>> +
>> +    addi.d    a2, a1, KVM_VCPU_ARCH
>> +    st.d    sp, a2, KVM_ARCH_HSP
>> +    st.d    tp, a2, KVM_ARCH_HTP
>> +    /* Save per cpu register */
>> +    st.d    u0, a2, KVM_ARCH_HPERCPU
>> +
>> +    /* Save kvm_vcpu to kscratch */
>> +    csrwr    a1, KVM_VCPU_KS
>> +    kvm_switch_to_guest
>> +SYM_FUNC_END(kvm_enter_guest)
>> +kvm_enter_guest_end:
>> +
>> +    .section ".rodata"
>> +SYM_DATA(kvm_vector_size, .quad kvm_vector_entry_end - 
>> kvm_vector_entry)
>> +SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - 
>> kvm_enter_guest)
>
> Not restore .text section. Although I'd like to use .pushsection ... .pop
>
> after marked kvm_..._end, you can put them after kvm_restore_fpu, too.
>
>
> Jinyang
Thanks, I will put these code after kvm_restore_fpu.

Thanks
Tianrui Zhao
>
>
>> +
>> +
>> +SYM_FUNC_START(kvm_save_fpu)
>> +    fpu_save_csr    a0 t1
>> +    fpu_save_double a0 t1
>> +    fpu_save_cc    a0 t1 t2
>> +    jr              ra
>> +SYM_FUNC_END(kvm_save_fpu)
>> +
>> +SYM_FUNC_START(kvm_restore_fpu)
>> +    fpu_restore_double a0 t1
>> +    fpu_restore_csr    a0 t1
>> +    fpu_restore_cc       a0 t1 t2
>> +    jr                 ra
>> +SYM_FUNC_END(kvm_restore_fpu)
  
WANG Xuerui June 28, 2023, 9:51 a.m. UTC | #3
Hi,

On 2023/6/28 16:34, zhaotianrui wrote:
> 
> 在 2023/6/28 上午11:42, Jinyang He 写道:
>> On 2023-06-26 16:47, Tianrui Zhao wrote:
>>
>>> [snip]
>>
>>> +    ldx.d   t0, t1, t0
>>> +    csrwr    t0, LOONGARCH_CSR_PGDL
>>> +
>>> +    /* Mix GID and RID */
>>> +    csrrd        t1, LOONGARCH_CSR_GSTAT
>>> +    bstrpick.w    t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
>>> +    csrrd        t0, LOONGARCH_CSR_GTLBC
>>> +    bstrins.w    t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>>> +    csrwr        t0, LOONGARCH_CSR_GTLBC
>>> +
>>> +    /*
>>> +     * Switch to guest:
>>> +     *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
>>> +     *  ertn
>>> +     */
>>> +
>>> +    /*
>>> +     * Enable intr in root mode with future ertn so that host interrupt
>>> +     * can be responsed during VM runs
>>> +     * guest crmd comes from separate gcsr_CRMD register
>>> +     */
>>> +    ori    t0, zero, CSR_PRMD_PIE
>> li.w t0, CSR_PRMD_PIE
> Thanks for your advice, and I think it need not to replace it with 
> "li.w" there, as it has the same meaning with "ori" instruction, and 
> "ori" instruction is simple and clear enough. The same as the following 
> "move" instructions. What do you think of it.

Just my 2c: I'd agree that pseudo-instructions should be used wherever 
possible and helping readability.

FYI there were similar usages way before, but all were cleaned up with 
my previous commit 57ce5d3eefac ("LoongArch: Use the "move" 
pseudo-instruction where applicable").

Such usages apparently came from an era when the LoongArch toolchains 
didn't support any pseudo-instruction, and are less intuitive especially 
for someone not familiar with LoongArch assembly. Given that familiarity 
with LoongArch won't be widespread (unlike with e.g. RISC-V that are 
adopted more widely), we should optimize for readability when writing 
code; it's also a best practice in general because code is read way more 
often than written, and people care about semantics not unnecessary 
details like "how are moves like this or that materialized".
  
maobibo June 28, 2023, 10:11 a.m. UTC | #4
在 2023/6/28 17:51, WANG Xuerui 写道:
> Hi,
> 
> On 2023/6/28 16:34, zhaotianrui wrote:
>>
>> 在 2023/6/28 上午11:42, Jinyang He 写道:
>>> On 2023-06-26 16:47, Tianrui Zhao wrote:
>>>
>>>> [snip]
>>>
>>>> +    ldx.d   t0, t1, t0
>>>> +    csrwr    t0, LOONGARCH_CSR_PGDL
>>>> +
>>>> +    /* Mix GID and RID */
>>>> +    csrrd        t1, LOONGARCH_CSR_GSTAT
>>>> +    bstrpick.w    t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
>>>> +    csrrd        t0, LOONGARCH_CSR_GTLBC
>>>> +    bstrins.w    t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>>>> +    csrwr        t0, LOONGARCH_CSR_GTLBC
>>>> +
>>>> +    /*
>>>> +     * Switch to guest:
>>>> +     *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
>>>> +     *  ertn
>>>> +     */
>>>> +
>>>> +    /*
>>>> +     * Enable intr in root mode with future ertn so that host interrupt
>>>> +     * can be responsed during VM runs
>>>> +     * guest crmd comes from separate gcsr_CRMD register
>>>> +     */
>>>> +    ori    t0, zero, CSR_PRMD_PIE
>>> li.w t0, CSR_PRMD_PIE
>> Thanks for your advice, and I think it need not to replace it with "li.w" there, as it has the same meaning with "ori" instruction, and "ori" instruction is simple and clear enough. The same as the following "move" instructions. What do you think of it.
> 
> Just my 2c: I'd agree that pseudo-instructions should be used wherever possible and helping readability.
"lu12i.w+srli.w" can be replaced by "li.w t0, KVM_GPGD" 
we accept the suggestion two instructions should be replaced with pseudo-instruction.

For the instruction "ori    t0, zero, CSR_PRMD_PIE"
what is advantage of this pseudo-instruction
    li.w t0, CSR_PRMD_PIE

is "ori t0, zero, CSR_PRMD_PIE" hard to understand? It is basic arithmetic instr and easy to understand also. To be frank I do not see the advantage of using li.w, also there is no document that pseudo-instruction should be used with high priority.

Regards
Bibo Mao
> 
> FYI there were similar usages way before, but all were cleaned up with my previous commit 57ce5d3eefac ("LoongArch: Use the "move" pseudo-instruction where applicable").
> 
> Such usages apparently came from an era when the LoongArch toolchains didn't support any pseudo-instruction, and are less intuitive especially for someone not familiar with LoongArch assembly. Given that familiarity with LoongArch won't be widespread (unlike with e.g. RISC-V that are adopted more widely), we should optimize for readability when writing code; it's also a best practice in general because code is read way more often than written, and people care about semantics not unnecessary details like "how are moves like this or that materialized".
>
  
WANG Xuerui June 28, 2023, 10:22 a.m. UTC | #5
On 2023/6/28 18:11, bibo mao wrote:
> 
> 
> 在 2023/6/28 17:51, WANG Xuerui 写道:
>> Hi,
>>
>> On 2023/6/28 16:34, zhaotianrui wrote:
>>>
>>> 在 2023/6/28 上午11:42, Jinyang He 写道:
>>>> On 2023-06-26 16:47, Tianrui Zhao wrote:
>>>>
>>>>> [snip]
>>>>
>>>>> +    ldx.d   t0, t1, t0
>>>>> +    csrwr    t0, LOONGARCH_CSR_PGDL
>>>>> +
>>>>> +    /* Mix GID and RID */
>>>>> +    csrrd        t1, LOONGARCH_CSR_GSTAT
>>>>> +    bstrpick.w    t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
>>>>> +    csrrd        t0, LOONGARCH_CSR_GTLBC
>>>>> +    bstrins.w    t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>>>>> +    csrwr        t0, LOONGARCH_CSR_GTLBC
>>>>> +
>>>>> +    /*
>>>>> +     * Switch to guest:
>>>>> +     *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
>>>>> +     *  ertn
>>>>> +     */
>>>>> +
>>>>> +    /*
>>>>> +     * Enable intr in root mode with future ertn so that host interrupt
>>>>> +     * can be responsed during VM runs
>>>>> +     * guest crmd comes from separate gcsr_CRMD register
>>>>> +     */
>>>>> +    ori    t0, zero, CSR_PRMD_PIE
>>>> li.w t0, CSR_PRMD_PIE
>>> Thanks for your advice, and I think it need not to replace it with "li.w" there, as it has the same meaning with "ori" instruction, and "ori" instruction is simple and clear enough. The same as the following "move" instructions. What do you think of it.
>>
>> Just my 2c: I'd agree that pseudo-instructions should be used wherever possible and helping readability.
> "lu12i.w+srli.w" can be replaced by "li.w t0, KVM_GPGD"
> we accept the suggestion two instructions should be replaced with pseudo-instruction.
> 
> For the instruction "ori    t0, zero, CSR_PRMD_PIE"
> what is advantage of this pseudo-instruction
>      li.w t0, CSR_PRMD_PIE
> 
> is "ori t0, zero, CSR_PRMD_PIE" hard to understand? It is basic arithmetic instr and easy to understand also. To be frank I do not see the advantage of using li.w, also there is no document that pseudo-instruction should be used with high priority.

It depends on the reader. Sure the semantics are the same, but with "ori 
xx, zero, xx" someone's always going to wonder "why do 'x = 0 | 
something' when you can simply li.w", because even if it's easy to 
understand it's still an extra level of indirection.

And I've given the historical and general software engineering 
perspective too; it's not something set in stone, but I'd expect general 
software development best practices and minimizing any *possible* reader 
confusion to be acceptable.
  
Xi Ruoyao June 28, 2023, 10:40 a.m. UTC | #6
On Wed, 2023-06-28 at 18:22 +0800, WANG Xuerui wrote:
> > is "ori t0, zero, CSR_PRMD_PIE" hard to understand? It is basic
> > arithmetic instr and easy to understand also. To be frank I do not
> > see the advantage of using li.w, also there is no document that
> > pseudo-instruction should be used with high priority.

> It depends on the reader. Sure the semantics are the same, but with "ori 
> xx, zero, xx" someone's always going to wonder "why do 'x = 0 | 
> something' when you can simply li.w", because even if it's easy to 
> understand it's still an extra level of indirection.
> 
> And I've given the historical and general software engineering 
> perspective too; it's not something set in stone, but I'd expect general 
> software development best practices and minimizing any *possible* reader 
> confusion to be acceptable.

The pseudo *should* be the high priority in general.  If we don't
consider the pseudos high priority, people will start to load immediates
with "their own way", like writing "addi.w $t0, $zero, 42" or even "xori
$t0, $zero, 42" for "42".  These may puzzle the uarch, causing a
performance worse than "ori $t0, $zero, 42".  So the only rationale
things are either:

1. Telling people always use pseudos if possible.

or

2. Telling people "remember to use ori for loading small immediates, not
other instructions".

1 is obviously easier.
  
maobibo June 29, 2023, 12:34 a.m. UTC | #7
在 2023/6/28 18:22, WANG Xuerui 写道:
> On 2023/6/28 18:11, bibo mao wrote:
>>
>>
>> 在 2023/6/28 17:51, WANG Xuerui 写道:
>>> Hi,
>>>
>>> On 2023/6/28 16:34, zhaotianrui wrote:
>>>>
>>>> 在 2023/6/28 上午11:42, Jinyang He 写道:
>>>>> On 2023-06-26 16:47, Tianrui Zhao wrote:
>>>>>
>>>>>> [snip]
>>>>>
>>>>>> +    ldx.d   t0, t1, t0
>>>>>> +    csrwr    t0, LOONGARCH_CSR_PGDL
>>>>>> +
>>>>>> +    /* Mix GID and RID */
>>>>>> +    csrrd        t1, LOONGARCH_CSR_GSTAT
>>>>>> +    bstrpick.w    t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
>>>>>> +    csrrd        t0, LOONGARCH_CSR_GTLBC
>>>>>> +    bstrins.w    t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
>>>>>> +    csrwr        t0, LOONGARCH_CSR_GTLBC
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Switch to guest:
>>>>>> +     *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
>>>>>> +     *  ertn
>>>>>> +     */
>>>>>> +
>>>>>> +    /*
>>>>>> +     * Enable intr in root mode with future ertn so that host interrupt
>>>>>> +     * can be responsed during VM runs
>>>>>> +     * guest crmd comes from separate gcsr_CRMD register
>>>>>> +     */
>>>>>> +    ori    t0, zero, CSR_PRMD_PIE
>>>>> li.w t0, CSR_PRMD_PIE
>>>> Thanks for your advice, and I think it need not to replace it with "li.w" there, as it has the same meaning with "ori" instruction, and "ori" instruction is simple and clear enough. The same as the following "move" instructions. What do you think of it.
>>>
>>> Just my 2c: I'd agree that pseudo-instructions should be used wherever possible and helping readability.
>> "lu12i.w+srli.w" can be replaced by "li.w t0, KVM_GPGD"
>> we accept the suggestion two instructions should be replaced with pseudo-instruction.
>>
>> For the instruction "ori    t0, zero, CSR_PRMD_PIE"
>> what is advantage of this pseudo-instruction
>>      li.w t0, CSR_PRMD_PIE
>>
>> is "ori t0, zero, CSR_PRMD_PIE" hard to understand? It is basic arithmetic instr and easy to understand also. To be frank I do not see the advantage of using li.w, also there is no document that pseudo-instruction should be used with high priority.
> 
> It depends on the reader. Sure the semantics are the same, but with "ori xx, zero, xx" someone's always going to wonder "why do 'x = 0 | something' when you can simply li.w", because even if it's easy to understand it's still an extra level of indirection.
pseudo-instruction li.w/li.d is understandable, is there alias li.h/li.b instructions also or li is enough on LoongArch64 system?

ori $rd, $rj, imm12 can be used, however ori $rd, zero, imm12 should not be used by the description.

I guess compiler guys want to remove using zero register in assemble language, so there is pseudo instruction here. is there similar pseudo instr for "xor $rd, ZERO, $rj" and "sub $rd, ZERO, $rj" to remove zero register also?  I do not get the  documentation and consensus information for pseudo instruction usage.

Regards
Bibo Mao

> 
> And I've given the historical and general software engineering perspective too; it's not something set in stone, but I'd expect general software development best practices and minimizing any *possible* reader confusion to be acceptable.
>
  

Patch

diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 4bdb203fc66e..cb6c5a5afea9 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -9,6 +9,7 @@ 
 #include <linux/mm.h>
 #include <linux/kbuild.h>
 #include <linux/suspend.h>
+#include <linux/kvm_host.h>
 #include <asm/cpu-info.h>
 #include <asm/ptrace.h>
 #include <asm/processor.h>
@@ -272,3 +273,34 @@  void output_pbe_defines(void)
 	BLANK();
 }
 #endif
+
+static void __used output_kvm_defines(void)
+{
+	COMMENT(" KVM/LOONGARCH Specific offsets. ");
+
+	OFFSET(VCPU_FCSR0, kvm_vcpu_arch, fpu.fcsr);
+	OFFSET(VCPU_FCC, kvm_vcpu_arch, fpu.fcc);
+	BLANK();
+
+	OFFSET(KVM_VCPU_ARCH, kvm_vcpu, arch);
+	OFFSET(KVM_VCPU_KVM, kvm_vcpu, kvm);
+	OFFSET(KVM_VCPU_RUN, kvm_vcpu, run);
+	BLANK();
+
+	OFFSET(KVM_ARCH_HSP, kvm_vcpu_arch, host_sp);
+	OFFSET(KVM_ARCH_HTP, kvm_vcpu_arch, host_tp);
+	OFFSET(KVM_ARCH_HANDLE_EXIT, kvm_vcpu_arch, handle_exit);
+	OFFSET(KVM_ARCH_HPGD, kvm_vcpu_arch, host_pgd);
+	OFFSET(KVM_ARCH_GEENTRY, kvm_vcpu_arch, guest_eentry);
+	OFFSET(KVM_ARCH_GPC, kvm_vcpu_arch, pc);
+	OFFSET(KVM_ARCH_GGPR, kvm_vcpu_arch, gprs);
+	OFFSET(KVM_ARCH_HESTAT, kvm_vcpu_arch, host_estat);
+	OFFSET(KVM_ARCH_HBADV, kvm_vcpu_arch, badv);
+	OFFSET(KVM_ARCH_HBADI, kvm_vcpu_arch, badi);
+	OFFSET(KVM_ARCH_HECFG, kvm_vcpu_arch, host_ecfg);
+	OFFSET(KVM_ARCH_HEENTRY, kvm_vcpu_arch, host_eentry);
+	OFFSET(KVM_ARCH_HPERCPU, kvm_vcpu_arch, host_percpu);
+
+	OFFSET(KVM_GPGD, kvm, arch.gpa_mm.pgd);
+	BLANK();
+}
diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S
new file mode 100644
index 000000000000..7e55976c6fec
--- /dev/null
+++ b/arch/loongarch/kvm/switch.S
@@ -0,0 +1,299 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+#include <asm/stackframe.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+#include <asm/loongarch.h>
+#include <asm/export.h>
+
+#define PT_GPR_OFFSET(x)	(PT_R0 + 8*x)
+	.text
+
+.macro kvm_save_host_gpr base
+	.irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+	st.d	$r\n, \base, PT_GPR_OFFSET(\n)
+	.endr
+.endm
+
+.macro kvm_restore_host_gpr base
+	.irp n,1,2,3,22,23,24,25,26,27,28,29,30,31
+	ld.d	$r\n, \base, PT_GPR_OFFSET(\n)
+	.endr
+.endm
+
+/*
+ * prepare switch to guest, save host reg and restore guest reg.
+ * a2: kvm_vcpu_arch, don't touch it until 'ertn'
+ * t0, t1: temp register
+ */
+.macro kvm_switch_to_guest
+	/* set host excfg.VS=0, all exceptions share one exception entry */
+	csrrd		t0, LOONGARCH_CSR_ECFG
+	bstrins.w	t0, zero, CSR_ECFG_VS_SHIFT_END, CSR_ECFG_VS_SHIFT
+	csrwr		t0, LOONGARCH_CSR_ECFG
+
+	/* Load up the new EENTRY */
+	ld.d	t0, a2, KVM_ARCH_GEENTRY
+	csrwr	t0, LOONGARCH_CSR_EENTRY
+
+	/* Set Guest ERA */
+	ld.d	t0, a2, KVM_ARCH_GPC
+	csrwr	t0, LOONGARCH_CSR_ERA
+
+	/* Save host PGDL */
+	csrrd	t0, LOONGARCH_CSR_PGDL
+	st.d	t0, a2, KVM_ARCH_HPGD
+
+	/* Switch to kvm */
+	ld.d	t1, a2, KVM_VCPU_KVM - KVM_VCPU_ARCH
+
+	/* Load guest PGDL */
+	lu12i.w t0, KVM_GPGD
+	srli.w  t0, t0, 12
+	ldx.d   t0, t1, t0
+	csrwr	t0, LOONGARCH_CSR_PGDL
+
+	/* Mix GID and RID */
+	csrrd		t1, LOONGARCH_CSR_GSTAT
+	bstrpick.w	t1, t1, CSR_GSTAT_GID_SHIFT_END, CSR_GSTAT_GID_SHIFT
+	csrrd		t0, LOONGARCH_CSR_GTLBC
+	bstrins.w	t0, t1, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+	csrwr		t0, LOONGARCH_CSR_GTLBC
+
+	/*
+	 * Switch to guest:
+	 *  GSTAT.PGM = 1, ERRCTL.ISERR = 0, TLBRPRMD.ISTLBR = 0
+	 *  ertn
+	 */
+
+	/*
+	 * Enable intr in root mode with future ertn so that host interrupt
+	 * can be responsed during VM runs
+	 * guest crmd comes from separate gcsr_CRMD register
+	 */
+	ori	t0, zero, CSR_PRMD_PIE
+	csrxchg	t0, t0,   LOONGARCH_CSR_PRMD
+
+	/* Set PVM bit to setup ertn to guest context */
+	ori	t0, zero, CSR_GSTAT_PVM
+	csrxchg	t0, t0,   LOONGARCH_CSR_GSTAT
+
+	/* Load Guest gprs */
+	ld.d	ra,	a2,	(KVM_ARCH_GGPR + 8 * REG_RA)
+	ld.d	tp,	a2,	(KVM_ARCH_GGPR + 8 * REG_TP)
+	ld.d	sp,	a2,	(KVM_ARCH_GGPR + 8 * REG_SP)
+	ld.d	a0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A0)
+	ld.d	a1,	a2,	(KVM_ARCH_GGPR + 8 * REG_A1)
+	ld.d	a3,	a2,	(KVM_ARCH_GGPR + 8 * REG_A3)
+	ld.d	a4,	a2,	(KVM_ARCH_GGPR + 8 * REG_A4)
+	ld.d	a5,	a2,	(KVM_ARCH_GGPR + 8 * REG_A5)
+	ld.d	a6,	a2,	(KVM_ARCH_GGPR + 8 * REG_A6)
+	ld.d	a7,	a2,	(KVM_ARCH_GGPR + 8 * REG_A7)
+	ld.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_T0)
+	ld.d	t1,	a2,	(KVM_ARCH_GGPR + 8 * REG_T1)
+	ld.d	t2,	a2,	(KVM_ARCH_GGPR + 8 * REG_T2)
+	ld.d	t3,	a2,	(KVM_ARCH_GGPR + 8 * REG_T3)
+	ld.d	t4,	a2,	(KVM_ARCH_GGPR + 8 * REG_T4)
+	ld.d	t5,	a2,	(KVM_ARCH_GGPR + 8 * REG_T5)
+	ld.d	t6,	a2,	(KVM_ARCH_GGPR + 8 * REG_T6)
+	ld.d	t7,	a2,	(KVM_ARCH_GGPR + 8 * REG_T7)
+	ld.d	t8,	a2,	(KVM_ARCH_GGPR + 8 * REG_T8)
+	ld.d	u0,	a2,	(KVM_ARCH_GGPR + 8 * REG_U0)
+	ld.d	fp,	a2,	(KVM_ARCH_GGPR + 8 * REG_FP)
+	ld.d	s0,	a2,	(KVM_ARCH_GGPR + 8 * REG_S0)
+	ld.d	s1,	a2,	(KVM_ARCH_GGPR + 8 * REG_S1)
+	ld.d	s2,	a2,	(KVM_ARCH_GGPR + 8 * REG_S2)
+	ld.d	s3,	a2,	(KVM_ARCH_GGPR + 8 * REG_S3)
+	ld.d	s4,	a2,	(KVM_ARCH_GGPR + 8 * REG_S4)
+	ld.d	s5,	a2,	(KVM_ARCH_GGPR + 8 * REG_S5)
+	ld.d	s6,	a2,	(KVM_ARCH_GGPR + 8 * REG_S6)
+	ld.d	s7,	a2,	(KVM_ARCH_GGPR + 8 * REG_S7)
+	ld.d	s8,	a2,	(KVM_ARCH_GGPR + 8 * REG_S8)
+	/* Load KVM_ARCH register */
+	ld.d	a2,	a2,	(KVM_ARCH_GGPR + 8 * REG_A2)
+
+	ertn
+.endm
+
+	/*
+	 * exception entry for general exception from guest mode
+	 *  - IRQ is disabled
+	 *  - kernel privilege in root mode
+	 *  - page mode keep unchanged from previous prmd in root mode
+	 *  - Fixme: tlb exception cannot happen since registers relative with TLB
+	 *  -        is still in guest mode, such as pgd table/vmid registers etc,
+	 *  -        will fix with hw page walk enabled in future
+	 * load kvm_vcpu from reserved CSR KVM_VCPU_KS, and save a2 to KVM_TEMP_KS
+	 */
+	.section	.text
+	.cfi_sections	.debug_frame
+SYM_CODE_START(kvm_vector_entry)
+	csrwr	a2,   KVM_TEMP_KS
+	csrrd	a2,   KVM_VCPU_KS
+	addi.d	a2,   a2, KVM_VCPU_ARCH
+
+	/* After save gprs, free to use any gpr */
+	st.d	ra,	a2,	(KVM_ARCH_GGPR + 8 * REG_RA)
+	st.d	tp,	a2,	(KVM_ARCH_GGPR + 8 * REG_TP)
+	st.d	sp,	a2,	(KVM_ARCH_GGPR + 8 * REG_SP)
+	st.d	a0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A0)
+	st.d	a1,	a2,	(KVM_ARCH_GGPR + 8 * REG_A1)
+	st.d	a3,	a2,	(KVM_ARCH_GGPR + 8 * REG_A3)
+	st.d	a4,	a2,	(KVM_ARCH_GGPR + 8 * REG_A4)
+	st.d	a5,	a2,	(KVM_ARCH_GGPR + 8 * REG_A5)
+	st.d	a6,	a2,	(KVM_ARCH_GGPR + 8 * REG_A6)
+	st.d	a7,	a2,	(KVM_ARCH_GGPR + 8 * REG_A7)
+	st.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_T0)
+	st.d	t1,	a2,	(KVM_ARCH_GGPR + 8 * REG_T1)
+	st.d	t2,	a2,	(KVM_ARCH_GGPR + 8 * REG_T2)
+	st.d	t3,	a2,	(KVM_ARCH_GGPR + 8 * REG_T3)
+	st.d	t4,	a2,	(KVM_ARCH_GGPR + 8 * REG_T4)
+	st.d	t5,	a2,	(KVM_ARCH_GGPR + 8 * REG_T5)
+	st.d	t6,	a2,	(KVM_ARCH_GGPR + 8 * REG_T6)
+	st.d	t7,	a2,	(KVM_ARCH_GGPR + 8 * REG_T7)
+	st.d	t8,	a2,	(KVM_ARCH_GGPR + 8 * REG_T8)
+	st.d	u0,	a2,	(KVM_ARCH_GGPR + 8 * REG_U0)
+	st.d	fp,	a2,	(KVM_ARCH_GGPR + 8 * REG_FP)
+	st.d	s0,	a2,	(KVM_ARCH_GGPR + 8 * REG_S0)
+	st.d	s1,	a2,	(KVM_ARCH_GGPR + 8 * REG_S1)
+	st.d	s2,	a2,	(KVM_ARCH_GGPR + 8 * REG_S2)
+	st.d	s3,	a2,	(KVM_ARCH_GGPR + 8 * REG_S3)
+	st.d	s4,	a2,	(KVM_ARCH_GGPR + 8 * REG_S4)
+	st.d	s5,	a2,	(KVM_ARCH_GGPR + 8 * REG_S5)
+	st.d	s6,	a2,	(KVM_ARCH_GGPR + 8 * REG_S6)
+	st.d	s7,	a2,	(KVM_ARCH_GGPR + 8 * REG_S7)
+	st.d	s8,	a2,	(KVM_ARCH_GGPR + 8 * REG_S8)
+	/* Save guest a2 */
+	csrrd	t0,	KVM_TEMP_KS
+	st.d	t0,	a2,	(KVM_ARCH_GGPR + 8 * REG_A2)
+
+	/* a2: kvm_vcpu_arch, a1 is free to use */
+	csrrd	s1,   KVM_VCPU_KS
+	ld.d	s0,   s1, KVM_VCPU_RUN
+
+	csrrd	t0,   LOONGARCH_CSR_ESTAT
+	st.d	t0,   a2, KVM_ARCH_HESTAT
+	csrrd	t0,   LOONGARCH_CSR_ERA
+	st.d	t0,   a2, KVM_ARCH_GPC
+	csrrd	t0,   LOONGARCH_CSR_BADV
+	st.d	t0,   a2, KVM_ARCH_HBADV
+	csrrd	t0,   LOONGARCH_CSR_BADI
+	st.d	t0,   a2, KVM_ARCH_HBADI
+
+	/* Restore host excfg.VS */
+	csrrd	t0, LOONGARCH_CSR_ECFG
+	ld.d	t1, a2, KVM_ARCH_HECFG
+	or	t0, t0, t1
+	csrwr	t0, LOONGARCH_CSR_ECFG
+
+	/* Restore host eentry */
+	ld.d	t0, a2, KVM_ARCH_HEENTRY
+	csrwr	t0, LOONGARCH_CSR_EENTRY
+
+	/* restore host pgd table */
+	ld.d    t0, a2, KVM_ARCH_HPGD
+	csrwr   t0, LOONGARCH_CSR_PGDL
+
+	/*
+	 * Disable PGM bit to enter root mode by default with next ertn
+	 */
+	ori	t0, zero, CSR_GSTAT_PVM
+	csrxchg	zero, t0, LOONGARCH_CSR_GSTAT
+	/*
+	 * Clear GTLBC.TGID field
+	 *       0: for root  tlb update in future tlb instr
+	 *  others: for guest tlb update like gpa to hpa in future tlb instr
+	 */
+	csrrd	t0, LOONGARCH_CSR_GTLBC
+	bstrins.w	t0, zero, CSR_GTLBC_TGID_SHIFT_END, CSR_GTLBC_TGID_SHIFT
+	csrwr	t0, LOONGARCH_CSR_GTLBC
+	ld.d	tp, a2, KVM_ARCH_HTP
+	ld.d	sp, a2, KVM_ARCH_HSP
+	/* restore per cpu register */
+	ld.d	u0, a2, KVM_ARCH_HPERCPU
+	addi.d	sp, sp, -PT_SIZE
+
+	/* Prepare handle exception */
+	or	a0, s0, zero
+	or	a1, s1, zero
+	ld.d	t8, a2, KVM_ARCH_HANDLE_EXIT
+	jirl	ra, t8, 0
+
+	or	a2, s1, zero
+	addi.d	a2, a2, KVM_VCPU_ARCH
+
+	/* resume host when ret <= 0 */
+	bge	zero, a0, ret_to_host
+
+	/*
+         * return to guest
+         * save per cpu register again, maybe switched to another cpu
+         */
+	st.d	u0, a2, KVM_ARCH_HPERCPU
+
+	/* Save kvm_vcpu to kscratch */
+	csrwr	s1, KVM_VCPU_KS
+	kvm_switch_to_guest
+
+ret_to_host:
+	ld.d    a2, a2, KVM_ARCH_HSP
+	addi.d  a2, a2, -PT_SIZE
+	kvm_restore_host_gpr    a2
+	jr      ra
+SYM_CODE_END(kvm_vector_entry)
+kvm_vector_entry_end:
+
+/*
+ * int kvm_enter_guest(struct kvm_run *run, struct kvm_vcpu *vcpu)
+ *
+ * @register_param:
+ *  a0: kvm_run* run
+ *  a1: kvm_vcpu* vcpu
+ */
+SYM_FUNC_START(kvm_enter_guest)
+	/* allocate space in stack bottom */
+	addi.d	a2, sp, -PT_SIZE
+	/* save host gprs */
+	kvm_save_host_gpr a2
+
+	/* save host crmd,prmd csr to stack */
+	csrrd	a3, LOONGARCH_CSR_CRMD
+	st.d	a3, a2, PT_CRMD
+	csrrd	a3, LOONGARCH_CSR_PRMD
+	st.d	a3, a2, PT_PRMD
+
+	addi.d	a2, a1, KVM_VCPU_ARCH
+	st.d	sp, a2, KVM_ARCH_HSP
+	st.d	tp, a2, KVM_ARCH_HTP
+	/* Save per cpu register */
+	st.d	u0, a2, KVM_ARCH_HPERCPU
+
+	/* Save kvm_vcpu to kscratch */
+	csrwr	a1, KVM_VCPU_KS
+	kvm_switch_to_guest
+SYM_FUNC_END(kvm_enter_guest)
+kvm_enter_guest_end:
+
+	.section ".rodata"
+SYM_DATA(kvm_vector_size, .quad kvm_vector_entry_end - kvm_vector_entry)
+SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest)
+
+
+SYM_FUNC_START(kvm_save_fpu)
+	fpu_save_csr	a0 t1
+	fpu_save_double a0 t1
+	fpu_save_cc	a0 t1 t2
+	jr              ra
+SYM_FUNC_END(kvm_save_fpu)
+
+SYM_FUNC_START(kvm_restore_fpu)
+	fpu_restore_double a0 t1
+	fpu_restore_csr    a0 t1
+	fpu_restore_cc	   a0 t1 t2
+	jr                 ra
+SYM_FUNC_END(kvm_restore_fpu)