[v5,4/9] riscv/kprobe: Add common RVI and RVC instruction decoder code

Message ID 20221224114315.850130-5-chenguokai17@mails.ucas.ac.cn
State New
Headers
Series Add OPTPROBES feature on RISCV |

Commit Message

Xim Dec. 24, 2022, 11:43 a.m. UTC
  From: Liao Chang <liaochang1@huawei.com>

This patch add code that can be used to decode RVI and RVC instructions
in searching one register for 'AUIPC/JALR'. As mentioned in previous
patch, kprobe can't be optimized until one free integer register can be
found out to save the jump target, in order to figure out the register
searching, all instructions starts from the kprobe to the last one of
function needs to decode and test if contains one candidate register.

For all RVI instruction format, the position and length of 'rs1', 'rs2'
,'rd' and 'opcode' part are uniform, but the rule of RVC instruction
format is more complicated, so it address a couple of inline functions
to decode rs1/rs2/rd for RVC.

These instruction decoder suppose to be consistent with the RVC and
RV32/RV64G instruction set list specified in the riscv instruction
reference published at August 25, 2022.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Co-developed-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Signed-off-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
---
 arch/riscv/include/asm/bug.h             |   5 +-
 arch/riscv/kernel/probes/decode-insn.h   | 148 +++++++++++++++++++++++
 arch/riscv/kernel/probes/simulate-insn.h |  41 +++++++
 3 files changed, 193 insertions(+), 1 deletion(-)
  

Comments

Björn Töpel Jan. 2, 2023, 6:03 p.m. UTC | #1
Chen Guokai <chenguokai17@mails.ucas.ac.cn> writes:

> From: Liao Chang <liaochang1@huawei.com>

> diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
> index cb6ff7dccb92..74d8c1ba9064 100644
> --- a/arch/riscv/kernel/probes/simulate-insn.h
> +++ b/arch/riscv/kernel/probes/simulate-insn.h
> @@ -37,6 +37,40 @@ __RISCV_INSN_FUNCS(c_jalr,	0xf007, 0x9002);
>  __RISCV_INSN_FUNCS(c_beqz,	0xe003, 0xc001);
>  __RISCV_INSN_FUNCS(c_bnez,	0xe003, 0xe001);
>  __RISCV_INSN_FUNCS(c_ebreak,	0xffff, 0x9002);
> +/* RVC(S) instructions contain rs1 and rs2 */
> +__RISCV_INSN_FUNCS(c_sq,	0xe003, 0xa000);
> +__RISCV_INSN_FUNCS(c_sw,	0xe003, 0xc000);
> +__RISCV_INSN_FUNCS(c_sd,	0xe003, 0xe000);
> +/* RVC(A) instructions contain rs1 and rs2 */
> +__RISCV_INSN_FUNCS(c_sub,	0xfc03, 0x8c01);

Incorrect mask.

> +__RISCV_INSN_FUNCS(c_subw,	0xfc43, 0x9c01);
> +/* RVC(L) instructions contain rs1 */
> +__RISCV_INSN_FUNCS(c_lq,	0xe003, 0x2000);
> +__RISCV_INSN_FUNCS(c_lw,	0xe003, 0x4000);
> +__RISCV_INSN_FUNCS(c_ld,	0xe003, 0x6000);
> +/* RVC(I) instructions contain rs1 */
> +__RISCV_INSN_FUNCS(c_addi,	0xe003, 0x0001);
> +__RISCV_INSN_FUNCS(c_addiw,	0xe003, 0x2001);
> +__RISCV_INSN_FUNCS(c_addi16sp,	0xe183, 0x6101);
> +__RISCV_INSN_FUNCS(c_slli,	0xe003, 0x0002);
> +/* RVC(B) instructions contain rs1 */
> +__RISCV_INSN_FUNCS(c_sri,	0xe803, 0x8001);
> +__RISCV_INSN_FUNCS(c_andi,	0xec03, 0x8801);
> +/* RVC(SS) instructions contain rs2 */
> +__RISCV_INSN_FUNCS(c_sqsp,	0xe003, 0xa002);
> +__RISCV_INSN_FUNCS(c_swsp,	0xe003, 0xc002);
> +__RISCV_INSN_FUNCS(c_sdsp,	0xe003, 0xe002);
> +/* RVC(R) instructions contain rs2 and rd */
> +__RISCV_INSN_FUNCS(c_mv,	0xe003, 0x8002);

Shouldn't the mask be 0xf003?


Björn
  
Liao Chang Jan. 4, 2023, 8:35 a.m. UTC | #2
在 2023/1/3 2:03, Björn Töpel 写道:
> Chen Guokai <chenguokai17@mails.ucas.ac.cn> writes:
> 
>> From: Liao Chang <liaochang1@huawei.com>
> 
>> diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
>> index cb6ff7dccb92..74d8c1ba9064 100644
>> --- a/arch/riscv/kernel/probes/simulate-insn.h
>> +++ b/arch/riscv/kernel/probes/simulate-insn.h
>> @@ -37,6 +37,40 @@ __RISCV_INSN_FUNCS(c_jalr,	0xf007, 0x9002);
>>  __RISCV_INSN_FUNCS(c_beqz,	0xe003, 0xc001);
>>  __RISCV_INSN_FUNCS(c_bnez,	0xe003, 0xe001);
>>  __RISCV_INSN_FUNCS(c_ebreak,	0xffff, 0x9002);
>> +/* RVC(S) instructions contain rs1 and rs2 */
>> +__RISCV_INSN_FUNCS(c_sq,	0xe003, 0xa000);
>> +__RISCV_INSN_FUNCS(c_sw,	0xe003, 0xc000);
>> +__RISCV_INSN_FUNCS(c_sd,	0xe003, 0xe000);
>> +/* RVC(A) instructions contain rs1 and rs2 */
>> +__RISCV_INSN_FUNCS(c_sub,	0xfc03, 0x8c01);
> 
> Incorrect mask.

Thanks for checking, i study the opcode of C_SUB [1], the correct mask should be 0xFC63.

      15 14 13 12 | 11 10  9  8 | 7 6 5 4 | 3 2  1 0
c.sub:       1  0  0  0 |  1  1  rs1'/rd' 0 0   rs2'   0 1
mask:           F |           C |       6 |        3
value:          8 |           C |       0 |        1

> 
>> +__RISCV_INSN_FUNCS(c_subw,	0xfc43, 0x9c01);
>> +/* RVC(L) instructions contain rs1 */
>> +__RISCV_INSN_FUNCS(c_lq,	0xe003, 0x2000);
>> +__RISCV_INSN_FUNCS(c_lw,	0xe003, 0x4000);
>> +__RISCV_INSN_FUNCS(c_ld,	0xe003, 0x6000);
>> +/* RVC(I) instructions contain rs1 */
>> +__RISCV_INSN_FUNCS(c_addi,	0xe003, 0x0001);
>> +__RISCV_INSN_FUNCS(c_addiw,	0xe003, 0x2001);
>> +__RISCV_INSN_FUNCS(c_addi16sp,	0xe183, 0x6101);
>> +__RISCV_INSN_FUNCS(c_slli,	0xe003, 0x0002);
>> +/* RVC(B) instructions contain rs1 */
>> +__RISCV_INSN_FUNCS(c_sri,	0xe803, 0x8001);
>> +__RISCV_INSN_FUNCS(c_andi,	0xec03, 0x8801);
>> +/* RVC(SS) instructions contain rs2 */
>> +__RISCV_INSN_FUNCS(c_sqsp,	0xe003, 0xa002);
>> +__RISCV_INSN_FUNCS(c_swsp,	0xe003, 0xc002);
>> +__RISCV_INSN_FUNCS(c_sdsp,	0xe003, 0xe002);
>> +/* RVC(R) instructions contain rs2 and rd */
>> +__RISCV_INSN_FUNCS(c_mv,	0xe003, 0x8002);
> 
> Shouldn't the mask be 0xf003?

Actually, the mask should be 0xf003 indeedly, but it also bring another problem that
it can't tell C.MV and C.JR via the mask and value parts. Look opcodes below:

      15 14 13 12 | 11 10  9  8 | 7 6 5 4 | 3 2  1 0
C.JR:  1  0  0  0 |             rs1           0  1 0
C.MV:  1  0  0  0 |              rd         rs2  1 0

The only differece between C.MV and C.JR is the bits[2~6], these bitfield of C.JR is zero,
the ones of C.MV is rs2 which never be zero. In order to tell C.MV and C.JR correclty, it
is better to adjust the mask of C.JR to be 0xf07f as your patch(riscv, kprobe: Stricter c.jr/c.jalr decoding)

Looking forward to your feedback.

> 
> 
> Björn


[1] https://github.com/riscv/riscv-isa-manual/releases
  
Björn Töpel Jan. 4, 2023, 8:57 a.m. UTC | #3
"liaochang (A)" <liaochang1@huawei.com> writes:

>> Shouldn't the mask be 0xf003?
>
> Actually, the mask should be 0xf003 indeedly, but it also bring another problem that
> it can't tell C.MV and C.JR via the mask and value parts. Look opcodes below:
>
>       15 14 13 12 | 11 10  9  8 | 7 6 5 4 | 3 2  1 0
> C.JR:  1  0  0  0 |             rs1           0  1 0
> C.MV:  1  0  0  0 |              rd         rs2  1 0
>
> The only differece between C.MV and C.JR is the bits[2~6], these bitfield of C.JR is zero,
> the ones of C.MV is rs2 which never be zero. In order to tell C.MV and C.JR correclty, it
> is better to adjust the mask of C.JR to be 0xf07f as your patch(riscv, kprobe: Stricter c.jr/c.jalr decoding)
>
> Looking forward to your feedback.

Yup, that was the reason I submitted the fix! Let's wait for the fix to
be applied, and not include that fix in your feature series.


Björn
  

Patch

diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index 1aaea81fb141..9c33d3b58225 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -19,11 +19,14 @@ 
 #define __BUG_INSN_32	_UL(0x00100073) /* ebreak */
 #define __BUG_INSN_16	_UL(0x9002) /* c.ebreak */
 
+#define RVI_INSN_LEN	4UL
+#define RVC_INSN_LEN	2UL
+
 #define GET_INSN_LENGTH(insn)						\
 ({									\
 	unsigned long __len;						\
 	__len = ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ?	\
-		4UL : 2UL;						\
+		RVI_INSN_LEN : RVC_INSN_LEN;				\
 	__len;								\
 })
 
diff --git a/arch/riscv/kernel/probes/decode-insn.h b/arch/riscv/kernel/probes/decode-insn.h
index 42269a7d676d..785b023a62ea 100644
--- a/arch/riscv/kernel/probes/decode-insn.h
+++ b/arch/riscv/kernel/probes/decode-insn.h
@@ -3,6 +3,7 @@ 
 #ifndef _RISCV_KERNEL_KPROBES_DECODE_INSN_H
 #define _RISCV_KERNEL_KPROBES_DECODE_INSN_H
 
+#include <linux/bitops.h>
 #include <asm/sections.h>
 #include <asm/kprobes.h>
 
@@ -15,4 +16,151 @@  enum probe_insn {
 enum probe_insn __kprobes
 riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi);
 
+#ifdef CONFIG_KPROBES
+
+static inline u16 rvi_rs1(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 15) & 0x1f);
+}
+
+static inline u16 rvi_rs2(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 20) & 0x1f);
+}
+
+static inline u16 rvi_rd(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x1f);
+}
+
+static inline s32 rvi_branch_imme(kprobe_opcode_t opcode)
+{
+	u32 imme = 0;
+
+	imme |= (((opcode >> 8)  & 0xf)   << 1)  |
+		(((opcode >> 25) & 0x3f)  << 5)  |
+		(((opcode >> 7)  & 0x1)   << 11) |
+		(((opcode >> 31) & 0x1)   << 12);
+
+	return sign_extend32(imme, 13);
+}
+
+static inline s32 rvi_jal_imme(kprobe_opcode_t opcode)
+{
+	u32 imme = 0;
+
+	imme |= (((opcode >> 21) & 0x3ff) << 1)  |
+		(((opcode >> 20) & 0x1)   << 11) |
+		(((opcode >> 12) & 0xff)  << 12) |
+		(((opcode >> 31) & 0x1)   << 20);
+
+	return sign_extend32(imme, 21);
+}
+
+#ifdef CONFIG_RISCV_ISA_C
+static inline u16 rvc_r_rs1(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x1f);
+}
+
+static inline u16 rvc_r_rs2(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x1f);
+}
+
+static inline u16 rvc_r_rd(kprobe_opcode_t opcode)
+{
+	return rvc_r_rs1(opcode);
+}
+
+static inline u16 rvc_i_rs1(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x1f);
+}
+
+static inline u16 rvc_i_rd(kprobe_opcode_t opcode)
+{
+	return rvc_i_rs1(opcode);
+}
+
+static inline u16 rvc_ss_rs2(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x1f);
+}
+
+static inline u16 rvc_l_rd(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x7);
+}
+
+static inline u16 rvc_l_rs(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x7);
+}
+
+static inline u16 rvc_s_rs2(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x7);
+}
+
+static inline u16 rvc_s_rs1(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x7);
+}
+
+static inline u16 rvc_a_rs2(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 2) & 0x7);
+}
+
+static inline u16 rvc_a_rs1(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x7);
+}
+
+static inline u16 rvc_a_rd(kprobe_opcode_t opcode)
+{
+	return rvc_a_rs1(opcode);
+}
+
+static inline u16 rvc_b_rd(kprobe_opcode_t opcode)
+{
+	return (u16)((opcode >> 7) & 0x7);
+}
+
+static inline u16 rvc_b_rs(kprobe_opcode_t opcode)
+{
+	return rvc_b_rd(opcode);
+}
+
+static inline s32 rvc_branch_imme(kprobe_opcode_t opcode)
+{
+	u32 imme = 0;
+
+	imme |= (((opcode >> 3)  & 0x3) << 1) |
+		(((opcode >> 10) & 0x3) << 3) |
+		(((opcode >> 2)  & 0x1) << 5) |
+		(((opcode >> 5)  & 0x3) << 6) |
+		(((opcode >> 12) & 0x1) << 8);
+
+	return sign_extend32(imme, 9);
+}
+
+static inline s32 rvc_jal_imme(kprobe_opcode_t opcode)
+{
+	u32 imme = 0;
+
+	imme |= (((opcode >> 3)  & 0x3) << 1) |
+		(((opcode >> 11) & 0x1) << 4) |
+		(((opcode >> 2)  & 0x1) << 5) |
+		(((opcode >> 7)  & 0x1) << 6) |
+		(((opcode >> 6)  & 0x1) << 7) |
+		(((opcode >> 9)  & 0x3) << 8) |
+		(((opcode >> 8)  & 0x1) << 10) |
+		(((opcode >> 12) & 0x1) << 11);
+
+	return sign_extend32(imme, 12);
+}
+#endif /* CONFIG_KPROBES */
+#endif /* CONFIG_RISCV_ISA_C */
 #endif /* _RISCV_KERNEL_KPROBES_DECODE_INSN_H */
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
index cb6ff7dccb92..74d8c1ba9064 100644
--- a/arch/riscv/kernel/probes/simulate-insn.h
+++ b/arch/riscv/kernel/probes/simulate-insn.h
@@ -37,6 +37,40 @@  __RISCV_INSN_FUNCS(c_jalr,	0xf007, 0x9002);
 __RISCV_INSN_FUNCS(c_beqz,	0xe003, 0xc001);
 __RISCV_INSN_FUNCS(c_bnez,	0xe003, 0xe001);
 __RISCV_INSN_FUNCS(c_ebreak,	0xffff, 0x9002);
+/* RVC(S) instructions contain rs1 and rs2 */
+__RISCV_INSN_FUNCS(c_sq,	0xe003, 0xa000);
+__RISCV_INSN_FUNCS(c_sw,	0xe003, 0xc000);
+__RISCV_INSN_FUNCS(c_sd,	0xe003, 0xe000);
+/* RVC(A) instructions contain rs1 and rs2 */
+__RISCV_INSN_FUNCS(c_sub,	0xfc03, 0x8c01);
+__RISCV_INSN_FUNCS(c_subw,	0xfc43, 0x9c01);
+/* RVC(L) instructions contain rs1 */
+__RISCV_INSN_FUNCS(c_lq,	0xe003, 0x2000);
+__RISCV_INSN_FUNCS(c_lw,	0xe003, 0x4000);
+__RISCV_INSN_FUNCS(c_ld,	0xe003, 0x6000);
+/* RVC(I) instructions contain rs1 */
+__RISCV_INSN_FUNCS(c_addi,	0xe003, 0x0001);
+__RISCV_INSN_FUNCS(c_addiw,	0xe003, 0x2001);
+__RISCV_INSN_FUNCS(c_addi16sp,	0xe183, 0x6101);
+__RISCV_INSN_FUNCS(c_slli,	0xe003, 0x0002);
+/* RVC(B) instructions contain rs1 */
+__RISCV_INSN_FUNCS(c_sri,	0xe803, 0x8001);
+__RISCV_INSN_FUNCS(c_andi,	0xec03, 0x8801);
+/* RVC(SS) instructions contain rs2 */
+__RISCV_INSN_FUNCS(c_sqsp,	0xe003, 0xa002);
+__RISCV_INSN_FUNCS(c_swsp,	0xe003, 0xc002);
+__RISCV_INSN_FUNCS(c_sdsp,	0xe003, 0xe002);
+/* RVC(R) instructions contain rs2 and rd */
+__RISCV_INSN_FUNCS(c_mv,	0xe003, 0x8002);
+/* RVC(I) instructions contain sp and rd */
+__RISCV_INSN_FUNCS(c_lqsp,	0xe003, 0x2002);
+__RISCV_INSN_FUNCS(c_lwsp,	0xe003, 0x4002);
+__RISCV_INSN_FUNCS(c_ldsp,	0xe003, 0x6002);
+/* RVC(CW) instructions contain sp and rd */
+__RISCV_INSN_FUNCS(c_addi4spn,	0xe003, 0x0000);
+/* RVC(I) instructions contain rd */
+__RISCV_INSN_FUNCS(c_li,	0xe003, 0x4001);
+__RISCV_INSN_FUNCS(c_lui,	0xe003, 0x6001);
 
 __RISCV_INSN_FUNCS(auipc,	0x7f, 0x17);
 __RISCV_INSN_FUNCS(branch,	0x7f, 0x63);
@@ -44,4 +78,11 @@  __RISCV_INSN_FUNCS(branch,	0x7f, 0x63);
 __RISCV_INSN_FUNCS(jal,		0x7f, 0x6f);
 __RISCV_INSN_FUNCS(jalr,	0x707f, 0x67);
 
+__RISCV_INSN_FUNCS(arith_rr,	0x77, 0x33);
+__RISCV_INSN_FUNCS(arith_ri,	0x77, 0x13);
+__RISCV_INSN_FUNCS(lui,		0x7f, 0x37);
+__RISCV_INSN_FUNCS(load,	0x7f, 0x03);
+__RISCV_INSN_FUNCS(store,	0x7f, 0x23);
+__RISCV_INSN_FUNCS(amo,		0x7f, 0x2f);
+
 #endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */