[v6,08/13] riscv/kprobe: Patch AUIPC/JALR pair to optimize kprobe

Message ID 20230127130541.1250865-9-chenguokai17@mails.ucas.ac.cn
State New
Headers
Series Add OPTPROBES feature on RISCV |

Commit Message

Xim Jan. 27, 2023, 1:05 p.m. UTC
  From: Liao Chang <liaochang1@huawei.com>

There is race when replacing EBREAK with AUIPC/JALR pairs under SMP,
so it needs to patch multiple instructions safely, this patch enhances
patch_text_cb() to ensure no race occurs when patching AUIPC/JALR pairs.

Signed-off-by: Liao Chang <liaochang1@huawei.com>
Co-developed-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
Signed-off-by: Chen Guokai <chenguokai17@mails.ucas.ac.cn>
---
 arch/riscv/include/asm/patch.h |  1 +
 arch/riscv/kernel/patch.c      | 23 +++++++++---
 arch/riscv/kernel/probes/opt.c | 65 ++++++++++++++++++++++++++++++++--
 3 files changed, 83 insertions(+), 6 deletions(-)
  

Comments

Björn Töpel Feb. 1, 2023, 1:31 p.m. UTC | #1
Chen Guokai <chenguokai17@mails.ucas.ac.cn> writes:

> diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
> index 9a7d7346001e..ee31539de65f 100644
> --- a/arch/riscv/include/asm/patch.h
> +++ b/arch/riscv/include/asm/patch.h
> @@ -8,5 +8,6 @@
>  
>  int patch_text_nosync(void *addr, const void *insns, size_t len);
>  int patch_text(void *addr, u32 insn);
> +int patch_text_batch(void *addr, const void *insn, size_t size);
>  
>  #endif /* _ASM_RISCV_PATCH_H */
> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> index 765004b60513..ce324b6a6998 100644
> --- a/arch/riscv/kernel/patch.c
> +++ b/arch/riscv/kernel/patch.c
> @@ -15,7 +15,8 @@
>  
>  struct patch_insn {
>  	void *addr;
> -	u32 insn;
> +	const void *insn;
> +	size_t size;
>  	atomic_t cpu_count;
>  };
>  
> @@ -106,8 +107,7 @@ static int patch_text_cb(void *data)
>  
>  	if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
>  		ret =

Nit: Please use the full width. No need for a NL here.

> -		    patch_text_nosync(patch->addr, &patch->insn,
> -					    GET_INSN_LENGTH(patch->insn));
> +		    patch_text_nosync(patch->addr, patch->insn, patch->size);
>  		atomic_inc(&patch->cpu_count);
>  	} else {
>  		while (atomic_read(&patch->cpu_count) <= num_online_cpus())
> @@ -123,7 +123,8 @@ int patch_text(void *addr, u32 insn)
>  {
>  	struct patch_insn patch = {
>  		.addr = addr,
> -		.insn = insn,
> +		.insn = &insn,
> +		.size = GET_INSN_LENGTH(insn),
>  		.cpu_count = ATOMIC_INIT(0),
>  	};
>  
> @@ -131,3 +132,17 @@ int patch_text(void *addr, u32 insn)
>  				       &patch, cpu_online_mask);
>  }
>  NOKPROBE_SYMBOL(patch_text);
> +
> +int patch_text_batch(void *addr, const void *insn, size_t size)
> +{
> +	struct patch_insn patch = {
> +		.addr = addr,
> +		.insn = insn,
> +		.size = size,
> +		.cpu_count = ATOMIC_INIT(0),
> +	};
> +
> +	return stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask);
> +}
> +
> +NOKPROBE_SYMBOL(patch_text_batch);
> diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
> index a47f7d2bf3a6..c52d5bdc748c 100644
> --- a/arch/riscv/kernel/probes/opt.c
> +++ b/arch/riscv/kernel/probes/opt.c
> @@ -8,6 +8,7 @@
>  
>  #define pr_fmt(fmt)	"optprobe: " fmt
>  
> +#include <linux/types.h>
>  #include <linux/kprobes.h>
>  #include <asm/kprobes.h>
>  #include <asm/patch.h>
> @@ -444,11 +445,19 @@ static bool can_optimize(unsigned long paddr, struct optimized_kprobe *op)
>  
>  int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
>  {
> -	return 0;
> +	return optinsn->length;
>  }
>  
>  int arch_check_optimized_kprobe(struct optimized_kprobe *op)
>  {
> +	unsigned long i;
> +	struct kprobe *p;
> +
> +	for (i = RVC_INSN_LEN; i < op->optinsn.length; i += RVC_INSN_LEN) {
> +		p = get_kprobe(op->kp.addr + i);
> +		if (p && !kprobe_disabled(p))
> +			return -EEXIST;
> +	}
>  	return 0;
>  }
>  
> @@ -509,23 +518,75 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
>  
>  void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
>  {
> +	if (op->optinsn.insn) {
> +		free_optinsn_slot(op->optinsn.insn, 1);
> +		op->optinsn.insn = NULL;
> +		op->optinsn.length = 0;
> +	}
>  }
>  
>  void arch_optimize_kprobes(struct list_head *oplist)
>  {
> +	long offs;
> +	kprobe_opcode_t insn[3];
> +	struct optimized_kprobe *op, *tmp;
> +
> +	list_for_each_entry_safe(op, tmp, oplist, list) {
> +		WARN_ON(kprobe_disabled(&op->kp));
> +
> +		/* Backup instructions which will be replaced by jump address */
> +		memcpy(op->optinsn.copied_insn,
> +		       DETOUR_ADDR(op->optinsn.insn, DETOUR_INSN_OFFSET),
> +		       op->optinsn.length);
> +
> +		/*
> +		 * After patching, it should be:
> +		 * auipc free_register, %hi(detour_buffer)
> +		 * jalr free_register, free_register, %lo(detour_buffer)
> +		 * where free_register will eventually save the return address
> +		 */
> +		offs = (unsigned long)op->optinsn.insn -
> +		       (unsigned long)op->kp.addr;
> +		insn[0] = rv_auipc(op->optinsn.rd, (offs + (1 << 11)) >> 12);
> +		insn[1] = rv_jalr(op->optinsn.rd, op->optinsn.rd, offs & 0xFFF);
> +		/* For 3 RVC + 1 RVI scenario, fill C.NOP for padding */
> +		if (op->optinsn.length > 2 * RVI_INSN_LEN)
> +			insn[2] = rvc_addi(0, 0);
> +
> +		patch_text_batch(op->kp.addr, insn, op->optinsn.length);
> +		if (memcmp(op->kp.addr, insn, op->optinsn.length))
> +			continue;
> +
> +		list_del_init(&op->list);
> +	}
>  }
>  
>  void arch_unoptimize_kprobes(struct list_head *oplist,
>  			     struct list_head *done_list)
>  {
> +	struct optimized_kprobe *op, *tmp;
> +
> +	list_for_each_entry_safe(op, tmp, oplist, list) {
> +		arch_unoptimize_kprobe(op);
> +		list_move(&op->list, done_list);
> +	}
>  }
>  
>  void arch_unoptimize_kprobe(struct optimized_kprobe *op)
>  {
> +	kprobe_opcode_t buf[MAX_COPIED_INSN];
> +
> +	memcpy(buf, op->optinsn.copied_insn, op->optinsn.length);
> +	if (GET_INSN_LENGTH(op->kp.opcode) == RVI_INSN_LEN)
> +		*(u32 *)buf = __BUG_INSN_32;
> +	else
> +		*(u16 *)buf = __BUG_INSN_16;
> +	patch_text_batch(op->kp.addr, buf, op->optinsn.length);
>  }
>  
>  int arch_within_optimized_kprobe(struct optimized_kprobe *op,
>  				 kprobe_opcode_t *addr)
>  {
> -	return 0;
> +	return (op->kp.addr <= addr &&
> +		op->kp.addr + op->optinsn.length > addr);

Nit: Use the whole 100 char line width, please.

With or w/o the nits fixed:

Reviewed-by: Björn Töpel <bjorn@kernel.org>


Björn
  

Patch

diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
index 9a7d7346001e..ee31539de65f 100644
--- a/arch/riscv/include/asm/patch.h
+++ b/arch/riscv/include/asm/patch.h
@@ -8,5 +8,6 @@ 
 
 int patch_text_nosync(void *addr, const void *insns, size_t len);
 int patch_text(void *addr, u32 insn);
+int patch_text_batch(void *addr, const void *insn, size_t size);
 
 #endif /* _ASM_RISCV_PATCH_H */
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 765004b60513..ce324b6a6998 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -15,7 +15,8 @@ 
 
 struct patch_insn {
 	void *addr;
-	u32 insn;
+	const void *insn;
+	size_t size;
 	atomic_t cpu_count;
 };
 
@@ -106,8 +107,7 @@  static int patch_text_cb(void *data)
 
 	if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
 		ret =
-		    patch_text_nosync(patch->addr, &patch->insn,
-					    GET_INSN_LENGTH(patch->insn));
+		    patch_text_nosync(patch->addr, patch->insn, patch->size);
 		atomic_inc(&patch->cpu_count);
 	} else {
 		while (atomic_read(&patch->cpu_count) <= num_online_cpus())
@@ -123,7 +123,8 @@  int patch_text(void *addr, u32 insn)
 {
 	struct patch_insn patch = {
 		.addr = addr,
-		.insn = insn,
+		.insn = &insn,
+		.size = GET_INSN_LENGTH(insn),
 		.cpu_count = ATOMIC_INIT(0),
 	};
 
@@ -131,3 +132,17 @@  int patch_text(void *addr, u32 insn)
 				       &patch, cpu_online_mask);
 }
 NOKPROBE_SYMBOL(patch_text);
+
+int patch_text_batch(void *addr, const void *insn, size_t size)
+{
+	struct patch_insn patch = {
+		.addr = addr,
+		.insn = insn,
+		.size = size,
+		.cpu_count = ATOMIC_INIT(0),
+	};
+
+	return stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask);
+}
+
+NOKPROBE_SYMBOL(patch_text_batch);
diff --git a/arch/riscv/kernel/probes/opt.c b/arch/riscv/kernel/probes/opt.c
index a47f7d2bf3a6..c52d5bdc748c 100644
--- a/arch/riscv/kernel/probes/opt.c
+++ b/arch/riscv/kernel/probes/opt.c
@@ -8,6 +8,7 @@ 
 
 #define pr_fmt(fmt)	"optprobe: " fmt
 
+#include <linux/types.h>
 #include <linux/kprobes.h>
 #include <asm/kprobes.h>
 #include <asm/patch.h>
@@ -444,11 +445,19 @@  static bool can_optimize(unsigned long paddr, struct optimized_kprobe *op)
 
 int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
 {
-	return 0;
+	return optinsn->length;
 }
 
 int arch_check_optimized_kprobe(struct optimized_kprobe *op)
 {
+	unsigned long i;
+	struct kprobe *p;
+
+	for (i = RVC_INSN_LEN; i < op->optinsn.length; i += RVC_INSN_LEN) {
+		p = get_kprobe(op->kp.addr + i);
+		if (p && !kprobe_disabled(p))
+			return -EEXIST;
+	}
 	return 0;
 }
 
@@ -509,23 +518,75 @@  int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 
 void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 {
+	if (op->optinsn.insn) {
+		free_optinsn_slot(op->optinsn.insn, 1);
+		op->optinsn.insn = NULL;
+		op->optinsn.length = 0;
+	}
 }
 
 void arch_optimize_kprobes(struct list_head *oplist)
 {
+	long offs;
+	kprobe_opcode_t insn[3];
+	struct optimized_kprobe *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		WARN_ON(kprobe_disabled(&op->kp));
+
+		/* Backup instructions which will be replaced by jump address */
+		memcpy(op->optinsn.copied_insn,
+		       DETOUR_ADDR(op->optinsn.insn, DETOUR_INSN_OFFSET),
+		       op->optinsn.length);
+
+		/*
+		 * After patching, it should be:
+		 * auipc free_register, %hi(detour_buffer)
+		 * jalr free_register, free_register, %lo(detour_buffer)
+		 * where free_register will eventually save the return address
+		 */
+		offs = (unsigned long)op->optinsn.insn -
+		       (unsigned long)op->kp.addr;
+		insn[0] = rv_auipc(op->optinsn.rd, (offs + (1 << 11)) >> 12);
+		insn[1] = rv_jalr(op->optinsn.rd, op->optinsn.rd, offs & 0xFFF);
+		/* For 3 RVC + 1 RVI scenario, fill C.NOP for padding */
+		if (op->optinsn.length > 2 * RVI_INSN_LEN)
+			insn[2] = rvc_addi(0, 0);
+
+		patch_text_batch(op->kp.addr, insn, op->optinsn.length);
+		if (memcmp(op->kp.addr, insn, op->optinsn.length))
+			continue;
+
+		list_del_init(&op->list);
+	}
 }
 
 void arch_unoptimize_kprobes(struct list_head *oplist,
 			     struct list_head *done_list)
 {
+	struct optimized_kprobe *op, *tmp;
+
+	list_for_each_entry_safe(op, tmp, oplist, list) {
+		arch_unoptimize_kprobe(op);
+		list_move(&op->list, done_list);
+	}
 }
 
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
 {
+	kprobe_opcode_t buf[MAX_COPIED_INSN];
+
+	memcpy(buf, op->optinsn.copied_insn, op->optinsn.length);
+	if (GET_INSN_LENGTH(op->kp.opcode) == RVI_INSN_LEN)
+		*(u32 *)buf = __BUG_INSN_32;
+	else
+		*(u16 *)buf = __BUG_INSN_16;
+	patch_text_batch(op->kp.addr, buf, op->optinsn.length);
 }
 
 int arch_within_optimized_kprobe(struct optimized_kprobe *op,
 				 kprobe_opcode_t *addr)
 {
-	return 0;
+	return (op->kp.addr <= addr &&
+		op->kp.addr + op->optinsn.length > addr);
 }