[v3,2/3] LoongArch: Add ptrace single step support

Message ID 20230215092358.30368-3-zhangqing@loongson.cn
State New
Headers
Series LoongArch: Add hardware breakpoints/watchpoints support |

Commit Message

Qing Zhang Feb. 15, 2023, 9:23 a.m. UTC
  Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
arch_has_single_step in  and implementing the
user_enable_single_step and user_disable_single_step functions.

LongArch has no hardware single-step register. the hardware single-step
function multiplex fetch instruction watchpoint(FWPS) and specifies that
the next instruction must trigger the watch exception by setting the mask bit.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
---
 arch/loongarch/include/asm/processor.h |  3 ++
 arch/loongarch/include/asm/ptrace.h    |  2 +
 arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
 arch/loongarch/kernel/ptrace.c         | 68 ++++++++++++++++++++++++++
 arch/loongarch/kernel/traps.c          | 20 ++++++--
 5 files changed, 120 insertions(+), 8 deletions(-)
  

Comments

Jinyang He Feb. 16, 2023, 2:07 a.m. UTC | #1
On 2023-02-15 17:23, Qing Zhang wrote:
> Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
> PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
> arch_has_single_step in  and implementing the
> user_enable_single_step and user_disable_single_step functions.
>
> LongArch has no hardware single-step register. the hardware single-step
> function multiplex fetch instruction watchpoint(FWPS) and specifies that
> the next instruction must trigger the watch exception by setting the mask bit.
>
> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
> ---
>   arch/loongarch/include/asm/processor.h |  3 ++
>   arch/loongarch/include/asm/ptrace.h    |  2 +
>   arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
>   arch/loongarch/kernel/ptrace.c         | 68 ++++++++++++++++++++++++++
>   arch/loongarch/kernel/traps.c          | 20 ++++++--
>   5 files changed, 120 insertions(+), 8 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
> index db060c5a976f..3ea0f1910c23 100644
> --- a/arch/loongarch/include/asm/processor.h
> +++ b/arch/loongarch/include/asm/processor.h
> @@ -131,6 +131,9 @@ struct thread_struct {
>   	struct perf_event	*hbp_break[LOONGARCH_MAX_BRP];
>   	struct perf_event	*hbp_watch[LOONGARCH_MAX_WRP];
>   
> +	/* Used by ptrace single_step */
> +	unsigned long single_step;
> +
>   	/*
>   	 * FPU & vector registers, must be at last because
>   	 * they are conditionally copied at fork().
> diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
> index 58596c4f8a0f..66a0e6c480a3 100644
> --- a/arch/loongarch/include/asm/ptrace.h
> +++ b/arch/loongarch/include/asm/ptrace.h
> @@ -150,4 +150,6 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
>   	regs->regs[3] = val;
>   }
>   
> +#define arch_has_single_step()		(1)
> +
>   #endif /* _ASM_PTRACE_H */
> diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
> index 6431cd319c32..94967b887d92 100644
> --- a/arch/loongarch/kernel/hw_breakpoint.c
> +++ b/arch/loongarch/kernel/hw_breakpoint.c
> @@ -153,6 +153,22 @@ static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
>    */
>   void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
>   {
> +	int i;
> +	struct thread_struct *t = &tsk->thread;
> +
> +	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
> +		if (t->hbp_break[i]) {
> +			unregister_hw_breakpoint(t->hbp_break[i]);
> +			t->hbp_break[i] = NULL;
> +		}
> +	}
> +
> +	for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
> +		if (t->hbp_watch[i]) {
> +			unregister_hw_breakpoint(t->hbp_watch[i]);
> +			t->hbp_watch[i] = NULL;
> +		}
> +	}
>   }
>   
>   void ptrace_hw_copy_thread(struct task_struct *tsk)
> @@ -498,11 +514,20 @@ arch_initcall(arch_hw_breakpoint_init);
>   void hw_breakpoint_thread_switch(struct task_struct *next)
>   {
>   	struct pt_regs *regs = task_pt_regs(next);
> -
> -	/* Update breakpoints */
> -	update_bp_registers(regs, 1, 0);
> -	/* Update watchpoints */
> -	update_bp_registers(regs, 1, 1);
> +	u64 addr, mask;
> +
> +	if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
> +		addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
> +		mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
> +		if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
> +			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
> +		regs->csr_prmd |= CSR_PRMD_PWE;
> +	} else {
> +		/* Update breakpoints */
> +		update_bp_registers(regs, 1, 0);
> +		/* Update watchpoints */
> +		update_bp_registers(regs, 1, 1);
> +	}
>   }
>   
>   void hw_breakpoint_pmu_read(struct perf_event *bp)
> diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
> index bee4194177fd..52a3ee4366f4 100644
> --- a/arch/loongarch/kernel/ptrace.c
> +++ b/arch/loongarch/kernel/ptrace.c
> @@ -20,6 +20,7 @@
>   #include <linux/context_tracking.h>
>   #include <linux/elf.h>
>   #include <linux/errno.h>
> +#include <linux/hw_breakpoint.h>
>   #include <linux/mm.h>
>   #include <linux/ptrace.h>
>   #include <linux/regset.h>
> @@ -30,6 +31,7 @@
>   #include <linux/stddef.h>
>   #include <linux/seccomp.h>
>   #include <linux/uaccess.h>
> +#include <linux/thread_info.h>
>   
>   #include <asm/byteorder.h>
>   #include <asm/cpu.h>
> @@ -39,6 +41,7 @@
>   #include <asm/page.h>
>   #include <asm/pgtable.h>
>   #include <asm/processor.h>
> +#include <asm/ptrace.h>
>   #include <asm/reg.h>
>   #include <asm/syscall.h>
>   
> @@ -541,3 +544,68 @@ long arch_ptrace(struct task_struct *child, long request,
>   
>   	return ret;
>   }
> +
> +void ptrace_triggered(struct perf_event *bp,
> +		      struct perf_sample_data *data, struct pt_regs *regs)
> +{
> +	struct perf_event_attr attr;
> +
> +	attr = bp->attr;
> +	attr.disabled = true;
> +	modify_user_hw_breakpoint(bp, &attr);
> +}
> +
> +static int set_single_step(struct task_struct *tsk, unsigned long addr)
> +{
> +	struct thread_struct *thread = &tsk->thread;
> +	struct perf_event *bp;
> +	struct perf_event_attr attr;
> +	struct arch_hw_breakpoint *info;
> +
> +	bp = thread->hbp_break[0];
> +	if (!bp) {
> +		ptrace_breakpoint_init(&attr);
> +
> +		attr.bp_addr = addr;
> +		attr.bp_len = HW_BREAKPOINT_LEN_8;
> +		attr.bp_type = HW_BREAKPOINT_X;
> +
> +		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
> +						 NULL, tsk);
> +		if (IS_ERR(bp))
> +			return PTR_ERR(bp);
> +
> +		thread->hbp_break[0] = bp;
> +	} else {
> +		int err;
> +
> +		attr = bp->attr;
> +		attr.bp_addr = addr;
> +		/* reenable breakpoint */
> +		attr.disabled = false;
> +		err = modify_user_hw_breakpoint(bp, &attr);
> +		if (unlikely(err))
> +			return err;
> +
> +		csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
> +	}
> +	info = counter_arch_bp(bp);
> +	info->mask = 0xffffffffffff;
> +
> +	return 0;
> +}
> +
> +/* ptrace API */
> +void user_enable_single_step(struct task_struct *task)
> +{
> +	struct thread_info *ti = task_thread_info(task);
> +
> +	set_single_step(task, task_pt_regs(task)->csr_era);
> +	task->thread.single_step = task_pt_regs(task)->csr_era;
> +	set_ti_thread_flag(ti, TIF_SINGLESTEP);
> +}
> +
> +void user_disable_single_step(struct task_struct *task)
> +{
> +	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
> +}
> diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
> index 2b133079e0f3..56d7d076153c 100644
> --- a/arch/loongarch/kernel/traps.c
> +++ b/arch/loongarch/kernel/traps.c
> @@ -511,9 +511,23 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>   	irqentry_state_t state = irqentry_enter(regs);
>   
> -	breakpoint_handler(regs);
> -	watchpoint_handler(regs);
> -	force_sig(SIGTRAP);
> +	if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
> +		int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
> +		unsigned long pc = regs->csr_era;
> +
> +		if (llbit) {

Hi, Qing,


It should be noted here. When the ll-sc combo is encountered, it is
regarded as an single instruction. So donnot clear llbit and reset
CSR.FWPS.Skip until the llsc execution is completed.

> +			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
> +			csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
> +		} else if (pc == current->thread.single_step) {
Note here as well. Because 3A5000 has a strange hardware issue that
certain insns are occasionally not skipped when CSR.FWPS.Skip is set,
such as fld.d/fst.d. Singlestep needs compare whether the csr_era is
equal to the value of singlestep which last time set, as in most case

they should be not equal.


And for this condition expression, some potentially strange insns may
cause bugs. For example, "b 0" or "jr rd" where rd is equal to its PC
will cause cannot stop the singlestep. These insns is so strange that
we did not consider in OW. However, I think we should consider this
case for robustness in upstream.

It is better to use macros avoid direct numbers when csr_write* is invoked.


Thanks,

Jinyang

> +			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
> +		} else {
> +			force_sig(SIGTRAP);
> +		}
> +	} else {
> +		breakpoint_handler(regs);
> +		watchpoint_handler(regs);
> +		force_sig(SIGTRAP);
> +	}
>   
>   	irqentry_exit(regs, state);
>   #endif
  
Youling Tang Feb. 16, 2023, 3:13 a.m. UTC | #2
On 02/16/2023 10:07 AM, Jinyang He wrote:
>
> On 2023-02-15 17:23, Qing Zhang wrote:
>> Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
>> PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
>> arch_has_single_step in  and implementing the
>> user_enable_single_step and user_disable_single_step functions.
>>
>> LongArch has no hardware single-step register. the hardware single-step
>> function multiplex fetch instruction watchpoint(FWPS) and specifies that
>> the next instruction must trigger the watch exception by setting the
>> mask bit.
>>
>> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
>> ---
>>   arch/loongarch/include/asm/processor.h |  3 ++
>>   arch/loongarch/include/asm/ptrace.h    |  2 +
>>   arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
>>   arch/loongarch/kernel/ptrace.c         | 68 ++++++++++++++++++++++++++
>>   arch/loongarch/kernel/traps.c          | 20 ++++++--
>>   5 files changed, 120 insertions(+), 8 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/processor.h
>> b/arch/loongarch/include/asm/processor.h
>> index db060c5a976f..3ea0f1910c23 100644
>> --- a/arch/loongarch/include/asm/processor.h
>> +++ b/arch/loongarch/include/asm/processor.h
>> @@ -131,6 +131,9 @@ struct thread_struct {
>>       struct perf_event    *hbp_break[LOONGARCH_MAX_BRP];
>>       struct perf_event    *hbp_watch[LOONGARCH_MAX_WRP];
>>   +    /* Used by ptrace single_step */
>> +    unsigned long single_step;
>> +
>>       /*
>>        * FPU & vector registers, must be at last because
>>        * they are conditionally copied at fork().
>> diff --git a/arch/loongarch/include/asm/ptrace.h
>> b/arch/loongarch/include/asm/ptrace.h
>> index 58596c4f8a0f..66a0e6c480a3 100644
>> --- a/arch/loongarch/include/asm/ptrace.h
>> +++ b/arch/loongarch/include/asm/ptrace.h
>> @@ -150,4 +150,6 @@ static inline void user_stack_pointer_set(struct
>> pt_regs *regs,
>>       regs->regs[3] = val;
>>   }
>>   +#define arch_has_single_step()        (1)
>> +
>>   #endif /* _ASM_PTRACE_H */
>> diff --git a/arch/loongarch/kernel/hw_breakpoint.c
>> b/arch/loongarch/kernel/hw_breakpoint.c
>> index 6431cd319c32..94967b887d92 100644
>> --- a/arch/loongarch/kernel/hw_breakpoint.c
>> +++ b/arch/loongarch/kernel/hw_breakpoint.c
>> @@ -153,6 +153,22 @@ static int hw_breakpoint_slot_setup(struct
>> perf_event **slots, int max_slots,
>>    */
>>   void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
>>   {
>> +    int i;
>> +    struct thread_struct *t = &tsk->thread;
>> +
>> +    for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
>> +        if (t->hbp_break[i]) {
>> +            unregister_hw_breakpoint(t->hbp_break[i]);
>> +            t->hbp_break[i] = NULL;
>> +        }
>> +    }
>> +
>> +    for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
>> +        if (t->hbp_watch[i]) {
>> +            unregister_hw_breakpoint(t->hbp_watch[i]);
>> +            t->hbp_watch[i] = NULL;
>> +        }
>> +    }
>>   }
>>     void ptrace_hw_copy_thread(struct task_struct *tsk)
>> @@ -498,11 +514,20 @@ arch_initcall(arch_hw_breakpoint_init);
>>   void hw_breakpoint_thread_switch(struct task_struct *next)
>>   {
>>       struct pt_regs *regs = task_pt_regs(next);
>> -
>> -    /* Update breakpoints */
>> -    update_bp_registers(regs, 1, 0);
>> -    /* Update watchpoints */
>> -    update_bp_registers(regs, 1, 1);
>> +    u64 addr, mask;
>> +
>> +    if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
>> +        addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
>> +        mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
>> +        if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>> +        regs->csr_prmd |= CSR_PRMD_PWE;
>> +    } else {
>> +        /* Update breakpoints */
>> +        update_bp_registers(regs, 1, 0);
>> +        /* Update watchpoints */
>> +        update_bp_registers(regs, 1, 1);
>> +    }
>>   }
>>     void hw_breakpoint_pmu_read(struct perf_event *bp)
>> diff --git a/arch/loongarch/kernel/ptrace.c
>> b/arch/loongarch/kernel/ptrace.c
>> index bee4194177fd..52a3ee4366f4 100644
>> --- a/arch/loongarch/kernel/ptrace.c
>> +++ b/arch/loongarch/kernel/ptrace.c
>> @@ -20,6 +20,7 @@
>>   #include <linux/context_tracking.h>
>>   #include <linux/elf.h>
>>   #include <linux/errno.h>
>> +#include <linux/hw_breakpoint.h>
>>   #include <linux/mm.h>
>>   #include <linux/ptrace.h>
>>   #include <linux/regset.h>
>> @@ -30,6 +31,7 @@
>>   #include <linux/stddef.h>
>>   #include <linux/seccomp.h>
>>   #include <linux/uaccess.h>
>> +#include <linux/thread_info.h>
>>     #include <asm/byteorder.h>
>>   #include <asm/cpu.h>
>> @@ -39,6 +41,7 @@
>>   #include <asm/page.h>
>>   #include <asm/pgtable.h>
>>   #include <asm/processor.h>
>> +#include <asm/ptrace.h>
>>   #include <asm/reg.h>
>>   #include <asm/syscall.h>
>>   @@ -541,3 +544,68 @@ long arch_ptrace(struct task_struct *child,
>> long request,
>>         return ret;
>>   }
>> +
>> +void ptrace_triggered(struct perf_event *bp,
>> +              struct perf_sample_data *data, struct pt_regs *regs)
>> +{
>> +    struct perf_event_attr attr;
>> +
>> +    attr = bp->attr;
>> +    attr.disabled = true;
>> +    modify_user_hw_breakpoint(bp, &attr);
>> +}
>> +
>> +static int set_single_step(struct task_struct *tsk, unsigned long addr)
>> +{
>> +    struct thread_struct *thread = &tsk->thread;
>> +    struct perf_event *bp;
>> +    struct perf_event_attr attr;
>> +    struct arch_hw_breakpoint *info;
>> +
>> +    bp = thread->hbp_break[0];
>> +    if (!bp) {
>> +        ptrace_breakpoint_init(&attr);
>> +
>> +        attr.bp_addr = addr;
>> +        attr.bp_len = HW_BREAKPOINT_LEN_8;
>> +        attr.bp_type = HW_BREAKPOINT_X;
>> +
>> +        bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
>> +                         NULL, tsk);
>> +        if (IS_ERR(bp))
>> +            return PTR_ERR(bp);
>> +
>> +        thread->hbp_break[0] = bp;
>> +    } else {
>> +        int err;
>> +
>> +        attr = bp->attr;
>> +        attr.bp_addr = addr;
>> +        /* reenable breakpoint */
>> +        attr.disabled = false;
>> +        err = modify_user_hw_breakpoint(bp, &attr);
>> +        if (unlikely(err))
>> +            return err;
>> +
>> +        csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
>> +    }
>> +    info = counter_arch_bp(bp);
>> +    info->mask = 0xffffffffffff;
>> +
>> +    return 0;
>> +}
>> +
>> +/* ptrace API */
>> +void user_enable_single_step(struct task_struct *task)
>> +{
>> +    struct thread_info *ti = task_thread_info(task);
>> +
>> +    set_single_step(task, task_pt_regs(task)->csr_era);
>> +    task->thread.single_step = task_pt_regs(task)->csr_era;
>> +    set_ti_thread_flag(ti, TIF_SINGLESTEP);
>> +}
>> +
>> +void user_disable_single_step(struct task_struct *task)
>> +{
>> +    clear_tsk_thread_flag(task, TIF_SINGLESTEP);
>> +}
>> diff --git a/arch/loongarch/kernel/traps.c
>> b/arch/loongarch/kernel/traps.c
>> index 2b133079e0f3..56d7d076153c 100644
>> --- a/arch/loongarch/kernel/traps.c
>> +++ b/arch/loongarch/kernel/traps.c
>> @@ -511,9 +511,23 @@ asmlinkage void noinstr do_watch(struct pt_regs
>> *regs)
>>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>>       irqentry_state_t state = irqentry_enter(regs);
>>   -    breakpoint_handler(regs);
>> -    watchpoint_handler(regs);
>> -    force_sig(SIGTRAP);
>> +    if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
>> +        int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
>> +        unsigned long pc = regs->csr_era;
>> +
>> +        if (llbit) {
>
> Hi, Qing,
>
>
> It should be noted here. When the ll-sc combo is encountered, it is
> regarded as an single instruction. So donnot clear llbit and reset
> CSR.FWPS.Skip until the llsc execution is completed.
>
>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>> +            csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
>> +        } else if (pc == current->thread.single_step) {
> Note here as well. Because 3A5000 has a strange hardware issue that
> certain insns are occasionally not skipped when CSR.FWPS.Skip is set,
> such as fld.d/fst.d. Singlestep needs compare whether the csr_era is
> equal to the value of singlestep which last time set, as in most case
>
> they should be not equal.

BTW, I prefer to separate this special processing from this patch (for
example, add two patchs in this series, add special processing of
instructions such as LL-SC, and FLD.D/FST.D, etc.), and add
corresponding test cases to describe the phenomenon and reason, this
is conducive to everyone's understanding of the code.

>
>
> And for this condition expression, some potentially strange insns may
> cause bugs. For example, "b 0" or "jr rd" where rd is equal to its PC
> will cause cannot stop the singlestep. These insns is so strange that
> we did not consider in OW. However, I think we should consider this
> case for robustness in upstream.
>

I don't know if there will be instructions like "b 0" or "jr rd (rd =
pc)" in the executable file after linking?

Youling.

> It is better to use macros avoid direct numbers when csr_write* is invoked.
>
>
> Thanks,
>
> Jinyang
>
>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>> +        } else {
>> +            force_sig(SIGTRAP);
>> +        }
>> +    } else {
>> +        breakpoint_handler(regs);
>> +        watchpoint_handler(regs);
>> +        force_sig(SIGTRAP);
>> +    }
>>         irqentry_exit(regs, state);
>>   #endif
>
  
Jinyang He Feb. 16, 2023, 6:44 a.m. UTC | #3
On 2023-02-16 11:13, Youling Tang wrote:
>
>
> On 02/16/2023 10:07 AM, Jinyang He wrote:
>>
>> On 2023-02-15 17:23, Qing Zhang wrote:
>>> Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
>>> PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
>>> arch_has_single_step in  and implementing the
>>> user_enable_single_step and user_disable_single_step functions.
>>>
>>> LongArch has no hardware single-step register. the hardware single-step
>>> function multiplex fetch instruction watchpoint(FWPS) and specifies 
>>> that
>>> the next instruction must trigger the watch exception by setting the
>>> mask bit.
>>>
>>> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
>>> ---
>>>   arch/loongarch/include/asm/processor.h |  3 ++
>>>   arch/loongarch/include/asm/ptrace.h    |  2 +
>>>   arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
>>>   arch/loongarch/kernel/ptrace.c         | 68 
>>> ++++++++++++++++++++++++++
>>>   arch/loongarch/kernel/traps.c          | 20 ++++++--
>>>   5 files changed, 120 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/loongarch/include/asm/processor.h
>>> b/arch/loongarch/include/asm/processor.h
>>> index db060c5a976f..3ea0f1910c23 100644
>>> --- a/arch/loongarch/include/asm/processor.h
>>> +++ b/arch/loongarch/include/asm/processor.h
>>> @@ -131,6 +131,9 @@ struct thread_struct {
>>>       struct perf_event    *hbp_break[LOONGARCH_MAX_BRP];
>>>       struct perf_event    *hbp_watch[LOONGARCH_MAX_WRP];
>>>   +    /* Used by ptrace single_step */
>>> +    unsigned long single_step;
>>> +
>>>       /*
>>>        * FPU & vector registers, must be at last because
>>>        * they are conditionally copied at fork().
>>> diff --git a/arch/loongarch/include/asm/ptrace.h
>>> b/arch/loongarch/include/asm/ptrace.h
>>> index 58596c4f8a0f..66a0e6c480a3 100644
>>> --- a/arch/loongarch/include/asm/ptrace.h
>>> +++ b/arch/loongarch/include/asm/ptrace.h
>>> @@ -150,4 +150,6 @@ static inline void user_stack_pointer_set(struct
>>> pt_regs *regs,
>>>       regs->regs[3] = val;
>>>   }
>>>   +#define arch_has_single_step()        (1)
>>> +
>>>   #endif /* _ASM_PTRACE_H */
>>> diff --git a/arch/loongarch/kernel/hw_breakpoint.c
>>> b/arch/loongarch/kernel/hw_breakpoint.c
>>> index 6431cd319c32..94967b887d92 100644
>>> --- a/arch/loongarch/kernel/hw_breakpoint.c
>>> +++ b/arch/loongarch/kernel/hw_breakpoint.c
>>> @@ -153,6 +153,22 @@ static int hw_breakpoint_slot_setup(struct
>>> perf_event **slots, int max_slots,
>>>    */
>>>   void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
>>>   {
>>> +    int i;
>>> +    struct thread_struct *t = &tsk->thread;
>>> +
>>> +    for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
>>> +        if (t->hbp_break[i]) {
>>> +            unregister_hw_breakpoint(t->hbp_break[i]);
>>> +            t->hbp_break[i] = NULL;
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
>>> +        if (t->hbp_watch[i]) {
>>> +            unregister_hw_breakpoint(t->hbp_watch[i]);
>>> +            t->hbp_watch[i] = NULL;
>>> +        }
>>> +    }
>>>   }
>>>     void ptrace_hw_copy_thread(struct task_struct *tsk)
>>> @@ -498,11 +514,20 @@ arch_initcall(arch_hw_breakpoint_init);
>>>   void hw_breakpoint_thread_switch(struct task_struct *next)
>>>   {
>>>       struct pt_regs *regs = task_pt_regs(next);
>>> -
>>> -    /* Update breakpoints */
>>> -    update_bp_registers(regs, 1, 0);
>>> -    /* Update watchpoints */
>>> -    update_bp_registers(regs, 1, 1);
>>> +    u64 addr, mask;
>>> +
>>> +    if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
>>> +        addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
>>> +        mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
>>> +        if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>> +        regs->csr_prmd |= CSR_PRMD_PWE;
>>> +    } else {
>>> +        /* Update breakpoints */
>>> +        update_bp_registers(regs, 1, 0);
>>> +        /* Update watchpoints */
>>> +        update_bp_registers(regs, 1, 1);
>>> +    }
>>>   }
>>>     void hw_breakpoint_pmu_read(struct perf_event *bp)
>>> diff --git a/arch/loongarch/kernel/ptrace.c
>>> b/arch/loongarch/kernel/ptrace.c
>>> index bee4194177fd..52a3ee4366f4 100644
>>> --- a/arch/loongarch/kernel/ptrace.c
>>> +++ b/arch/loongarch/kernel/ptrace.c
>>> @@ -20,6 +20,7 @@
>>>   #include <linux/context_tracking.h>
>>>   #include <linux/elf.h>
>>>   #include <linux/errno.h>
>>> +#include <linux/hw_breakpoint.h>
>>>   #include <linux/mm.h>
>>>   #include <linux/ptrace.h>
>>>   #include <linux/regset.h>
>>> @@ -30,6 +31,7 @@
>>>   #include <linux/stddef.h>
>>>   #include <linux/seccomp.h>
>>>   #include <linux/uaccess.h>
>>> +#include <linux/thread_info.h>
>>>     #include <asm/byteorder.h>
>>>   #include <asm/cpu.h>
>>> @@ -39,6 +41,7 @@
>>>   #include <asm/page.h>
>>>   #include <asm/pgtable.h>
>>>   #include <asm/processor.h>
>>> +#include <asm/ptrace.h>
>>>   #include <asm/reg.h>
>>>   #include <asm/syscall.h>
>>>   @@ -541,3 +544,68 @@ long arch_ptrace(struct task_struct *child,
>>> long request,
>>>         return ret;
>>>   }
>>> +
>>> +void ptrace_triggered(struct perf_event *bp,
>>> +              struct perf_sample_data *data, struct pt_regs *regs)
>>> +{
>>> +    struct perf_event_attr attr;
>>> +
>>> +    attr = bp->attr;
>>> +    attr.disabled = true;
>>> +    modify_user_hw_breakpoint(bp, &attr);
>>> +}
>>> +
>>> +static int set_single_step(struct task_struct *tsk, unsigned long 
>>> addr)
>>> +{
>>> +    struct thread_struct *thread = &tsk->thread;
>>> +    struct perf_event *bp;
>>> +    struct perf_event_attr attr;
>>> +    struct arch_hw_breakpoint *info;
>>> +
>>> +    bp = thread->hbp_break[0];
>>> +    if (!bp) {
>>> +        ptrace_breakpoint_init(&attr);
>>> +
>>> +        attr.bp_addr = addr;
>>> +        attr.bp_len = HW_BREAKPOINT_LEN_8;
>>> +        attr.bp_type = HW_BREAKPOINT_X;
>>> +
>>> +        bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
>>> +                         NULL, tsk);
>>> +        if (IS_ERR(bp))
>>> +            return PTR_ERR(bp);
>>> +
>>> +        thread->hbp_break[0] = bp;
>>> +    } else {
>>> +        int err;
>>> +
>>> +        attr = bp->attr;
>>> +        attr.bp_addr = addr;
>>> +        /* reenable breakpoint */
>>> +        attr.disabled = false;
>>> +        err = modify_user_hw_breakpoint(bp, &attr);
>>> +        if (unlikely(err))
>>> +            return err;
>>> +
>>> +        csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
>>> +    }
>>> +    info = counter_arch_bp(bp);
>>> +    info->mask = 0xffffffffffff;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +/* ptrace API */
>>> +void user_enable_single_step(struct task_struct *task)
>>> +{
>>> +    struct thread_info *ti = task_thread_info(task);
>>> +
>>> +    set_single_step(task, task_pt_regs(task)->csr_era);
>>> +    task->thread.single_step = task_pt_regs(task)->csr_era;
>>> +    set_ti_thread_flag(ti, TIF_SINGLESTEP);
>>> +}
>>> +
>>> +void user_disable_single_step(struct task_struct *task)
>>> +{
>>> +    clear_tsk_thread_flag(task, TIF_SINGLESTEP);
>>> +}
>>> diff --git a/arch/loongarch/kernel/traps.c
>>> b/arch/loongarch/kernel/traps.c
>>> index 2b133079e0f3..56d7d076153c 100644
>>> --- a/arch/loongarch/kernel/traps.c
>>> +++ b/arch/loongarch/kernel/traps.c
>>> @@ -511,9 +511,23 @@ asmlinkage void noinstr do_watch(struct pt_regs
>>> *regs)
>>>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>>>       irqentry_state_t state = irqentry_enter(regs);
>>>   -    breakpoint_handler(regs);
>>> -    watchpoint_handler(regs);
>>> -    force_sig(SIGTRAP);
>>> +    if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
>>> +        int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
>>> +        unsigned long pc = regs->csr_era;
>>> +
>>> +        if (llbit) {
>>
>> Hi, Qing,
>>
>>
>> It should be noted here. When the ll-sc combo is encountered, it is
>> regarded as an single instruction. So donnot clear llbit and reset
>> CSR.FWPS.Skip until the llsc execution is completed.
>>
>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>> +            csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
>>> +        } else if (pc == current->thread.single_step) {
>> Note here as well. Because 3A5000 has a strange hardware issue that
>> certain insns are occasionally not skipped when CSR.FWPS.Skip is set,
>> such as fld.d/fst.d. Singlestep needs compare whether the csr_era is
>> equal to the value of singlestep which last time set, as in most case
>>
>> they should be not equal.
>
> BTW, I prefer to separate this special processing from this patch (for
> example, add two patchs in this series, add special processing of
> instructions such as LL-SC, and FLD.D/FST.D, etc.), and add
> corresponding test cases to describe the phenomenon and reason, this
> is conducive to everyone's understanding of the code.

Agree.


>
>>
>>
>> And for this condition expression, some potentially strange insns may
>> cause bugs. For example, "b 0" or "jr rd" where rd is equal to its PC
>> will cause cannot stop the singlestep. These insns is so strange that
>> we did not consider in OW. However, I think we should consider this
>> case for robustness in upstream.
>>
>
> I don't know if there will be instructions like "b 0" or "jr rd (rd =
> pc)" in the executable file after linking?

So I said that is strange, and it can be fatal if happened. :-)


>
> Youling.
>
>> It is better to use macros avoid direct numbers when csr_write* is 
>> invoked.
>>
>>
>> Thanks,
>>
>> Jinyang
>>
>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>> +        } else {
>>> +            force_sig(SIGTRAP);
>>> +        }
>>> +    } else {
>>> +        breakpoint_handler(regs);
>>> +        watchpoint_handler(regs);
>>> +        force_sig(SIGTRAP);
>>> +    }
>>>         irqentry_exit(regs, state);
>>>   #endif
>>
>
  
WANG Xuerui Feb. 16, 2023, 6:57 a.m. UTC | #4
On 2023/2/16 11:13, Youling Tang wrote:
> 
> 
> On 02/16/2023 10:07 AM, Jinyang He wrote:
>>
>> On 2023-02-15 17:23, Qing Zhang wrote:
>>> Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
>>> PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
>>> arch_has_single_step in  and implementing the
>>> user_enable_single_step and user_disable_single_step functions.
>>>
>>> LongArch has no hardware single-step register. the hardware single-step
>>> function multiplex fetch instruction watchpoint(FWPS) and specifies that
>>> the next instruction must trigger the watch exception by setting the
>>> mask bit.
>>>
>>> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
>>> ---
>>>   arch/loongarch/include/asm/processor.h |  3 ++
>>>   arch/loongarch/include/asm/ptrace.h    |  2 +
>>>   arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
>>>   arch/loongarch/kernel/ptrace.c         | 68 ++++++++++++++++++++++++++
>>>   arch/loongarch/kernel/traps.c          | 20 ++++++--
>>>   5 files changed, 120 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/arch/loongarch/include/asm/processor.h
>>> b/arch/loongarch/include/asm/processor.h
>>> index db060c5a976f..3ea0f1910c23 100644
>>> --- a/arch/loongarch/include/asm/processor.h
>>> +++ b/arch/loongarch/include/asm/processor.h
>>> @@ -131,6 +131,9 @@ struct thread_struct {
>>>       struct perf_event    *hbp_break[LOONGARCH_MAX_BRP];
>>>       struct perf_event    *hbp_watch[LOONGARCH_MAX_WRP];
>>>   +    /* Used by ptrace single_step */
>>> +    unsigned long single_step;
>>> +
>>>       /*
>>>        * FPU & vector registers, must be at last because
>>>        * they are conditionally copied at fork().
>>> diff --git a/arch/loongarch/include/asm/ptrace.h
>>> b/arch/loongarch/include/asm/ptrace.h
>>> index 58596c4f8a0f..66a0e6c480a3 100644
>>> --- a/arch/loongarch/include/asm/ptrace.h
>>> +++ b/arch/loongarch/include/asm/ptrace.h
>>> @@ -150,4 +150,6 @@ static inline void user_stack_pointer_set(struct
>>> pt_regs *regs,
>>>       regs->regs[3] = val;
>>>   }
>>>   +#define arch_has_single_step()        (1)
>>> +
>>>   #endif /* _ASM_PTRACE_H */
>>> diff --git a/arch/loongarch/kernel/hw_breakpoint.c
>>> b/arch/loongarch/kernel/hw_breakpoint.c
>>> index 6431cd319c32..94967b887d92 100644
>>> --- a/arch/loongarch/kernel/hw_breakpoint.c
>>> +++ b/arch/loongarch/kernel/hw_breakpoint.c
>>> @@ -153,6 +153,22 @@ static int hw_breakpoint_slot_setup(struct
>>> perf_event **slots, int max_slots,
>>>    */
>>>   void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
>>>   {
>>> +    int i;
>>> +    struct thread_struct *t = &tsk->thread;
>>> +
>>> +    for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
>>> +        if (t->hbp_break[i]) {
>>> +            unregister_hw_breakpoint(t->hbp_break[i]);
>>> +            t->hbp_break[i] = NULL;
>>> +        }
>>> +    }
>>> +
>>> +    for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
>>> +        if (t->hbp_watch[i]) {
>>> +            unregister_hw_breakpoint(t->hbp_watch[i]);
>>> +            t->hbp_watch[i] = NULL;
>>> +        }
>>> +    }
>>>   }
>>>     void ptrace_hw_copy_thread(struct task_struct *tsk)
>>> @@ -498,11 +514,20 @@ arch_initcall(arch_hw_breakpoint_init);
>>>   void hw_breakpoint_thread_switch(struct task_struct *next)
>>>   {
>>>       struct pt_regs *regs = task_pt_regs(next);
>>> -
>>> -    /* Update breakpoints */
>>> -    update_bp_registers(regs, 1, 0);
>>> -    /* Update watchpoints */
>>> -    update_bp_registers(regs, 1, 1);
>>> +    u64 addr, mask;
>>> +
>>> +    if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
>>> +        addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
>>> +        mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
>>> +        if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>> +        regs->csr_prmd |= CSR_PRMD_PWE;
>>> +    } else {
>>> +        /* Update breakpoints */
>>> +        update_bp_registers(regs, 1, 0);
>>> +        /* Update watchpoints */
>>> +        update_bp_registers(regs, 1, 1);
>>> +    }
>>>   }
>>>     void hw_breakpoint_pmu_read(struct perf_event *bp)
>>> diff --git a/arch/loongarch/kernel/ptrace.c
>>> b/arch/loongarch/kernel/ptrace.c
>>> index bee4194177fd..52a3ee4366f4 100644
>>> --- a/arch/loongarch/kernel/ptrace.c
>>> +++ b/arch/loongarch/kernel/ptrace.c
>>> @@ -20,6 +20,7 @@
>>>   #include <linux/context_tracking.h>
>>>   #include <linux/elf.h>
>>>   #include <linux/errno.h>
>>> +#include <linux/hw_breakpoint.h>
>>>   #include <linux/mm.h>
>>>   #include <linux/ptrace.h>
>>>   #include <linux/regset.h>
>>> @@ -30,6 +31,7 @@
>>>   #include <linux/stddef.h>
>>>   #include <linux/seccomp.h>
>>>   #include <linux/uaccess.h>
>>> +#include <linux/thread_info.h>
>>>     #include <asm/byteorder.h>
>>>   #include <asm/cpu.h>
>>> @@ -39,6 +41,7 @@
>>>   #include <asm/page.h>
>>>   #include <asm/pgtable.h>
>>>   #include <asm/processor.h>
>>> +#include <asm/ptrace.h>
>>>   #include <asm/reg.h>
>>>   #include <asm/syscall.h>
>>>   @@ -541,3 +544,68 @@ long arch_ptrace(struct task_struct *child,
>>> long request,
>>>         return ret;
>>>   }
>>> +
>>> +void ptrace_triggered(struct perf_event *bp,
>>> +              struct perf_sample_data *data, struct pt_regs *regs)
>>> +{
>>> +    struct perf_event_attr attr;
>>> +
>>> +    attr = bp->attr;
>>> +    attr.disabled = true;
>>> +    modify_user_hw_breakpoint(bp, &attr);
>>> +}
>>> +
>>> +static int set_single_step(struct task_struct *tsk, unsigned long addr)
>>> +{
>>> +    struct thread_struct *thread = &tsk->thread;
>>> +    struct perf_event *bp;
>>> +    struct perf_event_attr attr;
>>> +    struct arch_hw_breakpoint *info;
>>> +
>>> +    bp = thread->hbp_break[0];
>>> +    if (!bp) {
>>> +        ptrace_breakpoint_init(&attr);
>>> +
>>> +        attr.bp_addr = addr;
>>> +        attr.bp_len = HW_BREAKPOINT_LEN_8;
>>> +        attr.bp_type = HW_BREAKPOINT_X;
>>> +
>>> +        bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
>>> +                         NULL, tsk);
>>> +        if (IS_ERR(bp))
>>> +            return PTR_ERR(bp);
>>> +
>>> +        thread->hbp_break[0] = bp;
>>> +    } else {
>>> +        int err;
>>> +
>>> +        attr = bp->attr;
>>> +        attr.bp_addr = addr;
>>> +        /* reenable breakpoint */
>>> +        attr.disabled = false;
>>> +        err = modify_user_hw_breakpoint(bp, &attr);
>>> +        if (unlikely(err))
>>> +            return err;
>>> +
>>> +        csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
>>> +    }
>>> +    info = counter_arch_bp(bp);
>>> +    info->mask = 0xffffffffffff;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +/* ptrace API */
>>> +void user_enable_single_step(struct task_struct *task)
>>> +{
>>> +    struct thread_info *ti = task_thread_info(task);
>>> +
>>> +    set_single_step(task, task_pt_regs(task)->csr_era);
>>> +    task->thread.single_step = task_pt_regs(task)->csr_era;
>>> +    set_ti_thread_flag(ti, TIF_SINGLESTEP);
>>> +}
>>> +
>>> +void user_disable_single_step(struct task_struct *task)
>>> +{
>>> +    clear_tsk_thread_flag(task, TIF_SINGLESTEP);
>>> +}
>>> diff --git a/arch/loongarch/kernel/traps.c
>>> b/arch/loongarch/kernel/traps.c
>>> index 2b133079e0f3..56d7d076153c 100644
>>> --- a/arch/loongarch/kernel/traps.c
>>> +++ b/arch/loongarch/kernel/traps.c
>>> @@ -511,9 +511,23 @@ asmlinkage void noinstr do_watch(struct pt_regs
>>> *regs)
>>>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>>>       irqentry_state_t state = irqentry_enter(regs);
>>>   -    breakpoint_handler(regs);
>>> -    watchpoint_handler(regs);
>>> -    force_sig(SIGTRAP);
>>> +    if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
>>> +        int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
>>> +        unsigned long pc = regs->csr_era;
>>> +
>>> +        if (llbit) {
>>
>> Hi, Qing,
>>
>>
>> It should be noted here. When the ll-sc combo is encountered, it is
>> regarded as an single instruction. So donnot clear llbit and reset
>> CSR.FWPS.Skip until the llsc execution is completed.
>>
>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>> +            csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
>>> +        } else if (pc == current->thread.single_step) {
>> Note here as well. Because 3A5000 has a strange hardware issue that
>> certain insns are occasionally not skipped when CSR.FWPS.Skip is set,
>> such as fld.d/fst.d. Singlestep needs compare whether the csr_era is
>> equal to the value of singlestep which last time set, as in most case
>>
>> they should be not equal.
> 
> BTW, I prefer to separate this special processing from this patch (for
> example, add two patchs in this series, add special processing of
> instructions such as LL-SC, and FLD.D/FST.D, etc.), and add
> corresponding test cases to describe the phenomenon and reason, this
> is conducive to everyone's understanding of the code.
> 
>>
>>
>> And for this condition expression, some potentially strange insns may
>> cause bugs. For example, "b 0" or "jr rd" where rd is equal to its PC
>> will cause cannot stop the singlestep. These insns is so strange that
>> we did not consider in OW. However, I think we should consider this
>> case for robustness in upstream.
>>
> 
> I don't know if there will be instructions like "b 0" or "jr rd (rd =
> pc)" in the executable file after linking?
FWIW `jirl A, A, 0` is apparently ubiquitous in the old world:

$ objdump -d 
~/loongnix-sysroot/usr/lib/loongarch64-linux-gnu/libc_nonshared.a | grep 
-E 'jirl\s+\$(..), \$\1'
   20:   4c000021        jirl            $ra, $ra, 0
   24:   4c000021        jirl            $ra, $ra, 0
    c:   4c000021        jirl            $ra, $ra, 0

One of the occurrences is in __stack_chk_fail_local, guess how popular 
it is among the whole old world...

But fortunately a quick check against my Gentoo sysroot yielded nothing, 
so perhaps the new world as a whole isn't going to be affected. I've 
checked some of the *older* (i.e. before 2022) new world sysroots I have 
at hand and they seemed not affected either.

As for `b 0`, they are kinda already signifying some panic/abort/halt 
condition, at least in a few projects that I've studied/ported.

But IMO the kernel shouldn't get DoS'd even if userland gets stuck in 
things like this, so some kind of workaround should still be necessary.
  
Qing Zhang Feb. 17, 2023, 2:36 a.m. UTC | #5
Hi, forks,
On 2023/2/16 下午2:57, WANG Xuerui wrote:
> On 2023/2/16 11:13, Youling Tang wrote:
>>
>>
>> On 02/16/2023 10:07 AM, Jinyang He wrote:
>>>
>>> On 2023-02-15 17:23, Qing Zhang wrote:
>>>> Use the generic ptrace_resume code for PTRACE_SYSCALL, PTRACE_CONT,
>>>> PTRACE_KILL and PTRACE_SINGLESTEP. This implies defining
>>>> arch_has_single_step in  and implementing the
>>>> user_enable_single_step and user_disable_single_step functions.
>>>>
>>>> LongArch has no hardware single-step register. the hardware single-step
>>>> function multiplex fetch instruction watchpoint(FWPS) and specifies 
>>>> that
>>>> the next instruction must trigger the watch exception by setting the
>>>> mask bit.
>>>>
>>>> Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
>>>> ---
>>>>   arch/loongarch/include/asm/processor.h |  3 ++
>>>>   arch/loongarch/include/asm/ptrace.h    |  2 +
>>>>   arch/loongarch/kernel/hw_breakpoint.c  | 35 +++++++++++--
>>>>   arch/loongarch/kernel/ptrace.c         | 68 
>>>> ++++++++++++++++++++++++++
>>>>   arch/loongarch/kernel/traps.c          | 20 ++++++--
>>>>   5 files changed, 120 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/include/asm/processor.h
>>>> b/arch/loongarch/include/asm/processor.h
>>>> index db060c5a976f..3ea0f1910c23 100644
>>>> --- a/arch/loongarch/include/asm/processor.h
>>>> +++ b/arch/loongarch/include/asm/processor.h
>>>> @@ -131,6 +131,9 @@ struct thread_struct {
>>>>       struct perf_event    *hbp_break[LOONGARCH_MAX_BRP];
>>>>       struct perf_event    *hbp_watch[LOONGARCH_MAX_WRP];
>>>>   +    /* Used by ptrace single_step */
>>>> +    unsigned long single_step;
>>>> +
>>>>       /*
>>>>        * FPU & vector registers, must be at last because
>>>>        * they are conditionally copied at fork().
>>>> diff --git a/arch/loongarch/include/asm/ptrace.h
>>>> b/arch/loongarch/include/asm/ptrace.h
>>>> index 58596c4f8a0f..66a0e6c480a3 100644
>>>> --- a/arch/loongarch/include/asm/ptrace.h
>>>> +++ b/arch/loongarch/include/asm/ptrace.h
>>>> @@ -150,4 +150,6 @@ static inline void user_stack_pointer_set(struct
>>>> pt_regs *regs,
>>>>       regs->regs[3] = val;
>>>>   }
>>>>   +#define arch_has_single_step()        (1)
>>>> +
>>>>   #endif /* _ASM_PTRACE_H */
>>>> diff --git a/arch/loongarch/kernel/hw_breakpoint.c
>>>> b/arch/loongarch/kernel/hw_breakpoint.c
>>>> index 6431cd319c32..94967b887d92 100644
>>>> --- a/arch/loongarch/kernel/hw_breakpoint.c
>>>> +++ b/arch/loongarch/kernel/hw_breakpoint.c
>>>> @@ -153,6 +153,22 @@ static int hw_breakpoint_slot_setup(struct
>>>> perf_event **slots, int max_slots,
>>>>    */
>>>>   void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
>>>>   {
>>>> +    int i;
>>>> +    struct thread_struct *t = &tsk->thread;
>>>> +
>>>> +    for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
>>>> +        if (t->hbp_break[i]) {
>>>> +            unregister_hw_breakpoint(t->hbp_break[i]);
>>>> +            t->hbp_break[i] = NULL;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
>>>> +        if (t->hbp_watch[i]) {
>>>> +            unregister_hw_breakpoint(t->hbp_watch[i]);
>>>> +            t->hbp_watch[i] = NULL;
>>>> +        }
>>>> +    }
>>>>   }
>>>>     void ptrace_hw_copy_thread(struct task_struct *tsk)
>>>> @@ -498,11 +514,20 @@ arch_initcall(arch_hw_breakpoint_init);
>>>>   void hw_breakpoint_thread_switch(struct task_struct *next)
>>>>   {
>>>>       struct pt_regs *regs = task_pt_regs(next);
>>>> -
>>>> -    /* Update breakpoints */
>>>> -    update_bp_registers(regs, 1, 0);
>>>> -    /* Update watchpoints */
>>>> -    update_bp_registers(regs, 1, 1);
>>>> +    u64 addr, mask;
>>>> +
>>>> +    if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
>>>> +        addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
>>>> +        mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
>>>> +        if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
>>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>>> +        regs->csr_prmd |= CSR_PRMD_PWE;
>>>> +    } else {
>>>> +        /* Update breakpoints */
>>>> +        update_bp_registers(regs, 1, 0);
>>>> +        /* Update watchpoints */
>>>> +        update_bp_registers(regs, 1, 1);
>>>> +    }
>>>>   }
>>>>     void hw_breakpoint_pmu_read(struct perf_event *bp)
>>>> diff --git a/arch/loongarch/kernel/ptrace.c
>>>> b/arch/loongarch/kernel/ptrace.c
>>>> index bee4194177fd..52a3ee4366f4 100644
>>>> --- a/arch/loongarch/kernel/ptrace.c
>>>> +++ b/arch/loongarch/kernel/ptrace.c
>>>> @@ -20,6 +20,7 @@
>>>>   #include <linux/context_tracking.h>
>>>>   #include <linux/elf.h>
>>>>   #include <linux/errno.h>
>>>> +#include <linux/hw_breakpoint.h>
>>>>   #include <linux/mm.h>
>>>>   #include <linux/ptrace.h>
>>>>   #include <linux/regset.h>
>>>> @@ -30,6 +31,7 @@
>>>>   #include <linux/stddef.h>
>>>>   #include <linux/seccomp.h>
>>>>   #include <linux/uaccess.h>
>>>> +#include <linux/thread_info.h>
>>>>     #include <asm/byteorder.h>
>>>>   #include <asm/cpu.h>
>>>> @@ -39,6 +41,7 @@
>>>>   #include <asm/page.h>
>>>>   #include <asm/pgtable.h>
>>>>   #include <asm/processor.h>
>>>> +#include <asm/ptrace.h>
>>>>   #include <asm/reg.h>
>>>>   #include <asm/syscall.h>
>>>>   @@ -541,3 +544,68 @@ long arch_ptrace(struct task_struct *child,
>>>> long request,
>>>>         return ret;
>>>>   }
>>>> +
>>>> +void ptrace_triggered(struct perf_event *bp,
>>>> +              struct perf_sample_data *data, struct pt_regs *regs)
>>>> +{
>>>> +    struct perf_event_attr attr;
>>>> +
>>>> +    attr = bp->attr;
>>>> +    attr.disabled = true;
>>>> +    modify_user_hw_breakpoint(bp, &attr);
>>>> +}
>>>> +
>>>> +static int set_single_step(struct task_struct *tsk, unsigned long 
>>>> addr)
>>>> +{
>>>> +    struct thread_struct *thread = &tsk->thread;
>>>> +    struct perf_event *bp;
>>>> +    struct perf_event_attr attr;
>>>> +    struct arch_hw_breakpoint *info;
>>>> +
>>>> +    bp = thread->hbp_break[0];
>>>> +    if (!bp) {
>>>> +        ptrace_breakpoint_init(&attr);
>>>> +
>>>> +        attr.bp_addr = addr;
>>>> +        attr.bp_len = HW_BREAKPOINT_LEN_8;
>>>> +        attr.bp_type = HW_BREAKPOINT_X;
>>>> +
>>>> +        bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
>>>> +                         NULL, tsk);
>>>> +        if (IS_ERR(bp))
>>>> +            return PTR_ERR(bp);
>>>> +
>>>> +        thread->hbp_break[0] = bp;
>>>> +    } else {
>>>> +        int err;
>>>> +
>>>> +        attr = bp->attr;
>>>> +        attr.bp_addr = addr;
>>>> +        /* reenable breakpoint */
>>>> +        attr.disabled = false;
>>>> +        err = modify_user_hw_breakpoint(bp, &attr);
>>>> +        if (unlikely(err))
>>>> +            return err;
>>>> +
>>>> +        csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
>>>> +    }
>>>> +    info = counter_arch_bp(bp);
>>>> +    info->mask = 0xffffffffffff;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +/* ptrace API */
>>>> +void user_enable_single_step(struct task_struct *task)
>>>> +{
>>>> +    struct thread_info *ti = task_thread_info(task);
>>>> +
>>>> +    set_single_step(task, task_pt_regs(task)->csr_era);
>>>> +    task->thread.single_step = task_pt_regs(task)->csr_era;
>>>> +    set_ti_thread_flag(ti, TIF_SINGLESTEP);
>>>> +}
>>>> +
>>>> +void user_disable_single_step(struct task_struct *task)
>>>> +{
>>>> +    clear_tsk_thread_flag(task, TIF_SINGLESTEP);
>>>> +}
>>>> diff --git a/arch/loongarch/kernel/traps.c
>>>> b/arch/loongarch/kernel/traps.c
>>>> index 2b133079e0f3..56d7d076153c 100644
>>>> --- a/arch/loongarch/kernel/traps.c
>>>> +++ b/arch/loongarch/kernel/traps.c
>>>> @@ -511,9 +511,23 @@ asmlinkage void noinstr do_watch(struct pt_regs
>>>> *regs)
>>>>   #ifdef CONFIG_HAVE_HW_BREAKPOINT
>>>>       irqentry_state_t state = irqentry_enter(regs);
>>>>   -    breakpoint_handler(regs);
>>>> -    watchpoint_handler(regs);
>>>> -    force_sig(SIGTRAP);
>>>> +    if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
>>>> +        int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
>>>> +        unsigned long pc = regs->csr_era;
>>>> +
>>>> +        if (llbit) {
>>>
>>> Hi, Qing,
>>>
>>>
>>> It should be noted here. When the ll-sc combo is encountered, it is
>>> regarded as an single instruction. So donnot clear llbit and reset
>>> CSR.FWPS.Skip until the llsc execution is completed.
>>>
>>>> +            csr_write32(0x10000, LOONGARCH_CSR_FWPS);
>>>> +            csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
>>>> +        } else if (pc == current->thread.single_step) {
>>> Note here as well. Because 3A5000 has a strange hardware issue that
>>> certain insns are occasionally not skipped when CSR.FWPS.Skip is set,
>>> such as fld.d/fst.d. Singlestep needs compare whether the csr_era is
>>> equal to the value of singlestep which last time set, as in most case
>>>
>>> they should be not equal.
>>
>> BTW, I prefer to separate this special processing from this patch (for
>> example, add two patchs in this series, add special processing of
>> instructions such as LL-SC, and FLD.D/FST.D, etc.), and add
>> corresponding test cases to describe the phenomenon and reason, this
>> is conducive to everyone's understanding of the code.
>>
In order to ensure the integrity of the patch function,
it is better to add comments without splitting.
>>>
>>>
>>> And for this condition expression, some potentially strange insns may
>>> cause bugs. For example, "b 0" or "jr rd" where rd is equal to its PC
>>> will cause cannot stop the singlestep. These insns is so strange that
>>> we did not consider in OW. However, I think we should consider this
>>> case for robustness in upstream.
>>>
>>
>> I don't know if there will be instructions like "b 0" or "jr rd (rd =
>> pc)" in the executable file after linking?
> FWIW `jirl A, A, 0` is apparently ubiquitous in the old world:
> 
> $ objdump -d 
> ~/loongnix-sysroot/usr/lib/loongarch64-linux-gnu/libc_nonshared.a | grep 
> -E 'jirl\s+\$(..), \$\1'
>    20:   4c000021        jirl            $ra, $ra, 0
>    24:   4c000021        jirl            $ra, $ra, 0
>     c:   4c000021        jirl            $ra, $ra, 0
> 
> One of the occurrences is in __stack_chk_fail_local, guess how popular 
> it is among the whole old world...
> 
> But fortunately a quick check against my Gentoo sysroot yielded nothing, 
> so perhaps the new world as a whole isn't going to be affected. I've 
> checked some of the *older* (i.e. before 2022) new world sysroots I have 
> at hand and they seemed not affected either.
> 
> As for `b 0`, they are kinda already signifying some panic/abort/halt 
> condition, at least in a few projects that I've studied/ported.
> 
> But IMO the kernel shouldn't get DoS'd even if userland gets stuck in 
> things like this, so some kind of workaround should still be necessary.
> 
Got it, the fix is already in v4.

Thanks,
-Qing
  

Patch

diff --git a/arch/loongarch/include/asm/processor.h b/arch/loongarch/include/asm/processor.h
index db060c5a976f..3ea0f1910c23 100644
--- a/arch/loongarch/include/asm/processor.h
+++ b/arch/loongarch/include/asm/processor.h
@@ -131,6 +131,9 @@  struct thread_struct {
 	struct perf_event	*hbp_break[LOONGARCH_MAX_BRP];
 	struct perf_event	*hbp_watch[LOONGARCH_MAX_WRP];
 
+	/* Used by ptrace single_step */
+	unsigned long single_step;
+
 	/*
 	 * FPU & vector registers, must be at last because
 	 * they are conditionally copied at fork().
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 58596c4f8a0f..66a0e6c480a3 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -150,4 +150,6 @@  static inline void user_stack_pointer_set(struct pt_regs *regs,
 	regs->regs[3] = val;
 }
 
+#define arch_has_single_step()		(1)
+
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index 6431cd319c32..94967b887d92 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -153,6 +153,22 @@  static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots,
  */
 void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
 {
+	int i;
+	struct thread_struct *t = &tsk->thread;
+
+	for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+		if (t->hbp_break[i]) {
+			unregister_hw_breakpoint(t->hbp_break[i]);
+			t->hbp_break[i] = NULL;
+		}
+	}
+
+	for (i = 0; i < LOONGARCH_MAX_WRP; i++) {
+		if (t->hbp_watch[i]) {
+			unregister_hw_breakpoint(t->hbp_watch[i]);
+			t->hbp_watch[i] = NULL;
+		}
+	}
 }
 
 void ptrace_hw_copy_thread(struct task_struct *tsk)
@@ -498,11 +514,20 @@  arch_initcall(arch_hw_breakpoint_init);
 void hw_breakpoint_thread_switch(struct task_struct *next)
 {
 	struct pt_regs *regs = task_pt_regs(next);
-
-	/* Update breakpoints */
-	update_bp_registers(regs, 1, 0);
-	/* Update watchpoints */
-	update_bp_registers(regs, 1, 1);
+	u64 addr, mask;
+
+	if (test_bit(TIF_SINGLESTEP, &task_thread_info(next)->flags)) {
+		addr = read_wb_reg(CSR_CFG_ADDR, 0, 0);
+		mask = read_wb_reg(CSR_CFG_MASK, 0, 0);
+		if ((task_pt_regs(next)->csr_era & ~mask) == (addr & ~mask))
+			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
+		regs->csr_prmd |= CSR_PRMD_PWE;
+	} else {
+		/* Update breakpoints */
+		update_bp_registers(regs, 1, 0);
+		/* Update watchpoints */
+		update_bp_registers(regs, 1, 1);
+	}
 }
 
 void hw_breakpoint_pmu_read(struct perf_event *bp)
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index bee4194177fd..52a3ee4366f4 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -20,6 +20,7 @@ 
 #include <linux/context_tracking.h>
 #include <linux/elf.h>
 #include <linux/errno.h>
+#include <linux/hw_breakpoint.h>
 #include <linux/mm.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
@@ -30,6 +31,7 @@ 
 #include <linux/stddef.h>
 #include <linux/seccomp.h>
 #include <linux/uaccess.h>
+#include <linux/thread_info.h>
 
 #include <asm/byteorder.h>
 #include <asm/cpu.h>
@@ -39,6 +41,7 @@ 
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
+#include <asm/ptrace.h>
 #include <asm/reg.h>
 #include <asm/syscall.h>
 
@@ -541,3 +544,68 @@  long arch_ptrace(struct task_struct *child, long request,
 
 	return ret;
 }
+
+void ptrace_triggered(struct perf_event *bp,
+		      struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event_attr attr;
+
+	attr = bp->attr;
+	attr.disabled = true;
+	modify_user_hw_breakpoint(bp, &attr);
+}
+
+static int set_single_step(struct task_struct *tsk, unsigned long addr)
+{
+	struct thread_struct *thread = &tsk->thread;
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+	struct arch_hw_breakpoint *info;
+
+	bp = thread->hbp_break[0];
+	if (!bp) {
+		ptrace_breakpoint_init(&attr);
+
+		attr.bp_addr = addr;
+		attr.bp_len = HW_BREAKPOINT_LEN_8;
+		attr.bp_type = HW_BREAKPOINT_X;
+
+		bp = register_user_hw_breakpoint(&attr, ptrace_triggered,
+						 NULL, tsk);
+		if (IS_ERR(bp))
+			return PTR_ERR(bp);
+
+		thread->hbp_break[0] = bp;
+	} else {
+		int err;
+
+		attr = bp->attr;
+		attr.bp_addr = addr;
+		/* reenable breakpoint */
+		attr.disabled = false;
+		err = modify_user_hw_breakpoint(bp, &attr);
+		if (unlikely(err))
+			return err;
+
+		csr_write64(attr.bp_addr, LOONGARCH_CSR_IB0ADDR);
+	}
+	info = counter_arch_bp(bp);
+	info->mask = 0xffffffffffff;
+
+	return 0;
+}
+
+/* ptrace API */
+void user_enable_single_step(struct task_struct *task)
+{
+	struct thread_info *ti = task_thread_info(task);
+
+	set_single_step(task, task_pt_regs(task)->csr_era);
+	task->thread.single_step = task_pt_regs(task)->csr_era;
+	set_ti_thread_flag(ti, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 2b133079e0f3..56d7d076153c 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -511,9 +511,23 @@  asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	irqentry_state_t state = irqentry_enter(regs);
 
-	breakpoint_handler(regs);
-	watchpoint_handler(regs);
-	force_sig(SIGTRAP);
+	if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
+		int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
+		unsigned long pc = regs->csr_era;
+
+		if (llbit) {
+			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
+			csr_write32(0x4, LOONGARCH_CSR_LLBCTL);
+		} else if (pc == current->thread.single_step) {
+			csr_write32(0x10000, LOONGARCH_CSR_FWPS);
+		} else {
+			force_sig(SIGTRAP);
+		}
+	} else {
+		breakpoint_handler(regs);
+		watchpoint_handler(regs);
+		force_sig(SIGTRAP);
+	}
 
 	irqentry_exit(regs, state);
 #endif