RISC-V: Don't use IPIs in flush_icache_all() when patching text

Message ID 20240205042955.833752-1-apatel@ventanamicro.com
State New
Headers
Series RISC-V: Don't use IPIs in flush_icache_all() when patching text |

Commit Message

Anup Patel Feb. 5, 2024, 4:29 a.m. UTC
  If some of the HARTs are parked by stop machine then IPI-based
flushing in flush_icache_all() will hang. This hang is observed
when text patching is invoked by various debug and BPF features.

To avoid this hang, we force use of SBI-based icache flushing
when patching text.

Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
Reported-by: Bjorn Topel <bjorn@kernel.org>
Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
---
 arch/riscv/include/asm/cacheflush.h | 7 ++++---
 arch/riscv/kernel/hibernate.c       | 2 +-
 arch/riscv/kernel/patch.c           | 4 ++--
 arch/riscv/mm/cacheflush.c          | 7 ++++---
 4 files changed, 11 insertions(+), 9 deletions(-)
  

Comments

Alexandre Ghiti Feb. 5, 2024, 6:22 a.m. UTC | #1
Hi Anup,

On 05/02/2024 05:29, Anup Patel wrote:
> If some of the HARTs are parked by stop machine then IPI-based
> flushing in flush_icache_all() will hang. This hang is observed
> when text patching is invoked by various debug and BPF features.
>
> To avoid this hang, we force use of SBI-based icache flushing
> when patching text.
>
> Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
> Reported-by: Bjorn Topel <bjorn@kernel.org>
> Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> ---
>   arch/riscv/include/asm/cacheflush.h | 7 ++++---
>   arch/riscv/kernel/hibernate.c       | 2 +-
>   arch/riscv/kernel/patch.c           | 4 ++--
>   arch/riscv/mm/cacheflush.c          | 7 ++++---
>   4 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> index a129dac4521d..561e079f34af 100644
> --- a/arch/riscv/include/asm/cacheflush.h
> +++ b/arch/riscv/include/asm/cacheflush.h
> @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
>    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>    * so instead we just flush the whole thing.
>    */
> -#define flush_icache_range(start, end) flush_icache_all()
> +#define flush_icache_range(start, end) flush_icache_all(true)
> +#define flush_icache_patch_range(start, end) flush_icache_all(false)
>   #define flush_icache_user_page(vma, pg, addr, len) \
>   	flush_icache_mm(vma->vm_mm, 0)
>   
> @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
>   
>   #ifndef CONFIG_SMP
>   
> -#define flush_icache_all() local_flush_icache_all()
> +#define flush_icache_all(want_ipi) local_flush_icache_all()
>   #define flush_icache_mm(mm, local) flush_icache_all()
>   
>   #else /* CONFIG_SMP */
>   
> -void flush_icache_all(void);
> +void flush_icache_all(bool want_ipi);
>   void flush_icache_mm(struct mm_struct *mm, bool local);
>   
>   #endif /* CONFIG_SMP */
> diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
> index 671b686c0158..388f10e187ba 100644
> --- a/arch/riscv/kernel/hibernate.c
> +++ b/arch/riscv/kernel/hibernate.c
> @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
>   	} else {
>   		suspend_restore_csrs(hibernate_cpu_context);
>   		flush_tlb_all();
> -		flush_icache_all();
> +		flush_icache_all(true);
>   
>   		/*
>   		 * Tell the hibernation core that we've just restored the memory.
> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> index 37e87fdcf6a0..721e144a7847 100644
> --- a/arch/riscv/kernel/patch.c
> +++ b/arch/riscv/kernel/patch.c
> @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
>   	ret = patch_insn_set(tp, c, len);
>   
>   	if (!ret)
> -		flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
> +		flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
>   
>   	return ret;
>   }
> @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
>   	ret = patch_insn_write(tp, insns, len);
>   
>   	if (!ret)
> -		flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> +		flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
>   
>   	return ret;
>   }
> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> index 55a34f2020a8..03cd3d4831ef 100644
> --- a/arch/riscv/mm/cacheflush.c
> +++ b/arch/riscv/mm/cacheflush.c
> @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
>   	return local_flush_icache_all();
>   }
>   
> -void flush_icache_all(void)
> +void flush_icache_all(bool want_ipi)
>   {
>   	local_flush_icache_all();
>   
> -	if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
> +	if (IS_ENABLED(CONFIG_RISCV_SBI) &&
> +	    (!want_ipi || !riscv_use_ipi_for_rfence()))
>   		sbi_remote_fence_i(NULL);
>   	else
>   		on_each_cpu(ipi_remote_fence_i, NULL, 1);
> @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
>   	struct folio *folio = page_folio(pte_page(pte));
>   
>   	if (!test_bit(PG_dcache_clean, &folio->flags)) {
> -		flush_icache_all();
> +		flush_icache_all(true);
>   		set_bit(PG_dcache_clean, &folio->flags);
>   	}
>   }


Since patch_text_cb() is run on all cpus, couldn't we completely avoid 
any remote icache flush by slightly changing patch_text_cb() instead as 
follows?

diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a0..075c376ed528 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -232,8 +232,8 @@ static int patch_text_cb(void *data)
         if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
                 for (i = 0; ret == 0 && i < patch->ninsns; i++) {
                         len = GET_INSN_LENGTH(patch->insns[i]);
-                       ret = patch_text_nosync(patch->addr + i * len,
- &patch->insns[i], len);
+                       ret = patch_insn_write((u32 *)(patch->addr + i * 
len),
+ &patch->insns[i], len);
                 }
                 atomic_inc(&patch->cpu_count);
         } else {
@@ -242,6 +242,8 @@ static int patch_text_cb(void *data)
                 smp_mb();
         }

+       local_flush_icache_all();
+
         return ret;
  }
  NOKPROBE_SYMBOL(patch_text_cb);
  
Anup Patel Feb. 5, 2024, 9:55 a.m. UTC | #2
On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Anup,
>
> On 05/02/2024 05:29, Anup Patel wrote:
> > If some of the HARTs are parked by stop machine then IPI-based
> > flushing in flush_icache_all() will hang. This hang is observed
> > when text patching is invoked by various debug and BPF features.
> >
> > To avoid this hang, we force use of SBI-based icache flushing
> > when patching text.
> >
> > Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
> > Reported-by: Bjorn Topel <bjorn@kernel.org>
> > Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> > ---
> >   arch/riscv/include/asm/cacheflush.h | 7 ++++---
> >   arch/riscv/kernel/hibernate.c       | 2 +-
> >   arch/riscv/kernel/patch.c           | 4 ++--
> >   arch/riscv/mm/cacheflush.c          | 7 ++++---
> >   4 files changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> > index a129dac4521d..561e079f34af 100644
> > --- a/arch/riscv/include/asm/cacheflush.h
> > +++ b/arch/riscv/include/asm/cacheflush.h
> > @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
> >    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
> >    * so instead we just flush the whole thing.
> >    */
> > -#define flush_icache_range(start, end) flush_icache_all()
> > +#define flush_icache_range(start, end) flush_icache_all(true)
> > +#define flush_icache_patch_range(start, end) flush_icache_all(false)
> >   #define flush_icache_user_page(vma, pg, addr, len) \
> >       flush_icache_mm(vma->vm_mm, 0)
> >
> > @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
> >
> >   #ifndef CONFIG_SMP
> >
> > -#define flush_icache_all() local_flush_icache_all()
> > +#define flush_icache_all(want_ipi) local_flush_icache_all()
> >   #define flush_icache_mm(mm, local) flush_icache_all()
> >
> >   #else /* CONFIG_SMP */
> >
> > -void flush_icache_all(void);
> > +void flush_icache_all(bool want_ipi);
> >   void flush_icache_mm(struct mm_struct *mm, bool local);
> >
> >   #endif /* CONFIG_SMP */
> > diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
> > index 671b686c0158..388f10e187ba 100644
> > --- a/arch/riscv/kernel/hibernate.c
> > +++ b/arch/riscv/kernel/hibernate.c
> > @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
> >       } else {
> >               suspend_restore_csrs(hibernate_cpu_context);
> >               flush_tlb_all();
> > -             flush_icache_all();
> > +             flush_icache_all(true);
> >
> >               /*
> >                * Tell the hibernation core that we've just restored the memory.
> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> > index 37e87fdcf6a0..721e144a7847 100644
> > --- a/arch/riscv/kernel/patch.c
> > +++ b/arch/riscv/kernel/patch.c
> > @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
> >       ret = patch_insn_set(tp, c, len);
> >
> >       if (!ret)
> > -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
> > +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
> >
> >       return ret;
> >   }
> > @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
> >       ret = patch_insn_write(tp, insns, len);
> >
> >       if (!ret)
> > -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> > +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
> >
> >       return ret;
> >   }
> > diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> > index 55a34f2020a8..03cd3d4831ef 100644
> > --- a/arch/riscv/mm/cacheflush.c
> > +++ b/arch/riscv/mm/cacheflush.c
> > @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
> >       return local_flush_icache_all();
> >   }
> >
> > -void flush_icache_all(void)
> > +void flush_icache_all(bool want_ipi)
> >   {
> >       local_flush_icache_all();
> >
> > -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
> > +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
> > +         (!want_ipi || !riscv_use_ipi_for_rfence()))
> >               sbi_remote_fence_i(NULL);
> >       else
> >               on_each_cpu(ipi_remote_fence_i, NULL, 1);
> > @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
> >       struct folio *folio = page_folio(pte_page(pte));
> >
> >       if (!test_bit(PG_dcache_clean, &folio->flags)) {
> > -             flush_icache_all();
> > +             flush_icache_all(true);
> >               set_bit(PG_dcache_clean, &folio->flags);
> >       }
> >   }
>
>
> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
> any remote icache flush by slightly changing patch_text_cb() instead as
> follows?

Unfortunately patch_text_cb() is not the only user of patch_text_nosync
since patch_text_nosync() and patch_text_set_nosync() are called directly
from other places as well.

We have to update all users of patch_text_nosync() and
patch_text_set_nosync() to move to local icache flushes which
is a much bigger change.

Regards,
Anup

>
> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> index 37e87fdcf6a0..075c376ed528 100644
> --- a/arch/riscv/kernel/patch.c
> +++ b/arch/riscv/kernel/patch.c
> @@ -232,8 +232,8 @@ static int patch_text_cb(void *data)
>          if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) {
>                  for (i = 0; ret == 0 && i < patch->ninsns; i++) {
>                          len = GET_INSN_LENGTH(patch->insns[i]);
> -                       ret = patch_text_nosync(patch->addr + i * len,
> - &patch->insns[i], len);
> +                       ret = patch_insn_write((u32 *)(patch->addr + i *
> len),
> + &patch->insns[i], len);
>                  }
>                  atomic_inc(&patch->cpu_count);
>          } else {
> @@ -242,6 +242,8 @@ static int patch_text_cb(void *data)
>                  smp_mb();
>          }
>
> +       local_flush_icache_all();
> +
>          return ret;
>   }
>   NOKPROBE_SYMBOL(patch_text_cb);
>
>
>
  
Björn Töpel Feb. 5, 2024, 11 a.m. UTC | #3
Anup Patel <apatel@ventanamicro.com> writes:

> On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>>
>> Hi Anup,
>>
>> On 05/02/2024 05:29, Anup Patel wrote:
>> > If some of the HARTs are parked by stop machine then IPI-based
>> > flushing in flush_icache_all() will hang. This hang is observed
>> > when text patching is invoked by various debug and BPF features.
>> >
>> > To avoid this hang, we force use of SBI-based icache flushing
>> > when patching text.
>> >
>> > Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
>> > Reported-by: Bjorn Topel <bjorn@kernel.org>
>> > Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
>> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
>> > ---
>> >   arch/riscv/include/asm/cacheflush.h | 7 ++++---
>> >   arch/riscv/kernel/hibernate.c       | 2 +-
>> >   arch/riscv/kernel/patch.c           | 4 ++--
>> >   arch/riscv/mm/cacheflush.c          | 7 ++++---
>> >   4 files changed, 11 insertions(+), 9 deletions(-)
>> >
>> > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
>> > index a129dac4521d..561e079f34af 100644
>> > --- a/arch/riscv/include/asm/cacheflush.h
>> > +++ b/arch/riscv/include/asm/cacheflush.h
>> > @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
>> >    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>> >    * so instead we just flush the whole thing.
>> >    */
>> > -#define flush_icache_range(start, end) flush_icache_all()
>> > +#define flush_icache_range(start, end) flush_icache_all(true)
>> > +#define flush_icache_patch_range(start, end) flush_icache_all(false)
>> >   #define flush_icache_user_page(vma, pg, addr, len) \
>> >       flush_icache_mm(vma->vm_mm, 0)
>> >
>> > @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
>> >
>> >   #ifndef CONFIG_SMP
>> >
>> > -#define flush_icache_all() local_flush_icache_all()
>> > +#define flush_icache_all(want_ipi) local_flush_icache_all()
>> >   #define flush_icache_mm(mm, local) flush_icache_all()
>> >
>> >   #else /* CONFIG_SMP */
>> >
>> > -void flush_icache_all(void);
>> > +void flush_icache_all(bool want_ipi);
>> >   void flush_icache_mm(struct mm_struct *mm, bool local);
>> >
>> >   #endif /* CONFIG_SMP */
>> > diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
>> > index 671b686c0158..388f10e187ba 100644
>> > --- a/arch/riscv/kernel/hibernate.c
>> > +++ b/arch/riscv/kernel/hibernate.c
>> > @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
>> >       } else {
>> >               suspend_restore_csrs(hibernate_cpu_context);
>> >               flush_tlb_all();
>> > -             flush_icache_all();
>> > +             flush_icache_all(true);
>> >
>> >               /*
>> >                * Tell the hibernation core that we've just restored the memory.
>> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
>> > index 37e87fdcf6a0..721e144a7847 100644
>> > --- a/arch/riscv/kernel/patch.c
>> > +++ b/arch/riscv/kernel/patch.c
>> > @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
>> >       ret = patch_insn_set(tp, c, len);
>> >
>> >       if (!ret)
>> > -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
>> > +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
>> >
>> >       return ret;
>> >   }
>> > @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
>> >       ret = patch_insn_write(tp, insns, len);
>> >
>> >       if (!ret)
>> > -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
>> > +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
>> >
>> >       return ret;
>> >   }
>> > diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> > index 55a34f2020a8..03cd3d4831ef 100644
>> > --- a/arch/riscv/mm/cacheflush.c
>> > +++ b/arch/riscv/mm/cacheflush.c
>> > @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
>> >       return local_flush_icache_all();
>> >   }
>> >
>> > -void flush_icache_all(void)
>> > +void flush_icache_all(bool want_ipi)
>> >   {
>> >       local_flush_icache_all();
>> >
>> > -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
>> > +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
>> > +         (!want_ipi || !riscv_use_ipi_for_rfence()))
>> >               sbi_remote_fence_i(NULL);
>> >       else
>> >               on_each_cpu(ipi_remote_fence_i, NULL, 1);
>> > @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
>> >       struct folio *folio = page_folio(pte_page(pte));
>> >
>> >       if (!test_bit(PG_dcache_clean, &folio->flags)) {
>> > -             flush_icache_all();
>> > +             flush_icache_all(true);
>> >               set_bit(PG_dcache_clean, &folio->flags);
>> >       }
>> >   }
>>
>>
>> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
>> any remote icache flush by slightly changing patch_text_cb() instead as
>> follows?
>
> Unfortunately patch_text_cb() is not the only user of patch_text_nosync
> since patch_text_nosync() and patch_text_set_nosync() are called directly
> from other places as well.

Yeah. There is one more stop_machine() text patching user, and that's
ftrace. ftrace is using stop_machine() with the last argument set to
NULL, so only patching on *any* hart. Continuing on Alex' idea would be
to place an IPI flush in ftrace_arch_code_modify_post_process(),
unfortately that's too late since we're already moved on from
stop_machine().

> We have to update all users of patch_text_nosync() and
> patch_text_set_nosync() to move to local icache flushes which
> is a much bigger change.

Only the ftrace stop_machine() user, right? Alex solution is sufficient
for patch_text(). I'm not a super fan of conditionally calling into SBI
and passing around boolean context flags as a workaround... :-( Any
other alternatives?

The obvious fixing text patching not to be completly useless on RISC-V,
but that's an even bigger patch...


Björn
  
Anup Patel Feb. 5, 2024, 11:22 a.m. UTC | #4
On Mon, Feb 5, 2024 at 4:30 PM Björn Töpel <bjorn@kernel.org> wrote:
>
> Anup Patel <apatel@ventanamicro.com> writes:
>
> > On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >>
> >> Hi Anup,
> >>
> >> On 05/02/2024 05:29, Anup Patel wrote:
> >> > If some of the HARTs are parked by stop machine then IPI-based
> >> > flushing in flush_icache_all() will hang. This hang is observed
> >> > when text patching is invoked by various debug and BPF features.
> >> >
> >> > To avoid this hang, we force use of SBI-based icache flushing
> >> > when patching text.
> >> >
> >> > Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
> >> > Reported-by: Bjorn Topel <bjorn@kernel.org>
> >> > Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
> >> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> >> > ---
> >> >   arch/riscv/include/asm/cacheflush.h | 7 ++++---
> >> >   arch/riscv/kernel/hibernate.c       | 2 +-
> >> >   arch/riscv/kernel/patch.c           | 4 ++--
> >> >   arch/riscv/mm/cacheflush.c          | 7 ++++---
> >> >   4 files changed, 11 insertions(+), 9 deletions(-)
> >> >
> >> > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> >> > index a129dac4521d..561e079f34af 100644
> >> > --- a/arch/riscv/include/asm/cacheflush.h
> >> > +++ b/arch/riscv/include/asm/cacheflush.h
> >> > @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
> >> >    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
> >> >    * so instead we just flush the whole thing.
> >> >    */
> >> > -#define flush_icache_range(start, end) flush_icache_all()
> >> > +#define flush_icache_range(start, end) flush_icache_all(true)
> >> > +#define flush_icache_patch_range(start, end) flush_icache_all(false)
> >> >   #define flush_icache_user_page(vma, pg, addr, len) \
> >> >       flush_icache_mm(vma->vm_mm, 0)
> >> >
> >> > @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
> >> >
> >> >   #ifndef CONFIG_SMP
> >> >
> >> > -#define flush_icache_all() local_flush_icache_all()
> >> > +#define flush_icache_all(want_ipi) local_flush_icache_all()
> >> >   #define flush_icache_mm(mm, local) flush_icache_all()
> >> >
> >> >   #else /* CONFIG_SMP */
> >> >
> >> > -void flush_icache_all(void);
> >> > +void flush_icache_all(bool want_ipi);
> >> >   void flush_icache_mm(struct mm_struct *mm, bool local);
> >> >
> >> >   #endif /* CONFIG_SMP */
> >> > diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
> >> > index 671b686c0158..388f10e187ba 100644
> >> > --- a/arch/riscv/kernel/hibernate.c
> >> > +++ b/arch/riscv/kernel/hibernate.c
> >> > @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
> >> >       } else {
> >> >               suspend_restore_csrs(hibernate_cpu_context);
> >> >               flush_tlb_all();
> >> > -             flush_icache_all();
> >> > +             flush_icache_all(true);
> >> >
> >> >               /*
> >> >                * Tell the hibernation core that we've just restored the memory.
> >> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> >> > index 37e87fdcf6a0..721e144a7847 100644
> >> > --- a/arch/riscv/kernel/patch.c
> >> > +++ b/arch/riscv/kernel/patch.c
> >> > @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
> >> >       ret = patch_insn_set(tp, c, len);
> >> >
> >> >       if (!ret)
> >> > -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
> >> > +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
> >> >
> >> >       return ret;
> >> >   }
> >> > @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
> >> >       ret = patch_insn_write(tp, insns, len);
> >> >
> >> >       if (!ret)
> >> > -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> >> > +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
> >> >
> >> >       return ret;
> >> >   }
> >> > diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> >> > index 55a34f2020a8..03cd3d4831ef 100644
> >> > --- a/arch/riscv/mm/cacheflush.c
> >> > +++ b/arch/riscv/mm/cacheflush.c
> >> > @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
> >> >       return local_flush_icache_all();
> >> >   }
> >> >
> >> > -void flush_icache_all(void)
> >> > +void flush_icache_all(bool want_ipi)
> >> >   {
> >> >       local_flush_icache_all();
> >> >
> >> > -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
> >> > +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
> >> > +         (!want_ipi || !riscv_use_ipi_for_rfence()))
> >> >               sbi_remote_fence_i(NULL);
> >> >       else
> >> >               on_each_cpu(ipi_remote_fence_i, NULL, 1);
> >> > @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
> >> >       struct folio *folio = page_folio(pte_page(pte));
> >> >
> >> >       if (!test_bit(PG_dcache_clean, &folio->flags)) {
> >> > -             flush_icache_all();
> >> > +             flush_icache_all(true);
> >> >               set_bit(PG_dcache_clean, &folio->flags);
> >> >       }
> >> >   }
> >>
> >>
> >> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
> >> any remote icache flush by slightly changing patch_text_cb() instead as
> >> follows?
> >
> > Unfortunately patch_text_cb() is not the only user of patch_text_nosync
> > since patch_text_nosync() and patch_text_set_nosync() are called directly
> > from other places as well.
>
> Yeah. There is one more stop_machine() text patching user, and that's
> ftrace. ftrace is using stop_machine() with the last argument set to
> NULL, so only patching on *any* hart. Continuing on Alex' idea would be
> to place an IPI flush in ftrace_arch_code_modify_post_process(),
> unfortately that's too late since we're already moved on from
> stop_machine().
>
> > We have to update all users of patch_text_nosync() and
> > patch_text_set_nosync() to move to local icache flushes which
> > is a much bigger change.
>
> Only the ftrace stop_machine() user, right? Alex solution is sufficient
> for patch_text(). I'm not a super fan of conditionally calling into SBI
> and passing around boolean context flags as a workaround... :-( Any
> other alternatives?

I was seeing hang because of patch_text_nosync() getting called from
the BPF path in my debug sessions.

I am certainly not a fan of the approach taken by this patch but this is
the smallest amount of change I could come-up as FIXUP. We should
certainly have a separate patch to do this in a proper way.

>
> The obvious fixing text patching not to be completly useless on RISC-V,
> but that's an even bigger patch...
>

I agree.

Regards,
Anup
  
Björn Töpel Feb. 5, 2024, 11:41 a.m. UTC | #5
Anup Patel <apatel@ventanamicro.com> writes:

> On Mon, Feb 5, 2024 at 4:30 PM Björn Töpel <bjorn@kernel.org> wrote:
>>
>> Anup Patel <apatel@ventanamicro.com> writes:
>>
>> > On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>> >>
>> >> Hi Anup,
>> >>
>> >> On 05/02/2024 05:29, Anup Patel wrote:
>> >> > If some of the HARTs are parked by stop machine then IPI-based
>> >> > flushing in flush_icache_all() will hang. This hang is observed
>> >> > when text patching is invoked by various debug and BPF features.
>> >> >
>> >> > To avoid this hang, we force use of SBI-based icache flushing
>> >> > when patching text.
>> >> >
>> >> > Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
>> >> > Reported-by: Bjorn Topel <bjorn@kernel.org>
>> >> > Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
>> >> > Signed-off-by: Anup Patel <apatel@ventanamicro.com>
>> >> > ---
>> >> >   arch/riscv/include/asm/cacheflush.h | 7 ++++---
>> >> >   arch/riscv/kernel/hibernate.c       | 2 +-
>> >> >   arch/riscv/kernel/patch.c           | 4 ++--
>> >> >   arch/riscv/mm/cacheflush.c          | 7 ++++---
>> >> >   4 files changed, 11 insertions(+), 9 deletions(-)
>> >> >
>> >> > diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
>> >> > index a129dac4521d..561e079f34af 100644
>> >> > --- a/arch/riscv/include/asm/cacheflush.h
>> >> > +++ b/arch/riscv/include/asm/cacheflush.h
>> >> > @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
>> >> >    * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>> >> >    * so instead we just flush the whole thing.
>> >> >    */
>> >> > -#define flush_icache_range(start, end) flush_icache_all()
>> >> > +#define flush_icache_range(start, end) flush_icache_all(true)
>> >> > +#define flush_icache_patch_range(start, end) flush_icache_all(false)
>> >> >   #define flush_icache_user_page(vma, pg, addr, len) \
>> >> >       flush_icache_mm(vma->vm_mm, 0)
>> >> >
>> >> > @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
>> >> >
>> >> >   #ifndef CONFIG_SMP
>> >> >
>> >> > -#define flush_icache_all() local_flush_icache_all()
>> >> > +#define flush_icache_all(want_ipi) local_flush_icache_all()
>> >> >   #define flush_icache_mm(mm, local) flush_icache_all()
>> >> >
>> >> >   #else /* CONFIG_SMP */
>> >> >
>> >> > -void flush_icache_all(void);
>> >> > +void flush_icache_all(bool want_ipi);
>> >> >   void flush_icache_mm(struct mm_struct *mm, bool local);
>> >> >
>> >> >   #endif /* CONFIG_SMP */
>> >> > diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
>> >> > index 671b686c0158..388f10e187ba 100644
>> >> > --- a/arch/riscv/kernel/hibernate.c
>> >> > +++ b/arch/riscv/kernel/hibernate.c
>> >> > @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
>> >> >       } else {
>> >> >               suspend_restore_csrs(hibernate_cpu_context);
>> >> >               flush_tlb_all();
>> >> > -             flush_icache_all();
>> >> > +             flush_icache_all(true);
>> >> >
>> >> >               /*
>> >> >                * Tell the hibernation core that we've just restored the memory.
>> >> > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
>> >> > index 37e87fdcf6a0..721e144a7847 100644
>> >> > --- a/arch/riscv/kernel/patch.c
>> >> > +++ b/arch/riscv/kernel/patch.c
>> >> > @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
>> >> >       ret = patch_insn_set(tp, c, len);
>> >> >
>> >> >       if (!ret)
>> >> > -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
>> >> > +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
>> >> >
>> >> >       return ret;
>> >> >   }
>> >> > @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
>> >> >       ret = patch_insn_write(tp, insns, len);
>> >> >
>> >> >       if (!ret)
>> >> > -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
>> >> > +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
>> >> >
>> >> >       return ret;
>> >> >   }
>> >> > diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> >> > index 55a34f2020a8..03cd3d4831ef 100644
>> >> > --- a/arch/riscv/mm/cacheflush.c
>> >> > +++ b/arch/riscv/mm/cacheflush.c
>> >> > @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
>> >> >       return local_flush_icache_all();
>> >> >   }
>> >> >
>> >> > -void flush_icache_all(void)
>> >> > +void flush_icache_all(bool want_ipi)
>> >> >   {
>> >> >       local_flush_icache_all();
>> >> >
>> >> > -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
>> >> > +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
>> >> > +         (!want_ipi || !riscv_use_ipi_for_rfence()))
>> >> >               sbi_remote_fence_i(NULL);
>> >> >       else
>> >> >               on_each_cpu(ipi_remote_fence_i, NULL, 1);
>> >> > @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
>> >> >       struct folio *folio = page_folio(pte_page(pte));
>> >> >
>> >> >       if (!test_bit(PG_dcache_clean, &folio->flags)) {
>> >> > -             flush_icache_all();
>> >> > +             flush_icache_all(true);
>> >> >               set_bit(PG_dcache_clean, &folio->flags);
>> >> >       }
>> >> >   }
>> >>
>> >>
>> >> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
>> >> any remote icache flush by slightly changing patch_text_cb() instead as
>> >> follows?
>> >
>> > Unfortunately patch_text_cb() is not the only user of patch_text_nosync
>> > since patch_text_nosync() and patch_text_set_nosync() are called directly
>> > from other places as well.
>>
>> Yeah. There is one more stop_machine() text patching user, and that's
>> ftrace. ftrace is using stop_machine() with the last argument set to
>> NULL, so only patching on *any* hart. Continuing on Alex' idea would be
>> to place an IPI flush in ftrace_arch_code_modify_post_process(),
>> unfortately that's too late since we're already moved on from
>> stop_machine().
>>
>> > We have to update all users of patch_text_nosync() and
>> > patch_text_set_nosync() to move to local icache flushes which
>> > is a much bigger change.
>>
>> Only the ftrace stop_machine() user, right? Alex solution is sufficient
>> for patch_text(). I'm not a super fan of conditionally calling into SBI
>> and passing around boolean context flags as a workaround... :-( Any
>> other alternatives?
>
> I was seeing hang because of patch_text_nosync() getting called from
> the BPF path in my debug sessions.

Yeah, and this is ftrace's stop_machine() path. All ftrace() paths,
and all kprobe paths (patch_text) will have this hang with the IMSIC
series unless the fixup is done. :-(

> I am certainly not a fan of the approach taken by this patch but this is
> the smallest amount of change I could come-up as FIXUP. We should
> certainly have a separate patch to do this in a proper way.

Yup (and thanks for working on it BTW!). Hmm, making it possible for
Charlie's work [1] to attach to cpu_stopper? But maybe that's more
work...

Unless anyone can comeup with a cleaner (non-SBI), working solution, I'd
say go for this one...


Björn
  
Björn Töpel Feb. 5, 2024, 11:42 a.m. UTC | #6
Björn Töpel <bjorn@kernel.org> writes:

> Yup (and thanks for working on it BTW!). Hmm, making it possible for
> Charlie's work [1] to attach to cpu_stopper? But maybe that's more
> work...

Trigger finger...

[1] https://lore.kernel.org/linux-riscv/20240124-fencei-v10-1-a25971f4301d@rivosinc.com/
  
Alexandre Ghiti Feb. 5, 2024, 2:08 p.m. UTC | #7
On 05/02/2024 12:00, Björn Töpel wrote:
> Anup Patel <apatel@ventanamicro.com> writes:
>
>> On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>>> Hi Anup,
>>>
>>> On 05/02/2024 05:29, Anup Patel wrote:
>>>> If some of the HARTs are parked by stop machine then IPI-based
>>>> flushing in flush_icache_all() will hang. This hang is observed
>>>> when text patching is invoked by various debug and BPF features.
>>>>
>>>> To avoid this hang, we force use of SBI-based icache flushing
>>>> when patching text.
>>>>
>>>> Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
>>>> Reported-by: Bjorn Topel <bjorn@kernel.org>
>>>> Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
>>>> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
>>>> ---
>>>>    arch/riscv/include/asm/cacheflush.h | 7 ++++---
>>>>    arch/riscv/kernel/hibernate.c       | 2 +-
>>>>    arch/riscv/kernel/patch.c           | 4 ++--
>>>>    arch/riscv/mm/cacheflush.c          | 7 ++++---
>>>>    4 files changed, 11 insertions(+), 9 deletions(-)
>>>>
>>>> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
>>>> index a129dac4521d..561e079f34af 100644
>>>> --- a/arch/riscv/include/asm/cacheflush.h
>>>> +++ b/arch/riscv/include/asm/cacheflush.h
>>>> @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
>>>>     * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>>>>     * so instead we just flush the whole thing.
>>>>     */
>>>> -#define flush_icache_range(start, end) flush_icache_all()
>>>> +#define flush_icache_range(start, end) flush_icache_all(true)
>>>> +#define flush_icache_patch_range(start, end) flush_icache_all(false)
>>>>    #define flush_icache_user_page(vma, pg, addr, len) \
>>>>        flush_icache_mm(vma->vm_mm, 0)
>>>>
>>>> @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
>>>>
>>>>    #ifndef CONFIG_SMP
>>>>
>>>> -#define flush_icache_all() local_flush_icache_all()
>>>> +#define flush_icache_all(want_ipi) local_flush_icache_all()
>>>>    #define flush_icache_mm(mm, local) flush_icache_all()
>>>>
>>>>    #else /* CONFIG_SMP */
>>>>
>>>> -void flush_icache_all(void);
>>>> +void flush_icache_all(bool want_ipi);
>>>>    void flush_icache_mm(struct mm_struct *mm, bool local);
>>>>
>>>>    #endif /* CONFIG_SMP */
>>>> diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
>>>> index 671b686c0158..388f10e187ba 100644
>>>> --- a/arch/riscv/kernel/hibernate.c
>>>> +++ b/arch/riscv/kernel/hibernate.c
>>>> @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
>>>>        } else {
>>>>                suspend_restore_csrs(hibernate_cpu_context);
>>>>                flush_tlb_all();
>>>> -             flush_icache_all();
>>>> +             flush_icache_all(true);
>>>>
>>>>                /*
>>>>                 * Tell the hibernation core that we've just restored the memory.
>>>> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
>>>> index 37e87fdcf6a0..721e144a7847 100644
>>>> --- a/arch/riscv/kernel/patch.c
>>>> +++ b/arch/riscv/kernel/patch.c
>>>> @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
>>>>        ret = patch_insn_set(tp, c, len);
>>>>
>>>>        if (!ret)
>>>> -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
>>>> +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
>>>>
>>>>        return ret;
>>>>    }
>>>> @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
>>>>        ret = patch_insn_write(tp, insns, len);
>>>>
>>>>        if (!ret)
>>>> -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
>>>> +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
>>>>
>>>>        return ret;
>>>>    }
>>>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>>>> index 55a34f2020a8..03cd3d4831ef 100644
>>>> --- a/arch/riscv/mm/cacheflush.c
>>>> +++ b/arch/riscv/mm/cacheflush.c
>>>> @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
>>>>        return local_flush_icache_all();
>>>>    }
>>>>
>>>> -void flush_icache_all(void)
>>>> +void flush_icache_all(bool want_ipi)
>>>>    {
>>>>        local_flush_icache_all();
>>>>
>>>> -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
>>>> +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
>>>> +         (!want_ipi || !riscv_use_ipi_for_rfence()))
>>>>                sbi_remote_fence_i(NULL);
>>>>        else
>>>>                on_each_cpu(ipi_remote_fence_i, NULL, 1);
>>>> @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
>>>>        struct folio *folio = page_folio(pte_page(pte));
>>>>
>>>>        if (!test_bit(PG_dcache_clean, &folio->flags)) {
>>>> -             flush_icache_all();
>>>> +             flush_icache_all(true);
>>>>                set_bit(PG_dcache_clean, &folio->flags);
>>>>        }
>>>>    }
>>>
>>> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
>>> any remote icache flush by slightly changing patch_text_cb() instead as
>>> follows?
>> Unfortunately patch_text_cb() is not the only user of patch_text_nosync
>> since patch_text_nosync() and patch_text_set_nosync() are called directly
>> from other places as well.
> Yeah. There is one more stop_machine() text patching user, and that's
> ftrace. ftrace is using stop_machine() with the last argument set to
> NULL, so only patching on *any* hart. Continuing on Alex' idea would be
> to place an IPI flush in ftrace_arch_code_modify_post_process(),
> unfortately that's too late since we're already moved on from
> stop_machine().


After discussion with Bjorn, we think the solution would be to 
reimplement arch_ftrace_update_code() with stop_machine(..., 
cpu_online_mask) and use the same barrier as the one in patch_text_cb() 
(csky does just that 
https://elixir.bootlin.com/linux/latest/source/arch/csky/kernel/ftrace.c#L224). 
And then we can apply the same solution as I first proposed: no more 
remote icache flushes, only local ones.

What do you think Anup? I can come up with this patch if you want.


>
>> We have to update all users of patch_text_nosync() and
>> patch_text_set_nosync() to move to local icache flushes which
>> is a much bigger change.
> Only the ftrace stop_machine() user, right? Alex solution is sufficient
> for patch_text(). I'm not a super fan of conditionally calling into SBI
> and passing around boolean context flags as a workaround... :-( Any
> other alternatives?
>
> The obvious fixing text patching not to be completly useless on RISC-V,
> but that's an even bigger patch...
>
>
> Björn
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
  
Anup Patel Feb. 5, 2024, 2:27 p.m. UTC | #8
On Mon, Feb 5, 2024 at 7:38 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
>
> On 05/02/2024 12:00, Björn Töpel wrote:
> > Anup Patel <apatel@ventanamicro.com> writes:
> >
> >> On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
> >>> Hi Anup,
> >>>
> >>> On 05/02/2024 05:29, Anup Patel wrote:
> >>>> If some of the HARTs are parked by stop machine then IPI-based
> >>>> flushing in flush_icache_all() will hang. This hang is observed
> >>>> when text patching is invoked by various debug and BPF features.
> >>>>
> >>>> To avoid this hang, we force use of SBI-based icache flushing
> >>>> when patching text.
> >>>>
> >>>> Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
> >>>> Reported-by: Bjorn Topel <bjorn@kernel.org>
> >>>> Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
> >>>> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
> >>>> ---
> >>>>    arch/riscv/include/asm/cacheflush.h | 7 ++++---
> >>>>    arch/riscv/kernel/hibernate.c       | 2 +-
> >>>>    arch/riscv/kernel/patch.c           | 4 ++--
> >>>>    arch/riscv/mm/cacheflush.c          | 7 ++++---
> >>>>    4 files changed, 11 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
> >>>> index a129dac4521d..561e079f34af 100644
> >>>> --- a/arch/riscv/include/asm/cacheflush.h
> >>>> +++ b/arch/riscv/include/asm/cacheflush.h
> >>>> @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
> >>>>     * RISC-V doesn't have an instruction to flush parts of the instruction cache,
> >>>>     * so instead we just flush the whole thing.
> >>>>     */
> >>>> -#define flush_icache_range(start, end) flush_icache_all()
> >>>> +#define flush_icache_range(start, end) flush_icache_all(true)
> >>>> +#define flush_icache_patch_range(start, end) flush_icache_all(false)
> >>>>    #define flush_icache_user_page(vma, pg, addr, len) \
> >>>>        flush_icache_mm(vma->vm_mm, 0)
> >>>>
> >>>> @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
> >>>>
> >>>>    #ifndef CONFIG_SMP
> >>>>
> >>>> -#define flush_icache_all() local_flush_icache_all()
> >>>> +#define flush_icache_all(want_ipi) local_flush_icache_all()
> >>>>    #define flush_icache_mm(mm, local) flush_icache_all()
> >>>>
> >>>>    #else /* CONFIG_SMP */
> >>>>
> >>>> -void flush_icache_all(void);
> >>>> +void flush_icache_all(bool want_ipi);
> >>>>    void flush_icache_mm(struct mm_struct *mm, bool local);
> >>>>
> >>>>    #endif /* CONFIG_SMP */
> >>>> diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
> >>>> index 671b686c0158..388f10e187ba 100644
> >>>> --- a/arch/riscv/kernel/hibernate.c
> >>>> +++ b/arch/riscv/kernel/hibernate.c
> >>>> @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
> >>>>        } else {
> >>>>                suspend_restore_csrs(hibernate_cpu_context);
> >>>>                flush_tlb_all();
> >>>> -             flush_icache_all();
> >>>> +             flush_icache_all(true);
> >>>>
> >>>>                /*
> >>>>                 * Tell the hibernation core that we've just restored the memory.
> >>>> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
> >>>> index 37e87fdcf6a0..721e144a7847 100644
> >>>> --- a/arch/riscv/kernel/patch.c
> >>>> +++ b/arch/riscv/kernel/patch.c
> >>>> @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
> >>>>        ret = patch_insn_set(tp, c, len);
> >>>>
> >>>>        if (!ret)
> >>>> -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
> >>>> +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
> >>>>
> >>>>        return ret;
> >>>>    }
> >>>> @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
> >>>>        ret = patch_insn_write(tp, insns, len);
> >>>>
> >>>>        if (!ret)
> >>>> -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
> >>>> +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
> >>>>
> >>>>        return ret;
> >>>>    }
> >>>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
> >>>> index 55a34f2020a8..03cd3d4831ef 100644
> >>>> --- a/arch/riscv/mm/cacheflush.c
> >>>> +++ b/arch/riscv/mm/cacheflush.c
> >>>> @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
> >>>>        return local_flush_icache_all();
> >>>>    }
> >>>>
> >>>> -void flush_icache_all(void)
> >>>> +void flush_icache_all(bool want_ipi)
> >>>>    {
> >>>>        local_flush_icache_all();
> >>>>
> >>>> -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
> >>>> +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
> >>>> +         (!want_ipi || !riscv_use_ipi_for_rfence()))
> >>>>                sbi_remote_fence_i(NULL);
> >>>>        else
> >>>>                on_each_cpu(ipi_remote_fence_i, NULL, 1);
> >>>> @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
> >>>>        struct folio *folio = page_folio(pte_page(pte));
> >>>>
> >>>>        if (!test_bit(PG_dcache_clean, &folio->flags)) {
> >>>> -             flush_icache_all();
> >>>> +             flush_icache_all(true);
> >>>>                set_bit(PG_dcache_clean, &folio->flags);
> >>>>        }
> >>>>    }
> >>>
> >>> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
> >>> any remote icache flush by slightly changing patch_text_cb() instead as
> >>> follows?
> >> Unfortunately patch_text_cb() is not the only user of patch_text_nosync
> >> since patch_text_nosync() and patch_text_set_nosync() are called directly
> >> from other places as well.
> > Yeah. There is one more stop_machine() text patching user, and that's
> > ftrace. ftrace is using stop_machine() with the last argument set to
> > NULL, so only patching on *any* hart. Continuing on Alex' idea would be
> > to place an IPI flush in ftrace_arch_code_modify_post_process(),
> > unfortately that's too late since we're already moved on from
> > stop_machine().
>
>
> After discussion with Bjorn, we think the solution would be to
> reimplement arch_ftrace_update_code() with stop_machine(...,
> cpu_online_mask) and use the same barrier as the one in patch_text_cb()
> (csky does just that
> https://elixir.bootlin.com/linux/latest/source/arch/csky/kernel/ftrace.c#L224).
> And then we can apply the same solution as I first proposed: no more
> remote icache flushes, only local ones.
>
> What do you think Anup? I can come up with this patch if you want.

Sounds good to me. You will need a special .config from Bjorn to test
your patch.

>
>
> >
> >> We have to update all users of patch_text_nosync() and
> >> patch_text_set_nosync() to move to local icache flushes which
> >> is a much bigger change.
> > Only the ftrace stop_machine() user, right? Alex solution is sufficient
> > for patch_text(). I'm not a super fan of conditionally calling into SBI
> > and passing around boolean context flags as a workaround... :-( Any
> > other alternatives?
> >
> > The obvious fixing text patching not to be completly useless on RISC-V,
> > but that's an even bigger patch...
> >
> >
> > Björn
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv

Regards,
Anup
  
Björn Töpel Feb. 5, 2024, 2:35 p.m. UTC | #9
Anup Patel <anup@brainfault.org> writes:

> On Mon, Feb 5, 2024 at 7:38 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>>
>>
>> On 05/02/2024 12:00, Björn Töpel wrote:
>> > Anup Patel <apatel@ventanamicro.com> writes:
>> >
>> >> On Mon, Feb 5, 2024 at 11:52 AM Alexandre Ghiti <alex@ghiti.fr> wrote:
>> >>> Hi Anup,
>> >>>
>> >>> On 05/02/2024 05:29, Anup Patel wrote:
>> >>>> If some of the HARTs are parked by stop machine then IPI-based
>> >>>> flushing in flush_icache_all() will hang. This hang is observed
>> >>>> when text patching is invoked by various debug and BPF features.
>> >>>>
>> >>>> To avoid this hang, we force use of SBI-based icache flushing
>> >>>> when patching text.
>> >>>>
>> >>>> Fixes: 627922843235 ("RISC-V: Use IPIs for remote icache flush when possible")
>> >>>> Reported-by: Bjorn Topel <bjorn@kernel.org>
>> >>>> Closes: https://gist.github.com/bjoto/04a580568378f3b5483af07cd9d22501
>> >>>> Signed-off-by: Anup Patel <apatel@ventanamicro.com>
>> >>>> ---
>> >>>>    arch/riscv/include/asm/cacheflush.h | 7 ++++---
>> >>>>    arch/riscv/kernel/hibernate.c       | 2 +-
>> >>>>    arch/riscv/kernel/patch.c           | 4 ++--
>> >>>>    arch/riscv/mm/cacheflush.c          | 7 ++++---
>> >>>>    4 files changed, 11 insertions(+), 9 deletions(-)
>> >>>>
>> >>>> diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
>> >>>> index a129dac4521d..561e079f34af 100644
>> >>>> --- a/arch/riscv/include/asm/cacheflush.h
>> >>>> +++ b/arch/riscv/include/asm/cacheflush.h
>> >>>> @@ -32,7 +32,8 @@ static inline void flush_dcache_page(struct page *page)
>> >>>>     * RISC-V doesn't have an instruction to flush parts of the instruction cache,
>> >>>>     * so instead we just flush the whole thing.
>> >>>>     */
>> >>>> -#define flush_icache_range(start, end) flush_icache_all()
>> >>>> +#define flush_icache_range(start, end) flush_icache_all(true)
>> >>>> +#define flush_icache_patch_range(start, end) flush_icache_all(false)
>> >>>>    #define flush_icache_user_page(vma, pg, addr, len) \
>> >>>>        flush_icache_mm(vma->vm_mm, 0)
>> >>>>
>> >>>> @@ -43,12 +44,12 @@ static inline void flush_dcache_page(struct page *page)
>> >>>>
>> >>>>    #ifndef CONFIG_SMP
>> >>>>
>> >>>> -#define flush_icache_all() local_flush_icache_all()
>> >>>> +#define flush_icache_all(want_ipi) local_flush_icache_all()
>> >>>>    #define flush_icache_mm(mm, local) flush_icache_all()
>> >>>>
>> >>>>    #else /* CONFIG_SMP */
>> >>>>
>> >>>> -void flush_icache_all(void);
>> >>>> +void flush_icache_all(bool want_ipi);
>> >>>>    void flush_icache_mm(struct mm_struct *mm, bool local);
>> >>>>
>> >>>>    #endif /* CONFIG_SMP */
>> >>>> diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
>> >>>> index 671b686c0158..388f10e187ba 100644
>> >>>> --- a/arch/riscv/kernel/hibernate.c
>> >>>> +++ b/arch/riscv/kernel/hibernate.c
>> >>>> @@ -153,7 +153,7 @@ int swsusp_arch_suspend(void)
>> >>>>        } else {
>> >>>>                suspend_restore_csrs(hibernate_cpu_context);
>> >>>>                flush_tlb_all();
>> >>>> -             flush_icache_all();
>> >>>> +             flush_icache_all(true);
>> >>>>
>> >>>>                /*
>> >>>>                 * Tell the hibernation core that we've just restored the memory.
>> >>>> diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
>> >>>> index 37e87fdcf6a0..721e144a7847 100644
>> >>>> --- a/arch/riscv/kernel/patch.c
>> >>>> +++ b/arch/riscv/kernel/patch.c
>> >>>> @@ -182,7 +182,7 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len)
>> >>>>        ret = patch_insn_set(tp, c, len);
>> >>>>
>> >>>>        if (!ret)
>> >>>> -             flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
>> >>>> +             flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
>> >>>>
>> >>>>        return ret;
>> >>>>    }
>> >>>> @@ -217,7 +217,7 @@ int patch_text_nosync(void *addr, const void *insns, size_t len)
>> >>>>        ret = patch_insn_write(tp, insns, len);
>> >>>>
>> >>>>        if (!ret)
>> >>>> -             flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
>> >>>> +             flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
>> >>>>
>> >>>>        return ret;
>> >>>>    }
>> >>>> diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
>> >>>> index 55a34f2020a8..03cd3d4831ef 100644
>> >>>> --- a/arch/riscv/mm/cacheflush.c
>> >>>> +++ b/arch/riscv/mm/cacheflush.c
>> >>>> @@ -17,11 +17,12 @@ static void ipi_remote_fence_i(void *info)
>> >>>>        return local_flush_icache_all();
>> >>>>    }
>> >>>>
>> >>>> -void flush_icache_all(void)
>> >>>> +void flush_icache_all(bool want_ipi)
>> >>>>    {
>> >>>>        local_flush_icache_all();
>> >>>>
>> >>>> -     if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
>> >>>> +     if (IS_ENABLED(CONFIG_RISCV_SBI) &&
>> >>>> +         (!want_ipi || !riscv_use_ipi_for_rfence()))
>> >>>>                sbi_remote_fence_i(NULL);
>> >>>>        else
>> >>>>                on_each_cpu(ipi_remote_fence_i, NULL, 1);
>> >>>> @@ -87,7 +88,7 @@ void flush_icache_pte(pte_t pte)
>> >>>>        struct folio *folio = page_folio(pte_page(pte));
>> >>>>
>> >>>>        if (!test_bit(PG_dcache_clean, &folio->flags)) {
>> >>>> -             flush_icache_all();
>> >>>> +             flush_icache_all(true);
>> >>>>                set_bit(PG_dcache_clean, &folio->flags);
>> >>>>        }
>> >>>>    }
>> >>>
>> >>> Since patch_text_cb() is run on all cpus, couldn't we completely avoid
>> >>> any remote icache flush by slightly changing patch_text_cb() instead as
>> >>> follows?
>> >> Unfortunately patch_text_cb() is not the only user of patch_text_nosync
>> >> since patch_text_nosync() and patch_text_set_nosync() are called directly
>> >> from other places as well.
>> > Yeah. There is one more stop_machine() text patching user, and that's
>> > ftrace. ftrace is using stop_machine() with the last argument set to
>> > NULL, so only patching on *any* hart. Continuing on Alex' idea would be
>> > to place an IPI flush in ftrace_arch_code_modify_post_process(),
>> > unfortately that's too late since we're already moved on from
>> > stop_machine().
>>
>>
>> After discussion with Bjorn, we think the solution would be to
>> reimplement arch_ftrace_update_code() with stop_machine(...,
>> cpu_online_mask) and use the same barrier as the one in patch_text_cb()
>> (csky does just that
>> https://elixir.bootlin.com/linux/latest/source/arch/csky/kernel/ftrace.c#L224).
>> And then we can apply the same solution as I first proposed: no more
>> remote icache flushes, only local ones.
>>
>> What do you think Anup? I can come up with this patch if you want.
>
> Sounds good to me. You will need a special .config from Bjorn to test
> your patch.

I also (apparently ;-)) like this solution!

To clarify; "The special config" does ftrace patching in initcall phase,
but you can reproduce by exercising kprobe/ftrace with the IMSIC IPI
patches.


Cheers,
Björn
  

Patch

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index a129dac4521d..561e079f34af 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -32,7 +32,8 @@  static inline void flush_dcache_page(struct page *page)
  * RISC-V doesn't have an instruction to flush parts of the instruction cache,
  * so instead we just flush the whole thing.
  */
-#define flush_icache_range(start, end) flush_icache_all()
+#define flush_icache_range(start, end) flush_icache_all(true)
+#define flush_icache_patch_range(start, end) flush_icache_all(false)
 #define flush_icache_user_page(vma, pg, addr, len) \
 	flush_icache_mm(vma->vm_mm, 0)
 
@@ -43,12 +44,12 @@  static inline void flush_dcache_page(struct page *page)
 
 #ifndef CONFIG_SMP
 
-#define flush_icache_all() local_flush_icache_all()
+#define flush_icache_all(want_ipi) local_flush_icache_all()
 #define flush_icache_mm(mm, local) flush_icache_all()
 
 #else /* CONFIG_SMP */
 
-void flush_icache_all(void);
+void flush_icache_all(bool want_ipi);
 void flush_icache_mm(struct mm_struct *mm, bool local);
 
 #endif /* CONFIG_SMP */
diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c
index 671b686c0158..388f10e187ba 100644
--- a/arch/riscv/kernel/hibernate.c
+++ b/arch/riscv/kernel/hibernate.c
@@ -153,7 +153,7 @@  int swsusp_arch_suspend(void)
 	} else {
 		suspend_restore_csrs(hibernate_cpu_context);
 		flush_tlb_all();
-		flush_icache_all();
+		flush_icache_all(true);
 
 		/*
 		 * Tell the hibernation core that we've just restored the memory.
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 37e87fdcf6a0..721e144a7847 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -182,7 +182,7 @@  int patch_text_set_nosync(void *addr, u8 c, size_t len)
 	ret = patch_insn_set(tp, c, len);
 
 	if (!ret)
-		flush_icache_range((uintptr_t)tp, (uintptr_t)tp + len);
+		flush_icache_patch_range((uintptr_t)tp, (uintptr_t)tp + len);
 
 	return ret;
 }
@@ -217,7 +217,7 @@  int patch_text_nosync(void *addr, const void *insns, size_t len)
 	ret = patch_insn_write(tp, insns, len);
 
 	if (!ret)
-		flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len);
+		flush_icache_patch_range((uintptr_t) tp, (uintptr_t) tp + len);
 
 	return ret;
 }
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 55a34f2020a8..03cd3d4831ef 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -17,11 +17,12 @@  static void ipi_remote_fence_i(void *info)
 	return local_flush_icache_all();
 }
 
-void flush_icache_all(void)
+void flush_icache_all(bool want_ipi)
 {
 	local_flush_icache_all();
 
-	if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence())
+	if (IS_ENABLED(CONFIG_RISCV_SBI) &&
+	    (!want_ipi || !riscv_use_ipi_for_rfence()))
 		sbi_remote_fence_i(NULL);
 	else
 		on_each_cpu(ipi_remote_fence_i, NULL, 1);
@@ -87,7 +88,7 @@  void flush_icache_pte(pte_t pte)
 	struct folio *folio = page_folio(pte_page(pte));
 
 	if (!test_bit(PG_dcache_clean, &folio->flags)) {
-		flush_icache_all();
+		flush_icache_all(true);
 		set_bit(PG_dcache_clean, &folio->flags);
 	}
 }