[v4,4/6] RISC-V: hwprobe: Support probing of misaligned access performance
Commit Message
This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.
Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Evan Green <evan@rivosinc.com>
---
Changes in v4:
- Add newlines to CPUPERF_0 documentation (Conor)
- Add UNSUPPORTED value (Conor)
- Switched from DT to alternatives-based probing (Rob)
- Crispen up cpu index type to always be int (Conor)
Changes in v3:
- Have hwprobe_misaligned return int instead of long.
- Constify cpumask pointer in hwprobe_misaligned()
- Fix warnings in _PERF_O list documentation, use :c:macro:.
- Move include cpufeature.h to misaligned patch.
- Fix documentation mismatch for RISCV_HWPROBE_KEY_CPUPERF_0 (Conor)
- Use for_each_possible_cpu() instead of NR_CPUS (Conor)
- Break early in misaligned access iteration (Conor)
- Increase MISALIGNED_MASK from 2 bits to 3 for possible UNSUPPORTED future
value (Conor)
Changes in v2:
- Fixed logic error in if(of_property_read_string...) that caused crash
- Include cpufeature.h in cpufeature.h to avoid undeclared variable
warning.
- Added a _MASK define
- Fix random checkpatch complaints
Documentation/riscv/hwprobe.rst | 21 ++++++++++++++++++++
arch/riscv/errata/thead/errata.c | 9 +++++++++
arch/riscv/include/asm/alternative.h | 5 +++++
arch/riscv/include/asm/cpufeature.h | 2 ++
arch/riscv/include/asm/hwprobe.h | 2 +-
arch/riscv/include/uapi/asm/hwprobe.h | 7 +++++++
arch/riscv/kernel/alternative.c | 19 ++++++++++++++++++
arch/riscv/kernel/cpufeature.c | 3 +++
arch/riscv/kernel/smpboot.c | 1 +
arch/riscv/kernel/sys_riscv.c | 28 +++++++++++++++++++++++++++
10 files changed, 96 insertions(+), 1 deletion(-)
Comments
Hi Evan,
Am Dienstag, 14. März 2023, 19:32:18 CET schrieb Evan Green:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
I really like this implementation.
Also interesting that T-Head actually has a fast unaligned access.
Maybe that should be part of the commit message (including were
this information comes from)
> Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Evan Green <evan@rivosinc.com>
>
> ---
>
> Changes in v4:
> - Add newlines to CPUPERF_0 documentation (Conor)
> - Add UNSUPPORTED value (Conor)
> - Switched from DT to alternatives-based probing (Rob)
> - Crispen up cpu index type to always be int (Conor)
>
> Changes in v3:
> - Have hwprobe_misaligned return int instead of long.
> - Constify cpumask pointer in hwprobe_misaligned()
> - Fix warnings in _PERF_O list documentation, use :c:macro:.
> - Move include cpufeature.h to misaligned patch.
> - Fix documentation mismatch for RISCV_HWPROBE_KEY_CPUPERF_0 (Conor)
> - Use for_each_possible_cpu() instead of NR_CPUS (Conor)
> - Break early in misaligned access iteration (Conor)
> - Increase MISALIGNED_MASK from 2 bits to 3 for possible UNSUPPORTED future
> value (Conor)
>
> Changes in v2:
> - Fixed logic error in if(of_property_read_string...) that caused crash
> - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> warning.
> - Added a _MASK define
> - Fix random checkpatch complaints
>
> Documentation/riscv/hwprobe.rst | 21 ++++++++++++++++++++
> arch/riscv/errata/thead/errata.c | 9 +++++++++
> arch/riscv/include/asm/alternative.h | 5 +++++
> arch/riscv/include/asm/cpufeature.h | 2 ++
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 7 +++++++
> arch/riscv/kernel/alternative.c | 19 ++++++++++++++++++
> arch/riscv/kernel/cpufeature.c | 3 +++
> arch/riscv/kernel/smpboot.c | 1 +
> arch/riscv/kernel/sys_riscv.c | 28 +++++++++++++++++++++++++++
> 10 files changed, 96 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index 945d44683c40..9f0dd62dcb5d 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -63,3 +63,24 @@ The following keys are defined:
>
> * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
> by version 2.2 of the RISC-V ISA manual.
> +
> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> + information about the selected set of processors.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> + accesses is unknown.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
> + emulated via software, either in or below the kernel. These accesses are
> + always extremely slow.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
> + in hardware, but are slower than the cooresponding aligned accesses
> + sequences.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
> + in hardware and are faster than the cooresponding aligned accesses
> + sequences.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
> + not supported at all and will generate a misaligned address fault.
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index fac5742d1c1e..f41a45af5607 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -10,7 +10,9 @@
> #include <linux/uaccess.h>
> #include <asm/alternative.h>
> #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
> #include <asm/errata_list.h>
> +#include <asm/hwprobe.h>
> #include <asm/patch.h>
> #include <asm/vendorid_list.h>
>
> @@ -108,3 +110,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
> if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> local_flush_icache_all();
> }
> +
> +void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
> + unsigned long impid)
> +{
> + if ((archid == 0) && (impid == 0))
> + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
When looking at this function I 'm wondering if we also want to expose
the active erratas somehow (not in this patch of course, just in general)
Heiko
On Tue, Mar 14, 2023 at 11:32:18AM -0700, Evan Green wrote:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
>
> Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Evan Green <evan@rivosinc.com>
I think this fine now, modulo the lack of an explanation in the commit
message for the new thead feature/"errata" that you've added.
With an explanation for that:
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Thanks,
Conor.
> ---
>
> Changes in v4:
> - Add newlines to CPUPERF_0 documentation (Conor)
> - Add UNSUPPORTED value (Conor)
> - Switched from DT to alternatives-based probing (Rob)
> - Crispen up cpu index type to always be int (Conor)
>
> Changes in v3:
> - Have hwprobe_misaligned return int instead of long.
> - Constify cpumask pointer in hwprobe_misaligned()
> - Fix warnings in _PERF_O list documentation, use :c:macro:.
> - Move include cpufeature.h to misaligned patch.
> - Fix documentation mismatch for RISCV_HWPROBE_KEY_CPUPERF_0 (Conor)
> - Use for_each_possible_cpu() instead of NR_CPUS (Conor)
> - Break early in misaligned access iteration (Conor)
> - Increase MISALIGNED_MASK from 2 bits to 3 for possible UNSUPPORTED future
> value (Conor)
>
> Changes in v2:
> - Fixed logic error in if(of_property_read_string...) that caused crash
> - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> warning.
> - Added a _MASK define
> - Fix random checkpatch complaints
>
> Documentation/riscv/hwprobe.rst | 21 ++++++++++++++++++++
> arch/riscv/errata/thead/errata.c | 9 +++++++++
> arch/riscv/include/asm/alternative.h | 5 +++++
> arch/riscv/include/asm/cpufeature.h | 2 ++
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/uapi/asm/hwprobe.h | 7 +++++++
> arch/riscv/kernel/alternative.c | 19 ++++++++++++++++++
> arch/riscv/kernel/cpufeature.c | 3 +++
> arch/riscv/kernel/smpboot.c | 1 +
> arch/riscv/kernel/sys_riscv.c | 28 +++++++++++++++++++++++++++
> 10 files changed, 96 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index 945d44683c40..9f0dd62dcb5d 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -63,3 +63,24 @@ The following keys are defined:
>
> * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
> by version 2.2 of the RISC-V ISA manual.
> +
> +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> + information about the selected set of processors.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> + accesses is unknown.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
> + emulated via software, either in or below the kernel. These accesses are
> + always extremely slow.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
> + in hardware, but are slower than the cooresponding aligned accesses
> + sequences.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
> + in hardware and are faster than the cooresponding aligned accesses
> + sequences.
> +
> + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
> + not supported at all and will generate a misaligned address fault.
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index fac5742d1c1e..f41a45af5607 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -10,7 +10,9 @@
> #include <linux/uaccess.h>
> #include <asm/alternative.h>
> #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
> #include <asm/errata_list.h>
> +#include <asm/hwprobe.h>
> #include <asm/patch.h>
> #include <asm/vendorid_list.h>
>
> @@ -108,3 +110,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
> if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> local_flush_icache_all();
> }
> +
> +void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
> + unsigned long impid)
> +{
> + if ((archid == 0) && (impid == 0))
> + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
> +}
> diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> index 6511dd73e812..7be6d4c6a27d 100644
> --- a/arch/riscv/include/asm/alternative.h
> +++ b/arch/riscv/include/asm/alternative.h
> @@ -23,6 +23,7 @@
> #define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */
> #define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */
>
> +void probe_vendor_features(unsigned int cpu);
> void __init apply_boot_alternatives(void);
> void __init apply_early_boot_alternatives(void);
> void apply_module_alternatives(void *start, size_t length);
> @@ -47,11 +48,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
> unsigned long archid, unsigned long impid,
> unsigned int stage);
>
> +void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
> + unsigned long impid);
> +
> void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
> unsigned int stage);
>
> #else /* CONFIG_RISCV_ALTERNATIVE */
>
> +static inline void probe_vendor_features(unsigned int cpu) { }
> static inline void apply_boot_alternatives(void) { }
> static inline void apply_early_boot_alternatives(void) { }
> static inline void apply_module_alternatives(void *start, size_t length) { }
> diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
> index 66ebaae449c8..808d5403f2ac 100644
> --- a/arch/riscv/include/asm/cpufeature.h
> +++ b/arch/riscv/include/asm/cpufeature.h
> @@ -18,4 +18,6 @@ struct riscv_cpuinfo {
>
> DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
>
> +DECLARE_PER_CPU(long, misaligned_access_speed);
> +
> #endif
> diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
> index 7e52f1e1fe10..4e45e33015bc 100644
> --- a/arch/riscv/include/asm/hwprobe.h
> +++ b/arch/riscv/include/asm/hwprobe.h
> @@ -8,6 +8,6 @@
>
> #include <uapi/asm/hwprobe.h>
>
> -#define RISCV_HWPROBE_MAX_KEY 4
> +#define RISCV_HWPROBE_MAX_KEY 5
>
> #endif
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index fc5665411782..2968bb0984b5 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -25,6 +25,13 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_KEY_IMA_EXT_0 4
> #define RISCV_HWPROBE_IMA_FD (1 << 0)
> #define RISCV_HWPROBE_IMA_C (1 << 1)
> +#define RISCV_HWPROBE_KEY_CPUPERF_0 5
> +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>
> #endif
> diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> index a7d26a00beea..522d3d11e0c3 100644
> --- a/arch/riscv/kernel/alternative.c
> +++ b/arch/riscv/kernel/alternative.c
> @@ -23,6 +23,8 @@ struct cpu_manufacturer_info_t {
> void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
> unsigned long archid, unsigned long impid,
> unsigned int stage);
> + void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
> + unsigned long impid);
> };
>
> static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
> @@ -37,6 +39,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
> cpu_mfr_info->imp_id = sbi_get_mimpid();
> #endif
>
> + cpu_mfr_info->feature_probe_func = NULL;
> switch (cpu_mfr_info->vendor_id) {
> #ifdef CONFIG_ERRATA_SIFIVE
> case SIFIVE_VENDOR_ID:
> @@ -46,6 +49,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
> #ifdef CONFIG_ERRATA_THEAD
> case THEAD_VENDOR_ID:
> cpu_mfr_info->patch_func = thead_errata_patch_func;
> + cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
> break;
> #endif
> default:
> @@ -53,6 +57,20 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
> }
> }
>
> +/* Called on each CPU as it starts */
> +void probe_vendor_features(unsigned int cpu)
> +{
> + struct cpu_manufacturer_info_t cpu_mfr_info;
> +
> + riscv_fill_cpu_mfr_info(&cpu_mfr_info);
> + if (!cpu_mfr_info.feature_probe_func)
> + return;
> +
> + cpu_mfr_info.feature_probe_func(cpu,
> + cpu_mfr_info.arch_id,
> + cpu_mfr_info.imp_id);
> +}
> +
> /*
> * This is called very early in the boot process (directly after we run
> * a feature detect on the boot CPU). No need to worry about other CPUs
> @@ -82,6 +100,7 @@ void __init apply_boot_alternatives(void)
> /* If called on non-boot cpu things could go wrong */
> WARN_ON(smp_processor_id() != 0);
>
> + probe_vendor_features(0);
> _apply_alternatives((struct alt_entry *)__alt_start,
> (struct alt_entry *)__alt_end,
> RISCV_ALTERNATIVES_BOOT);
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 93e45560af30..8ccf260e8b02 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -32,6 +32,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
> EXPORT_SYMBOL(riscv_isa_ext_keys);
>
> +/* Performance information */
> +DEFINE_PER_CPU(long, misaligned_access_speed);
> +
> /**
> * riscv_isa_extension_base() - Get base extension word
> *
> diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
> index 3373df413c88..1291ab5ba4c3 100644
> --- a/arch/riscv/kernel/smpboot.c
> +++ b/arch/riscv/kernel/smpboot.c
> @@ -167,6 +167,7 @@ asmlinkage __visible void smp_callin(void)
> notify_cpu_starting(curr_cpuid);
> numa_add_cpu(curr_cpuid);
> set_cpu_online(curr_cpuid, 1);
> + probe_vendor_features(curr_cpuid);
>
> /*
> * Remote TLB flushes are ignored while the CPU is offline, so emit
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index 1c118438b1b3..76d5b468914c 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -7,6 +7,7 @@
>
> #include <linux/syscalls.h>
> #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
> #include <asm/hwprobe.h>
> #include <asm/sbi.h>
> #include <asm/switch_to.h>
> @@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
> pair->value = id;
> }
>
> +static u64 hwprobe_misaligned(const struct cpumask *cpus)
> +{
> + int cpu;
> + u64 perf = -1ULL;
> +
> + for_each_cpu(cpu, cpus) {
> + int this_perf = per_cpu(misaligned_access_speed, cpu);
> +
> + if (perf == -1ULL)
> + perf = this_perf;
> +
> + if (perf != this_perf) {
> + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> + break;
> + }
> + }
> +
> + if (perf == -1ULL)
> + return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> +
> + return perf;
> +}
> +
> static void hwprobe_one_pair(struct riscv_hwprobe *pair,
> const struct cpumask *cpus)
> {
> @@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
>
> break;
>
> + case RISCV_HWPROBE_KEY_CPUPERF_0:
> + pair->value = hwprobe_misaligned(cpus);
> + break;
> +
> /*
> * For forward compatibility, unknown keys don't fail the whole
> * call, but get their element key set to -1 and value set to 0
> --
> 2.25.1
>
On Fri, Mar 17, 2023 at 3:08 AM Heiko Stübner <heiko@sntech.de> wrote:
>
> Hi Evan,
>
> Am Dienstag, 14. März 2023, 19:32:18 CET schrieb Evan Green:
> > This allows userspace to select various routines to use based on the
> > performance of misaligned access on the target hardware.
>
> I really like this implementation.
>
> Also interesting that T-Head actually has a fast unaligned access.
> Maybe that should be part of the commit message (including were
> this information comes from)
Thanks Heiko (and Conor)! Yep, you both noticed that, I'll add a description.
>
>
> > Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Evan Green <evan@rivosinc.com>
> >
> > ---
> >
> > Changes in v4:
> > - Add newlines to CPUPERF_0 documentation (Conor)
> > - Add UNSUPPORTED value (Conor)
> > - Switched from DT to alternatives-based probing (Rob)
> > - Crispen up cpu index type to always be int (Conor)
> >
> > Changes in v3:
> > - Have hwprobe_misaligned return int instead of long.
> > - Constify cpumask pointer in hwprobe_misaligned()
> > - Fix warnings in _PERF_O list documentation, use :c:macro:.
> > - Move include cpufeature.h to misaligned patch.
> > - Fix documentation mismatch for RISCV_HWPROBE_KEY_CPUPERF_0 (Conor)
> > - Use for_each_possible_cpu() instead of NR_CPUS (Conor)
> > - Break early in misaligned access iteration (Conor)
> > - Increase MISALIGNED_MASK from 2 bits to 3 for possible UNSUPPORTED future
> > value (Conor)
> >
> > Changes in v2:
> > - Fixed logic error in if(of_property_read_string...) that caused crash
> > - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> > warning.
> > - Added a _MASK define
> > - Fix random checkpatch complaints
> >
> > Documentation/riscv/hwprobe.rst | 21 ++++++++++++++++++++
> > arch/riscv/errata/thead/errata.c | 9 +++++++++
> > arch/riscv/include/asm/alternative.h | 5 +++++
> > arch/riscv/include/asm/cpufeature.h | 2 ++
> > arch/riscv/include/asm/hwprobe.h | 2 +-
> > arch/riscv/include/uapi/asm/hwprobe.h | 7 +++++++
> > arch/riscv/kernel/alternative.c | 19 ++++++++++++++++++
> > arch/riscv/kernel/cpufeature.c | 3 +++
> > arch/riscv/kernel/smpboot.c | 1 +
> > arch/riscv/kernel/sys_riscv.c | 28 +++++++++++++++++++++++++++
> > 10 files changed, 96 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> > index 945d44683c40..9f0dd62dcb5d 100644
> > --- a/Documentation/riscv/hwprobe.rst
> > +++ b/Documentation/riscv/hwprobe.rst
> > @@ -63,3 +63,24 @@ The following keys are defined:
> >
> > * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
> > by version 2.2 of the RISC-V ISA manual.
> > +
> > +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
> > + information about the selected set of processors.
> > +
> > + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
> > + accesses is unknown.
> > +
> > + * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
> > + emulated via software, either in or below the kernel. These accesses are
> > + always extremely slow.
> > +
> > + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
> > + in hardware, but are slower than the cooresponding aligned accesses
> > + sequences.
> > +
> > + * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
> > + in hardware and are faster than the cooresponding aligned accesses
> > + sequences.
> > +
> > + * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
> > + not supported at all and will generate a misaligned address fault.
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index fac5742d1c1e..f41a45af5607 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -10,7 +10,9 @@
> > #include <linux/uaccess.h>
> > #include <asm/alternative.h>
> > #include <asm/cacheflush.h>
> > +#include <asm/cpufeature.h>
> > #include <asm/errata_list.h>
> > +#include <asm/hwprobe.h>
> > #include <asm/patch.h>
> > #include <asm/vendorid_list.h>
> >
> > @@ -108,3 +110,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
> > if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> > local_flush_icache_all();
> > }
> > +
> > +void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
> > + unsigned long impid)
> > +{
> > + if ((archid == 0) && (impid == 0))
> > + per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
>
> When looking at this function I 'm wondering if we also want to expose
> the active erratas somehow (not in this patch of course, just in general)
I suppose as Arnd pointed out in a different thread there's sort of a
tension between this mechanism and /proc/cpuinfo, the traditional spot
for exposing more standard cpu features/errata. Though if we think of
this mechanism as a sort of surrogate for cpuid, then it potentially
does make sense. My gut says it's a judgment call.
-Evan
Am Dienstag, 14. März 2023, 19:32:18 CET schrieb Evan Green:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
>
> Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Evan Green <evan@rivosinc.com>
With the mentioned comment about the origin of the thead valie
Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
@@ -63,3 +63,24 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
by version 2.2 of the RISC-V ISA manual.
+
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
+ information about the selected set of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
+ accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
+ emulated via software, either in or below the kernel. These accesses are
+ always extremely slow.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
+ in hardware, but are slower than the cooresponding aligned accesses
+ sequences.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
+ in hardware and are faster than the cooresponding aligned accesses
+ sequences.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
+ not supported at all and will generate a misaligned address fault.
@@ -10,7 +10,9 @@
#include <linux/uaccess.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/errata_list.h>
+#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/vendorid_list.h>
@@ -108,3 +110,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
local_flush_icache_all();
}
+
+void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
+ unsigned long impid)
+{
+ if ((archid == 0) && (impid == 0))
+ per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
+}
@@ -23,6 +23,7 @@
#define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */
#define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */
+void probe_vendor_features(unsigned int cpu);
void __init apply_boot_alternatives(void);
void __init apply_early_boot_alternatives(void);
void apply_module_alternatives(void *start, size_t length);
@@ -47,11 +48,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage);
+void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
+ unsigned long impid);
+
void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned int stage);
#else /* CONFIG_RISCV_ALTERNATIVE */
+static inline void probe_vendor_features(unsigned int cpu) { }
static inline void apply_boot_alternatives(void) { }
static inline void apply_early_boot_alternatives(void) { }
static inline void apply_module_alternatives(void *start, size_t length) { }
@@ -18,4 +18,6 @@ struct riscv_cpuinfo {
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
#endif
@@ -8,6 +8,6 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 4
+#define RISCV_HWPROBE_MAX_KEY 5
#endif
@@ -25,6 +25,13 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_FD (1 << 0)
#define RISCV_HWPROBE_IMA_C (1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0 5
+#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
+#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
+#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
+#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
+#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
+#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
#endif
@@ -23,6 +23,8 @@ struct cpu_manufacturer_info_t {
void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage);
+ void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
+ unsigned long impid);
};
static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
@@ -37,6 +39,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
cpu_mfr_info->imp_id = sbi_get_mimpid();
#endif
+ cpu_mfr_info->feature_probe_func = NULL;
switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_SIFIVE
case SIFIVE_VENDOR_ID:
@@ -46,6 +49,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
#ifdef CONFIG_ERRATA_THEAD
case THEAD_VENDOR_ID:
cpu_mfr_info->patch_func = thead_errata_patch_func;
+ cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
break;
#endif
default:
@@ -53,6 +57,20 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
}
}
+/* Called on each CPU as it starts */
+void probe_vendor_features(unsigned int cpu)
+{
+ struct cpu_manufacturer_info_t cpu_mfr_info;
+
+ riscv_fill_cpu_mfr_info(&cpu_mfr_info);
+ if (!cpu_mfr_info.feature_probe_func)
+ return;
+
+ cpu_mfr_info.feature_probe_func(cpu,
+ cpu_mfr_info.arch_id,
+ cpu_mfr_info.imp_id);
+}
+
/*
* This is called very early in the boot process (directly after we run
* a feature detect on the boot CPU). No need to worry about other CPUs
@@ -82,6 +100,7 @@ void __init apply_boot_alternatives(void)
/* If called on non-boot cpu things could go wrong */
WARN_ON(smp_processor_id() != 0);
+ probe_vendor_features(0);
_apply_alternatives((struct alt_entry *)__alt_start,
(struct alt_entry *)__alt_end,
RISCV_ALTERNATIVES_BOOT);
@@ -32,6 +32,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
EXPORT_SYMBOL(riscv_isa_ext_keys);
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
/**
* riscv_isa_extension_base() - Get base extension word
*
@@ -167,6 +167,7 @@ asmlinkage __visible void smp_callin(void)
notify_cpu_starting(curr_cpuid);
numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, 1);
+ probe_vendor_features(curr_cpuid);
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
@@ -7,6 +7,7 @@
#include <linux/syscalls.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
#include <asm/sbi.h>
#include <asm/switch_to.h>
@@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
pair->value = id;
}
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(misaligned_access_speed, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+ return perf;
+}
+
static void hwprobe_one_pair(struct riscv_hwprobe *pair,
const struct cpumask *cpus)
{
@@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
break;
+ case RISCV_HWPROBE_KEY_CPUPERF_0:
+ pair->value = hwprobe_misaligned(cpus);
+ break;
+
/*
* For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0