[v2,5/6] RISC-V: hwprobe: Support probing of misaligned access performance

Message ID 20230206201455.1790329-6-evan@rivosinc.com
State New
Headers
Series RISC-V Hardware Probing User Interface |

Commit Message

Evan Green Feb. 6, 2023, 8:14 p.m. UTC
  This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.

Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Evan Green <evan@rivosinc.com>

---

Changes in v2:
 - Fixed logic error in if(of_property_read_string...) that caused crash
 - Include cpufeature.h in cpufeature.h to avoid undeclared variable
   warning.
 - Added a _MASK define
 - Fix random checkpatch complaints

 Documentation/riscv/hwprobe.rst       | 13 +++++++++++
 arch/riscv/include/asm/cpufeature.h   |  2 ++
 arch/riscv/include/asm/hwprobe.h      |  2 +-
 arch/riscv/include/asm/smp.h          |  9 ++++++++
 arch/riscv/include/uapi/asm/hwprobe.h |  6 ++++++
 arch/riscv/kernel/cpufeature.c        | 31 +++++++++++++++++++++++++--
 arch/riscv/kernel/sys_riscv.c         | 23 ++++++++++++++++++++
 7 files changed, 83 insertions(+), 3 deletions(-)
  

Comments

kernel test robot Feb. 7, 2023, 7:02 a.m. UTC | #1
Hi Evan,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on shuah-kselftest/next]
[also build test ERROR on shuah-kselftest/fixes robh/for-next soc/for-next linus/master v6.2-rc7]
[cannot apply to next-20230207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
patch link:    https://lore.kernel.org/r/20230206201455.1790329-6-evan%40rivosinc.com
patch subject: [PATCH v2 5/6] RISC-V: hwprobe: Support probing of misaligned access performance
config: riscv-randconfig-s052-20230206 (https://download.01.org/0day-ci/archive/20230207/202302071444.L48pajcK-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 12.1.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.4-39-gce1a6720-dirty
        # https://github.com/intel-lab-lkp/linux/commit/9e77be253d64809a98f31c17abd12761dd9fad8e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746
        git checkout 9e77be253d64809a98f31c17abd12761dd9fad8e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv SHELL=/bin/bash arch/riscv/kernel/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arch/riscv/kernel/cpufeature.c: In function 'riscv_fill_hwcap':
>> arch/riscv/kernel/cpufeature.c:258:23: error: implicit declaration of function 'hartid_to_cpuid_map' [-Werror=implicit-function-declaration]
     258 |                 cpu = hartid_to_cpuid_map(hartid);
         |                       ^~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors


vim +/hartid_to_cpuid_map +258 arch/riscv/kernel/cpufeature.c

    93	
    94	void __init riscv_fill_hwcap(void)
    95	{
    96		struct device_node *node;
    97		const char *isa, *misaligned;
    98		char print_str[NUM_ALPHA_EXTS + 1];
    99		int i, j, rc;
   100		unsigned long isa2hwcap[26] = {0};
   101		unsigned long hartid, cpu;
   102	
   103		isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
   104		isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
   105		isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
   106		isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
   107		isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
   108		isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
   109	
   110		elf_hwcap = 0;
   111	
   112		bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
   113	
   114		for_each_of_cpu_node(node) {
   115			unsigned long this_hwcap = 0;
   116			DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
   117			const char *temp;
   118	
   119			rc = riscv_of_processor_hartid(node, &hartid);
   120			if (rc < 0)
   121				continue;
   122	
   123			if (of_property_read_string(node, "riscv,isa", &isa)) {
   124				pr_warn("Unable to find \"riscv,isa\" devicetree entry\n");
   125				continue;
   126			}
   127	
   128			temp = isa;
   129	#if IS_ENABLED(CONFIG_32BIT)
   130			if (!strncmp(isa, "rv32", 4))
   131				isa += 4;
   132	#elif IS_ENABLED(CONFIG_64BIT)
   133			if (!strncmp(isa, "rv64", 4))
   134				isa += 4;
   135	#endif
   136			/* The riscv,isa DT property must start with rv64 or rv32 */
   137			if (temp == isa)
   138				continue;
   139			bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
   140			for (; *isa; ++isa) {
   141				const char *ext = isa++;
   142				const char *ext_end = isa;
   143				bool ext_long = false, ext_err = false;
   144	
   145				switch (*ext) {
   146				case 's':
   147					/**
   148					 * Workaround for invalid single-letter 's' & 'u'(QEMU).
   149					 * No need to set the bit in riscv_isa as 's' & 'u' are
   150					 * not valid ISA extensions. It works until multi-letter
   151					 * extension starting with "Su" appears.
   152					 */
   153					if (ext[-1] != '_' && ext[1] == 'u') {
   154						++isa;
   155						ext_err = true;
   156						break;
   157					}
   158					fallthrough;
   159				case 'x':
   160				case 'z':
   161					ext_long = true;
   162					/* Multi-letter extension must be delimited */
   163					for (; *isa && *isa != '_'; ++isa)
   164						if (unlikely(!islower(*isa)
   165							     && !isdigit(*isa)))
   166							ext_err = true;
   167					/* Parse backwards */
   168					ext_end = isa;
   169					if (unlikely(ext_err))
   170						break;
   171					if (!isdigit(ext_end[-1]))
   172						break;
   173					/* Skip the minor version */
   174					while (isdigit(*--ext_end))
   175						;
   176					if (ext_end[0] != 'p'
   177					    || !isdigit(ext_end[-1])) {
   178						/* Advance it to offset the pre-decrement */
   179						++ext_end;
   180						break;
   181					}
   182					/* Skip the major version */
   183					while (isdigit(*--ext_end))
   184						;
   185					++ext_end;
   186					break;
   187				default:
   188					if (unlikely(!islower(*ext))) {
   189						ext_err = true;
   190						break;
   191					}
   192					/* Find next extension */
   193					if (!isdigit(*isa))
   194						break;
   195					/* Skip the minor version */
   196					while (isdigit(*++isa))
   197						;
   198					if (*isa != 'p')
   199						break;
   200					if (!isdigit(*++isa)) {
   201						--isa;
   202						break;
   203					}
   204					/* Skip the major version */
   205					while (isdigit(*++isa))
   206						;
   207					break;
   208				}
   209				if (*isa != '_')
   210					--isa;
   211	
   212	#define SET_ISA_EXT_MAP(name, bit)						\
   213				do {							\
   214					if ((ext_end - ext == sizeof(name) - 1) &&	\
   215					     !memcmp(ext, name, sizeof(name) - 1) &&	\
   216					     riscv_isa_extension_check(bit))		\
   217						set_bit(bit, this_isa);			\
   218				} while (false)						\
   219	
   220				if (unlikely(ext_err))
   221					continue;
   222				if (!ext_long) {
   223					int nr = *ext - 'a';
   224	
   225					if (riscv_isa_extension_check(nr)) {
   226						this_hwcap |= isa2hwcap[nr];
   227						set_bit(nr, this_isa);
   228					}
   229				} else {
   230					SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
   231					SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
   232					SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
   233					SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
   234					SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
   235					SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
   236				}
   237	#undef SET_ISA_EXT_MAP
   238			}
   239	
   240			/*
   241			 * All "okay" hart should have same isa. Set HWCAP based on
   242			 * common capabilities of every "okay" hart, in case they don't
   243			 * have.
   244			 */
   245			if (elf_hwcap)
   246				elf_hwcap &= this_hwcap;
   247			else
   248				elf_hwcap = this_hwcap;
   249	
   250			if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
   251				bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
   252			else
   253				bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
   254	
   255			/*
   256			 * Check for the performance of misaligned accesses.
   257			 */
 > 258			cpu = hartid_to_cpuid_map(hartid);
   259			if (cpu < 0)
   260				continue;
   261	
   262			if (!of_property_read_string(node, "riscv,misaligned-access-performance",
   263						     &misaligned)) {
   264				if (strcmp(misaligned, "emulated") == 0)
   265					per_cpu(misaligned_access_speed, cpu) =
   266						RISCV_HWPROBE_MISALIGNED_EMULATED;
   267	
   268				if (strcmp(misaligned, "slow") == 0)
   269					per_cpu(misaligned_access_speed, cpu) =
   270						RISCV_HWPROBE_MISALIGNED_SLOW;
   271	
   272				if (strcmp(misaligned, "fast") == 0)
   273					per_cpu(misaligned_access_speed, cpu) =
   274						RISCV_HWPROBE_MISALIGNED_FAST;
   275			}
   276		}
   277	
   278		/* We don't support systems with F but without D, so mask those out
   279		 * here. */
   280		if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
   281			pr_info("This kernel does not support systems with F but not D\n");
   282			elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
   283		}
   284	
   285		memset(print_str, 0, sizeof(print_str));
   286		for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
   287			if (riscv_isa[0] & BIT_MASK(i))
   288				print_str[j++] = (char)('a' + i);
   289		pr_info("riscv: base ISA extensions %s\n", print_str);
   290	
   291		memset(print_str, 0, sizeof(print_str));
   292		for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
   293			if (elf_hwcap & BIT_MASK(i))
   294				print_str[j++] = (char)('a' + i);
   295		pr_info("riscv: ELF capabilities %s\n", print_str);
   296	
   297		for_each_set_bit(i, riscv_isa, RISCV_ISA_EXT_MAX) {
   298			j = riscv_isa_ext2key(i);
   299			if (j >= 0)
   300				static_branch_enable(&riscv_isa_ext_keys[j]);
   301		}
   302	}
   303
  
Conor Dooley Feb. 15, 2023, 9:57 p.m. UTC | #2
On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
> 
> Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Evan Green <evan@rivosinc.com>
> 
> ---
> 
> Changes in v2:
>  - Fixed logic error in if(of_property_read_string...) that caused crash
>  - Include cpufeature.h in cpufeature.h to avoid undeclared variable
>    warning.
>  - Added a _MASK define
>  - Fix random checkpatch complaints
> 
>  Documentation/riscv/hwprobe.rst       | 13 +++++++++++
>  arch/riscv/include/asm/cpufeature.h   |  2 ++
>  arch/riscv/include/asm/hwprobe.h      |  2 +-
>  arch/riscv/include/asm/smp.h          |  9 ++++++++
>  arch/riscv/include/uapi/asm/hwprobe.h |  6 ++++++
>  arch/riscv/kernel/cpufeature.c        | 31 +++++++++++++++++++++++++--
>  arch/riscv/kernel/sys_riscv.c         | 23 ++++++++++++++++++++
>  7 files changed, 83 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index ce186967861f..0dc75e83e127 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -51,3 +51,16 @@ The following keys are defined:
>        not minNum/maxNum") of the RISC-V ISA manual.
>      * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
>        version 2.2 of the RISC-V ISA manual.
> +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information

This doesn't match what's defined?

> +  about the selected set of processors.
> +    * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
> +      accesses is unknown.
> +    * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
> +      software, either in or below the kernel.  These accesses are always
> +      extremely slow.
> +    * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
> +      hardware, but are slower than the cooresponding aligned accesses
> +      sequences.
> +    * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
> +      hardware and are faster than the cooresponding aligned accesses
> +      sequences.

> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
> index 3831b638ecab..6c1759091e44 100644
> --- a/arch/riscv/include/asm/smp.h
> +++ b/arch/riscv/include/asm/smp.h
> @@ -26,6 +26,15 @@ struct riscv_ipi_ops {
>   */
>  extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
>  #define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
> +static inline long hartid_to_cpuid_map(unsigned long hartid)
> +{
> +	long i;
> +
> +	for (i = 0; i < NR_CPUS; ++i)

I'm never (or not yet?) sure about these things.
Should this be for_each_possible_cpu()?

> +		if (cpuid_to_hartid_map(i) == hartid)
> +			return i;
> +	return -1;
> +}
>  
>  /* print IPI stats */
>  void show_ipi_stats(struct seq_file *p, int prec);
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index ce39d6e74103..5d55e2da2b1f 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -25,5 +25,11 @@ struct riscv_hwprobe {
>  #define RISCV_HWPROBE_KEY_IMA_EXT_0	4
>  #define		RISCV_HWPROBE_IMA_FD		(1 << 0)
>  #define		RISCV_HWPROBE_IMA_C		(1 << 1)
> +#define RISCV_HWPROBE_KEY_CPUPERF_0	5
> +#define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
> +#define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
> +#define		RISCV_HWPROBE_MISALIGNED_SLOW		(2 << 0)
> +#define		RISCV_HWPROBE_MISALIGNED_FAST		(3 << 0)
> +#define		RISCV_HWPROBE_MISALIGNED_MASK		(3 << 0)

Why is it UNKNOWN rather than UNSUPPORTED?
I thought I saw Palmer saying that there is no requirement to support
misaligned accesses any more.
Plenty of old DTs are going to lack this property so would be UNKNOWN,
and I *assume* that the user of the syscall is gonna conflate the two,
but the rationale interests me.

>  /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
>  #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 93e45560af30..12af6f7a2f53 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -14,8 +14,10 @@
>  #include <linux/of.h>
>  #include <asm/alternative.h>
>  #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
>  #include <asm/errata_list.h>
>  #include <asm/hwcap.h>
> +#include <asm/hwprobe.h>
>  #include <asm/patch.h>
>  #include <asm/pgtable.h>
>  #include <asm/processor.h>
> @@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
>  DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
>  EXPORT_SYMBOL(riscv_isa_ext_keys);
>  
> +/* Performance information */
> +DEFINE_PER_CPU(long, misaligned_access_speed);
> +
>  /**
>   * riscv_isa_extension_base() - Get base extension word
>   *
> @@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id)
>  void __init riscv_fill_hwcap(void)
>  {
>  	struct device_node *node;
> -	const char *isa;
> +	const char *isa, *misaligned;
>  	char print_str[NUM_ALPHA_EXTS + 1];
>  	int i, j, rc;
>  	unsigned long isa2hwcap[26] = {0};
> -	unsigned long hartid;
> +	unsigned long hartid, cpu;
>  
>  	isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
>  	isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
> @@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void)
>  			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
>  		else
>  			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> +
> +		/*
> +		 * Check for the performance of misaligned accesses.
> +		 */
> +		cpu = hartid_to_cpuid_map(hartid);
> +		if (cpu < 0)
> +			continue;
> +
> +		if (!of_property_read_string(node, "riscv,misaligned-access-performance",
> +					     &misaligned)) {
> +			if (strcmp(misaligned, "emulated") == 0)
> +				per_cpu(misaligned_access_speed, cpu) =
> +					RISCV_HWPROBE_MISALIGNED_EMULATED;
> +
> +			if (strcmp(misaligned, "slow") == 0)
> +				per_cpu(misaligned_access_speed, cpu) =
> +					RISCV_HWPROBE_MISALIGNED_SLOW;
> +
> +			if (strcmp(misaligned, "fast") == 0)
> +				per_cpu(misaligned_access_speed, cpu) =
> +					RISCV_HWPROBE_MISALIGNED_FAST;
> +		}
>  	}
>  
>  	/* We don't support systems with F but without D, so mask those out
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index 74e0d72c877d..73d937c54f4e 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
>  	return set_hwprobe(pair, id);
>  }
>  
> +static long hwprobe_misaligned(cpumask_t *cpus)
> +{
> +	long cpu, perf = -1;
> +
> +	for_each_cpu(cpu, cpus) {
> +		long this_perf = per_cpu(misaligned_access_speed, cpu);
> +
> +		if (perf == -1)
> +			perf = this_perf;
> +
> +		if (perf != this_perf)
> +			perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;

Is there any reason to continue in the loop if this condition is met?

> +	}
> +
> +	if (perf == -1)
> +		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> +	return perf;

heh, nitpicking the maintainer's use of whitespace... newline before
return please :)

Cheers,
Conor.

> +}
> +
>  static
>  long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
>  		      long cpu_count, unsigned long __user *cpus_user,
> @@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
>  			}
>  			break;
>  
> +		case RISCV_HWPROBE_KEY_CPUPERF_0:
> +			ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus));
> +			break;
> +
>  		/*
>  		 * For forward compatibility, unknown keys don't fail the whole
>  		 * call, but get their element key set to -1 and value set to 0
> -- 
> 2.25.1
>
  
Evan Green Feb. 18, 2023, 12:15 a.m. UTC | #3
On Wed, Feb 15, 2023 at 1:57 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote:
> > This allows userspace to select various routines to use based on the
> > performance of misaligned access on the target hardware.
> >
> > Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Evan Green <evan@rivosinc.com>
> >
> > ---
> >
> > Changes in v2:
> >  - Fixed logic error in if(of_property_read_string...) that caused crash
> >  - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> >    warning.
> >  - Added a _MASK define
> >  - Fix random checkpatch complaints
> >
> >  Documentation/riscv/hwprobe.rst       | 13 +++++++++++
> >  arch/riscv/include/asm/cpufeature.h   |  2 ++
> >  arch/riscv/include/asm/hwprobe.h      |  2 +-
> >  arch/riscv/include/asm/smp.h          |  9 ++++++++
> >  arch/riscv/include/uapi/asm/hwprobe.h |  6 ++++++
> >  arch/riscv/kernel/cpufeature.c        | 31 +++++++++++++++++++++++++--
> >  arch/riscv/kernel/sys_riscv.c         | 23 ++++++++++++++++++++
> >  7 files changed, 83 insertions(+), 3 deletions(-)
> >
> > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> > index ce186967861f..0dc75e83e127 100644
> > --- a/Documentation/riscv/hwprobe.rst
> > +++ b/Documentation/riscv/hwprobe.rst
> > @@ -51,3 +51,16 @@ The following keys are defined:
> >        not minNum/maxNum") of the RISC-V ISA manual.
> >      * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
> >        version 2.2 of the RISC-V ISA manual.
> > +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
>
> This doesn't match what's defined?
>
> > +  about the selected set of processors.
> > +    * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
> > +      accesses is unknown.
> > +    * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
> > +      software, either in or below the kernel.  These accesses are always
> > +      extremely slow.
> > +    * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
> > +      hardware, but are slower than the cooresponding aligned accesses
> > +      sequences.
> > +    * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
> > +      hardware and are faster than the cooresponding aligned accesses
> > +      sequences.
>
> > diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
> > index 3831b638ecab..6c1759091e44 100644
> > --- a/arch/riscv/include/asm/smp.h
> > +++ b/arch/riscv/include/asm/smp.h
> > @@ -26,6 +26,15 @@ struct riscv_ipi_ops {
> >   */
> >  extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
> >  #define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
> > +static inline long hartid_to_cpuid_map(unsigned long hartid)
> > +{
> > +     long i;
> > +
> > +     for (i = 0; i < NR_CPUS; ++i)
>
> I'm never (or not yet?) sure about these things.
> Should this be for_each_possible_cpu()?

Me neither. I believe it's the same, as for_each_possible_cpu()
iterates over a CPU mask of all 1s, and the size of struct cpumask is
set by NR_CPUS. Some architectures appear to have an
init_cpu_possible() function to further restrict the set, though riscv
does not. It's probably better to use for_each_possible_cpu() though
in case a call to init_cpu_possible() ever does get added.

>
> > +             if (cpuid_to_hartid_map(i) == hartid)
> > +                     return i;
> > +     return -1;
> > +}
> >
> >  /* print IPI stats */
> >  void show_ipi_stats(struct seq_file *p, int prec);
> > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> > index ce39d6e74103..5d55e2da2b1f 100644
> > --- a/arch/riscv/include/uapi/asm/hwprobe.h
> > +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> > @@ -25,5 +25,11 @@ struct riscv_hwprobe {
> >  #define RISCV_HWPROBE_KEY_IMA_EXT_0  4
> >  #define              RISCV_HWPROBE_IMA_FD            (1 << 0)
> >  #define              RISCV_HWPROBE_IMA_C             (1 << 1)
> > +#define RISCV_HWPROBE_KEY_CPUPERF_0  5
> > +#define              RISCV_HWPROBE_MISALIGNED_UNKNOWN        (0 << 0)
> > +#define              RISCV_HWPROBE_MISALIGNED_EMULATED       (1 << 0)
> > +#define              RISCV_HWPROBE_MISALIGNED_SLOW           (2 << 0)
> > +#define              RISCV_HWPROBE_MISALIGNED_FAST           (3 << 0)
> > +#define              RISCV_HWPROBE_MISALIGNED_MASK           (3 << 0)
>
> Why is it UNKNOWN rather than UNSUPPORTED?
> I thought I saw Palmer saying that there is no requirement to support
> misaligned accesses any more.
> Plenty of old DTs are going to lack this property so would be UNKNOWN,
> and I *assume* that the user of the syscall is gonna conflate the two,
> but the rationale interests me.

Palmer had mentioned on the DT bindings patch that historically it was
required but emulated. So because old binaries assumed it was there,
the default values for DTs without this needs to imply "supported, but
no idea how fast it is".

But you bring up an interesting point: should the bindings and these
defines have a value that indicates no support at all for unaligned
accesses? We could always add the value to the bindings later, but
maybe we should leave space in this field now.

-Evan
  

Patch

diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
index ce186967861f..0dc75e83e127 100644
--- a/Documentation/riscv/hwprobe.rst
+++ b/Documentation/riscv/hwprobe.rst
@@ -51,3 +51,16 @@  The following keys are defined:
       not minNum/maxNum") of the RISC-V ISA manual.
     * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
       version 2.2 of the RISC-V ISA manual.
+* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
+  about the selected set of processors.
+    * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
+      accesses is unknown.
+    * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
+      software, either in or below the kernel.  These accesses are always
+      extremely slow.
+    * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
+      hardware, but are slower than the cooresponding aligned accesses
+      sequences.
+    * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
+      hardware and are faster than the cooresponding aligned accesses
+      sequences.
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index 66c251d98290..ac51a9e6387a 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -18,4 +18,6 @@  struct riscv_cpuinfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
 #endif
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 7e52f1e1fe10..4e45e33015bc 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,6 +8,6 @@ 
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 4
+#define RISCV_HWPROBE_MAX_KEY 5
 
 #endif
diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
index 3831b638ecab..6c1759091e44 100644
--- a/arch/riscv/include/asm/smp.h
+++ b/arch/riscv/include/asm/smp.h
@@ -26,6 +26,15 @@  struct riscv_ipi_ops {
  */
 extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
 #define cpuid_to_hartid_map(cpu)    __cpuid_to_hartid_map[cpu]
+static inline long hartid_to_cpuid_map(unsigned long hartid)
+{
+	long i;
+
+	for (i = 0; i < NR_CPUS; ++i)
+		if (cpuid_to_hartid_map(i) == hartid)
+			return i;
+	return -1;
+}
 
 /* print IPI stats */
 void show_ipi_stats(struct seq_file *p, int prec);
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index ce39d6e74103..5d55e2da2b1f 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -25,5 +25,11 @@  struct riscv_hwprobe {
 #define RISCV_HWPROBE_KEY_IMA_EXT_0	4
 #define		RISCV_HWPROBE_IMA_FD		(1 << 0)
 #define		RISCV_HWPROBE_IMA_C		(1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0	5
+#define		RISCV_HWPROBE_MISALIGNED_UNKNOWN	(0 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_EMULATED	(1 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_SLOW		(2 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_FAST		(3 << 0)
+#define		RISCV_HWPROBE_MISALIGNED_MASK		(3 << 0)
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 #endif
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 93e45560af30..12af6f7a2f53 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -14,8 +14,10 @@ 
 #include <linux/of.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/errata_list.h>
 #include <asm/hwcap.h>
+#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -32,6 +34,9 @@  static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
 EXPORT_SYMBOL(riscv_isa_ext_keys);
 
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
@@ -89,11 +94,11 @@  static bool riscv_isa_extension_check(int id)
 void __init riscv_fill_hwcap(void)
 {
 	struct device_node *node;
-	const char *isa;
+	const char *isa, *misaligned;
 	char print_str[NUM_ALPHA_EXTS + 1];
 	int i, j, rc;
 	unsigned long isa2hwcap[26] = {0};
-	unsigned long hartid;
+	unsigned long hartid, cpu;
 
 	isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
 	isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
@@ -246,6 +251,28 @@  void __init riscv_fill_hwcap(void)
 			bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
 		else
 			bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+
+		/*
+		 * Check for the performance of misaligned accesses.
+		 */
+		cpu = hartid_to_cpuid_map(hartid);
+		if (cpu < 0)
+			continue;
+
+		if (!of_property_read_string(node, "riscv,misaligned-access-performance",
+					     &misaligned)) {
+			if (strcmp(misaligned, "emulated") == 0)
+				per_cpu(misaligned_access_speed, cpu) =
+					RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+			if (strcmp(misaligned, "slow") == 0)
+				per_cpu(misaligned_access_speed, cpu) =
+					RISCV_HWPROBE_MISALIGNED_SLOW;
+
+			if (strcmp(misaligned, "fast") == 0)
+				per_cpu(misaligned_access_speed, cpu) =
+					RISCV_HWPROBE_MISALIGNED_FAST;
+		}
 	}
 
 	/* We don't support systems with F but without D, so mask those out
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 74e0d72c877d..73d937c54f4e 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -133,6 +133,25 @@  static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
 	return set_hwprobe(pair, id);
 }
 
+static long hwprobe_misaligned(cpumask_t *cpus)
+{
+	long cpu, perf = -1;
+
+	for_each_cpu(cpu, cpus) {
+		long this_perf = per_cpu(misaligned_access_speed, cpu);
+
+		if (perf == -1)
+			perf = this_perf;
+
+		if (perf != this_perf)
+			perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+	}
+
+	if (perf == -1)
+		return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+	return perf;
+}
+
 static
 long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
 		      long cpu_count, unsigned long __user *cpus_user,
@@ -205,6 +224,10 @@  long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
 			}
 			break;
 
+		case RISCV_HWPROBE_KEY_CPUPERF_0:
+			ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus));
+			break;
+
 		/*
 		 * For forward compatibility, unknown keys don't fail the whole
 		 * call, but get their element key set to -1 and value set to 0