[v2,5/6] RISC-V: hwprobe: Support probing of misaligned access performance
Commit Message
This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.
Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Evan Green <evan@rivosinc.com>
---
Changes in v2:
- Fixed logic error in if(of_property_read_string...) that caused crash
- Include cpufeature.h in cpufeature.h to avoid undeclared variable
warning.
- Added a _MASK define
- Fix random checkpatch complaints
Documentation/riscv/hwprobe.rst | 13 +++++++++++
arch/riscv/include/asm/cpufeature.h | 2 ++
arch/riscv/include/asm/hwprobe.h | 2 +-
arch/riscv/include/asm/smp.h | 9 ++++++++
arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++
arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++--
arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++
7 files changed, 83 insertions(+), 3 deletions(-)
Comments
Hi Evan,
Thank you for the patch! Yet something to improve:
[auto build test ERROR on shuah-kselftest/next]
[also build test ERROR on shuah-kselftest/fixes robh/for-next soc/for-next linus/master v6.2-rc7]
[cannot apply to next-20230207]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746
base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
patch link: https://lore.kernel.org/r/20230206201455.1790329-6-evan%40rivosinc.com
patch subject: [PATCH v2 5/6] RISC-V: hwprobe: Support probing of misaligned access performance
config: riscv-randconfig-s052-20230206 (https://download.01.org/0day-ci/archive/20230207/202302071444.L48pajcK-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 12.1.0
reproduce:
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.4-39-gce1a6720-dirty
# https://github.com/intel-lab-lkp/linux/commit/9e77be253d64809a98f31c17abd12761dd9fad8e
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Evan-Green/RISC-V-Move-struct-riscv_cpuinfo-to-new-header/20230207-041746
git checkout 9e77be253d64809a98f31c17abd12761dd9fad8e
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' O=build_dir ARCH=riscv SHELL=/bin/bash arch/riscv/kernel/
If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
All errors (new ones prefixed by >>):
arch/riscv/kernel/cpufeature.c: In function 'riscv_fill_hwcap':
>> arch/riscv/kernel/cpufeature.c:258:23: error: implicit declaration of function 'hartid_to_cpuid_map' [-Werror=implicit-function-declaration]
258 | cpu = hartid_to_cpuid_map(hartid);
| ^~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
vim +/hartid_to_cpuid_map +258 arch/riscv/kernel/cpufeature.c
93
94 void __init riscv_fill_hwcap(void)
95 {
96 struct device_node *node;
97 const char *isa, *misaligned;
98 char print_str[NUM_ALPHA_EXTS + 1];
99 int i, j, rc;
100 unsigned long isa2hwcap[26] = {0};
101 unsigned long hartid, cpu;
102
103 isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
104 isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
105 isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
106 isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
107 isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
108 isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
109
110 elf_hwcap = 0;
111
112 bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX);
113
114 for_each_of_cpu_node(node) {
115 unsigned long this_hwcap = 0;
116 DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX);
117 const char *temp;
118
119 rc = riscv_of_processor_hartid(node, &hartid);
120 if (rc < 0)
121 continue;
122
123 if (of_property_read_string(node, "riscv,isa", &isa)) {
124 pr_warn("Unable to find \"riscv,isa\" devicetree entry\n");
125 continue;
126 }
127
128 temp = isa;
129 #if IS_ENABLED(CONFIG_32BIT)
130 if (!strncmp(isa, "rv32", 4))
131 isa += 4;
132 #elif IS_ENABLED(CONFIG_64BIT)
133 if (!strncmp(isa, "rv64", 4))
134 isa += 4;
135 #endif
136 /* The riscv,isa DT property must start with rv64 or rv32 */
137 if (temp == isa)
138 continue;
139 bitmap_zero(this_isa, RISCV_ISA_EXT_MAX);
140 for (; *isa; ++isa) {
141 const char *ext = isa++;
142 const char *ext_end = isa;
143 bool ext_long = false, ext_err = false;
144
145 switch (*ext) {
146 case 's':
147 /**
148 * Workaround for invalid single-letter 's' & 'u'(QEMU).
149 * No need to set the bit in riscv_isa as 's' & 'u' are
150 * not valid ISA extensions. It works until multi-letter
151 * extension starting with "Su" appears.
152 */
153 if (ext[-1] != '_' && ext[1] == 'u') {
154 ++isa;
155 ext_err = true;
156 break;
157 }
158 fallthrough;
159 case 'x':
160 case 'z':
161 ext_long = true;
162 /* Multi-letter extension must be delimited */
163 for (; *isa && *isa != '_'; ++isa)
164 if (unlikely(!islower(*isa)
165 && !isdigit(*isa)))
166 ext_err = true;
167 /* Parse backwards */
168 ext_end = isa;
169 if (unlikely(ext_err))
170 break;
171 if (!isdigit(ext_end[-1]))
172 break;
173 /* Skip the minor version */
174 while (isdigit(*--ext_end))
175 ;
176 if (ext_end[0] != 'p'
177 || !isdigit(ext_end[-1])) {
178 /* Advance it to offset the pre-decrement */
179 ++ext_end;
180 break;
181 }
182 /* Skip the major version */
183 while (isdigit(*--ext_end))
184 ;
185 ++ext_end;
186 break;
187 default:
188 if (unlikely(!islower(*ext))) {
189 ext_err = true;
190 break;
191 }
192 /* Find next extension */
193 if (!isdigit(*isa))
194 break;
195 /* Skip the minor version */
196 while (isdigit(*++isa))
197 ;
198 if (*isa != 'p')
199 break;
200 if (!isdigit(*++isa)) {
201 --isa;
202 break;
203 }
204 /* Skip the major version */
205 while (isdigit(*++isa))
206 ;
207 break;
208 }
209 if (*isa != '_')
210 --isa;
211
212 #define SET_ISA_EXT_MAP(name, bit) \
213 do { \
214 if ((ext_end - ext == sizeof(name) - 1) && \
215 !memcmp(ext, name, sizeof(name) - 1) && \
216 riscv_isa_extension_check(bit)) \
217 set_bit(bit, this_isa); \
218 } while (false) \
219
220 if (unlikely(ext_err))
221 continue;
222 if (!ext_long) {
223 int nr = *ext - 'a';
224
225 if (riscv_isa_extension_check(nr)) {
226 this_hwcap |= isa2hwcap[nr];
227 set_bit(nr, this_isa);
228 }
229 } else {
230 SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
231 SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
232 SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
233 SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
234 SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
235 SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
236 }
237 #undef SET_ISA_EXT_MAP
238 }
239
240 /*
241 * All "okay" hart should have same isa. Set HWCAP based on
242 * common capabilities of every "okay" hart, in case they don't
243 * have.
244 */
245 if (elf_hwcap)
246 elf_hwcap &= this_hwcap;
247 else
248 elf_hwcap = this_hwcap;
249
250 if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX))
251 bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
252 else
253 bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
254
255 /*
256 * Check for the performance of misaligned accesses.
257 */
> 258 cpu = hartid_to_cpuid_map(hartid);
259 if (cpu < 0)
260 continue;
261
262 if (!of_property_read_string(node, "riscv,misaligned-access-performance",
263 &misaligned)) {
264 if (strcmp(misaligned, "emulated") == 0)
265 per_cpu(misaligned_access_speed, cpu) =
266 RISCV_HWPROBE_MISALIGNED_EMULATED;
267
268 if (strcmp(misaligned, "slow") == 0)
269 per_cpu(misaligned_access_speed, cpu) =
270 RISCV_HWPROBE_MISALIGNED_SLOW;
271
272 if (strcmp(misaligned, "fast") == 0)
273 per_cpu(misaligned_access_speed, cpu) =
274 RISCV_HWPROBE_MISALIGNED_FAST;
275 }
276 }
277
278 /* We don't support systems with F but without D, so mask those out
279 * here. */
280 if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) {
281 pr_info("This kernel does not support systems with F but not D\n");
282 elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
283 }
284
285 memset(print_str, 0, sizeof(print_str));
286 for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
287 if (riscv_isa[0] & BIT_MASK(i))
288 print_str[j++] = (char)('a' + i);
289 pr_info("riscv: base ISA extensions %s\n", print_str);
290
291 memset(print_str, 0, sizeof(print_str));
292 for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++)
293 if (elf_hwcap & BIT_MASK(i))
294 print_str[j++] = (char)('a' + i);
295 pr_info("riscv: ELF capabilities %s\n", print_str);
296
297 for_each_set_bit(i, riscv_isa, RISCV_ISA_EXT_MAX) {
298 j = riscv_isa_ext2key(i);
299 if (j >= 0)
300 static_branch_enable(&riscv_isa_ext_keys[j]);
301 }
302 }
303
On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote:
> This allows userspace to select various routines to use based on the
> performance of misaligned access on the target hardware.
>
> Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> Signed-off-by: Evan Green <evan@rivosinc.com>
>
> ---
>
> Changes in v2:
> - Fixed logic error in if(of_property_read_string...) that caused crash
> - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> warning.
> - Added a _MASK define
> - Fix random checkpatch complaints
>
> Documentation/riscv/hwprobe.rst | 13 +++++++++++
> arch/riscv/include/asm/cpufeature.h | 2 ++
> arch/riscv/include/asm/hwprobe.h | 2 +-
> arch/riscv/include/asm/smp.h | 9 ++++++++
> arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++
> arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++--
> arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++
> 7 files changed, 83 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> index ce186967861f..0dc75e83e127 100644
> --- a/Documentation/riscv/hwprobe.rst
> +++ b/Documentation/riscv/hwprobe.rst
> @@ -51,3 +51,16 @@ The following keys are defined:
> not minNum/maxNum") of the RISC-V ISA manual.
> * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
> version 2.2 of the RISC-V ISA manual.
> +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
This doesn't match what's defined?
> + about the selected set of processors.
> + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
> + accesses is unknown.
> + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
> + software, either in or below the kernel. These accesses are always
> + extremely slow.
> + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
> + hardware, but are slower than the cooresponding aligned accesses
> + sequences.
> + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
> + hardware and are faster than the cooresponding aligned accesses
> + sequences.
> diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
> index 3831b638ecab..6c1759091e44 100644
> --- a/arch/riscv/include/asm/smp.h
> +++ b/arch/riscv/include/asm/smp.h
> @@ -26,6 +26,15 @@ struct riscv_ipi_ops {
> */
> extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
> #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
> +static inline long hartid_to_cpuid_map(unsigned long hartid)
> +{
> + long i;
> +
> + for (i = 0; i < NR_CPUS; ++i)
I'm never (or not yet?) sure about these things.
Should this be for_each_possible_cpu()?
> + if (cpuid_to_hartid_map(i) == hartid)
> + return i;
> + return -1;
> +}
>
> /* print IPI stats */
> void show_ipi_stats(struct seq_file *p, int prec);
> diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> index ce39d6e74103..5d55e2da2b1f 100644
> --- a/arch/riscv/include/uapi/asm/hwprobe.h
> +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> @@ -25,5 +25,11 @@ struct riscv_hwprobe {
> #define RISCV_HWPROBE_KEY_IMA_EXT_0 4
> #define RISCV_HWPROBE_IMA_FD (1 << 0)
> #define RISCV_HWPROBE_IMA_C (1 << 1)
> +#define RISCV_HWPROBE_KEY_CPUPERF_0 5
> +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
> +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0)
Why is it UNKNOWN rather than UNSUPPORTED?
I thought I saw Palmer saying that there is no requirement to support
misaligned accesses any more.
Plenty of old DTs are going to lack this property so would be UNKNOWN,
and I *assume* that the user of the syscall is gonna conflate the two,
but the rationale interests me.
> /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
> #endif
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 93e45560af30..12af6f7a2f53 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -14,8 +14,10 @@
> #include <linux/of.h>
> #include <asm/alternative.h>
> #include <asm/cacheflush.h>
> +#include <asm/cpufeature.h>
> #include <asm/errata_list.h>
> #include <asm/hwcap.h>
> +#include <asm/hwprobe.h>
> #include <asm/patch.h>
> #include <asm/pgtable.h>
> #include <asm/processor.h>
> @@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
> DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
> EXPORT_SYMBOL(riscv_isa_ext_keys);
>
> +/* Performance information */
> +DEFINE_PER_CPU(long, misaligned_access_speed);
> +
> /**
> * riscv_isa_extension_base() - Get base extension word
> *
> @@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id)
> void __init riscv_fill_hwcap(void)
> {
> struct device_node *node;
> - const char *isa;
> + const char *isa, *misaligned;
> char print_str[NUM_ALPHA_EXTS + 1];
> int i, j, rc;
> unsigned long isa2hwcap[26] = {0};
> - unsigned long hartid;
> + unsigned long hartid, cpu;
>
> isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
> isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
> @@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void)
> bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> else
> bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
> +
> + /*
> + * Check for the performance of misaligned accesses.
> + */
> + cpu = hartid_to_cpuid_map(hartid);
> + if (cpu < 0)
> + continue;
> +
> + if (!of_property_read_string(node, "riscv,misaligned-access-performance",
> + &misaligned)) {
> + if (strcmp(misaligned, "emulated") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_EMULATED;
> +
> + if (strcmp(misaligned, "slow") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_SLOW;
> +
> + if (strcmp(misaligned, "fast") == 0)
> + per_cpu(misaligned_access_speed, cpu) =
> + RISCV_HWPROBE_MISALIGNED_FAST;
> + }
> }
>
> /* We don't support systems with F but without D, so mask those out
> diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
> index 74e0d72c877d..73d937c54f4e 100644
> --- a/arch/riscv/kernel/sys_riscv.c
> +++ b/arch/riscv/kernel/sys_riscv.c
> @@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
> return set_hwprobe(pair, id);
> }
>
> +static long hwprobe_misaligned(cpumask_t *cpus)
> +{
> + long cpu, perf = -1;
> +
> + for_each_cpu(cpu, cpus) {
> + long this_perf = per_cpu(misaligned_access_speed, cpu);
> +
> + if (perf == -1)
> + perf = this_perf;
> +
> + if (perf != this_perf)
> + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
Is there any reason to continue in the loop if this condition is met?
> + }
> +
> + if (perf == -1)
> + return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
> + return perf;
heh, nitpicking the maintainer's use of whitespace... newline before
return please :)
Cheers,
Conor.
> +}
> +
> static
> long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
> long cpu_count, unsigned long __user *cpus_user,
> @@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
> }
> break;
>
> + case RISCV_HWPROBE_KEY_CPUPERF_0:
> + ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus));
> + break;
> +
> /*
> * For forward compatibility, unknown keys don't fail the whole
> * call, but get their element key set to -1 and value set to 0
> --
> 2.25.1
>
On Wed, Feb 15, 2023 at 1:57 PM Conor Dooley <conor@kernel.org> wrote:
>
> On Mon, Feb 06, 2023 at 12:14:54PM -0800, Evan Green wrote:
> > This allows userspace to select various routines to use based on the
> > performance of misaligned access on the target hardware.
> >
> > Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
> > Signed-off-by: Evan Green <evan@rivosinc.com>
> >
> > ---
> >
> > Changes in v2:
> > - Fixed logic error in if(of_property_read_string...) that caused crash
> > - Include cpufeature.h in cpufeature.h to avoid undeclared variable
> > warning.
> > - Added a _MASK define
> > - Fix random checkpatch complaints
> >
> > Documentation/riscv/hwprobe.rst | 13 +++++++++++
> > arch/riscv/include/asm/cpufeature.h | 2 ++
> > arch/riscv/include/asm/hwprobe.h | 2 +-
> > arch/riscv/include/asm/smp.h | 9 ++++++++
> > arch/riscv/include/uapi/asm/hwprobe.h | 6 ++++++
> > arch/riscv/kernel/cpufeature.c | 31 +++++++++++++++++++++++++--
> > arch/riscv/kernel/sys_riscv.c | 23 ++++++++++++++++++++
> > 7 files changed, 83 insertions(+), 3 deletions(-)
> >
> > diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
> > index ce186967861f..0dc75e83e127 100644
> > --- a/Documentation/riscv/hwprobe.rst
> > +++ b/Documentation/riscv/hwprobe.rst
> > @@ -51,3 +51,16 @@ The following keys are defined:
> > not minNum/maxNum") of the RISC-V ISA manual.
> > * :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
> > version 2.2 of the RISC-V ISA manual.
> > +* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
>
> This doesn't match what's defined?
>
> > + about the selected set of processors.
> > + * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
> > + accesses is unknown.
> > + * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
> > + software, either in or below the kernel. These accesses are always
> > + extremely slow.
> > + * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
> > + hardware, but are slower than the cooresponding aligned accesses
> > + sequences.
> > + * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
> > + hardware and are faster than the cooresponding aligned accesses
> > + sequences.
>
> > diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h
> > index 3831b638ecab..6c1759091e44 100644
> > --- a/arch/riscv/include/asm/smp.h
> > +++ b/arch/riscv/include/asm/smp.h
> > @@ -26,6 +26,15 @@ struct riscv_ipi_ops {
> > */
> > extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
> > #define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
> > +static inline long hartid_to_cpuid_map(unsigned long hartid)
> > +{
> > + long i;
> > +
> > + for (i = 0; i < NR_CPUS; ++i)
>
> I'm never (or not yet?) sure about these things.
> Should this be for_each_possible_cpu()?
Me neither. I believe it's the same, as for_each_possible_cpu()
iterates over a CPU mask of all 1s, and the size of struct cpumask is
set by NR_CPUS. Some architectures appear to have an
init_cpu_possible() function to further restrict the set, though riscv
does not. It's probably better to use for_each_possible_cpu() though
in case a call to init_cpu_possible() ever does get added.
>
> > + if (cpuid_to_hartid_map(i) == hartid)
> > + return i;
> > + return -1;
> > +}
> >
> > /* print IPI stats */
> > void show_ipi_stats(struct seq_file *p, int prec);
> > diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
> > index ce39d6e74103..5d55e2da2b1f 100644
> > --- a/arch/riscv/include/uapi/asm/hwprobe.h
> > +++ b/arch/riscv/include/uapi/asm/hwprobe.h
> > @@ -25,5 +25,11 @@ struct riscv_hwprobe {
> > #define RISCV_HWPROBE_KEY_IMA_EXT_0 4
> > #define RISCV_HWPROBE_IMA_FD (1 << 0)
> > #define RISCV_HWPROBE_IMA_C (1 << 1)
> > +#define RISCV_HWPROBE_KEY_CPUPERF_0 5
> > +#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
> > +#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
> > +#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
> > +#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
> > +#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0)
>
> Why is it UNKNOWN rather than UNSUPPORTED?
> I thought I saw Palmer saying that there is no requirement to support
> misaligned accesses any more.
> Plenty of old DTs are going to lack this property so would be UNKNOWN,
> and I *assume* that the user of the syscall is gonna conflate the two,
> but the rationale interests me.
Palmer had mentioned on the DT bindings patch that historically it was
required but emulated. So because old binaries assumed it was there,
the default values for DTs without this needs to imply "supported, but
no idea how fast it is".
But you bring up an interesting point: should the bindings and these
defines have a value that indicates no support at all for unaligned
accesses? We could always add the value to the bindings later, but
maybe we should leave space in this field now.
-Evan
@@ -51,3 +51,16 @@ The following keys are defined:
not minNum/maxNum") of the RISC-V ISA manual.
* :RISCV_HWPROBE_IMA_C:: The C extension is supported, as defined by
version 2.2 of the RISC-V ISA manual.
+* :RISCV_HWPROBE_KEY_PERF_0:: A bitmask that contains performance information
+ about the selected set of processors.
+ * :RISCV_HWPROBE_MISALIGNED_UNKNOWN:: The performance of misaligned
+ accesses is unknown.
+ * :RISCV_HWPROBE_MISALIGNED_EMULATED:: Misaligned accesses are emulated via
+ software, either in or below the kernel. These accesses are always
+ extremely slow.
+ * :RISCV_HWPROBE_MISALIGNED_SLOW:: Misaligned accesses are supported in
+ hardware, but are slower than the cooresponding aligned accesses
+ sequences.
+ * :RISCV_HWPROBE_MISALIGNED_FAST:: Misaligned accesses are supported in
+ hardware and are faster than the cooresponding aligned accesses
+ sequences.
@@ -18,4 +18,6 @@ struct riscv_cpuinfo {
DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
#endif
@@ -8,6 +8,6 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 4
+#define RISCV_HWPROBE_MAX_KEY 5
#endif
@@ -26,6 +26,15 @@ struct riscv_ipi_ops {
*/
extern unsigned long __cpuid_to_hartid_map[NR_CPUS];
#define cpuid_to_hartid_map(cpu) __cpuid_to_hartid_map[cpu]
+static inline long hartid_to_cpuid_map(unsigned long hartid)
+{
+ long i;
+
+ for (i = 0; i < NR_CPUS; ++i)
+ if (cpuid_to_hartid_map(i) == hartid)
+ return i;
+ return -1;
+}
/* print IPI stats */
void show_ipi_stats(struct seq_file *p, int prec);
@@ -25,5 +25,11 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
#define RISCV_HWPROBE_IMA_FD (1 << 0)
#define RISCV_HWPROBE_IMA_C (1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0 5
+#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
+#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
+#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
+#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
+#define RISCV_HWPROBE_MISALIGNED_MASK (3 << 0)
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
#endif
@@ -14,8 +14,10 @@
#include <linux/of.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/errata_list.h>
#include <asm/hwcap.h>
+#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -32,6 +34,9 @@ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX);
EXPORT_SYMBOL(riscv_isa_ext_keys);
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
/**
* riscv_isa_extension_base() - Get base extension word
*
@@ -89,11 +94,11 @@ static bool riscv_isa_extension_check(int id)
void __init riscv_fill_hwcap(void)
{
struct device_node *node;
- const char *isa;
+ const char *isa, *misaligned;
char print_str[NUM_ALPHA_EXTS + 1];
int i, j, rc;
unsigned long isa2hwcap[26] = {0};
- unsigned long hartid;
+ unsigned long hartid, cpu;
isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
@@ -246,6 +251,28 @@ void __init riscv_fill_hwcap(void)
bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
else
bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX);
+
+ /*
+ * Check for the performance of misaligned accesses.
+ */
+ cpu = hartid_to_cpuid_map(hartid);
+ if (cpu < 0)
+ continue;
+
+ if (!of_property_read_string(node, "riscv,misaligned-access-performance",
+ &misaligned)) {
+ if (strcmp(misaligned, "emulated") == 0)
+ per_cpu(misaligned_access_speed, cpu) =
+ RISCV_HWPROBE_MISALIGNED_EMULATED;
+
+ if (strcmp(misaligned, "slow") == 0)
+ per_cpu(misaligned_access_speed, cpu) =
+ RISCV_HWPROBE_MISALIGNED_SLOW;
+
+ if (strcmp(misaligned, "fast") == 0)
+ per_cpu(misaligned_access_speed, cpu) =
+ RISCV_HWPROBE_MISALIGNED_FAST;
+ }
}
/* We don't support systems with F but without D, so mask those out
@@ -133,6 +133,25 @@ static long hwprobe_mid(struct riscv_hwprobe __user *pair, size_t key,
return set_hwprobe(pair, id);
}
+static long hwprobe_misaligned(cpumask_t *cpus)
+{
+ long cpu, perf = -1;
+
+ for_each_cpu(cpu, cpus) {
+ long this_perf = per_cpu(misaligned_access_speed, cpu);
+
+ if (perf == -1)
+ perf = this_perf;
+
+ if (perf != this_perf)
+ perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ }
+
+ if (perf == -1)
+ return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ return perf;
+}
+
static
long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
long cpu_count, unsigned long __user *cpus_user,
@@ -205,6 +224,10 @@ long do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, long pair_count,
}
break;
+ case RISCV_HWPROBE_KEY_CPUPERF_0:
+ ret = set_hwprobe(pairs, hwprobe_misaligned(&cpus));
+ break;
+
/*
* For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0