[1/2] riscv: percpu:Add riscv percpu operations

Message ID 20221026104015.565468-2-haiwenyao@uniontech.com
State New
Headers
Series riscv: Rewrite percpu operations and support cmpxchg-local feature |

Commit Message

Wenyao Hai Oct. 26, 2022, 10:40 a.m. UTC
  This patch use riscv AMO(Atomic Memory Operation) instructions to
optimise some this_cpu_and this_cpu_or this_cpu_add operations.
It reuse cmpxchg_local() to impletment this_cpu_cmpxchg macros.
It reuse xchg_relaxed() to impletment this_cpu_xchg macros.

Signed-off-by: Wen Yao <haiwenyao@uniontech.com>
---
 arch/riscv/include/asm/percpu.h | 101 ++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)
 create mode 100644 arch/riscv/include/asm/percpu.h
  

Comments

Conor Dooley Oct. 26, 2022, 6:54 p.m. UTC | #1
Hey Wen Yao,
Couple comments for you.

On Wed, Oct 26, 2022 at 06:40:14PM +0800, Wen Yao wrote:
> riscv: percpu:Add riscv percpu operations

Can you please consistently use ": " between parts of the commit
messages? For both this and patch 2/2.

> This patch use riscv AMO(Atomic Memory Operation) instructions to

nit: s/This patch/Use (or better:
"Optimise some ... using RISC-V AMO (Atomic..."

> optimise some this_cpu_and this_cpu_or this_cpu_add operations.
> It reuse cmpxchg_local() to impletment this_cpu_cmpxchg macros.

s/It Reuse/Reuse, and "impletment" is a typo.

> It reuse xchg_relaxed() to impletment this_cpu_xchg macros.
> 
> Signed-off-by: Wen Yao <haiwenyao@uniontech.com>
> ---
>  arch/riscv/include/asm/percpu.h | 101 ++++++++++++++++++++++++++++++++
>  1 file changed, 101 insertions(+)
>  create mode 100644 arch/riscv/include/asm/percpu.h
> 
> diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
> new file mode 100644
> index 000000000000..ae796e328442
> --- /dev/null
> +++ b/arch/riscv/include/asm/percpu.h
> @@ -0,0 +1,101 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020-2022 Union Tech Software Technology Corporation Limited
> + */
> +#ifndef __ASM_PERCPU_H
> +#define __ASM_PERCPU_H
> +
> +#include <asm/cmpxchg.h>
> +
> +#define PERCPU_OP(op, asm_op, c_op)                                            \
> +	static inline unsigned long __percpu_##op(void *ptr,                   \

Can you please make sure that these \s are actually aligned & swap the
spaces you've used for tabs? The other files that I checked in this
directory all use tabs for \ alignment in macros.

Thanks,
Conor.

> +						  unsigned long val, int size) \
> +	{                                                                      \
> +		unsigned long ret;                                             \
> +		switch (size) {                                                \
> +		case 4:                                                        \
> +			__asm__ __volatile__(                                  \
> +				"amo" #asm_op ".w"                             \
> +				" %[ret], %[val], %[ptr]\n"                   \
> +				: [ret] "=&r"(ret), [ptr] "+A"(*(u32 *)ptr)    \
> +				: [val] "r"(val));                             \
> +			break;                                                 \
> +		case 8:                                                        \
> +			__asm__ __volatile__(                                  \
> +				"amo" #asm_op ".d"                             \
> +				" %[ret], %[val], %[ptr]\n"                   \
> +				: [ret] "=&r"(ret), [ptr] "+A"(*(u64 *)ptr)    \
> +				: [val] "r"(val));                             \
> +			break;                                                 \
> +		default:                                                       \
> +			ret = 0;                                               \
> +			BUILD_BUG();                                           \
> +		}                                                              \
> +										\
> +		return ret c_op val;                                           \
> +	}
> +
> +PERCPU_OP(add, add, +)
> +PERCPU_OP(and, and, &)
> +PERCPU_OP(or, or, |)
> +#undef PERCPU_OP
> +
> +/* this_cpu_xchg */
> +#define _protect_xchg_local(pcp, val)                           \
> +	({                                                      \
> +		typeof(*raw_cpu_ptr(&(pcp))) __ret;             \
> +		preempt_disable_notrace();                      \
> +		__ret = xchg_relaxed(raw_cpu_ptr(&(pcp)), val); \
> +		preempt_enable_notrace();                       \
> +		__ret;                                          \
> +	})
> +
> +/* this_cpu_cmpxchg */
> +#define _protect_cmpxchg_local(pcp, o, n)                         \
> +	({                                                        \
> +		typeof(*raw_cpu_ptr(&(pcp))) __ret;               \
> +		preempt_disable_notrace();                        \
> +		__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
> +		preempt_enable_notrace();                         \
> +		__ret;                                            \
> +	})
> +
> +#define _pcp_protect(operation, pcp, val)                                     \
> +	({                                                                    \
> +		typeof(pcp) __retval;                                         \
> +		preempt_disable_notrace();                                    \
> +		__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), (val), \
> +						  sizeof(pcp));               \
> +		preempt_enable_notrace();                                     \
> +		__retval;                                                     \
> +	})
> +
> +#define _percpu_add(pcp, val) _pcp_protect(__percpu_add, pcp, val)
> +
> +#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
> +
> +#define _percpu_and(pcp, val) _pcp_protect(__percpu_and, pcp, val)
> +
> +#define _percpu_or(pcp, val) _pcp_protect(__percpu_or, pcp, val)
> +
> +#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
> +#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
> +
> +#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
> +#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
> +
> +#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
> +#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
> +
> +#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
> +#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
> +
> +#define this_cpu_xchg_4(pcp, val) _protect_xchg_local(pcp, val)
> +#define this_cpu_xchg_8(pcp, val) _protect_xchg_local(pcp, val)
> +
> +#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
> +#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
> +
> +#include <asm-generic/percpu.h>
> +
> +#endif /* __ASM_PERCPU_H */
> -- 
> 2.25.1
>
  
kernel test robot Oct. 27, 2022, 12:14 a.m. UTC | #2
Hi Wen,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.1-rc2 next-20221026]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Wen-Yao/riscv-Rewrite-percpu-operations-and-support-cmpxchg-local-feature/20221026-184310
patch link:    https://lore.kernel.org/r/20221026104015.565468-2-haiwenyao%40uniontech.com
patch subject: [PATCH 1/2] riscv: percpu:Add riscv percpu operations
config: riscv-rv32_defconfig
compiler: clang version 16.0.0 (https://github.com/llvm/llvm-project 791a7ae1ba3efd6bca96338e10ffde557ba83920)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install riscv cross compiling tool for clang build
        # apt-get install binutils-riscv-linux-gnu
        # https://github.com/intel-lab-lkp/linux/commit/bb33e199935b65336d45c367d7581682dff63cdb
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Wen-Yao/riscv-Rewrite-percpu-operations-and-support-cmpxchg-local-feature/20221026-184310
        git checkout bb33e199935b65336d45c367d7581682dff63cdb
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from fs/nfs/file.c:20:
   In file included from include/linux/module.h:13:
   In file included from include/linux/stat.h:19:
   In file included from include/linux/time.h:6:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, s3, 0(a0)
           ^
   In file included from fs/nfs/file.c:20:
   In file included from include/linux/module.h:13:
   In file included from include/linux/stat.h:19:
   In file included from include/linux/time.h:6:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, s4, 0(a0)
           ^
   2 errors generated.
--
   In file included from fs/nfs/direct.c:43:
   In file included from include/linux/sched.h:14:
   In file included from include/linux/pid.h:5:
   In file included from include/linux/rculist.h:11:
   In file included from include/linux/rcupdate.h:26:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, s1, 0(a0)
           ^
   In file included from fs/nfs/direct.c:43:
   In file included from include/linux/sched.h:14:
   In file included from include/linux/pid.h:5:
   In file included from include/linux/rculist.h:11:
   In file included from include/linux/rcupdate.h:26:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, s4, 0(a0)
           ^
   2 errors generated.
--
   In file included from fs/nfs/read.c:11:
   In file included from include/linux/time.h:6:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a0, 0(a1)
           ^
   In file included from fs/nfs/read.c:11:
   In file included from include/linux/time.h:6:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a0, 0(a1)
           ^
   In file included from fs/nfs/read.c:11:
   In file included from include/linux/time.h:6:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a1, 0(a0)
           ^
   3 errors generated.
--
   In file included from fs/nfs/write.c:11:
   In file included from include/linux/slab.h:15:
   In file included from include/linux/gfp.h:7:
   In file included from include/linux/mmzone.h:8:
   In file included from include/linux/spinlock.h:56:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:14:
   In file included from include/linux/restart_block.h:10:
   In file included from include/linux/time64.h:5:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a1, 0(a0)
           ^
   In file included from fs/nfs/write.c:11:
   In file included from include/linux/slab.h:15:
   In file included from include/linux/gfp.h:7:
   In file included from include/linux/mmzone.h:8:
   In file included from include/linux/spinlock.h:56:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:14:
   In file included from include/linux/restart_block.h:10:
   In file included from include/linux/time64.h:5:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a1, 0(a0)
           ^
   2 errors generated.
--
   In file included from fs/ext4/mballoc.c:12:
   In file included from fs/ext4/ext4_jbd2.h:15:
   In file included from include/linux/fs.h:6:
   In file included from include/linux/wait_bit.h:8:
   In file included from include/linux/wait.h:9:
   In file included from include/linux/spinlock.h:56:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:14:
   In file included from include/linux/restart_block.h:10:
   In file included from include/linux/time64.h:5:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a1, 0(a0)
           ^
   In file included from fs/ext4/mballoc.c:12:
   In file included from fs/ext4/ext4_jbd2.h:15:
   In file included from include/linux/fs.h:6:
   In file included from include/linux/wait_bit.h:8:
   In file included from include/linux/wait.h:9:
   In file included from include/linux/spinlock.h:56:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:14:
   In file included from include/linux/restart_block.h:10:
   In file included from include/linux/time64.h:5:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a2)
           ^
   In file included from fs/ext4/mballoc.c:12:
   In file included from fs/ext4/ext4_jbd2.h:15:
   In file included from include/linux/fs.h:6:
   In file included from include/linux/wait_bit.h:8:
   In file included from include/linux/wait.h:9:
   In file included from include/linux/spinlock.h:56:
   In file included from include/linux/preempt.h:78:
   In file included from ./arch/riscv/include/generated/asm/preempt.h:1:
   In file included from include/asm-generic/preempt.h:5:
   In file included from include/linux/thread_info.h:14:
   In file included from include/linux/restart_block.h:10:
   In file included from include/linux/time64.h:5:
   In file included from include/linux/math64.h:6:
   In file included from include/linux/math.h:6:
   In file included from ./arch/riscv/include/generated/asm/div64.h:1:
   In file included from include/asm-generic/div64.h:55:
   In file included from include/linux/log2.h:12:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, s9, 0(a0)
           ^
   3 errors generated.
--
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a2)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a3, 0(a1)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a2)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, a2, 0(a0)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a2)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a3, 0(a1)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a2)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a2, a3, 0(a1)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a1)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, a2, 0(a0)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a4, a3, 0(a1)
           ^
   In file included from net/ipv6/ip6_output.c:26:
   In file included from include/linux/kernel.h:22:
   In file included from include/linux/bitops.h:68:
   In file included from arch/riscv/include/asm/bitops.h:14:
   In file included from include/linux/irqflags.h:17:
>> arch/riscv/include/asm/percpu.h:38:1: error: instruction requires the following: RV64I Base Instruction Set
   PERCPU_OP(add, add, +)
   ^
   arch/riscv/include/asm/percpu.h:25:5: note: expanded from macro 'PERCPU_OP'
                                   "amo" #asm_op ".d"                             \
                                   ^
   <inline asm>:1:2: note: instantiated into assembly here
           amoadd.d a1, a2, 0(a0)
           ^
   12 errors generated.
..


vim +38 arch/riscv/include/asm/percpu.h

     9	
    10	#define PERCPU_OP(op, asm_op, c_op)                                            \
    11		static inline unsigned long __percpu_##op(void *ptr,                   \
    12							  unsigned long val, int size) \
    13		{                                                                      \
    14			unsigned long ret;                                             \
    15			switch (size) {                                                \
    16			case 4:                                                        \
    17				__asm__ __volatile__(                                  \
    18					"amo" #asm_op ".w"                             \
    19					" %[ret], %[val], %[ptr]\n"                   \
    20					: [ret] "=&r"(ret), [ptr] "+A"(*(u32 *)ptr)    \
    21					: [val] "r"(val));                             \
    22				break;                                                 \
    23			case 8:                                                        \
    24				__asm__ __volatile__(                                  \
    25					"amo" #asm_op ".d"                             \
    26					" %[ret], %[val], %[ptr]\n"                   \
    27					: [ret] "=&r"(ret), [ptr] "+A"(*(u64 *)ptr)    \
    28					: [val] "r"(val));                             \
    29				break;                                                 \
    30			default:                                                       \
    31				ret = 0;                                               \
    32				BUILD_BUG();                                           \
    33			}                                                              \
    34											\
    35			return ret c_op val;                                           \
    36		}
    37	
  > 38	PERCPU_OP(add, add, +)
    39	PERCPU_OP(and, and, &)
    40	PERCPU_OP(or, or, |)
    41	#undef PERCPU_OP
    42
  
kernel test robot Oct. 27, 2022, 1:05 a.m. UTC | #3
Hi Wen,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.1-rc2 next-20221026]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Wen-Yao/riscv-Rewrite-percpu-operations-and-support-cmpxchg-local-feature/20221026-184310
patch link:    https://lore.kernel.org/r/20221026104015.565468-2-haiwenyao%40uniontech.com
patch subject: [PATCH 1/2] riscv: percpu:Add riscv percpu operations
config: riscv-rv32_defconfig
compiler: riscv32-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/bb33e199935b65336d45c367d7581682dff63cdb
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Wen-Yao/riscv-Rewrite-percpu-operations-and-support-cmpxchg-local-feature/20221026-184310
        git checkout bb33e199935b65336d45c367d7581682dff63cdb
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=riscv SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a0,a3,0(a4)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,a4,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a0,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a0,a3,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a5,0(a3)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a1,a2,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,s6,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,a0,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,s2,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,s2,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,a1,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a4,0(a5)'
--
   arch/riscv/include/asm/percpu.h: Assembler messages:
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a3,a1,0(a5)'
>> arch/riscv/include/asm/percpu.h:38: Error: unrecognized opcode `amoadd.d a2,a3,0(a5)'


vim +38 arch/riscv/include/asm/percpu.h

     9	
    10	#define PERCPU_OP(op, asm_op, c_op)                                            \
    11		static inline unsigned long __percpu_##op(void *ptr,                   \
    12							  unsigned long val, int size) \
    13		{                                                                      \
    14			unsigned long ret;                                             \
    15			switch (size) {                                                \
    16			case 4:                                                        \
    17				__asm__ __volatile__(                                  \
    18					"amo" #asm_op ".w"                             \
    19					" %[ret], %[val], %[ptr]\n"                   \
    20					: [ret] "=&r"(ret), [ptr] "+A"(*(u32 *)ptr)    \
    21					: [val] "r"(val));                             \
    22				break;                                                 \
    23			case 8:                                                        \
    24				__asm__ __volatile__(                                  \
    25					"amo" #asm_op ".d"                             \
    26					" %[ret], %[val], %[ptr]\n"                   \
    27					: [ret] "=&r"(ret), [ptr] "+A"(*(u64 *)ptr)    \
    28					: [val] "r"(val));                             \
    29				break;                                                 \
    30			default:                                                       \
    31				ret = 0;                                               \
    32				BUILD_BUG();                                           \
    33			}                                                              \
    34											\
    35			return ret c_op val;                                           \
    36		}
    37	
  > 38	PERCPU_OP(add, add, +)
    39	PERCPU_OP(and, and, &)
    40	PERCPU_OP(or, or, |)
    41	#undef PERCPU_OP
    42
  
Christoph Lameter Oct. 30, 2022, 1:19 p.m. UTC | #4
On Wed, 26 Oct 2022, Wen Yao wrote:

> This patch use riscv AMO(Atomic Memory Operation) instructions to
> optimise some this_cpu_and this_cpu_or this_cpu_add operations.
> It reuse cmpxchg_local() to impletment this_cpu_cmpxchg macros.
> It reuse xchg_relaxed() to impletment this_cpu_xchg macros.

Are you sure that these changes gives you any benefit vs disabling preempt
or irq offs? I dont know too much about atomics on riscv but it looks like
you are using full atomics. The performance penalty for the use of those
is usually drastic. Often irq/preempt off is better.

Could you run some of the synthetic tests to establish the benefit? F.e.
run the synthetic tests for the slub allcator with and without these
patches.


> +			__asm__ __volatile__(                                  \
> +				"amo" #asm_op ".w"                             \

amo = atomic operation?
  

Patch

diff --git a/arch/riscv/include/asm/percpu.h b/arch/riscv/include/asm/percpu.h
new file mode 100644
index 000000000000..ae796e328442
--- /dev/null
+++ b/arch/riscv/include/asm/percpu.h
@@ -0,0 +1,101 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Union Tech Software Technology Corporation Limited
+ */
+#ifndef __ASM_PERCPU_H
+#define __ASM_PERCPU_H
+
+#include <asm/cmpxchg.h>
+
+#define PERCPU_OP(op, asm_op, c_op)                                            \
+	static inline unsigned long __percpu_##op(void *ptr,                   \
+						  unsigned long val, int size) \
+	{                                                                      \
+		unsigned long ret;                                             \
+		switch (size) {                                                \
+		case 4:                                                        \
+			__asm__ __volatile__(                                  \
+				"amo" #asm_op ".w"                             \
+				" %[ret], %[val], %[ptr]\n"                   \
+				: [ret] "=&r"(ret), [ptr] "+A"(*(u32 *)ptr)    \
+				: [val] "r"(val));                             \
+			break;                                                 \
+		case 8:                                                        \
+			__asm__ __volatile__(                                  \
+				"amo" #asm_op ".d"                             \
+				" %[ret], %[val], %[ptr]\n"                   \
+				: [ret] "=&r"(ret), [ptr] "+A"(*(u64 *)ptr)    \
+				: [val] "r"(val));                             \
+			break;                                                 \
+		default:                                                       \
+			ret = 0;                                               \
+			BUILD_BUG();                                           \
+		}                                                              \
+										\
+		return ret c_op val;                                           \
+	}
+
+PERCPU_OP(add, add, +)
+PERCPU_OP(and, and, &)
+PERCPU_OP(or, or, |)
+#undef PERCPU_OP
+
+/* this_cpu_xchg */
+#define _protect_xchg_local(pcp, val)                           \
+	({                                                      \
+		typeof(*raw_cpu_ptr(&(pcp))) __ret;             \
+		preempt_disable_notrace();                      \
+		__ret = xchg_relaxed(raw_cpu_ptr(&(pcp)), val); \
+		preempt_enable_notrace();                       \
+		__ret;                                          \
+	})
+
+/* this_cpu_cmpxchg */
+#define _protect_cmpxchg_local(pcp, o, n)                         \
+	({                                                        \
+		typeof(*raw_cpu_ptr(&(pcp))) __ret;               \
+		preempt_disable_notrace();                        \
+		__ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
+		preempt_enable_notrace();                         \
+		__ret;                                            \
+	})
+
+#define _pcp_protect(operation, pcp, val)                                     \
+	({                                                                    \
+		typeof(pcp) __retval;                                         \
+		preempt_disable_notrace();                                    \
+		__retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), (val), \
+						  sizeof(pcp));               \
+		preempt_enable_notrace();                                     \
+		__retval;                                                     \
+	})
+
+#define _percpu_add(pcp, val) _pcp_protect(__percpu_add, pcp, val)
+
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
+
+#define _percpu_and(pcp, val) _pcp_protect(__percpu_and, pcp, val)
+
+#define _percpu_or(pcp, val) _pcp_protect(__percpu_or, pcp, val)
+
+#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
+#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
+
+#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
+#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
+
+#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
+#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
+
+#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
+#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
+
+#define this_cpu_xchg_4(pcp, val) _protect_xchg_local(pcp, val)
+#define this_cpu_xchg_8(pcp, val) _protect_xchg_local(pcp, val)
+
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#include <asm-generic/percpu.h>
+
+#endif /* __ASM_PERCPU_H */