[2/2] riscv: select DCACHE_WORD_ACCESS for efficient unaligned access HW

Message ID 20231202111822.3569-3-jszhang@kernel.org
State New
Headers
Series riscv: enable EFFICIENT_UNALIGNED_ACCESS and DCACHE_WORD_ACCESS |

Commit Message

Jisheng Zhang Dec. 2, 2023, 11:18 a.m. UTC
  DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string
comparisons in the vfs layer.

This patch implements support for load_unaligned_zeropad in much the
same way as has been done for arm64.

Here is the test program and step:

 $ cat tt.c
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>

 #define ITERATIONS 1000000

 #define PATH "123456781234567812345678123456781"

 int main(void)
 {
         unsigned long i;
         struct stat buf;

         for (i = 0; i < ITERATIONS; i++)
                 stat(PATH, &buf);

         return 0;
 }

 $ gcc -O2 tt.c
 $ touch 123456781234567812345678123456781
 $ time ./a.out

Per my test on T-HEAD C910 platforms, the above test performance is
improved by about 7.5%.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
 arch/riscv/Kconfig                      |  1 +
 arch/riscv/include/asm/asm-extable.h    | 15 ++++++++++++
 arch/riscv/include/asm/word-at-a-time.h | 23 ++++++++++++++++++
 arch/riscv/mm/extable.c                 | 31 +++++++++++++++++++++++++
 4 files changed, 70 insertions(+)
  

Comments

Conor Dooley Dec. 3, 2023, 11:53 a.m. UTC | #1
On Sat, Dec 02, 2023 at 07:18:22PM +0800, Jisheng Zhang wrote:
> DCACHE_WORD_ACCESS uses the word-at-a-time API for optimised string
> comparisons in the vfs layer.
> 
> This patch implements support for load_unaligned_zeropad in much the
> same way as has been done for arm64.
> 
> Here is the test program and step:
> 
>  $ cat tt.c
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <unistd.h>
> 
>  #define ITERATIONS 1000000
> 
>  #define PATH "123456781234567812345678123456781"
> 
>  int main(void)
>  {
>          unsigned long i;
>          struct stat buf;
> 
>          for (i = 0; i < ITERATIONS; i++)
>                  stat(PATH, &buf);
> 
>          return 0;
>  }
> 
>  $ gcc -O2 tt.c
>  $ touch 123456781234567812345678123456781
>  $ time ./a.out
> 
> Per my test on T-HEAD C910 platforms, the above test performance is
> improved by about 7.5%.
> 
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>

Doesn't build on nommu:
arch/riscv/include/asm/word-at-a-time.h:64:9: error: expected ':' or ')' before '_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD'
arch/riscv/include/asm/word-at-a-time.h:64:45: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:49: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:9: error: expected ':' or ')' before '_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD'
arch/riscv/include/asm/word-at-a-time.h:64:45: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:49: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:9: error: expected ':' or ')' before '_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD'
arch/riscv/include/asm/word-at-a-time.h:64:45: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:49: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:9: error: expected ':' or ')' before '_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD'
arch/riscv/include/asm/word-at-a-time.h:64:45: error: invalid suffix "b" on integer constant
arch/riscv/include/asm/word-at-a-time.h:64:49: error: invalid suffix "b" on integer constant

Cheers,
Conor.

> ---
>  arch/riscv/Kconfig                      |  1 +
>  arch/riscv/include/asm/asm-extable.h    | 15 ++++++++++++
>  arch/riscv/include/asm/word-at-a-time.h | 23 ++++++++++++++++++
>  arch/riscv/mm/extable.c                 | 31 +++++++++++++++++++++++++
>  4 files changed, 70 insertions(+)
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 0a76209e9b02..bb366eb1870e 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -657,6 +657,7 @@ config RISCV_MISALIGNED
>  config RISCV_EFFICIENT_UNALIGNED_ACCESS
>  	bool "Use unaligned access for some functions"
>  	depends on NONPORTABLE
> +	select DCACHE_WORD_ACCESS if MMU
>  	select HAVE_EFFICIENT_UNALIGNED_ACCESS
>  	default n
>  	help
> diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
> index 00a96e7a9664..0c8bfd54fc4e 100644
> --- a/arch/riscv/include/asm/asm-extable.h
> +++ b/arch/riscv/include/asm/asm-extable.h
> @@ -6,6 +6,7 @@
>  #define EX_TYPE_FIXUP			1
>  #define EX_TYPE_BPF			2
>  #define EX_TYPE_UACCESS_ERR_ZERO	3
> +#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD	4
>  
>  #ifdef CONFIG_MMU
>  
> @@ -47,6 +48,11 @@
>  #define EX_DATA_REG_ZERO_SHIFT	5
>  #define EX_DATA_REG_ZERO	GENMASK(9, 5)
>  
> +#define EX_DATA_REG_DATA_SHIFT	0
> +#define EX_DATA_REG_DATA	GENMASK(4, 0)
> +#define EX_DATA_REG_ADDR_SHIFT	5
> +#define EX_DATA_REG_ADDR	GENMASK(9, 5)
> +
>  #define EX_DATA_REG(reg, gpr)						\
>  	"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
>  
> @@ -62,6 +68,15 @@
>  #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)			\
>  	_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
>  
> +#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)		\
> +	__DEFINE_ASM_GPR_NUMS							\
> +	__ASM_EXTABLE_RAW(#insn, #fixup,					\
> +			  __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD),		\
> +			  "("							\
> +			    EX_DATA_REG(DATA, data) " | "			\
> +			    EX_DATA_REG(ADDR, addr)				\
> +			  ")")
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #else /* CONFIG_MMU */
> diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h
> index 7c086ac6ecd4..5a3865ac3623 100644
> --- a/arch/riscv/include/asm/word-at-a-time.h
> +++ b/arch/riscv/include/asm/word-at-a-time.h
> @@ -9,6 +9,7 @@
>  #define _ASM_RISCV_WORD_AT_A_TIME_H
>  
>  
> +#include <asm/asm-extable.h>
>  #include <linux/kernel.h>
>  
>  struct word_at_a_time {
> @@ -45,4 +46,26 @@ static inline unsigned long find_zero(unsigned long mask)
>  /* The mask we created is directly usable as a bytemask */
>  #define zero_bytemask(mask) (mask)
>  
> +/*
> + * Load an unaligned word from kernel space.
> + *
> + * In the (very unlikely) case of the word being a page-crosser
> + * and the next page not being mapped, take the exception and
> + * return zeroes in the non-existing part.
> + */
> +static inline unsigned long load_unaligned_zeropad(const void *addr)
> +{
> +	unsigned long ret;
> +
> +	/* Load word from unaligned pointer addr */
> +	asm(
> +	"1:	" REG_L " %0, %2\n"
> +	"2:\n"
> +	_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
> +	: "=&r" (ret)
> +	: "r" (addr), "m" (*(unsigned long *)addr));
> +
> +	return ret;
> +}
> +
>  #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
> diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
> index 35484d830fd6..dd1530af3ef1 100644
> --- a/arch/riscv/mm/extable.c
> +++ b/arch/riscv/mm/extable.c
> @@ -27,6 +27,14 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
>  	return true;
>  }
>  
> +static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
> +{
> +	if (unlikely(!offset || offset > MAX_REG_OFFSET))
> +		return 0;
> +
> +	return *(unsigned long *)((unsigned long)regs + offset);
> +}
> +
>  static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
>  				unsigned long val)
>  {
> @@ -50,6 +58,27 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
>  	return true;
>  }
>  
> +static bool
> +ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
> +				  struct pt_regs *regs)
> +{
> +	int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
> +	int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
> +	unsigned long data, addr, offset;
> +
> +	addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
> +
> +	offset = addr & 0x7UL;
> +	addr &= ~0x7UL;
> +
> +	data = *(unsigned long *)addr >> (offset * 8);
> +
> +	regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
> +
> +	regs->epc = get_ex_fixup(ex);
> +	return true;
> +}
> +
>  bool fixup_exception(struct pt_regs *regs)
>  {
>  	const struct exception_table_entry *ex;
> @@ -65,6 +94,8 @@ bool fixup_exception(struct pt_regs *regs)
>  		return ex_handler_bpf(ex, regs);
>  	case EX_TYPE_UACCESS_ERR_ZERO:
>  		return ex_handler_uaccess_err_zero(ex, regs);
> +	case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
> +		return ex_handler_load_unaligned_zeropad(ex, regs);
>  	}
>  
>  	BUG();
> -- 
> 2.42.0
>
  

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 0a76209e9b02..bb366eb1870e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -657,6 +657,7 @@  config RISCV_MISALIGNED
 config RISCV_EFFICIENT_UNALIGNED_ACCESS
 	bool "Use unaligned access for some functions"
 	depends on NONPORTABLE
+	select DCACHE_WORD_ACCESS if MMU
 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	default n
 	help
diff --git a/arch/riscv/include/asm/asm-extable.h b/arch/riscv/include/asm/asm-extable.h
index 00a96e7a9664..0c8bfd54fc4e 100644
--- a/arch/riscv/include/asm/asm-extable.h
+++ b/arch/riscv/include/asm/asm-extable.h
@@ -6,6 +6,7 @@ 
 #define EX_TYPE_FIXUP			1
 #define EX_TYPE_BPF			2
 #define EX_TYPE_UACCESS_ERR_ZERO	3
+#define EX_TYPE_LOAD_UNALIGNED_ZEROPAD	4
 
 #ifdef CONFIG_MMU
 
@@ -47,6 +48,11 @@ 
 #define EX_DATA_REG_ZERO_SHIFT	5
 #define EX_DATA_REG_ZERO	GENMASK(9, 5)
 
+#define EX_DATA_REG_DATA_SHIFT	0
+#define EX_DATA_REG_DATA	GENMASK(4, 0)
+#define EX_DATA_REG_ADDR_SHIFT	5
+#define EX_DATA_REG_ADDR	GENMASK(9, 5)
+
 #define EX_DATA_REG(reg, gpr)						\
 	"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
 
@@ -62,6 +68,15 @@ 
 #define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)			\
 	_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
 
+#define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)		\
+	__DEFINE_ASM_GPR_NUMS							\
+	__ASM_EXTABLE_RAW(#insn, #fixup,					\
+			  __stringify(EX_TYPE_LOAD_UNALIGNED_ZEROPAD),		\
+			  "("							\
+			    EX_DATA_REG(DATA, data) " | "			\
+			    EX_DATA_REG(ADDR, addr)				\
+			  ")")
+
 #endif /* __ASSEMBLY__ */
 
 #else /* CONFIG_MMU */
diff --git a/arch/riscv/include/asm/word-at-a-time.h b/arch/riscv/include/asm/word-at-a-time.h
index 7c086ac6ecd4..5a3865ac3623 100644
--- a/arch/riscv/include/asm/word-at-a-time.h
+++ b/arch/riscv/include/asm/word-at-a-time.h
@@ -9,6 +9,7 @@ 
 #define _ASM_RISCV_WORD_AT_A_TIME_H
 
 
+#include <asm/asm-extable.h>
 #include <linux/kernel.h>
 
 struct word_at_a_time {
@@ -45,4 +46,26 @@  static inline unsigned long find_zero(unsigned long mask)
 /* The mask we created is directly usable as a bytemask */
 #define zero_bytemask(mask) (mask)
 
+/*
+ * Load an unaligned word from kernel space.
+ *
+ * In the (very unlikely) case of the word being a page-crosser
+ * and the next page not being mapped, take the exception and
+ * return zeroes in the non-existing part.
+ */
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+	unsigned long ret;
+
+	/* Load word from unaligned pointer addr */
+	asm(
+	"1:	" REG_L " %0, %2\n"
+	"2:\n"
+	_ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(1b, 2b, %0, %1)
+	: "=&r" (ret)
+	: "r" (addr), "m" (*(unsigned long *)addr));
+
+	return ret;
+}
+
 #endif /* _ASM_RISCV_WORD_AT_A_TIME_H */
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 35484d830fd6..dd1530af3ef1 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -27,6 +27,14 @@  static bool ex_handler_fixup(const struct exception_table_entry *ex,
 	return true;
 }
 
+static inline unsigned long regs_get_gpr(struct pt_regs *regs, unsigned int offset)
+{
+	if (unlikely(!offset || offset > MAX_REG_OFFSET))
+		return 0;
+
+	return *(unsigned long *)((unsigned long)regs + offset);
+}
+
 static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset,
 				unsigned long val)
 {
@@ -50,6 +58,27 @@  static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
 	return true;
 }
 
+static bool
+ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex,
+				  struct pt_regs *regs)
+{
+	int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data);
+	int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
+	unsigned long data, addr, offset;
+
+	addr = regs_get_gpr(regs, reg_addr * sizeof(unsigned long));
+
+	offset = addr & 0x7UL;
+	addr &= ~0x7UL;
+
+	data = *(unsigned long *)addr >> (offset * 8);
+
+	regs_set_gpr(regs, reg_data * sizeof(unsigned long), data);
+
+	regs->epc = get_ex_fixup(ex);
+	return true;
+}
+
 bool fixup_exception(struct pt_regs *regs)
 {
 	const struct exception_table_entry *ex;
@@ -65,6 +94,8 @@  bool fixup_exception(struct pt_regs *regs)
 		return ex_handler_bpf(ex, regs);
 	case EX_TYPE_UACCESS_ERR_ZERO:
 		return ex_handler_uaccess_err_zero(ex, regs);
+	case EX_TYPE_LOAD_UNALIGNED_ZEROPAD:
+		return ex_handler_load_unaligned_zeropad(ex, regs);
 	}
 
 	BUG();