[v1,2/2] LoongArch: Add prefetch insns.
Checks
Commit Message
Co-Authored-By: xujiahao <xujiahao@loongson.cn>
gcc/ChangeLog:
* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
* config/loongarch/loongarch-protos.h (loongarch_prefetch_cookie):
Function declaration.
* config/loongarch/loongarch-tune.h (struct loongarch_cache):
Define number of parallel prefetch.
* config/loongarch/loongarch.cc (loongarch_option_override_internal):
Set up parameters to be used in prefetching algorithm.
(loongarch_prefetch_cookie): Select load or store based on the value of write.
* config/loongarch/loongarch.md (prefetch): New template.
(*prefetch_indexed_<mode>): New template.
---
gcc/config/loongarch/loongarch-def.c | 2 ++
gcc/config/loongarch/loongarch-protos.h | 1 +
gcc/config/loongarch/loongarch-tune.h | 1 +
gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++++++++++++
gcc/config/loongarch/loongarch.md | 23 ++++++++++++
5 files changed, 75 insertions(+)
Comments
Sorry for the problem in this patch. I will send it again after
modification.
在 2022/10/29 下午3:05, Lulu Cheng 写道:
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> * config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
> * config/loongarch/loongarch-protos.h (loongarch_prefetch_cookie):
> Function declaration.
> * config/loongarch/loongarch-tune.h (struct loongarch_cache):
> Define number of parallel prefetch.
> * config/loongarch/loongarch.cc (loongarch_option_override_internal):
> Set up parameters to be used in prefetching algorithm.
> (loongarch_prefetch_cookie): Select load or store based on the value of write.
> * config/loongarch/loongarch.md (prefetch): New template.
> (*prefetch_indexed_<mode>): New template.
> ---
> gcc/config/loongarch/loongarch-def.c | 2 ++
> gcc/config/loongarch/loongarch-protos.h | 1 +
> gcc/config/loongarch/loongarch-tune.h | 1 +
> gcc/config/loongarch/loongarch.cc | 48 +++++++++++++++++++++++++
> gcc/config/loongarch/loongarch.md | 23 ++++++++++++
> 5 files changed, 75 insertions(+)
>
> diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> [CPU_LA464] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> };
>
> diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
> index 77b2217247d..489525b520e 100644
> --- a/gcc/config/loongarch/loongarch-protos.h
> +++ b/gcc/config/loongarch/loongarch-protos.h
> @@ -179,5 +179,6 @@ extern tree loongarch_builtin_decl (unsigned int, bool);
> extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED,
> machine_mode, int);
> extern tree loongarch_build_builtin_va_list (void);
> +extern rtx loongarch_prefetch_cookie (rtx, rtx);
>
> #endif /* ! GCC_LOONGARCH_PROTOS_H */
> diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
> int l1d_line_size; /* bytes */
> int l1d_size; /* KiB */
> int l2d_size; /* kiB */
> + int simultaneous_prefetches; /* number of parallel prefetch */
> };
>
> #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 5e8cd293645..d663afe434d 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
> #include "context.h"
> #include "builtins.h"
> #include "rtl-iter.h"
> +#include "params.h"
>
> /* This file should be included last. */
> #include "target-def.h"
> @@ -6126,6 +6127,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
> if (loongarch_branch_cost == 0)
> loongarch_branch_cost = loongarch_cost->branch_cost;
>
> + /* Set up parameters to be used in prefetching algorithm. */
> + maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches,
> + opts->x_param_values,
> + opts_set->x_param_values);
> +
> + maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size,
> + opts->x_param_values,
> + opts_set->x_param_values);
> +
> + maybe_set_param_value (PARAM_L1_CACHE_SIZE,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size,
> + opts->x_param_values,
> + opts_set->x_param_values);
> +
> + maybe_set_param_value (PARAM_L2_CACHE_SIZE,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size,
> + opts->x_param_values,
> + opts_set->x_param_values);
> +
> + /* Enable sw prefetching at -O3 and higher. */
> + if (opts->x_flag_prefetch_loop_arrays < 0
> + && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> + && !opts->x_optimize_size)
> + opts->x_flag_prefetch_loop_arrays = 1;
> +
> if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
> error ("%qs cannot be used for compiling a shared library",
> "-mdirect-extern-access");
> @@ -6506,6 +6534,26 @@ loongarch_asan_shadow_offset (void)
> return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
> }
>
> +/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8
> + (prefetch for store), other hint just scale to hint = 0 and hint = 1. */
> +
> +rtx
> +loongarch_prefetch_cookie (rtx write, rtx locality)
> +{
> + if (INTVAL (locality) == 1 && INTVAL (write) == 0)
> + return GEN_INT (INTVAL (write) + 2);
> +
> + /* store. */
> + if (INTVAL (write) == 1)
> + return GEN_INT (INTVAL (write) + 7);
> +
> + /* load. */
> + if (INTVAL (write) == 0)
> + return GEN_INT (INTVAL (write));
> +
> + gcc_unreachable ();
> +}
> +
> /* Initialize the GCC target structure. */
> #undef TARGET_ASM_ALIGNED_HI_OP
> #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 7eaa9ab66e3..be247164eb4 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3201,6 +3201,29 @@ (define_expand "untyped_call"
> ;; ....................
> ;;
>
> +(define_insn "prefetch"
> + [(prefetch (match_operand 0 "address_operand" "p")
> + (match_operand 1 "const_int_operand" "n")
> + (match_operand 2 "const_int_operand" "n"))]
> + ""
> +{
> + operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
> + return "preld\t%1,%a0";
> +}
> + [(set_attr "type" "prefetch")])
> +
> +(define_insn "*prefetch_indexed_<mode>"
> + [(prefetch (plus:P (match_operand 0 "register_operand" "r")
> + (match_operand 1 "register_operand" "r"))
> + (match_operand 2 "const_int_operand" "n")
> + (match_operand 3 "const_int_operand" "n"))]
> + ""
> +{
> + operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]);
> + return "preldx\t%2,%1,%0";
> +}
> + [(set_attr "type" "prefetchx")])
> +
> (define_insn "nop"
> [(const_int 0)]
> ""
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
[CPU_LA464] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
};
@@ -179,5 +179,6 @@ extern tree loongarch_builtin_decl (unsigned int, bool);
extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED,
machine_mode, int);
extern tree loongarch_build_builtin_va_list (void);
+extern rtx loongarch_prefetch_cookie (rtx, rtx);
#endif /* ! GCC_LOONGARCH_PROTOS_H */
@@ -45,6 +45,7 @@ struct loongarch_cache {
int l1d_line_size; /* bytes */
int l1d_size; /* KiB */
int l2d_size; /* kiB */
+ int simultaneous_prefetches; /* number of parallel prefetch */
};
#endif /* LOONGARCH_TUNE_H */
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "params.h"
/* This file should be included last. */
#include "target-def.h"
@@ -6126,6 +6127,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
if (loongarch_branch_cost == 0)
loongarch_branch_cost = loongarch_cost->branch_cost;
+ /* Set up parameters to be used in prefetching algorithm. */
+ maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches,
+ opts->x_param_values,
+ opts_set->x_param_values);
+
+ maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size,
+ opts->x_param_values,
+ opts_set->x_param_values);
+
+ maybe_set_param_value (PARAM_L1_CACHE_SIZE,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size,
+ opts->x_param_values,
+ opts_set->x_param_values);
+
+ maybe_set_param_value (PARAM_L2_CACHE_SIZE,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size,
+ opts->x_param_values,
+ opts_set->x_param_values);
+
+ /* Enable sw prefetching at -O3 and higher. */
+ if (opts->x_flag_prefetch_loop_arrays < 0
+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+ && !opts->x_optimize_size)
+ opts->x_flag_prefetch_loop_arrays = 1;
+
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
error ("%qs cannot be used for compiling a shared library",
"-mdirect-extern-access");
@@ -6506,6 +6534,26 @@ loongarch_asan_shadow_offset (void)
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
}
+/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8
+ (prefetch for store), other hint just scale to hint = 0 and hint = 1. */
+
+rtx
+loongarch_prefetch_cookie (rtx write, rtx locality)
+{
+ if (INTVAL (locality) == 1 && INTVAL (write) == 0)
+ return GEN_INT (INTVAL (write) + 2);
+
+ /* store. */
+ if (INTVAL (write) == 1)
+ return GEN_INT (INTVAL (write) + 7);
+
+ /* load. */
+ if (INTVAL (write) == 0)
+ return GEN_INT (INTVAL (write));
+
+ gcc_unreachable ();
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -3201,6 +3201,29 @@ (define_expand "untyped_call"
;; ....................
;;
+(define_insn "prefetch"
+ [(prefetch (match_operand 0 "address_operand" "p")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))]
+ ""
+{
+ operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
+ return "preld\t%1,%a0";
+}
+ [(set_attr "type" "prefetch")])
+
+(define_insn "*prefetch_indexed_<mode>"
+ [(prefetch (plus:P (match_operand 0 "register_operand" "r")
+ (match_operand 1 "register_operand" "r"))
+ (match_operand 2 "const_int_operand" "n")
+ (match_operand 3 "const_int_operand" "n"))]
+ ""
+{
+ operands[2] = loongarch_prefetch_cookie (operands[2], operands[3]);
+ return "preldx\t%2,%1,%0";
+}
+ [(set_attr "type" "prefetchx")])
+
(define_insn "nop"
[(const_int 0)]
""