[v3] LoongArch: Add prefetch instructions.
Checks
Commit Message
v2 -> v3:
1. Remove preldx support.
---------------------------------------
Enable sw prefetching at -O3 and higher.
Co-Authored-By: xujiahao <xujiahao@loongson.cn>
gcc/ChangeLog:
* config/loongarch/constraints.md (ZD): New constraint.
* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
* config/loongarch/loongarch-tune.h (struct loongarch_cache):
Define number of parallel prefetch.
* config/loongarch/loongarch.cc (loongarch_option_override_internal):
Set up parameters to be used in prefetching algorithm.
* config/loongarch/loongarch.md (prefetch): New template.
---
gcc/config/loongarch/constraints.md | 10 ++++++++++
gcc/config/loongarch/loongarch-def.c | 2 ++
gcc/config/loongarch/loongarch-tune.h | 1 +
gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
5 files changed, 55 insertions(+)
Comments
On 2022/11/16 10:10, Lulu Cheng wrote:
> v2 -> v3:
> 1. Remove preldx support.
>
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
>
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> * config/loongarch/constraints.md (ZD): New constraint.
> * config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
> * config/loongarch/loongarch-tune.h (struct loongarch_cache):
> Define number of parallel prefetch.
> * config/loongarch/loongarch.cc (loongarch_option_override_internal):
> Set up parameters to be used in prefetching algorithm.
> * config/loongarch/loongarch.md (prefetch): New template.
> ---
> gcc/config/loongarch/constraints.md | 10 ++++++++++
> gcc/config/loongarch/loongarch-def.c | 2 ++
> gcc/config/loongarch/loongarch-tune.h | 1 +
> gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
> gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
> 5 files changed, 55 insertions(+)
>
> diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
> ;; "ZB"
> ;; "An address that is held in a general-purpose register.
> ;; The offset is zero"
> +;; "ZD"
> +;; "An address operand whose address is formed by a base register
> +;; and offset that is suitable for use in instructions with the same
> +;; addressing mode as @code{preld}."
> ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
> ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
>
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
> The offset is zero"
> (and (match_code "mem")
> (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> + "An address operand whose address is formed by a base register
> + and offset that is suitable for use in instructions with the same
> + addressing mode as @code{preld}."
> + (match_test "loongarch_12bit_offset_address_p (op, mode)"))
How is this different with the "m" constraint? AFAIK preld and ld share
the same addressing mode (i.e. base register + 12-bit signed immediate
offset).
> diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> [CPU_LA464] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> };
>
> diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
> int l1d_line_size; /* bytes */
> int l1d_size; /* KiB */
> int l2d_size; /* kiB */
> + int simultaneous_prefetches; /* number of parallel prefetch */
nit: "prefetches" or "prefetch ops" or "int prefetch_width"?
> };
>
> #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
> #include "context.h"
> #include "builtins.h"
> #include "rtl-iter.h"
> +#include "opts.h"
>
> /* This file should be included last. */
> #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
> if (loongarch_branch_cost == 0)
> loongarch_branch_cost = loongarch_cost->branch_cost;
>
> + /* Set up parameters to be used in prefetching algorithm. */
> + int simultaneous_prefetches
> + = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_simultaneous_prefetches,
> + simultaneous_prefetches);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_line_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l2_cache_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> + /* Enable sw prefetching at -O3 and higher. */
> + if (opts->x_flag_prefetch_loop_arrays < 0
> + && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> + && !opts->x_optimize_size)
> + opts->x_flag_prefetch_loop_arrays = 1;
> +
> if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
> error ("%qs cannot be used for compiling a shared library",
> "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
> ;; ....................
> ;;
>
> +(define_insn "prefetch"
> + [(prefetch (match_operand 0 "address_operand" "ZD")
> + (match_operand 1 "const_int_operand" "n")
> + (match_operand 2 "const_int_operand" "n"))]
> + ""
> +{
> + switch (INTVAL (operands[1]))
> + {
> + case 0: return "preld\t0,%a0";
> + case 1: return "preld\t8,%a0";
> + default: gcc_unreachable ();
> + }
> +})
> +
> (define_insn "nop"
> [(const_int 0)]
> ""
在 2022/11/16 上午11:06, WANG Xuerui 写道:
>
> On 2022/11/16 10:10, Lulu Cheng wrote:
>> v2 -> v3:
>> 1. Remove preldx support.
>>
>> ---------------------------------------
>> Enable sw prefetching at -O3 and higher.
>>
>> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>>
>> gcc/ChangeLog:
>>
>> * config/loongarch/constraints.md (ZD): New constraint.
>> * config/loongarch/loongarch-def.c: Initial number of parallel
>> prefetch.
>> * config/loongarch/loongarch-tune.h (struct loongarch_cache):
>> Define number of parallel prefetch.
>> * config/loongarch/loongarch.cc
>> (loongarch_option_override_internal):
>> Set up parameters to be used in prefetching algorithm.
>> * config/loongarch/loongarch.md (prefetch): New template.
>> ---
>> gcc/config/loongarch/constraints.md | 10 ++++++++++
>> gcc/config/loongarch/loongarch-def.c | 2 ++
>> gcc/config/loongarch/loongarch-tune.h | 1 +
>> gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
>> gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
>> 5 files changed, 55 insertions(+)
>>
>> diff --git a/gcc/config/loongarch/constraints.md
>> b/gcc/config/loongarch/constraints.md
>> index 43cb7b5f0f5..46f7f63ae31 100644
>> --- a/gcc/config/loongarch/constraints.md
>> +++ b/gcc/config/loongarch/constraints.md
>> @@ -86,6 +86,10 @@
>> ;; "ZB"
>> ;; "An address that is held in a general-purpose register.
>> ;; The offset is zero"
>> +;; "ZD"
>> +;; "An address operand whose address is formed by a base register
>> +;; and offset that is suitable for use in instructions with the
>> same
>> +;; addressing mode as @code{preld}."
>> ;; "<" "Matches a pre-dec or post-dec operand." (Global
>> non-architectural)
>> ;; ">" "Matches a pre-inc or post-inc operand." (Global
>> non-architectural)
>> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
>> The offset is zero"
>> (and (match_code "mem")
>> (match_test "REG_P (XEXP (op, 0))")))
>> +
>> +(define_address_constraint "ZD"
>> + "An address operand whose address is formed by a base register
>> + and offset that is suitable for use in instructions with the same
>> + addressing mode as @code{preld}."
>> + (match_test "loongarch_12bit_offset_address_p (op, mode)"))
>
> How is this different with the "m" constraint? AFAIK preld and ld
> share the same addressing mode (i.e. base register + 12-bit signed
> immediate offset).
The "m" constraint is defined as follows:
(define_memory_constraint "m"
* (and (match_code "mem")*
(match_test "loongarch_12bit_offset_address_p (XEXP (op, 0),
mode)")))
This setting must be a memory operand.
''ZD" constraint is a address operand.
I think (mem:mode (address operand)) = memory operand.
>
>> diff --git a/gcc/config/loongarch/loongarch-def.c
>> b/gcc/config/loongarch/loongarch-def.c
>> index cbf995d81b5..80ab10a52a8 100644
>> --- a/gcc/config/loongarch/loongarch-def.c
>> +++ b/gcc/config/loongarch/loongarch-def.c
>> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
>> .l1d_line_size = 64,
>> .l1d_size = 64,
>> .l2d_size = 256,
>> + .simultaneous_prefetches = 4,
>> },
>> [CPU_LA464] = {
>> .l1d_line_size = 64,
>> .l1d_size = 64,
>> .l2d_size = 256,
>> + .simultaneous_prefetches = 4,
>> },
>> };
>> diff --git a/gcc/config/loongarch/loongarch-tune.h
>> b/gcc/config/loongarch/loongarch-tune.h
>> index 6f3530f5c02..8e3eb29472b 100644
>> --- a/gcc/config/loongarch/loongarch-tune.h
>> +++ b/gcc/config/loongarch/loongarch-tune.h
>> @@ -45,6 +45,7 @@ struct loongarch_cache {
>> int l1d_line_size; /* bytes */
>> int l1d_size; /* KiB */
>> int l2d_size; /* kiB */
>> + int simultaneous_prefetches; /* number of parallel prefetch */
> nit: "prefetches" or "prefetch ops" or "int prefetch_width"?
>> };
>> #endif /* LOONGARCH_TUNE_H */
>> diff --git a/gcc/config/loongarch/loongarch.cc
>> b/gcc/config/loongarch/loongarch.cc
>> index 8d5d8d965dd..8ee32c90573 100644
>> --- a/gcc/config/loongarch/loongarch.cc
>> +++ b/gcc/config/loongarch/loongarch.cc
>> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
>> #include "context.h"
>> #include "builtins.h"
>> #include "rtl-iter.h"
>> +#include "opts.h"
>> /* This file should be included last. */
>> #include "target-def.h"
>> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct
>> gcc_options *opts)
>> if (loongarch_branch_cost == 0)
>> loongarch_branch_cost = loongarch_cost->branch_cost;
>> + /* Set up parameters to be used in prefetching algorithm. */
>> + int simultaneous_prefetches
>> + = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
>> +
>> + SET_OPTION_IF_UNSET (opts, &global_options_set,
>> + param_simultaneous_prefetches,
>> + simultaneous_prefetches);
>> +
>> + SET_OPTION_IF_UNSET (opts, &global_options_set,
>> + param_l1_cache_line_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
>> +
>> + SET_OPTION_IF_UNSET (opts, &global_options_set,
>> + param_l1_cache_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
>> +
>> + SET_OPTION_IF_UNSET (opts, &global_options_set,
>> + param_l2_cache_size,
>> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
>> +
>> +
>> + /* Enable sw prefetching at -O3 and higher. */
>> + if (opts->x_flag_prefetch_loop_arrays < 0
>> + && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
>> + && !opts->x_optimize_size)
>> + opts->x_flag_prefetch_loop_arrays = 1;
>> +
>> if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
>> error ("%qs cannot be used for compiling a shared library",
>> "-mdirect-extern-access");
>> diff --git a/gcc/config/loongarch/loongarch.md
>> b/gcc/config/loongarch/loongarch.md
>> index 682ab961741..2fda5381904 100644
>> --- a/gcc/config/loongarch/loongarch.md
>> +++ b/gcc/config/loongarch/loongarch.md
>> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
>> ;; ....................
>> ;;
>> +(define_insn "prefetch"
>> + [(prefetch (match_operand 0 "address_operand" "ZD")
>> + (match_operand 1 "const_int_operand" "n")
>> + (match_operand 2 "const_int_operand" "n"))]
>> + ""
>> +{
>> + switch (INTVAL (operands[1]))
>> + {
>> + case 0: return "preld\t0,%a0";
>> + case 1: return "preld\t8,%a0";
>> + default: gcc_unreachable ();
>> + }
>> +})
>> +
>> (define_insn "nop"
>> [(const_int 0)]
>> ""
On Wed, 2022-11-16 at 11:19 +0800, Lulu Cheng wrote:
> The "m" constraint is defined as follows:
> (define_memory_constraint "m"
> (and (match_code "mem")
> (match_test "loongarch_12bit_offset_address_p (XEXP (op, 0),
> mode)")))
> This setting must be a memory operand.
> ''ZD" constraint is a address operand.
> I think (mem:mode (address operand)) = memory operand.
Yes they are different. I tried reusing "m" in my previous attempt to
add prefetch instruction but it didn't work.
LGTM. A minor issue is "enabling -fprefetch-loop-arrays at -O3" is not
documented, but AArch64 and i386 are already doing this anyway. We can
add the fact into the doc later.
On Wed, 2022-11-16 at 10:10 +0800, Lulu Cheng wrote:
> v2 -> v3:
> 1. Remove preldx support.
>
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
>
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> * config/loongarch/constraints.md (ZD): New constraint.
> * config/loongarch/loongarch-def.c: Initial number of parallel
> prefetch.
> * config/loongarch/loongarch-tune.h (struct loongarch_cache):
> Define number of parallel prefetch.
> * config/loongarch/loongarch.cc
> (loongarch_option_override_internal):
> Set up parameters to be used in prefetching algorithm.
> * config/loongarch/loongarch.md (prefetch): New template.
> ---
> gcc/config/loongarch/constraints.md | 10 ++++++++++
> gcc/config/loongarch/loongarch-def.c | 2 ++
> gcc/config/loongarch/loongarch-tune.h | 1 +
> gcc/config/loongarch/loongarch.cc | 28
> +++++++++++++++++++++++++++
> gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
> 5 files changed, 55 insertions(+)
>
> diff --git a/gcc/config/loongarch/constraints.md
> b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
> ;; "ZB"
> ;; "An address that is held in a general-purpose register.
> ;; The offset is zero"
> +;; "ZD"
> +;; "An address operand whose address is formed by a base register
> +;; and offset that is suitable for use in instructions with the
> same
> +;; addressing mode as @code{preld}."
> ;; "<" "Matches a pre-dec or post-dec operand." (Global non-
> architectural)
> ;; ">" "Matches a pre-inc or post-inc operand." (Global non-
> architectural)
>
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
> The offset is zero"
> (and (match_code "mem")
> (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> + "An address operand whose address is formed by a base register
> + and offset that is suitable for use in instructions with the same
> + addressing mode as @code{preld}."
> + (match_test "loongarch_12bit_offset_address_p (op, mode)"))
> diff --git a/gcc/config/loongarch/loongarch-def.c
> b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> [CPU_LA464] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> };
>
> diff --git a/gcc/config/loongarch/loongarch-tune.h
> b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
> int l1d_line_size; /* bytes */
> int l1d_size; /* KiB */
> int l2d_size; /* kiB */
> + int simultaneous_prefetches; /* number of parallel prefetch */
> };
>
> #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc
> b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
> #include "context.h"
> #include "builtins.h"
> #include "rtl-iter.h"
> +#include "opts.h"
>
> /* This file should be included last. */
> #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct
> gcc_options *opts)
> if (loongarch_branch_cost == 0)
> loongarch_branch_cost = loongarch_cost->branch_cost;
>
> + /* Set up parameters to be used in prefetching algorithm. */
> + int simultaneous_prefetches
> + = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_simultaneous_prefetches,
> + simultaneous_prefetches);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_line_size,
> +
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_size,
> +
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l2_cache_size,
> +
> loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> + /* Enable sw prefetching at -O3 and higher. */
> + if (opts->x_flag_prefetch_loop_arrays < 0
> + && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> + && !opts->x_optimize_size)
> + opts->x_flag_prefetch_loop_arrays = 1;
> +
> if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
> error ("%qs cannot be used for compiling a shared library",
> "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md
> b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
> ;; ....................
> ;;
>
> +(define_insn "prefetch"
> + [(prefetch (match_operand 0 "address_operand" "ZD")
> + (match_operand 1 "const_int_operand" "n")
> + (match_operand 2 "const_int_operand" "n"))]
> + ""
> +{
> + switch (INTVAL (operands[1]))
> + {
> + case 0: return "preld\t0,%a0";
> + case 1: return "preld\t8,%a0";
> + default: gcc_unreachable ();
> + }
> +})
> +
> (define_insn "nop"
> [(const_int 0)]
> ""
Pushed r13-4259.
在 2022/11/16 10:10, Lulu Cheng 写道:
> v2 -> v3:
> 1. Remove preldx support.
>
> ---------------------------------------
> Enable sw prefetching at -O3 and higher.
>
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
>
> gcc/ChangeLog:
>
> * config/loongarch/constraints.md (ZD): New constraint.
> * config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
> * config/loongarch/loongarch-tune.h (struct loongarch_cache):
> Define number of parallel prefetch.
> * config/loongarch/loongarch.cc (loongarch_option_override_internal):
> Set up parameters to be used in prefetching algorithm.
> * config/loongarch/loongarch.md (prefetch): New template.
> ---
> gcc/config/loongarch/constraints.md | 10 ++++++++++
> gcc/config/loongarch/loongarch-def.c | 2 ++
> gcc/config/loongarch/loongarch-tune.h | 1 +
> gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
> gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
> 5 files changed, 55 insertions(+)
>
> diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
> index 43cb7b5f0f5..46f7f63ae31 100644
> --- a/gcc/config/loongarch/constraints.md
> +++ b/gcc/config/loongarch/constraints.md
> @@ -86,6 +86,10 @@
> ;; "ZB"
> ;; "An address that is held in a general-purpose register.
> ;; The offset is zero"
> +;; "ZD"
> +;; "An address operand whose address is formed by a base register
> +;; and offset that is suitable for use in instructions with the same
> +;; addressing mode as @code{preld}."
> ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
> ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
>
> @@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
> The offset is zero"
> (and (match_code "mem")
> (match_test "REG_P (XEXP (op, 0))")))
> +
> +(define_address_constraint "ZD"
> + "An address operand whose address is formed by a base register
> + and offset that is suitable for use in instructions with the same
> + addressing mode as @code{preld}."
> + (match_test "loongarch_12bit_offset_address_p (op, mode)"))
> diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
> index cbf995d81b5..80ab10a52a8 100644
> --- a/gcc/config/loongarch/loongarch-def.c
> +++ b/gcc/config/loongarch/loongarch-def.c
> @@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> [CPU_LA464] = {
> .l1d_line_size = 64,
> .l1d_size = 64,
> .l2d_size = 256,
> + .simultaneous_prefetches = 4,
> },
> };
>
> diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
> index 6f3530f5c02..8e3eb29472b 100644
> --- a/gcc/config/loongarch/loongarch-tune.h
> +++ b/gcc/config/loongarch/loongarch-tune.h
> @@ -45,6 +45,7 @@ struct loongarch_cache {
> int l1d_line_size; /* bytes */
> int l1d_size; /* KiB */
> int l2d_size; /* kiB */
> + int simultaneous_prefetches; /* number of parallel prefetch */
> };
>
> #endif /* LOONGARCH_TUNE_H */
> diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
> index 8d5d8d965dd..8ee32c90573 100644
> --- a/gcc/config/loongarch/loongarch.cc
> +++ b/gcc/config/loongarch/loongarch.cc
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
> #include "context.h"
> #include "builtins.h"
> #include "rtl-iter.h"
> +#include "opts.h"
>
> /* This file should be included last. */
> #include "target-def.h"
> @@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
> if (loongarch_branch_cost == 0)
> loongarch_branch_cost = loongarch_cost->branch_cost;
>
> + /* Set up parameters to be used in prefetching algorithm. */
> + int simultaneous_prefetches
> + = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_simultaneous_prefetches,
> + simultaneous_prefetches);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_line_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l1_cache_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
> +
> + SET_OPTION_IF_UNSET (opts, &global_options_set,
> + param_l2_cache_size,
> + loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
> +
> +
> + /* Enable sw prefetching at -O3 and higher. */
> + if (opts->x_flag_prefetch_loop_arrays < 0
> + && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
> + && !opts->x_optimize_size)
> + opts->x_flag_prefetch_loop_arrays = 1;
> +
> if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
> error ("%qs cannot be used for compiling a shared library",
> "-mdirect-extern-access");
> diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
> index 682ab961741..2fda5381904 100644
> --- a/gcc/config/loongarch/loongarch.md
> +++ b/gcc/config/loongarch/loongarch.md
> @@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
> ;; ....................
> ;;
>
> +(define_insn "prefetch"
> + [(prefetch (match_operand 0 "address_operand" "ZD")
> + (match_operand 1 "const_int_operand" "n")
> + (match_operand 2 "const_int_operand" "n"))]
> + ""
> +{
> + switch (INTVAL (operands[1]))
> + {
> + case 0: return "preld\t0,%a0";
> + case 1: return "preld\t8,%a0";
> + default: gcc_unreachable ();
> + }
> +})
> +
> (define_insn "nop"
> [(const_int 0)]
> ""
@@ -86,6 +86,10 @@
;; "ZB"
;; "An address that is held in a general-purpose register.
;; The offset is zero"
+;; "ZD"
+;; "An address operand whose address is formed by a base register
+;; and offset that is suitable for use in instructions with the same
+;; addressing mode as @code{preld}."
;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
@@ -190,3 +194,9 @@ (define_memory_constraint "ZB"
The offset is zero"
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+ "An address operand whose address is formed by a base register
+ and offset that is suitable for use in instructions with the same
+ addressing mode as @code{preld}."
+ (match_test "loongarch_12bit_offset_address_p (op, mode)"))
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
[CPU_LA464] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
};
@@ -45,6 +45,7 @@ struct loongarch_cache {
int l1d_line_size; /* bytes */
int l1d_size; /* KiB */
int l2d_size; /* kiB */
+ int simultaneous_prefetches; /* number of parallel prefetch */
};
#endif /* LOONGARCH_TUNE_H */
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -6100,6 +6101,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
if (loongarch_branch_cost == 0)
loongarch_branch_cost = loongarch_cost->branch_cost;
+ /* Set up parameters to be used in prefetching algorithm. */
+ int simultaneous_prefetches
+ = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_simultaneous_prefetches,
+ simultaneous_prefetches);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l1_cache_line_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l1_cache_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l2_cache_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+ /* Enable sw prefetching at -O3 and higher. */
+ if (opts->x_flag_prefetch_loop_arrays < 0
+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+ && !opts->x_optimize_size)
+ opts->x_flag_prefetch_loop_arrays = 1;
+
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
error ("%qs cannot be used for compiling a shared library",
"-mdirect-extern-access");
@@ -3282,6 +3282,20 @@ (define_expand "untyped_call"
;; ....................
;;
+(define_insn "prefetch"
+ [(prefetch (match_operand 0 "address_operand" "ZD")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))]
+ ""
+{
+ switch (INTVAL (operands[1]))
+ {
+ case 0: return "preld\t0,%a0";
+ case 1: return "preld\t8,%a0";
+ default: gcc_unreachable ();
+ }
+})
+
(define_insn "nop"
[(const_int 0)]
""