[V2,5/7] aarch64: Implement system register r/w arm ACLE intrinsic functions
Checks
Commit Message
Implement the aarch64 intrinsics for reading and writing system
registers with the following signatures:
uint32_t __arm_rsr(const char *special_register);
uint64_t __arm_rsr64(const char *special_register);
void* __arm_rsrp(const char *special_register);
float __arm_rsrf(const char *special_register);
double __arm_rsrf64(const char *special_register);
void __arm_wsr(const char *special_register, uint32_t value);
void __arm_wsr64(const char *special_register, uint64_t value);
void __arm_wsrp(const char *special_register, const void *value);
void __arm_wsrf(const char *special_register, float value);
void __arm_wsrf64(const char *special_register, double value);
gcc/ChangeLog:
* gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
Add enums for new builtins.
(aarch64_init_rwsr_builtins): New.
(aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
(aarch64_expand_rwsr_builtin): New.
(aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
* gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
(write_sysregdi): Likewise.
* gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
(__arm_rsrp): Likewise.
(__arm_rsr64): Likewise.
(__arm_rsrf): Likewise.
(__arm_rsrf64): Likewise.
(__arm_wsr): Likewise.
(__arm_wsrp): Likewise.
(__arm_wsr64): Likewise.
(__arm_wsrf): Likewise.
(__arm_wsrf64): Likewise.
gcc/testsuite/ChangeLog:
* gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
* gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
---
gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
gcc/config/aarch64/aarch64.md | 17 ++
gcc/config/aarch64/arm_acle.h | 30 +++
.../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
5 files changed, 411 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
Comments
Victor Do Nascimento <victor.donascimento@arm.com> writes:
> Implement the aarch64 intrinsics for reading and writing system
> registers with the following signatures:
>
> uint32_t __arm_rsr(const char *special_register);
> uint64_t __arm_rsr64(const char *special_register);
> void* __arm_rsrp(const char *special_register);
> float __arm_rsrf(const char *special_register);
> double __arm_rsrf64(const char *special_register);
> void __arm_wsr(const char *special_register, uint32_t value);
> void __arm_wsr64(const char *special_register, uint64_t value);
> void __arm_wsrp(const char *special_register, const void *value);
> void __arm_wsrf(const char *special_register, float value);
> void __arm_wsrf64(const char *special_register, double value);
>
> gcc/ChangeLog:
>
> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
> Add enums for new builtins.
> (aarch64_init_rwsr_builtins): New.
> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
> (aarch64_expand_rwsr_builtin): New.
> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
> (write_sysregdi): Likewise.
> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
> (__arm_rsrp): Likewise.
> (__arm_rsr64): Likewise.
> (__arm_rsrf): Likewise.
> (__arm_rsrf64): Likewise.
> (__arm_wsr): Likewise.
> (__arm_wsrp): Likewise.
> (__arm_wsr64): Likewise.
> (__arm_wsrf): Likewise.
> (__arm_wsrf64): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
> ---
> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
> gcc/config/aarch64/aarch64.md | 17 ++
> gcc/config/aarch64/arm_acle.h | 30 +++
> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
> 5 files changed, 411 insertions(+)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>
> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> index 04f59fd9a54..d8bb2a989a5 100644
> --- a/gcc/config/aarch64/aarch64-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> @@ -808,6 +808,17 @@ enum aarch64_builtins
> AARCH64_RBIT,
> AARCH64_RBITL,
> AARCH64_RBITLL,
> + /* System register builtins. */
> + AARCH64_RSR,
> + AARCH64_RSRP,
> + AARCH64_RSR64,
> + AARCH64_RSRF,
> + AARCH64_RSRF64,
> + AARCH64_WSR,
> + AARCH64_WSRP,
> + AARCH64_WSR64,
> + AARCH64_WSRF,
> + AARCH64_WSRF64,
> AARCH64_BUILTIN_MAX
> };
>
> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
> AARCH64_BUILTIN_RNG_RNDRRS);
> }
>
> +/* Add builtins for reading system register. */
> +static void
> +aarch64_init_rwsr_builtins (void)
> +{
> + tree fntype = NULL;
> + tree const_char_ptr_type
> + = build_pointer_type (build_type_variant (char_type_node, true, false));
> +
> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
> + aarch64_builtin_decls[AARCH64_##F] \
> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
> +
> + fntype
> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
> +
> + fntype
> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
> +
> + fntype
> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
> +
> + fntype
> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
> +
> + fntype
> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
> +
> + fntype
> + = build_function_type_list (void_type_node, const_char_ptr_type,
> + uint32_type_node, NULL);
> +
> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
> +
> + fntype
> + = build_function_type_list (void_type_node, const_char_ptr_type,
> + const_ptr_type_node, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
> +
> + fntype
> + = build_function_type_list (void_type_node, const_char_ptr_type,
> + uint64_type_node, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
> +
> + fntype
> + = build_function_type_list (void_type_node, const_char_ptr_type,
> + float_type_node, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
> +
> + fntype
> + = build_function_type_list (void_type_node, const_char_ptr_type,
> + double_type_node, NULL);
> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
> +}
> +
> /* Initialize the memory tagging extension (MTE) builtins. */
> struct
> {
> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
> aarch64_init_rng_builtins ();
> aarch64_init_data_intrinsics ();
>
> + aarch64_init_rwsr_builtins ();
> +
> tree ftype_jcvt
> = build_function_type_list (intSI_type_node, double_type_node, NULL);
> aarch64_builtin_decls[AARCH64_JSCVT]
> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
> return target;
> }
>
> +/* Expand the read/write system register builtin EXPs. */
> +rtx
> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
> +{
> + tree arg0, arg1;
> + rtx const_str, input_val, subreg;
> + enum machine_mode mode;
> + class expand_operand ops[2];
> +
> + arg0 = CALL_EXPR_ARG (exp, 0);
> +
> + bool write_op = (fcode == AARCH64_WSR
> + || fcode == AARCH64_WSRP
> + || fcode == AARCH64_WSR64
> + || fcode == AARCH64_WSRF
> + || fcode == AARCH64_WSRF64);
> + bool read_op = (fcode == AARCH64_RSR
> + || fcode == AARCH64_RSRP
> + || fcode == AARCH64_RSR64
> + || fcode == AARCH64_RSRF
> + || fcode == AARCH64_RSRF64);
Instead of this, how about using:
if (call_expr_nargs (exp) == 1)
for the read path?
> +
> + /* Argument 0 (system register name) must be a string literal. */
> + gcc_assert (!SSA_VAR_P (arg0)
> + && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
> + && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);
It looks like this requires arg0 to be an ADDR_EXPR. It would be good
to check that rather than !SSA_VAR_P.
Minor formatting nit: should be a space after the comma.
> +
> + const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
> + size_t len = strlen (name_input);
This can use TREE_STRING_LENGTH. That's preferable since it copes
with embedded nuls, e.g. "foo\0bar".
> +
> + if (len == 0)
> + {
> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
> + return const0_rtx;
> + }
Is this check necessary? It looks like the following code would
correctly reject empty strings.
> +
> + char *sysreg_name = (char *) alloca (len + 1);
> + strcpy (sysreg_name, name_input);
We shouldn't use alloca for user input that hasn't been verified yet,
since it could be arbitrarily long. Libiberty provides an xmemdup that
might be useful. (xmemdup rather than xstrdup to cope with the embedded
nuls mentioned above. Or perhaps we should just reject embedded nuls
immediately, so that later code doesn't need to worry about them.)
> +
> + for (unsigned pos = 0; pos <= len; pos++)
> + sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
> +
> + const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);
Does this TOLOWERing make the checks for capital letters in
is_implem_def_reg redundant?
BTW, I forgot to say, but it'd be better to add an "aarch64_" prefix to
is_implem_def_reg, to avoid accidental clashes with target-independent code.
> + if (name_output == NULL)
> + {
> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
> + return const0_rtx;
> + }
> +
> + /* Assign the string corresponding to the system register name to an RTX. */
> + const_str = rtx_alloc (CONST_STRING);
> + PUT_CODE (const_str, CONST_STRING);
> + XSTR (const_str, 0) = xstrdup (name_output);
I think this needs to use ggc_strdup rather than xstrdup, since the
memory is managed by the garbage collector.
> +
> + /* Set up expander operands and call instruction expansion. */
> + if (read_op)
> + {
> +
Nit: redundant blank line.
> + /* Emit the initial read_sysregdi rtx. */
> + create_output_operand (&ops[0], target, DImode);
> + create_fixed_operand (&ops[1], const_str);
> + expand_insn (CODE_FOR_aarch64_read_sysregdi, 2, ops);
> + target = ops[0].value;
> +
> + /* Do any necessary post-processing on the result. */
> + switch (fcode)
> + {
> + case AARCH64_RSR:
> + return gen_lowpart_SUBREG (SImode, target);
> + case AARCH64_RSRP:
> + case AARCH64_RSR64:
> + return target;
RSRP would return a 32-bit value for -mabi=ilp32. It might be easier
to do:
case AARCH64_RSR:
case AARCH64_RSRP:
case AARCH64_RSR64:
case AARCH64_RSRF64:
return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, DImode);
lowpart_subreg is a no-op if the new mode is the same as the old mode.
> + case AARCH64_RSRF:
> + subreg = gen_reg_rtx (SImode);
This line looks redundant.
> + subreg = gen_lowpart_SUBREG (SImode, target);
> + return gen_lowpart_SUBREG (SFmode, subreg);
> + case AARCH64_RSRF64:
> + return gen_lowpart_SUBREG (DFmode, target);
> + }
If we do go for my:
call_expr_nargs (exp) == 1
suggestion above, this switch should probably have a:
default:
gcc_unreachable ();
to catch unexpected ops.
> + }
> + if (write_op)
> + {
> +
> + arg1 = CALL_EXPR_ARG (exp, 1);
> + mode = TYPE_MODE (TREE_TYPE (arg1));
> + input_val = copy_to_mode_reg (mode, expand_normal (arg1));
> +
> + switch (fcode)
> + {
> + case AARCH64_WSR:
> + subreg = gen_lowpart_SUBREG (DImode, input_val);
> + break;
> + case AARCH64_WSRP:
> + case AARCH64_WSR64:
> + subreg = input_val;
> + break;
> + case AARCH64_WSRF:
> + subreg = gen_lowpart_SUBREG (SImode, input_val);
> + subreg = gen_lowpart_SUBREG (DImode, subreg);
> + break;
> + case AARCH64_WSRF64:
> + subreg = gen_lowpart_SUBREG (DImode, input_val);
> + break;
> + }
Similarly here, I think all cases except AARCH64_WSRF could use:
input_val = lowpart_subreg (DImode, input_value, mode);
> +
> + create_fixed_operand (&ops[0], const_str);
> + create_input_operand (&ops[1], subreg, DImode);
> + expand_insn (CODE_FOR_aarch64_write_sysregdi, 2, ops);
> +
> + return target;
> + }
> +
> + error_at (EXPR_LOCATION (exp),
> + "Malformed call to read/write system register builtin");
> + return target;
> +}
> +
> /* Expand an expression EXP that calls a MEMTAG built-in FCODE
> with result going to TARGET. */
> static rtx
> @@ -2832,6 +3021,17 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
> case AARCH64_BUILTIN_RNG_RNDR:
> case AARCH64_BUILTIN_RNG_RNDRRS:
> return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
> + case AARCH64_RSR:
> + case AARCH64_RSRP:
> + case AARCH64_RSR64:
> + case AARCH64_RSRF:
> + case AARCH64_RSRF64:
> + case AARCH64_WSR:
> + case AARCH64_WSRP:
> + case AARCH64_WSR64:
> + case AARCH64_WSRF:
> + case AARCH64_WSRF64:
> + return aarch64_expand_rwsr_builtin (exp, target, fcode);
> }
>
> if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 32c7adc8928..80da30bc30c 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -281,6 +281,8 @@
> UNSPEC_UPDATE_FFRT
> UNSPEC_RDFFR
> UNSPEC_WRFFR
> + UNSPEC_SYSREG_RDI
> + UNSPEC_SYSREG_WDI
> ;; Represents an SVE-style lane index, in which the indexing applies
> ;; within the containing 128-bit block.
> UNSPEC_SVE_LANE_SELECT
> @@ -476,6 +478,21 @@
> ;; Jumps and other miscellaneous insns
> ;; -------------------------------------------------------------------
>
> +(define_insn "aarch64_read_sysregdi"
> + [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec_volatile:DI [(match_operand 1 "aarch64_sysreg_string" "")]
> + UNSPEC_SYSREG_RDI))]
> + ""
> + "mrs\t%x0, %1"
> + )
> +
> +(define_insn "aarch64_write_sysregdi"
> + [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "")
> + (match_operand:DI 1 "register_operand" "r")] UNSPEC_SYSREG_WDI)]
> + ""
> + "msr\t%0, %x1"
> + )
I think the write pattern should accept rZ (x0-x30 + xzr) rather than just r.
Minor formatting nit, but other patterns are indented by 2 spaces
rather than 1, with the closing ")" not indented.
Thanks,
Richard
> +
> (define_insn "indirect_jump"
> [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
> ""
> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
> index 7599a32301d..71ada878299 100644
> --- a/gcc/config/aarch64/arm_acle.h
> +++ b/gcc/config/aarch64/arm_acle.h
> @@ -314,6 +314,36 @@ __rndrrs (uint64_t *__res)
>
> #pragma GCC pop_options
>
> +#define __arm_rsr(__regname) \
> + __builtin_aarch64_rsr (__regname)
> +
> +#define __arm_rsrp(__regname) \
> + __builtin_aarch64_rsrp (__regname)
> +
> +#define __arm_rsr64(__regname) \
> + __builtin_aarch64_rsr64 (__regname)
> +
> +#define __arm_rsrf(__regname) \
> + __builtin_aarch64_rsrf (__regname)
> +
> +#define __arm_rsrf64(__regname) \
> + __builtin_aarch64_rsrf64 (__regname)
> +
> +#define __arm_wsr(__regname, __value) \
> + __builtin_aarch64_wsr (__regname, __value)
> +
> +#define __arm_wsrp(__regname, __value) \
> + __builtin_aarch64_wsrp (__regname, __value)
> +
> +#define __arm_wsr64(__regname, __value) \
> + __builtin_aarch64_wsr64 (__regname, __value)
> +
> +#define __arm_wsrf(__regname, __value) \
> + __builtin_aarch64_wsrf (__regname, __value)
> +
> +#define __arm_wsrf64(__regname, __value) \
> + __builtin_aarch64_wsrf64 (__regname, __value)
> +
> #ifdef __cplusplus
> }
> #endif
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
> new file mode 100644
> index 00000000000..bc9db098f16
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
> @@ -0,0 +1,20 @@
> +/* Test the __arm_[r,w]sr ACLE intrinsics family. */
> +/* Ensure that illegal behavior is rejected by the compiler. */
> +
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.4-a" } */
> +
> +#include <arm_acle.h>
> +
> +/* Ensure that read/write-only register attributes are respected by the compiler. */
> +void
> +test_rwsr_read_write_only ()
> +{
> + /* Attempt to write to read-only registers. */
> + long long a = __arm_rsr64 ("aidr_el1"); /* Read ok. */
> + __arm_wsr64 ("aidr_el1", a); /* { dg-error {invalid system register name provided} } */
> +
> + /* Attempt to read from write-only registers. */
> + __arm_wsr64("icc_asgi1r_el1", a); /* Write ok. */
> + long long b = __arm_rsr64("icc_asgi1r_el1"); /* { dg-error {invalid system register name provided} } */
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
> new file mode 100644
> index 00000000000..3af4b960306
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
> @@ -0,0 +1,144 @@
> +/* Test the __arm_[r,w]sr ACLE intrinsics family. */
> +/* Check that function variants for different data types handle types correctly. */
> +/* { dg-do compile } */
> +/* { dg-options "-O1 -march=armv8.4-a" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#include <arm_acle.h>
> +
> +/*
> +** get_rsr:
> +** ...
> +** mrs x([0-9]+), s2_1_c0_c7_4
> +** add w\1, w\1, 1
> +** ...
> +*/
> +int
> +get_rsr ()
> +{
> + int a = __arm_rsr("trcseqstr");
> + return a+1;
> +}
> +
> +/*
> +** get_rsrf:
> +** mrs x([0-9]+), s2_1_c0_c7_4
> +** fmov s[0-9]+, w\1
> +** ...
> +*/
> +float
> +get_rsrf ()
> +{
> + return __arm_rsrf("trcseqstr");
> +}
> +
> +/*
> +** get_rsrp:
> +** mrs x0, s2_1_c0_c7_4
> +** ret
> +*/
> +void *
> +get_rsrp ()
> +{
> + return __arm_rsrp("trcseqstr");
> +}
> +
> +/*
> +** get_rsr64:
> +** mrs x0, s2_1_c0_c7_4
> +** ret
> +*/
> +long long
> +get_rsr64 ()
> +{
> + return __arm_rsr64("trcseqstr");
> +}
> +
> +/*
> +** get_rsrf64:
> +** mrs x([0-9]+), s2_1_c0_c7_4
> +** fmov d[0-9]+, x\1
> +** ...
> +*/
> +double
> +get_rsrf64 ()
> +{
> + return __arm_rsrf64("trcseqstr");
> +}
> +
> +/*
> +** set_wsr32:
> +** ...
> +** add w([0-9]+), w\1, 1
> +** msr s2_1_c0_c7_4, x\1
> +** ...
> +*/
> +void
> +set_wsr32 (int a)
> +{
> + __arm_wsr("trcseqstr", a+1);
> +}
> +
> +/*
> +** set_wsrp:
> +** ...
> +** msr s2_1_c0_c7_4, x[0-9]+
> +** ...
> +*/
> +void
> +set_wsrp (void *a)
> +{
> + __arm_wsrp("trcseqstr", a);
> +}
> +
> +/*
> +** set_wsr64:
> +** ...
> +** msr s2_1_c0_c7_4, x[0-9]+
> +** ...
> +*/
> +void
> +set_wsr64 (long long a)
> +{
> + __arm_wsr64("trcseqstr", a);
> +}
> +
> +/*
> +** set_wsrf32:
> +** ...
> +** fmov w([0-9]+), s[0-9]+
> +** msr s2_1_c0_c7_4, x\1
> +** ...
> +*/
> +void
> +set_wsrf32 (float a)
> +{
> + __arm_wsrf("trcseqstr", a);
> +}
> +
> +/*
> +** set_wsrf64:
> +** ...
> +** fmov x([0-9]+), d[0-9]+
> +** msr s2_1_c0_c7_4, x\1
> +** ...
> +*/
> +void
> +set_wsrf64(double a)
> +{
> + __arm_wsrf64("trcseqstr", a);
> +}
> +
> +/*
> +** set_custom:
> +** ...
> +** mrs x0, s1_2_c3_c4_5
> +** ...
> +** msr s1_2_c3_c4_5, x0
> +** ...
> +*/
> +void set_custom()
> +{
> + __uint64_t b = __arm_rsr64("S1_2_C3_C4_5");
> + __arm_wsr64("S1_2_C3_C4_5", b);
> +}
On 10/18/23 21:39, Richard Sandiford wrote:
> Victor Do Nascimento <victor.donascimento@arm.com> writes:
>> Implement the aarch64 intrinsics for reading and writing system
>> registers with the following signatures:
>>
>> uint32_t __arm_rsr(const char *special_register);
>> uint64_t __arm_rsr64(const char *special_register);
>> void* __arm_rsrp(const char *special_register);
>> float __arm_rsrf(const char *special_register);
>> double __arm_rsrf64(const char *special_register);
>> void __arm_wsr(const char *special_register, uint32_t value);
>> void __arm_wsr64(const char *special_register, uint64_t value);
>> void __arm_wsrp(const char *special_register, const void *value);
>> void __arm_wsrf(const char *special_register, float value);
>> void __arm_wsrf64(const char *special_register, double value);
>>
>> gcc/ChangeLog:
>>
>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>> Add enums for new builtins.
>> (aarch64_init_rwsr_builtins): New.
>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
>> (aarch64_expand_rwsr_builtin): New.
>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
>> (write_sysregdi): Likewise.
>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
>> (__arm_rsrp): Likewise.
>> (__arm_rsr64): Likewise.
>> (__arm_rsrf): Likewise.
>> (__arm_rsrf64): Likewise.
>> (__arm_wsr): Likewise.
>> (__arm_wsrp): Likewise.
>> (__arm_wsr64): Likewise.
>> (__arm_wsrf): Likewise.
>> (__arm_wsrf64): Likewise.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
>> ---
>> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
>> gcc/config/aarch64/aarch64.md | 17 ++
>> gcc/config/aarch64/arm_acle.h | 30 +++
>> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
>> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
>> 5 files changed, 411 insertions(+)
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>>
>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
>> index 04f59fd9a54..d8bb2a989a5 100644
>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>> @@ -808,6 +808,17 @@ enum aarch64_builtins
>> AARCH64_RBIT,
>> AARCH64_RBITL,
>> AARCH64_RBITLL,
>> + /* System register builtins. */
>> + AARCH64_RSR,
>> + AARCH64_RSRP,
>> + AARCH64_RSR64,
>> + AARCH64_RSRF,
>> + AARCH64_RSRF64,
>> + AARCH64_WSR,
>> + AARCH64_WSRP,
>> + AARCH64_WSR64,
>> + AARCH64_WSRF,
>> + AARCH64_WSRF64,
>> AARCH64_BUILTIN_MAX
>> };
>>
>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
>> AARCH64_BUILTIN_RNG_RNDRRS);
>> }
>>
>> +/* Add builtins for reading system register. */
>> +static void
>> +aarch64_init_rwsr_builtins (void)
>> +{
>> + tree fntype = NULL;
>> + tree const_char_ptr_type
>> + = build_pointer_type (build_type_variant (char_type_node, true, false));
>> +
>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
>> + aarch64_builtin_decls[AARCH64_##F] \
>> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
>> +
>> + fntype
>> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
>> +
>> + fntype
>> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
>> +
>> + fntype
>> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
>> +
>> + fntype
>> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
>> +
>> + fntype
>> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
>> +
>> + fntype
>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>> + uint32_type_node, NULL);
>> +
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
>> +
>> + fntype
>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>> + const_ptr_type_node, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
>> +
>> + fntype
>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>> + uint64_type_node, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
>> +
>> + fntype
>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>> + float_type_node, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
>> +
>> + fntype
>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>> + double_type_node, NULL);
>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
>> +}
>> +
>> /* Initialize the memory tagging extension (MTE) builtins. */
>> struct
>> {
>> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
>> aarch64_init_rng_builtins ();
>> aarch64_init_data_intrinsics ();
>>
>> + aarch64_init_rwsr_builtins ();
>> +
>> tree ftype_jcvt
>> = build_function_type_list (intSI_type_node, double_type_node, NULL);
>> aarch64_builtin_decls[AARCH64_JSCVT]
>> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
>> return target;
>> }
>>
>> +/* Expand the read/write system register builtin EXPs. */
>> +rtx
>> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
>> +{
>> + tree arg0, arg1;
>> + rtx const_str, input_val, subreg;
>> + enum machine_mode mode;
>> + class expand_operand ops[2];
>> +
>> + arg0 = CALL_EXPR_ARG (exp, 0);
>> +
>> + bool write_op = (fcode == AARCH64_WSR
>> + || fcode == AARCH64_WSRP
>> + || fcode == AARCH64_WSR64
>> + || fcode == AARCH64_WSRF
>> + || fcode == AARCH64_WSRF64);
>> + bool read_op = (fcode == AARCH64_RSR
>> + || fcode == AARCH64_RSRP
>> + || fcode == AARCH64_RSR64
>> + || fcode == AARCH64_RSRF
>> + || fcode == AARCH64_RSRF64);
>
> Instead of this, how about using:
>
> if (call_expr_nargs (exp) == 1)
>
> for the read path?
>
Personally, I don't like that. It's a `read_op' because of the fcode it
has, the fact they share the same number of arguments is accidental.
With this implementation, we outline very early on exactly what
constitutes both a valid write and a valid read operation.
I'm happy to change things around if you feel strongly enough about it
or see some material advantage to doing things your way that I've missed :).
>> +
>> + /* Argument 0 (system register name) must be a string literal. */
>> + gcc_assert (!SSA_VAR_P (arg0)
>> + && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
>> + && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);
>
> It looks like this requires arg0 to be an ADDR_EXPR. It would be good
> to check that rather than !SSA_VAR_P.
>
> Minor formatting nit: should be a space after the comma.
>
>> +
>> + const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
>> + size_t len = strlen (name_input);
>
> This can use TREE_STRING_LENGTH. That's preferable since it copes
> with embedded nuls, e.g. "foo\0bar".
>
>> +
>> + if (len == 0)
>> + {
>> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
>> + return const0_rtx;
>> + }
>
> Is this check necessary? It looks like the following code would
> correctly reject empty strings.
>
Not necessary. As the code evolved, some checks I added early on in
development went on to become redundant, this being on example. Thanks
for picking up on it.
>> +
>> + char *sysreg_name = (char *) alloca (len + 1);
>> + strcpy (sysreg_name, name_input);
>
> We shouldn't use alloca for user input that hasn't been verified yet,
> since it could be arbitrarily long. Libiberty provides an xmemdup that
> might be useful. (xmemdup rather than xstrdup to cope with the embedded
> nuls mentioned above. Or perhaps we should just reject embedded nuls
> immediately, so that later code doesn't need to worry about them.)
>
>> +
>> + for (unsigned pos = 0; pos <= len; pos++)
>> + sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
>> +
>> + const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);
>
> Does this TOLOWERing make the checks for capital letters in
> is_implem_def_reg redundant?
>
Potentially.
The quesion is, do we want this explicit dependence of
`is_implem_def_reg' on this TOLOWERing?
My intention was to keep things fairly independent so they could be
reused elsewhere as easily as possible, should this need arise. This
meant I didn't want `is_implem_def_reg' to rely on the passed input
having been pre-processed in any way, keeping it as general-purpose as
possible.
Thanks,
V.
> BTW, I forgot to say, but it'd be better to add an "aarch64_" prefix to
> is_implem_def_reg, to avoid accidental clashes with target-independent code.
>
>> + if (name_output == NULL)
>> + {
>> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
>> + return const0_rtx;
>> + }
>> +
>> + /* Assign the string corresponding to the system register name to an RTX. */
>> + const_str = rtx_alloc (CONST_STRING);
>> + PUT_CODE (const_str, CONST_STRING);
>> + XSTR (const_str, 0) = xstrdup (name_output);
>
> I think this needs to use ggc_strdup rather than xstrdup, since the
> memory is managed by the garbage collector.
>
>> +
>> + /* Set up expander operands and call instruction expansion. */
>> + if (read_op)
>> + {
>> +
>
> Nit: redundant blank line.
>
>
>> + /* Emit the initial read_sysregdi rtx. */
>> + create_output_operand (&ops[0], target, DImode);
>> + create_fixed_operand (&ops[1], const_str);
>> + expand_insn (CODE_FOR_aarch64_read_sysregdi, 2, ops);
>> + target = ops[0].value;
>> +
>> + /* Do any necessary post-processing on the result. */
>> + switch (fcode)
>> + {
>> + case AARCH64_RSR:
>> + return gen_lowpart_SUBREG (SImode, target);
>> + case AARCH64_RSRP:
>> + case AARCH64_RSR64:
>> + return target;
>
> RSRP would return a 32-bit value for -mabi=ilp32. It might be easier
> to do:
>
> case AARCH64_RSR:
> case AARCH64_RSRP:
> case AARCH64_RSR64:
> case AARCH64_RSRF64:
> return lowpart_subreg (TYPE_MODE (TREE_TYPE (exp)), target, DImode);
>
> lowpart_subreg is a no-op if the new mode is the same as the old mode.
>
>> + case AARCH64_RSRF:
>> + subreg = gen_reg_rtx (SImode);
>
> This line looks redundant.
>
>> + subreg = gen_lowpart_SUBREG (SImode, target);
>> + return gen_lowpart_SUBREG (SFmode, subreg);
>> + case AARCH64_RSRF64:
>> + return gen_lowpart_SUBREG (DFmode, target);
>> + }
>
> If we do go for my:
>
> call_expr_nargs (exp) == 1
>
> suggestion above, this switch should probably have a:
>
> default:
> gcc_unreachable ();
>
> to catch unexpected ops.
>
>> + }
>> + if (write_op)
>> + {
>> +
>> + arg1 = CALL_EXPR_ARG (exp, 1);
>> + mode = TYPE_MODE (TREE_TYPE (arg1));
>> + input_val = copy_to_mode_reg (mode, expand_normal (arg1));
>> +
>> + switch (fcode)
>> + {
>> + case AARCH64_WSR:
>> + subreg = gen_lowpart_SUBREG (DImode, input_val);
>> + break;
>> + case AARCH64_WSRP:
>> + case AARCH64_WSR64:
>> + subreg = input_val;
>> + break;
>> + case AARCH64_WSRF:
>> + subreg = gen_lowpart_SUBREG (SImode, input_val);
>> + subreg = gen_lowpart_SUBREG (DImode, subreg);
>> + break;
>> + case AARCH64_WSRF64:
>> + subreg = gen_lowpart_SUBREG (DImode, input_val);
>> + break;
>> + }
>
> Similarly here, I think all cases except AARCH64_WSRF could use:
>
> input_val = lowpart_subreg (DImode, input_value, mode);
>
>> +
>> + create_fixed_operand (&ops[0], const_str);
>> + create_input_operand (&ops[1], subreg, DImode);
>> + expand_insn (CODE_FOR_aarch64_write_sysregdi, 2, ops);
>> +
>> + return target;
>> + }
>> +
>> + error_at (EXPR_LOCATION (exp),
>> + "Malformed call to read/write system register builtin");
>> + return target;
>> +}
>> +
>> /* Expand an expression EXP that calls a MEMTAG built-in FCODE
>> with result going to TARGET. */
>> static rtx
>> @@ -2832,6 +3021,17 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
>> case AARCH64_BUILTIN_RNG_RNDR:
>> case AARCH64_BUILTIN_RNG_RNDRRS:
>> return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
>> + case AARCH64_RSR:
>> + case AARCH64_RSRP:
>> + case AARCH64_RSR64:
>> + case AARCH64_RSRF:
>> + case AARCH64_RSRF64:
>> + case AARCH64_WSR:
>> + case AARCH64_WSRP:
>> + case AARCH64_WSR64:
>> + case AARCH64_WSRF:
>> + case AARCH64_WSRF64:
>> + return aarch64_expand_rwsr_builtin (exp, target, fcode);
>> }
>>
>> if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
>> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
>> index 32c7adc8928..80da30bc30c 100644
>> --- a/gcc/config/aarch64/aarch64.md
>> +++ b/gcc/config/aarch64/aarch64.md
>> @@ -281,6 +281,8 @@
>> UNSPEC_UPDATE_FFRT
>> UNSPEC_RDFFR
>> UNSPEC_WRFFR
>> + UNSPEC_SYSREG_RDI
>> + UNSPEC_SYSREG_WDI
>> ;; Represents an SVE-style lane index, in which the indexing applies
>> ;; within the containing 128-bit block.
>> UNSPEC_SVE_LANE_SELECT
>> @@ -476,6 +478,21 @@
>> ;; Jumps and other miscellaneous insns
>> ;; -------------------------------------------------------------------
>>
>> +(define_insn "aarch64_read_sysregdi"
>> + [(set (match_operand:DI 0 "register_operand" "=r")
>> + (unspec_volatile:DI [(match_operand 1 "aarch64_sysreg_string" "")]
>> + UNSPEC_SYSREG_RDI))]
>> + ""
>> + "mrs\t%x0, %1"
>> + )
>> +
>> +(define_insn "aarch64_write_sysregdi"
>> + [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "")
>> + (match_operand:DI 1 "register_operand" "r")] UNSPEC_SYSREG_WDI)]
>> + ""
>> + "msr\t%0, %x1"
>> + )
>
> I think the write pattern should accept rZ (x0-x30 + xzr) rather than just r.
>
> Minor formatting nit, but other patterns are indented by 2 spaces
> rather than 1, with the closing ")" not indented.
>
> Thanks,
> Richard
>
>> +
>> (define_insn "indirect_jump"
>> [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
>> ""
>> diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
>> index 7599a32301d..71ada878299 100644
>> --- a/gcc/config/aarch64/arm_acle.h
>> +++ b/gcc/config/aarch64/arm_acle.h
>> @@ -314,6 +314,36 @@ __rndrrs (uint64_t *__res)
>>
>> #pragma GCC pop_options
>>
>> +#define __arm_rsr(__regname) \
>> + __builtin_aarch64_rsr (__regname)
>> +
>> +#define __arm_rsrp(__regname) \
>> + __builtin_aarch64_rsrp (__regname)
>> +
>> +#define __arm_rsr64(__regname) \
>> + __builtin_aarch64_rsr64 (__regname)
>> +
>> +#define __arm_rsrf(__regname) \
>> + __builtin_aarch64_rsrf (__regname)
>> +
>> +#define __arm_rsrf64(__regname) \
>> + __builtin_aarch64_rsrf64 (__regname)
>> +
>> +#define __arm_wsr(__regname, __value) \
>> + __builtin_aarch64_wsr (__regname, __value)
>> +
>> +#define __arm_wsrp(__regname, __value) \
>> + __builtin_aarch64_wsrp (__regname, __value)
>> +
>> +#define __arm_wsr64(__regname, __value) \
>> + __builtin_aarch64_wsr64 (__regname, __value)
>> +
>> +#define __arm_wsrf(__regname, __value) \
>> + __builtin_aarch64_wsrf (__regname, __value)
>> +
>> +#define __arm_wsrf64(__regname, __value) \
>> + __builtin_aarch64_wsrf64 (__regname, __value)
>> +
>> #ifdef __cplusplus
>> }
>> #endif
>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>> new file mode 100644
>> index 00000000000..bc9db098f16
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>> @@ -0,0 +1,20 @@
>> +/* Test the __arm_[r,w]sr ACLE intrinsics family. */
>> +/* Ensure that illegal behavior is rejected by the compiler. */
>> +
>> +/* { dg-do compile } */
>> +/* { dg-options "-O3 -march=armv8.4-a" } */
>> +
>> +#include <arm_acle.h>
>> +
>> +/* Ensure that read/write-only register attributes are respected by the compiler. */
>> +void
>> +test_rwsr_read_write_only ()
>> +{
>> + /* Attempt to write to read-only registers. */
>> + long long a = __arm_rsr64 ("aidr_el1"); /* Read ok. */
>> + __arm_wsr64 ("aidr_el1", a); /* { dg-error {invalid system register name provided} } */
>> +
>> + /* Attempt to read from write-only registers. */
>> + __arm_wsr64("icc_asgi1r_el1", a); /* Write ok. */
>> + long long b = __arm_rsr64("icc_asgi1r_el1"); /* { dg-error {invalid system register name provided} } */
>> +}
>> diff --git a/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>> new file mode 100644
>> index 00000000000..3af4b960306
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>> @@ -0,0 +1,144 @@
>> +/* Test the __arm_[r,w]sr ACLE intrinsics family. */
>> +/* Check that function variants for different data types handle types correctly. */
>> +/* { dg-do compile } */
>> +/* { dg-options "-O1 -march=armv8.4-a" } */
>> +/* { dg-final { check-function-bodies "**" "" } } */
>> +
>> +#include <arm_acle.h>
>> +
>> +/*
>> +** get_rsr:
>> +** ...
>> +** mrs x([0-9]+), s2_1_c0_c7_4
>> +** add w\1, w\1, 1
>> +** ...
>> +*/
>> +int
>> +get_rsr ()
>> +{
>> + int a = __arm_rsr("trcseqstr");
>> + return a+1;
>> +}
>> +
>> +/*
>> +** get_rsrf:
>> +** mrs x([0-9]+), s2_1_c0_c7_4
>> +** fmov s[0-9]+, w\1
>> +** ...
>> +*/
>> +float
>> +get_rsrf ()
>> +{
>> + return __arm_rsrf("trcseqstr");
>> +}
>> +
>> +/*
>> +** get_rsrp:
>> +** mrs x0, s2_1_c0_c7_4
>> +** ret
>> +*/
>> +void *
>> +get_rsrp ()
>> +{
>> + return __arm_rsrp("trcseqstr");
>> +}
>> +
>> +/*
>> +** get_rsr64:
>> +** mrs x0, s2_1_c0_c7_4
>> +** ret
>> +*/
>> +long long
>> +get_rsr64 ()
>> +{
>> + return __arm_rsr64("trcseqstr");
>> +}
>> +
>> +/*
>> +** get_rsrf64:
>> +** mrs x([0-9]+), s2_1_c0_c7_4
>> +** fmov d[0-9]+, x\1
>> +** ...
>> +*/
>> +double
>> +get_rsrf64 ()
>> +{
>> + return __arm_rsrf64("trcseqstr");
>> +}
>> +
>> +/*
>> +** set_wsr32:
>> +** ...
>> +** add w([0-9]+), w\1, 1
>> +** msr s2_1_c0_c7_4, x\1
>> +** ...
>> +*/
>> +void
>> +set_wsr32 (int a)
>> +{
>> + __arm_wsr("trcseqstr", a+1);
>> +}
>> +
>> +/*
>> +** set_wsrp:
>> +** ...
>> +** msr s2_1_c0_c7_4, x[0-9]+
>> +** ...
>> +*/
>> +void
>> +set_wsrp (void *a)
>> +{
>> + __arm_wsrp("trcseqstr", a);
>> +}
>> +
>> +/*
>> +** set_wsr64:
>> +** ...
>> +** msr s2_1_c0_c7_4, x[0-9]+
>> +** ...
>> +*/
>> +void
>> +set_wsr64 (long long a)
>> +{
>> + __arm_wsr64("trcseqstr", a);
>> +}
>> +
>> +/*
>> +** set_wsrf32:
>> +** ...
>> +** fmov w([0-9]+), s[0-9]+
>> +** msr s2_1_c0_c7_4, x\1
>> +** ...
>> +*/
>> +void
>> +set_wsrf32 (float a)
>> +{
>> + __arm_wsrf("trcseqstr", a);
>> +}
>> +
>> +/*
>> +** set_wsrf64:
>> +** ...
>> +** fmov x([0-9]+), d[0-9]+
>> +** msr s2_1_c0_c7_4, x\1
>> +** ...
>> +*/
>> +void
>> +set_wsrf64(double a)
>> +{
>> + __arm_wsrf64("trcseqstr", a);
>> +}
>> +
>> +/*
>> +** set_custom:
>> +** ...
>> +** mrs x0, s1_2_c3_c4_5
>> +** ...
>> +** msr s1_2_c3_c4_5, x0
>> +** ...
>> +*/
>> +void set_custom()
>> +{
>> + __uint64_t b = __arm_rsr64("S1_2_C3_C4_5");
>> + __arm_wsr64("S1_2_C3_C4_5", b);
>> +}
Victor Do Nascimento <Victor.DoNascimento@arm.com> writes:
> On 10/18/23 21:39, Richard Sandiford wrote:
>> Victor Do Nascimento <victor.donascimento@arm.com> writes:
>>> Implement the aarch64 intrinsics for reading and writing system
>>> registers with the following signatures:
>>>
>>> uint32_t __arm_rsr(const char *special_register);
>>> uint64_t __arm_rsr64(const char *special_register);
>>> void* __arm_rsrp(const char *special_register);
>>> float __arm_rsrf(const char *special_register);
>>> double __arm_rsrf64(const char *special_register);
>>> void __arm_wsr(const char *special_register, uint32_t value);
>>> void __arm_wsr64(const char *special_register, uint64_t value);
>>> void __arm_wsrp(const char *special_register, const void *value);
>>> void __arm_wsrf(const char *special_register, float value);
>>> void __arm_wsrf64(const char *special_register, double value);
>>>
>>> gcc/ChangeLog:
>>>
>>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>>> Add enums for new builtins.
>>> (aarch64_init_rwsr_builtins): New.
>>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
>>> (aarch64_expand_rwsr_builtin): New.
>>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
>>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
>>> (write_sysregdi): Likewise.
>>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
>>> (__arm_rsrp): Likewise.
>>> (__arm_rsr64): Likewise.
>>> (__arm_rsrf): Likewise.
>>> (__arm_rsrf64): Likewise.
>>> (__arm_wsr): Likewise.
>>> (__arm_wsrp): Likewise.
>>> (__arm_wsr64): Likewise.
>>> (__arm_wsrf): Likewise.
>>> (__arm_wsrf64): Likewise.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
>>> ---
>>> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
>>> gcc/config/aarch64/aarch64.md | 17 ++
>>> gcc/config/aarch64/arm_acle.h | 30 +++
>>> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
>>> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
>>> 5 files changed, 411 insertions(+)
>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>>>
>>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
>>> index 04f59fd9a54..d8bb2a989a5 100644
>>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>>> @@ -808,6 +808,17 @@ enum aarch64_builtins
>>> AARCH64_RBIT,
>>> AARCH64_RBITL,
>>> AARCH64_RBITLL,
>>> + /* System register builtins. */
>>> + AARCH64_RSR,
>>> + AARCH64_RSRP,
>>> + AARCH64_RSR64,
>>> + AARCH64_RSRF,
>>> + AARCH64_RSRF64,
>>> + AARCH64_WSR,
>>> + AARCH64_WSRP,
>>> + AARCH64_WSR64,
>>> + AARCH64_WSRF,
>>> + AARCH64_WSRF64,
>>> AARCH64_BUILTIN_MAX
>>> };
>>>
>>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
>>> AARCH64_BUILTIN_RNG_RNDRRS);
>>> }
>>>
>>> +/* Add builtins for reading system register. */
>>> +static void
>>> +aarch64_init_rwsr_builtins (void)
>>> +{
>>> + tree fntype = NULL;
>>> + tree const_char_ptr_type
>>> + = build_pointer_type (build_type_variant (char_type_node, true, false));
>>> +
>>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
>>> + aarch64_builtin_decls[AARCH64_##F] \
>>> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
>>> +
>>> + fntype
>>> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>> + uint32_type_node, NULL);
>>> +
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>> + const_ptr_type_node, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>> + uint64_type_node, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>> + float_type_node, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
>>> +
>>> + fntype
>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>> + double_type_node, NULL);
>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
>>> +}
>>> +
>>> /* Initialize the memory tagging extension (MTE) builtins. */
>>> struct
>>> {
>>> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
>>> aarch64_init_rng_builtins ();
>>> aarch64_init_data_intrinsics ();
>>>
>>> + aarch64_init_rwsr_builtins ();
>>> +
>>> tree ftype_jcvt
>>> = build_function_type_list (intSI_type_node, double_type_node, NULL);
>>> aarch64_builtin_decls[AARCH64_JSCVT]
>>> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
>>> return target;
>>> }
>>>
>>> +/* Expand the read/write system register builtin EXPs. */
>>> +rtx
>>> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
>>> +{
>>> + tree arg0, arg1;
>>> + rtx const_str, input_val, subreg;
>>> + enum machine_mode mode;
>>> + class expand_operand ops[2];
>>> +
>>> + arg0 = CALL_EXPR_ARG (exp, 0);
>>> +
>>> + bool write_op = (fcode == AARCH64_WSR
>>> + || fcode == AARCH64_WSRP
>>> + || fcode == AARCH64_WSR64
>>> + || fcode == AARCH64_WSRF
>>> + || fcode == AARCH64_WSRF64);
>>> + bool read_op = (fcode == AARCH64_RSR
>>> + || fcode == AARCH64_RSRP
>>> + || fcode == AARCH64_RSR64
>>> + || fcode == AARCH64_RSRF
>>> + || fcode == AARCH64_RSRF64);
>>
>> Instead of this, how about using:
>>
>> if (call_expr_nargs (exp) == 1)
>>
>> for the read path?
>>
>
> Personally, I don't like that. It's a `read_op' because of the fcode it
> has, the fact they share the same number of arguments is accidental.
> With this implementation, we outline very early on exactly what
> constitutes both a valid write and a valid read operation.
I don't agree that it's accidental. :) But fair enough, I won't push it.
> I'm happy to change things around if you feel strongly enough about it
> or see some material advantage to doing things your way that I've missed :).
>
>>> +
>>> + /* Argument 0 (system register name) must be a string literal. */
>>> + gcc_assert (!SSA_VAR_P (arg0)
>>> + && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
>>> + && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);
>>
>> It looks like this requires arg0 to be an ADDR_EXPR. It would be good
>> to check that rather than !SSA_VAR_P.
>>
>> Minor formatting nit: should be a space after the comma.
>>
>>> +
>>> + const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
>>> + size_t len = strlen (name_input);
>>
>> This can use TREE_STRING_LENGTH. That's preferable since it copes
>> with embedded nuls, e.g. "foo\0bar".
>>
>>> +
>>> + if (len == 0)
>>> + {
>>> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
>>> + return const0_rtx;
>>> + }
>>
>> Is this check necessary? It looks like the following code would
>> correctly reject empty strings.
>>
>
> Not necessary. As the code evolved, some checks I added early on in
> development went on to become redundant, this being on example. Thanks
> for picking up on it.
>
>>> +
>>> + char *sysreg_name = (char *) alloca (len + 1);
>>> + strcpy (sysreg_name, name_input);
>>
>> We shouldn't use alloca for user input that hasn't been verified yet,
>> since it could be arbitrarily long. Libiberty provides an xmemdup that
>> might be useful. (xmemdup rather than xstrdup to cope with the embedded
>> nuls mentioned above. Or perhaps we should just reject embedded nuls
>> immediately, so that later code doesn't need to worry about them.)
>>
>>> +
>>> + for (unsigned pos = 0; pos <= len; pos++)
>>> + sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
>>> +
>>> + const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);
>>
>> Does this TOLOWERing make the checks for capital letters in
>> is_implem_def_reg redundant?
>>
>
> Potentially.
>
> The quesion is, do we want this explicit dependence of
> `is_implem_def_reg' on this TOLOWERing?
>
> My intention was to keep things fairly independent so they could be
> reused elsewhere as easily as possible, should this need arise. This
> meant I didn't want `is_implem_def_reg' to rely on the passed input
> having been pre-processed in any way, keeping it as general-purpose as
> possible.
I think it's good if we have a canonical form internally. And it looks
like the patches do have a canonical form, thanks to these TOLOWER calls.
IMO it's better to embrace that rather than support the possibility of
having multiple representations of the same thing in future.
Also, aarch64_lookup_sysreg_map requires the argument to be lower case
if it specifies a predefined register name, since we (rightly) don't
add every case variation of a register name to the hash table. So it
seems more consistent to have the same precondition for both types of
register name, rather than requiring one to be lower case and allowing
the other to be mixed case.
Thanks,
Richard
On 10/26/23 16:23, Richard Sandiford wrote:
> Victor Do Nascimento <Victor.DoNascimento@arm.com> writes:
>> On 10/18/23 21:39, Richard Sandiford wrote:
>>> Victor Do Nascimento <victor.donascimento@arm.com> writes:
>>>> Implement the aarch64 intrinsics for reading and writing system
>>>> registers with the following signatures:
>>>>
>>>> uint32_t __arm_rsr(const char *special_register);
>>>> uint64_t __arm_rsr64(const char *special_register);
>>>> void* __arm_rsrp(const char *special_register);
>>>> float __arm_rsrf(const char *special_register);
>>>> double __arm_rsrf64(const char *special_register);
>>>> void __arm_wsr(const char *special_register, uint32_t value);
>>>> void __arm_wsr64(const char *special_register, uint64_t value);
>>>> void __arm_wsrp(const char *special_register, const void *value);
>>>> void __arm_wsrf(const char *special_register, float value);
>>>> void __arm_wsrf64(const char *special_register, double value);
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>>>> Add enums for new builtins.
>>>> (aarch64_init_rwsr_builtins): New.
>>>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
>>>> (aarch64_expand_rwsr_builtin): New.
>>>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
>>>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
>>>> (write_sysregdi): Likewise.
>>>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
>>>> (__arm_rsrp): Likewise.
>>>> (__arm_rsr64): Likewise.
>>>> (__arm_rsrf): Likewise.
>>>> (__arm_rsrf64): Likewise.
>>>> (__arm_wsr): Likewise.
>>>> (__arm_wsrp): Likewise.
>>>> (__arm_wsr64): Likewise.
>>>> (__arm_wsrf): Likewise.
>>>> (__arm_wsrf64): Likewise.
>>>>
>>>> gcc/testsuite/ChangeLog:
>>>>
>>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
>>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
>>>> ---
>>>> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
>>>> gcc/config/aarch64/aarch64.md | 17 ++
>>>> gcc/config/aarch64/arm_acle.h | 30 +++
>>>> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
>>>> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
>>>> 5 files changed, 411 insertions(+)
>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>>>>
>>>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
>>>> index 04f59fd9a54..d8bb2a989a5 100644
>>>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>>>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>>>> @@ -808,6 +808,17 @@ enum aarch64_builtins
>>>> AARCH64_RBIT,
>>>> AARCH64_RBITL,
>>>> AARCH64_RBITLL,
>>>> + /* System register builtins. */
>>>> + AARCH64_RSR,
>>>> + AARCH64_RSRP,
>>>> + AARCH64_RSR64,
>>>> + AARCH64_RSRF,
>>>> + AARCH64_RSRF64,
>>>> + AARCH64_WSR,
>>>> + AARCH64_WSRP,
>>>> + AARCH64_WSR64,
>>>> + AARCH64_WSRF,
>>>> + AARCH64_WSRF64,
>>>> AARCH64_BUILTIN_MAX
>>>> };
>>>>
>>>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
>>>> AARCH64_BUILTIN_RNG_RNDRRS);
>>>> }
>>>>
>>>> +/* Add builtins for reading system register. */
>>>> +static void
>>>> +aarch64_init_rwsr_builtins (void)
>>>> +{
>>>> + tree fntype = NULL;
>>>> + tree const_char_ptr_type
>>>> + = build_pointer_type (build_type_variant (char_type_node, true, false));
>>>> +
>>>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
>>>> + aarch64_builtin_decls[AARCH64_##F] \
>>>> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>> + uint32_type_node, NULL);
>>>> +
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>> + const_ptr_type_node, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>> + uint64_type_node, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>> + float_type_node, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
>>>> +
>>>> + fntype
>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>> + double_type_node, NULL);
>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
>>>> +}
>>>> +
>>>> /* Initialize the memory tagging extension (MTE) builtins. */
>>>> struct
>>>> {
>>>> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
>>>> aarch64_init_rng_builtins ();
>>>> aarch64_init_data_intrinsics ();
>>>>
>>>> + aarch64_init_rwsr_builtins ();
>>>> +
>>>> tree ftype_jcvt
>>>> = build_function_type_list (intSI_type_node, double_type_node, NULL);
>>>> aarch64_builtin_decls[AARCH64_JSCVT]
>>>> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
>>>> return target;
>>>> }
>>>>
>>>> +/* Expand the read/write system register builtin EXPs. */
>>>> +rtx
>>>> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
>>>> +{
>>>> + tree arg0, arg1;
>>>> + rtx const_str, input_val, subreg;
>>>> + enum machine_mode mode;
>>>> + class expand_operand ops[2];
>>>> +
>>>> + arg0 = CALL_EXPR_ARG (exp, 0);
>>>> +
>>>> + bool write_op = (fcode == AARCH64_WSR
>>>> + || fcode == AARCH64_WSRP
>>>> + || fcode == AARCH64_WSR64
>>>> + || fcode == AARCH64_WSRF
>>>> + || fcode == AARCH64_WSRF64);
>>>> + bool read_op = (fcode == AARCH64_RSR
>>>> + || fcode == AARCH64_RSRP
>>>> + || fcode == AARCH64_RSR64
>>>> + || fcode == AARCH64_RSRF
>>>> + || fcode == AARCH64_RSRF64);
>>>
>>> Instead of this, how about using:
>>>
>>> if (call_expr_nargs (exp) == 1)
>>>
>>> for the read path?
>>>
>>
>> Personally, I don't like that. It's a `read_op' because of the fcode it
>> has, the fact they share the same number of arguments is accidental.
>> With this implementation, we outline very early on exactly what
>> constitutes both a valid write and a valid read operation.
>
> I don't agree that it's accidental. :) But fair enough, I won't push it.
>
Iff your objection is not so big so as to you letting me have my way,
then I shall not complain! Thanks.
>> I'm happy to change things around if you feel strongly enough about it
>> or see some material advantage to doing things your way that I've missed :).
>>
>>>> +
>>>> + /* Argument 0 (system register name) must be a string literal. */
>>>> + gcc_assert (!SSA_VAR_P (arg0)
>>>> + && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
>>>> + && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);
>>>
>>> It looks like this requires arg0 to be an ADDR_EXPR. It would be good
>>> to check that rather than !SSA_VAR_P.
>>>
>>> Minor formatting nit: should be a space after the comma.
>>>
>>>> +
>>>> + const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
>>>> + size_t len = strlen (name_input);
>>>
>>> This can use TREE_STRING_LENGTH. That's preferable since it copes
>>> with embedded nuls, e.g. "foo\0bar".
I figured that `c_strlen', as defined in `builtins.cc' would be helpful
here, having logic to handle "odd" strings, both those which contain an
embedded null and those lacking a null terminator altogether, returning
NULL_TREE in both scenarios. This allows us to deal with embedded nulls
early on, rejecting them straight away.
Any thoughts on this?
Many thanks,
V.
>>>
>>>> +
>>>> + if (len == 0)
>>>> + {
>>>> + error_at (EXPR_LOCATION (exp), "invalid system register name provided");
>>>> + return const0_rtx;
>>>> + }
>>>
>>> Is this check necessary? It looks like the following code would
>>> correctly reject empty strings.
>>>
>>
>> Not necessary. As the code evolved, some checks I added early on in
>> development went on to become redundant, this being on example. Thanks
>> for picking up on it.
>>
>>>> +
>>>> + char *sysreg_name = (char *) alloca (len + 1);
>>>> + strcpy (sysreg_name, name_input);
>>>
>>> We shouldn't use alloca for user input that hasn't been verified yet,
>>> since it could be arbitrarily long. Libiberty provides an xmemdup that
>>> might be useful. (xmemdup rather than xstrdup to cope with the embedded
>>> nuls mentioned above. Or perhaps we should just reject embedded nuls
>>> immediately, so that later code doesn't need to worry about them.)
>>>
>>>> +
>>>> + for (unsigned pos = 0; pos <= len; pos++)
>>>> + sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
>>>> +
>>>> + const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);
>>>
>>> Does this TOLOWERing make the checks for capital letters in
>>> is_implem_def_reg redundant?
>>>
>>
>> Potentially.
>>
>> The quesion is, do we want this explicit dependence of
>> `is_implem_def_reg' on this TOLOWERing?
>>
>> My intention was to keep things fairly independent so they could be
>> reused elsewhere as easily as possible, should this need arise. This
>> meant I didn't want `is_implem_def_reg' to rely on the passed input
>> having been pre-processed in any way, keeping it as general-purpose as
>> possible.
>
> I think it's good if we have a canonical form internally. And it looks
> like the patches do have a canonical form, thanks to these TOLOWER calls.
> IMO it's better to embrace that rather than support the possibility of
> having multiple representations of the same thing in future.
>
> Also, aarch64_lookup_sysreg_map requires the argument to be lower case
> if it specifies a predefined register name, since we (rightly) don't
> add every case variation of a register name to the hash table. So it
> seems more consistent to have the same precondition for both types of
> register name, rather than requiring one to be lower case and allowing
> the other to be mixed case.
>
I see your point!
> Thanks,
> Richard
On 26/10/2023 16:23, Richard Sandiford wrote:
> Victor Do Nascimento <Victor.DoNascimento@arm.com> writes:
> > On 10/18/23 21:39, Richard Sandiford wrote:
> >> Victor Do Nascimento <victor.donascimento@arm.com> writes:
> >>> Implement the aarch64 intrinsics for reading and writing system
> >>> registers with the following signatures:
> >>>
> >>> uint32_t __arm_rsr(const char *special_register);
> >>> uint64_t __arm_rsr64(const char *special_register);
> >>> void* __arm_rsrp(const char *special_register);
> >>> float __arm_rsrf(const char *special_register);
> >>> double __arm_rsrf64(const char *special_register);
> >>> void __arm_wsr(const char *special_register, uint32_t value);
> >>> void __arm_wsr64(const char *special_register, uint64_t value);
> >>> void __arm_wsrp(const char *special_register, const void *value);
> >>> void __arm_wsrf(const char *special_register, float value);
> >>> void __arm_wsrf64(const char *special_register, double value);
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
> >>> Add enums for new builtins.
> >>> (aarch64_init_rwsr_builtins): New.
> >>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
> >>> (aarch64_expand_rwsr_builtin): New.
> >>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
> >>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
> >>> (write_sysregdi): Likewise.
> >>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
> >>> (__arm_rsrp): Likewise.
> >>> (__arm_rsr64): Likewise.
> >>> (__arm_rsrf): Likewise.
> >>> (__arm_rsrf64): Likewise.
> >>> (__arm_wsr): Likewise.
> >>> (__arm_wsrp): Likewise.
> >>> (__arm_wsr64): Likewise.
> >>> (__arm_wsrf): Likewise.
> >>> (__arm_wsrf64): Likewise.
> >>>
> >>> gcc/testsuite/ChangeLog:
> >>>
> >>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
> >>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
> >>> ---
> >>> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
> >>> gcc/config/aarch64/aarch64.md | 17 ++
> >>> gcc/config/aarch64/arm_acle.h | 30 +++
> >>> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
> >>> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
> >>> 5 files changed, 411 insertions(+)
> >>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
> >>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
> >>>
> >>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
> >>> index 04f59fd9a54..d8bb2a989a5 100644
> >>> --- a/gcc/config/aarch64/aarch64-builtins.cc
> >>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
> >>> @@ -808,6 +808,17 @@ enum aarch64_builtins
> >>> AARCH64_RBIT,
> >>> AARCH64_RBITL,
> >>> AARCH64_RBITLL,
> >>> + /* System register builtins. */
> >>> + AARCH64_RSR,
> >>> + AARCH64_RSRP,
> >>> + AARCH64_RSR64,
> >>> + AARCH64_RSRF,
> >>> + AARCH64_RSRF64,
> >>> + AARCH64_WSR,
> >>> + AARCH64_WSRP,
> >>> + AARCH64_WSR64,
> >>> + AARCH64_WSRF,
> >>> + AARCH64_WSRF64,
> >>> AARCH64_BUILTIN_MAX
> >>> };
> >>>
> >>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
> >>> AARCH64_BUILTIN_RNG_RNDRRS);
> >>> }
> >>>
> >>> +/* Add builtins for reading system register. */
> >>> +static void
> >>> +aarch64_init_rwsr_builtins (void)
> >>> +{
> >>> + tree fntype = NULL;
> >>> + tree const_char_ptr_type
> >>> + = build_pointer_type (build_type_variant (char_type_node, true, false));
> >>> +
> >>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
> >>> + aarch64_builtin_decls[AARCH64_##F] \
> >>> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (void_type_node, const_char_ptr_type,
> >>> + uint32_type_node, NULL);
> >>> +
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (void_type_node, const_char_ptr_type,
> >>> + const_ptr_type_node, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (void_type_node, const_char_ptr_type,
> >>> + uint64_type_node, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (void_type_node, const_char_ptr_type,
> >>> + float_type_node, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
> >>> +
> >>> + fntype
> >>> + = build_function_type_list (void_type_node, const_char_ptr_type,
> >>> + double_type_node, NULL);
> >>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
> >>> +}
> >>> +
> >>> /* Initialize the memory tagging extension (MTE) builtins. */
> >>> struct
> >>> {
> >>> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
> >>> aarch64_init_rng_builtins ();
> >>> aarch64_init_data_intrinsics ();
> >>>
> >>> + aarch64_init_rwsr_builtins ();
> >>> +
> >>> tree ftype_jcvt
> >>> = build_function_type_list (intSI_type_node, double_type_node, NULL);
> >>> aarch64_builtin_decls[AARCH64_JSCVT]
> >>> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
> >>> return target;
> >>> }
> >>>
> >>> +/* Expand the read/write system register builtin EXPs. */
> >>> +rtx
> >>> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
> >>> +{
> >>> + tree arg0, arg1;
> >>> + rtx const_str, input_val, subreg;
> >>> + enum machine_mode mode;
> >>> + class expand_operand ops[2];
> >>> +
> >>> + arg0 = CALL_EXPR_ARG (exp, 0);
> >>> +
> >>> + bool write_op = (fcode == AARCH64_WSR
> >>> + || fcode == AARCH64_WSRP
> >>> + || fcode == AARCH64_WSR64
> >>> + || fcode == AARCH64_WSRF
> >>> + || fcode == AARCH64_WSRF64);
> >>> + bool read_op = (fcode == AARCH64_RSR
> >>> + || fcode == AARCH64_RSRP
> >>> + || fcode == AARCH64_RSR64
> >>> + || fcode == AARCH64_RSRF
> >>> + || fcode == AARCH64_RSRF64);
> >>
> >> Instead of this, how about using:
> >>
> >> if (call_expr_nargs (exp) == 1)
> >>
> >> for the read path?
> >>
> >
> > Personally, I don't like that. It's a `read_op' because of the fcode it
> > has, the fact they share the same number of arguments is accidental.
> > With this implementation, we outline very early on exactly what
> > constitutes both a valid write and a valid read operation.
>
> I don't agree that it's accidental. :) But fair enough, I won't push it.
FWIW I agree with Richard that it isn't accidental that reads have one argument
and writes have two, it's an inherent property of how the operations work.
I think it would be better to use call_expr_nargs (exp) == 1 as suggested
above.
IIUC, presumably the operation must either be a read or a write so having two
separate booleans (read_op and write_op) doesn't make much sense either?
>
> > I'm happy to change things around if you feel strongly enough about it
> > or see some material advantage to doing things your way that I've missed :).
Thanks,
Alex
On 10/27/23 14:18, Alex Coplan wrote:
> On 26/10/2023 16:23, Richard Sandiford wrote:
>> Victor Do Nascimento <Victor.DoNascimento@arm.com> writes:
>>> On 10/18/23 21:39, Richard Sandiford wrote:
>>>> Victor Do Nascimento <victor.donascimento@arm.com> writes:
>>>>> Implement the aarch64 intrinsics for reading and writing system
>>>>> registers with the following signatures:
>>>>>
>>>>> uint32_t __arm_rsr(const char *special_register);
>>>>> uint64_t __arm_rsr64(const char *special_register);
>>>>> void* __arm_rsrp(const char *special_register);
>>>>> float __arm_rsrf(const char *special_register);
>>>>> double __arm_rsrf64(const char *special_register);
>>>>> void __arm_wsr(const char *special_register, uint32_t value);
>>>>> void __arm_wsr64(const char *special_register, uint64_t value);
>>>>> void __arm_wsrp(const char *special_register, const void *value);
>>>>> void __arm_wsrf(const char *special_register, float value);
>>>>> void __arm_wsrf64(const char *special_register, double value);
>>>>>
>>>>> gcc/ChangeLog:
>>>>>
>>>>> * gcc/config/aarch64/aarch64-builtins.cc (enum aarch64_builtins):
>>>>> Add enums for new builtins.
>>>>> (aarch64_init_rwsr_builtins): New.
>>>>> (aarch64_general_init_builtins): Call aarch64_init_rwsr_builtins.
>>>>> (aarch64_expand_rwsr_builtin): New.
>>>>> (aarch64_general_expand_builtin): Call aarch64_general_expand_builtin.
>>>>> * gcc/config/aarch64/aarch64.md (read_sysregdi): New insn_and_split.
>>>>> (write_sysregdi): Likewise.
>>>>> * gcc/config/aarch64/arm_acle.h (__arm_rsr): New.
>>>>> (__arm_rsrp): Likewise.
>>>>> (__arm_rsr64): Likewise.
>>>>> (__arm_rsrf): Likewise.
>>>>> (__arm_rsrf64): Likewise.
>>>>> (__arm_wsr): Likewise.
>>>>> (__arm_wsrp): Likewise.
>>>>> (__arm_wsr64): Likewise.
>>>>> (__arm_wsrf): Likewise.
>>>>> (__arm_wsrf64): Likewise.
>>>>>
>>>>> gcc/testsuite/ChangeLog:
>>>>>
>>>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr.c: New.
>>>>> * gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c: Likewise.
>>>>> ---
>>>>> gcc/config/aarch64/aarch64-builtins.cc | 200 ++++++++++++++++++
>>>>> gcc/config/aarch64/aarch64.md | 17 ++
>>>>> gcc/config/aarch64/arm_acle.h | 30 +++
>>>>> .../gcc.target/aarch64/acle/rwsr-1.c | 20 ++
>>>>> gcc/testsuite/gcc.target/aarch64/acle/rwsr.c | 144 +++++++++++++
>>>>> 5 files changed, 411 insertions(+)
>>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr-1.c
>>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/acle/rwsr.c
>>>>>
>>>>> diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
>>>>> index 04f59fd9a54..d8bb2a989a5 100644
>>>>> --- a/gcc/config/aarch64/aarch64-builtins.cc
>>>>> +++ b/gcc/config/aarch64/aarch64-builtins.cc
>>>>> @@ -808,6 +808,17 @@ enum aarch64_builtins
>>>>> AARCH64_RBIT,
>>>>> AARCH64_RBITL,
>>>>> AARCH64_RBITLL,
>>>>> + /* System register builtins. */
>>>>> + AARCH64_RSR,
>>>>> + AARCH64_RSRP,
>>>>> + AARCH64_RSR64,
>>>>> + AARCH64_RSRF,
>>>>> + AARCH64_RSRF64,
>>>>> + AARCH64_WSR,
>>>>> + AARCH64_WSRP,
>>>>> + AARCH64_WSR64,
>>>>> + AARCH64_WSRF,
>>>>> + AARCH64_WSRF64,
>>>>> AARCH64_BUILTIN_MAX
>>>>> };
>>>>>
>>>>> @@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
>>>>> AARCH64_BUILTIN_RNG_RNDRRS);
>>>>> }
>>>>>
>>>>> +/* Add builtins for reading system register. */
>>>>> +static void
>>>>> +aarch64_init_rwsr_builtins (void)
>>>>> +{
>>>>> + tree fntype = NULL;
>>>>> + tree const_char_ptr_type
>>>>> + = build_pointer_type (build_type_variant (char_type_node, true, false));
>>>>> +
>>>>> +#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
>>>>> + aarch64_builtin_decls[AARCH64_##F] \
>>>>> + = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>>> + uint32_type_node, NULL);
>>>>> +
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>>> + const_ptr_type_node, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>>> + uint64_type_node, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>>> + float_type_node, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
>>>>> +
>>>>> + fntype
>>>>> + = build_function_type_list (void_type_node, const_char_ptr_type,
>>>>> + double_type_node, NULL);
>>>>> + AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
>>>>> +}
>>>>> +
>>>>> /* Initialize the memory tagging extension (MTE) builtins. */
>>>>> struct
>>>>> {
>>>>> @@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
>>>>> aarch64_init_rng_builtins ();
>>>>> aarch64_init_data_intrinsics ();
>>>>>
>>>>> + aarch64_init_rwsr_builtins ();
>>>>> +
>>>>> tree ftype_jcvt
>>>>> = build_function_type_list (intSI_type_node, double_type_node, NULL);
>>>>> aarch64_builtin_decls[AARCH64_JSCVT]
>>>>> @@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
>>>>> return target;
>>>>> }
>>>>>
>>>>> +/* Expand the read/write system register builtin EXPs. */
>>>>> +rtx
>>>>> +aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
>>>>> +{
>>>>> + tree arg0, arg1;
>>>>> + rtx const_str, input_val, subreg;
>>>>> + enum machine_mode mode;
>>>>> + class expand_operand ops[2];
>>>>> +
>>>>> + arg0 = CALL_EXPR_ARG (exp, 0);
>>>>> +
>>>>> + bool write_op = (fcode == AARCH64_WSR
>>>>> + || fcode == AARCH64_WSRP
>>>>> + || fcode == AARCH64_WSR64
>>>>> + || fcode == AARCH64_WSRF
>>>>> + || fcode == AARCH64_WSRF64);
>>>>> + bool read_op = (fcode == AARCH64_RSR
>>>>> + || fcode == AARCH64_RSRP
>>>>> + || fcode == AARCH64_RSR64
>>>>> + || fcode == AARCH64_RSRF
>>>>> + || fcode == AARCH64_RSRF64);
>>>>
>>>> Instead of this, how about using:
>>>>
>>>> if (call_expr_nargs (exp) == 1)
>>>>
>>>> for the read path?
>>>>
>>>
>>> Personally, I don't like that. It's a `read_op' because of the fcode it
>>> has, the fact they share the same number of arguments is accidental.
>>> With this implementation, we outline very early on exactly what
>>> constitutes both a valid write and a valid read operation.
>>
>> I don't agree that it's accidental. :) But fair enough, I won't push it.
>
> FWIW I agree with Richard that it isn't accidental that reads have one argument
> and writes have two, it's an inherent property of how the operations work.
> I think it would be better to use call_expr_nargs (exp) == 1 as suggested
> above.
>
For the record, I do mean accidental in the "subsidiary" sense of the
word, if that makes sense. They are primarily RSR operations associated
with the mrs instruction. The number of args simply follows on from
this fact.
If in some hypothetical scenario, another single-operand expression was
to make it to `aarch64_expand_rwsr_builtin' with an fcode not covered by
those in `read_op', being single-operand alone wouldn't be a strong
enough condition for it to be classified a "read system register" operation.
That's the scenario that is guarded against by Richard's comment a
little further down:
"If we do go for my:
call_expr_nargs (exp) == 1
suggestion above, this switch should probably have a:
this switch should probably have a:
default:
gcc_unreachable ();"
> IIUC, presumably the operation must either be a read or a write so having two
> separate booleans (read_op and write_op) doesn't make much sense either?
>
In all fairness, you're 100% right here and I had thought about this in
the past. Assuming no stray fcode makes it into this function, read_op
is just !write_op. I did things this way to be nice and explicit early
on abouut what consistutes a valid read operation.
Happy to simplify.
Cheers,
V.
>>
>>> I'm happy to change things around if you feel strongly enough about it
>>> or see some material advantage to doing things your way that I've missed :).
>
> Thanks,
> Alex
@@ -808,6 +808,17 @@ enum aarch64_builtins
AARCH64_RBIT,
AARCH64_RBITL,
AARCH64_RBITLL,
+ /* System register builtins. */
+ AARCH64_RSR,
+ AARCH64_RSRP,
+ AARCH64_RSR64,
+ AARCH64_RSRF,
+ AARCH64_RSRF64,
+ AARCH64_WSR,
+ AARCH64_WSRP,
+ AARCH64_WSR64,
+ AARCH64_WSRF,
+ AARCH64_WSRF64,
AARCH64_BUILTIN_MAX
};
@@ -1798,6 +1809,65 @@ aarch64_init_rng_builtins (void)
AARCH64_BUILTIN_RNG_RNDRRS);
}
+/* Add builtins for reading system register. */
+static void
+aarch64_init_rwsr_builtins (void)
+{
+ tree fntype = NULL;
+ tree const_char_ptr_type
+ = build_pointer_type (build_type_variant (char_type_node, true, false));
+
+#define AARCH64_INIT_RWSR_BUILTINS_DECL(F, N, T) \
+ aarch64_builtin_decls[AARCH64_##F] \
+ = aarch64_general_add_builtin ("__builtin_aarch64_"#N, T, AARCH64_##F);
+
+ fntype
+ = build_function_type_list (uint32_type_node, const_char_ptr_type, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (RSR, rsr, fntype);
+
+ fntype
+ = build_function_type_list (ptr_type_node, const_char_ptr_type, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (RSRP, rsrp, fntype);
+
+ fntype
+ = build_function_type_list (uint64_type_node, const_char_ptr_type, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (RSR64, rsr64, fntype);
+
+ fntype
+ = build_function_type_list (float_type_node, const_char_ptr_type, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF, rsrf, fntype);
+
+ fntype
+ = build_function_type_list (double_type_node, const_char_ptr_type, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (RSRF64, rsrf64, fntype);
+
+ fntype
+ = build_function_type_list (void_type_node, const_char_ptr_type,
+ uint32_type_node, NULL);
+
+ AARCH64_INIT_RWSR_BUILTINS_DECL (WSR, wsr, fntype);
+
+ fntype
+ = build_function_type_list (void_type_node, const_char_ptr_type,
+ const_ptr_type_node, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (WSRP, wsrp, fntype);
+
+ fntype
+ = build_function_type_list (void_type_node, const_char_ptr_type,
+ uint64_type_node, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (WSR64, wsr64, fntype);
+
+ fntype
+ = build_function_type_list (void_type_node, const_char_ptr_type,
+ float_type_node, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF, wsrf, fntype);
+
+ fntype
+ = build_function_type_list (void_type_node, const_char_ptr_type,
+ double_type_node, NULL);
+ AARCH64_INIT_RWSR_BUILTINS_DECL (WSRF64, wsrf64, fntype);
+}
+
/* Initialize the memory tagging extension (MTE) builtins. */
struct
{
@@ -2019,6 +2089,8 @@ aarch64_general_init_builtins (void)
aarch64_init_rng_builtins ();
aarch64_init_data_intrinsics ();
+ aarch64_init_rwsr_builtins ();
+
tree ftype_jcvt
= build_function_type_list (intSI_type_node, double_type_node, NULL);
aarch64_builtin_decls[AARCH64_JSCVT]
@@ -2599,6 +2671,123 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
return target;
}
+/* Expand the read/write system register builtin EXPs. */
+rtx
+aarch64_expand_rwsr_builtin (tree exp, rtx target, int fcode)
+{
+ tree arg0, arg1;
+ rtx const_str, input_val, subreg;
+ enum machine_mode mode;
+ class expand_operand ops[2];
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+
+ bool write_op = (fcode == AARCH64_WSR
+ || fcode == AARCH64_WSRP
+ || fcode == AARCH64_WSR64
+ || fcode == AARCH64_WSRF
+ || fcode == AARCH64_WSRF64);
+ bool read_op = (fcode == AARCH64_RSR
+ || fcode == AARCH64_RSRP
+ || fcode == AARCH64_RSR64
+ || fcode == AARCH64_RSRF
+ || fcode == AARCH64_RSRF64);
+
+ /* Argument 0 (system register name) must be a string literal. */
+ gcc_assert (!SSA_VAR_P (arg0)
+ && TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
+ && TREE_CODE (TREE_OPERAND (arg0,0)) == STRING_CST);
+
+ const char *name_input = TREE_STRING_POINTER (TREE_OPERAND (arg0, 0));
+ size_t len = strlen (name_input);
+
+ if (len == 0)
+ {
+ error_at (EXPR_LOCATION (exp), "invalid system register name provided");
+ return const0_rtx;
+ }
+
+ char *sysreg_name = (char *) alloca (len + 1);
+ strcpy (sysreg_name, name_input);
+
+ for (unsigned pos = 0; pos <= len; pos++)
+ sysreg_name[pos] = TOLOWER (sysreg_name[pos]);
+
+ const char* name_output = aarch64_retrieve_sysreg (sysreg_name, write_op);
+ if (name_output == NULL)
+ {
+ error_at (EXPR_LOCATION (exp), "invalid system register name provided");
+ return const0_rtx;
+ }
+
+ /* Assign the string corresponding to the system register name to an RTX. */
+ const_str = rtx_alloc (CONST_STRING);
+ PUT_CODE (const_str, CONST_STRING);
+ XSTR (const_str, 0) = xstrdup (name_output);
+
+ /* Set up expander operands and call instruction expansion. */
+ if (read_op)
+ {
+
+ /* Emit the initial read_sysregdi rtx. */
+ create_output_operand (&ops[0], target, DImode);
+ create_fixed_operand (&ops[1], const_str);
+ expand_insn (CODE_FOR_aarch64_read_sysregdi, 2, ops);
+ target = ops[0].value;
+
+ /* Do any necessary post-processing on the result. */
+ switch (fcode)
+ {
+ case AARCH64_RSR:
+ return gen_lowpart_SUBREG (SImode, target);
+ case AARCH64_RSRP:
+ case AARCH64_RSR64:
+ return target;
+ case AARCH64_RSRF:
+ subreg = gen_reg_rtx (SImode);
+ subreg = gen_lowpart_SUBREG (SImode, target);
+ return gen_lowpart_SUBREG (SFmode, subreg);
+ case AARCH64_RSRF64:
+ return gen_lowpart_SUBREG (DFmode, target);
+ }
+ }
+ if (write_op)
+ {
+
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ mode = TYPE_MODE (TREE_TYPE (arg1));
+ input_val = copy_to_mode_reg (mode, expand_normal (arg1));
+
+ switch (fcode)
+ {
+ case AARCH64_WSR:
+ subreg = gen_lowpart_SUBREG (DImode, input_val);
+ break;
+ case AARCH64_WSRP:
+ case AARCH64_WSR64:
+ subreg = input_val;
+ break;
+ case AARCH64_WSRF:
+ subreg = gen_lowpart_SUBREG (SImode, input_val);
+ subreg = gen_lowpart_SUBREG (DImode, subreg);
+ break;
+ case AARCH64_WSRF64:
+ subreg = gen_lowpart_SUBREG (DImode, input_val);
+ break;
+ }
+
+ create_fixed_operand (&ops[0], const_str);
+ create_input_operand (&ops[1], subreg, DImode);
+ expand_insn (CODE_FOR_aarch64_write_sysregdi, 2, ops);
+
+ return target;
+ }
+
+ error_at (EXPR_LOCATION (exp),
+ "Malformed call to read/write system register builtin");
+ return target;
+}
+
/* Expand an expression EXP that calls a MEMTAG built-in FCODE
with result going to TARGET. */
static rtx
@@ -2832,6 +3021,17 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
case AARCH64_BUILTIN_RNG_RNDR:
case AARCH64_BUILTIN_RNG_RNDRRS:
return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
+ case AARCH64_RSR:
+ case AARCH64_RSRP:
+ case AARCH64_RSR64:
+ case AARCH64_RSRF:
+ case AARCH64_RSRF64:
+ case AARCH64_WSR:
+ case AARCH64_WSRP:
+ case AARCH64_WSR64:
+ case AARCH64_WSRF:
+ case AARCH64_WSRF64:
+ return aarch64_expand_rwsr_builtin (exp, target, fcode);
}
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
@@ -281,6 +281,8 @@
UNSPEC_UPDATE_FFRT
UNSPEC_RDFFR
UNSPEC_WRFFR
+ UNSPEC_SYSREG_RDI
+ UNSPEC_SYSREG_WDI
;; Represents an SVE-style lane index, in which the indexing applies
;; within the containing 128-bit block.
UNSPEC_SVE_LANE_SELECT
@@ -476,6 +478,21 @@
;; Jumps and other miscellaneous insns
;; -------------------------------------------------------------------
+(define_insn "aarch64_read_sysregdi"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec_volatile:DI [(match_operand 1 "aarch64_sysreg_string" "")]
+ UNSPEC_SYSREG_RDI))]
+ ""
+ "mrs\t%x0, %1"
+ )
+
+(define_insn "aarch64_write_sysregdi"
+ [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "")
+ (match_operand:DI 1 "register_operand" "r")] UNSPEC_SYSREG_WDI)]
+ ""
+ "msr\t%0, %x1"
+ )
+
(define_insn "indirect_jump"
[(set (pc) (match_operand:DI 0 "register_operand" "r"))]
""
@@ -314,6 +314,36 @@ __rndrrs (uint64_t *__res)
#pragma GCC pop_options
+#define __arm_rsr(__regname) \
+ __builtin_aarch64_rsr (__regname)
+
+#define __arm_rsrp(__regname) \
+ __builtin_aarch64_rsrp (__regname)
+
+#define __arm_rsr64(__regname) \
+ __builtin_aarch64_rsr64 (__regname)
+
+#define __arm_rsrf(__regname) \
+ __builtin_aarch64_rsrf (__regname)
+
+#define __arm_rsrf64(__regname) \
+ __builtin_aarch64_rsrf64 (__regname)
+
+#define __arm_wsr(__regname, __value) \
+ __builtin_aarch64_wsr (__regname, __value)
+
+#define __arm_wsrp(__regname, __value) \
+ __builtin_aarch64_wsrp (__regname, __value)
+
+#define __arm_wsr64(__regname, __value) \
+ __builtin_aarch64_wsr64 (__regname, __value)
+
+#define __arm_wsrf(__regname, __value) \
+ __builtin_aarch64_wsrf (__regname, __value)
+
+#define __arm_wsrf64(__regname, __value) \
+ __builtin_aarch64_wsrf64 (__regname, __value)
+
#ifdef __cplusplus
}
#endif
new file mode 100644
@@ -0,0 +1,20 @@
+/* Test the __arm_[r,w]sr ACLE intrinsics family. */
+/* Ensure that illegal behavior is rejected by the compiler. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.4-a" } */
+
+#include <arm_acle.h>
+
+/* Ensure that read/write-only register attributes are respected by the compiler. */
+void
+test_rwsr_read_write_only ()
+{
+ /* Attempt to write to read-only registers. */
+ long long a = __arm_rsr64 ("aidr_el1"); /* Read ok. */
+ __arm_wsr64 ("aidr_el1", a); /* { dg-error {invalid system register name provided} } */
+
+ /* Attempt to read from write-only registers. */
+ __arm_wsr64("icc_asgi1r_el1", a); /* Write ok. */
+ long long b = __arm_rsr64("icc_asgi1r_el1"); /* { dg-error {invalid system register name provided} } */
+}
new file mode 100644
@@ -0,0 +1,144 @@
+/* Test the __arm_[r,w]sr ACLE intrinsics family. */
+/* Check that function variants for different data types handle types correctly. */
+/* { dg-do compile } */
+/* { dg-options "-O1 -march=armv8.4-a" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_acle.h>
+
+/*
+** get_rsr:
+** ...
+** mrs x([0-9]+), s2_1_c0_c7_4
+** add w\1, w\1, 1
+** ...
+*/
+int
+get_rsr ()
+{
+ int a = __arm_rsr("trcseqstr");
+ return a+1;
+}
+
+/*
+** get_rsrf:
+** mrs x([0-9]+), s2_1_c0_c7_4
+** fmov s[0-9]+, w\1
+** ...
+*/
+float
+get_rsrf ()
+{
+ return __arm_rsrf("trcseqstr");
+}
+
+/*
+** get_rsrp:
+** mrs x0, s2_1_c0_c7_4
+** ret
+*/
+void *
+get_rsrp ()
+{
+ return __arm_rsrp("trcseqstr");
+}
+
+/*
+** get_rsr64:
+** mrs x0, s2_1_c0_c7_4
+** ret
+*/
+long long
+get_rsr64 ()
+{
+ return __arm_rsr64("trcseqstr");
+}
+
+/*
+** get_rsrf64:
+** mrs x([0-9]+), s2_1_c0_c7_4
+** fmov d[0-9]+, x\1
+** ...
+*/
+double
+get_rsrf64 ()
+{
+ return __arm_rsrf64("trcseqstr");
+}
+
+/*
+** set_wsr32:
+** ...
+** add w([0-9]+), w\1, 1
+** msr s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsr32 (int a)
+{
+ __arm_wsr("trcseqstr", a+1);
+}
+
+/*
+** set_wsrp:
+** ...
+** msr s2_1_c0_c7_4, x[0-9]+
+** ...
+*/
+void
+set_wsrp (void *a)
+{
+ __arm_wsrp("trcseqstr", a);
+}
+
+/*
+** set_wsr64:
+** ...
+** msr s2_1_c0_c7_4, x[0-9]+
+** ...
+*/
+void
+set_wsr64 (long long a)
+{
+ __arm_wsr64("trcseqstr", a);
+}
+
+/*
+** set_wsrf32:
+** ...
+** fmov w([0-9]+), s[0-9]+
+** msr s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsrf32 (float a)
+{
+ __arm_wsrf("trcseqstr", a);
+}
+
+/*
+** set_wsrf64:
+** ...
+** fmov x([0-9]+), d[0-9]+
+** msr s2_1_c0_c7_4, x\1
+** ...
+*/
+void
+set_wsrf64(double a)
+{
+ __arm_wsrf64("trcseqstr", a);
+}
+
+/*
+** set_custom:
+** ...
+** mrs x0, s1_2_c3_c4_5
+** ...
+** msr s1_2_c3_c4_5, x0
+** ...
+*/
+void set_custom()
+{
+ __uint64_t b = __arm_rsr64("S1_2_C3_C4_5");
+ __arm_wsr64("S1_2_C3_C4_5", b);
+}