@@ -808,6 +808,10 @@ enum aarch64_builtins
AARCH64_RBIT,
AARCH64_RBITL,
AARCH64_RBITLL,
+ AARCH64_PLD,
+ AARCH64_PLDX,
+ AARCH64_PLI,
+ AARCH64_PLIX,
AARCH64_BUILTIN_MAX
};
@@ -1798,6 +1802,34 @@ aarch64_init_rng_builtins (void)
AARCH64_BUILTIN_RNG_RNDRRS);
}
+/* Add builtins for data and instrution prefetch. */
+static void
+aarch64_init_prefetch_builtin (void)
+{
+#define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N) \
+ aarch64_builtin_decls[INDEX] = \
+ aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
+
+ tree ftype;
+ tree cv_argtype;
+ cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
+ | TYPE_QUAL_VOLATILE);
+ cv_argtype = build_pointer_type (cv_argtype);
+
+ ftype = build_function_type_list (void_type_node, cv_argtype, NULL);
+ AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLD, "pld");
+ AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLI, "pli");
+
+ ftype = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, unsigned_type_node,
+ cv_argtype, NULL);
+ AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLDX, "pldx");
+
+ ftype = build_function_type_list (void_type_node, unsigned_type_node,
+ unsigned_type_node, cv_argtype, NULL);
+ AARCH64_INIT_PREFETCH_BUILTIN (AARCH64_PLIX, "plix");
+}
+
/* Initialize the memory tagging extension (MTE) builtins. */
struct
{
@@ -2019,6 +2051,8 @@ aarch64_general_init_builtins (void)
aarch64_init_rng_builtins ();
aarch64_init_data_intrinsics ();
+ aarch64_init_prefetch_builtin ();
+
tree ftype_jcvt
= build_function_type_list (intSI_type_node, double_type_node, NULL);
aarch64_builtin_decls[AARCH64_JSCVT]
@@ -2599,6 +2633,127 @@ aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
return target;
}
+/* Expand a prefetch builtin EXP. */
+void
+aarch64_expand_prefetch_builtin (tree exp, int fcode)
+{
+
+#define EXPAND_CONST_INT(IN_IDX, OUT_IDX, ERRMSG) \
+ if (TREE_CODE (args[IN_IDX]) != INTEGER_CST) \
+ { \
+ error_at (EXPR_LOCATION (exp), ERRMSG); \
+ args[IN_IDX] = integer_zero_node; \
+ } \
+ ops[OUT_IDX] = expand_normal (args[IN_IDX])
+
+#define WARN_INVALID(VAR, ERRMSG) \
+ do { \
+ warning_at (EXPR_LOCATION (exp), 0, ERRMSG); \
+ VAR = 0; \
+ } while (0)
+
+ unsigned narg;
+
+ tree args[4];
+ rtx ops[4];
+ int kind_id, level_id, rettn_id;
+ char prfop[11];
+
+ char kind_s[3][4] = {"PLD", "PST", "PLI"};
+ char level_s[4][4] = {"L1", "L2", "L3", "SLC"};
+ char rettn_s[2][5] = {"KEEP", "STRM"};
+
+ /* Each of the four prefetch builtins takes a different number of
+ arguments, but proceeds to call the PRFM insn which requires 4
+ pieces of information to be fully defined.
+
+ Specify the total number of arguments for each builtin and, where
+ one of these takes less than 4 arguments, set sensible defaults. */
+ switch (fcode)
+ {
+ case AARCH64_PLDX:
+ kind_id = -1;
+ narg = 4;
+ break;
+ case AARCH64_PLIX:
+ kind_id = 2;
+ narg = 3;
+ break;
+ case AARCH64_PLI:
+ case AARCH64_PLD:
+ default:
+ kind_id = (fcode == AARCH64_PLD) ? 0 : 2;
+ level_id = 0;
+ rettn_id = 0;
+ narg = 1;
+ break;
+ }
+
+ int addr_arg_index = narg - 1;
+
+ /* Extract the correct number of arguments from our function call. */
+ for (unsigned i = 0; i < narg; i++)
+ args[i] = CALL_EXPR_ARG (exp, i);
+
+ /* Check address argument. */
+ if (!(POINTER_TYPE_P (TREE_TYPE (args[addr_arg_index])))
+ || (TREE_CODE (TREE_TYPE (TREE_TYPE (args[addr_arg_index])))
+ != VOID_TYPE))
+ error_at (EXPR_LOCATION (exp), "invalid address type specified;"
+ " void const volatile * required");
+
+ ops[3] = expand_expr (args[addr_arg_index], NULL_RTX, Pmode, EXPAND_NORMAL);
+
+ /* Check arguments common to both pldx and plix. */
+ if (fcode == AARCH64_PLDX || fcode == AARCH64_PLIX)
+ {
+ int cache_index = (fcode == AARCH64_PLIX) ? 0 : 1;
+ int policy_index = cache_index + 1;
+
+ /* Cache level must be 0, 1, 2 or 3. */
+ EXPAND_CONST_INT (cache_index, 1,
+ "Cache-level argument must be a constant");
+ level_id = INTVAL (ops[1]);
+ if (level_id < 0 || level_id > 3)
+ WARN_INVALID (level_id, "invalid cache level selected; using zero");
+
+ /* Retention policy must be either zero or one. */
+ EXPAND_CONST_INT (policy_index, 2,
+ "Retention policy argument must be a constant");
+ rettn_id = INTVAL (ops[2]);
+ if (rettn_id != 0 && rettn_id != 1)
+ WARN_INVALID (rettn_id, "invalid retention policy selected; "
+ "using zero");
+ }
+
+ /* For PLDX, validate the access kind argument. */
+ if (fcode == AARCH64_PLDX)
+ {
+ /* Argument 0 must be either zero or one. */
+ EXPAND_CONST_INT (0, 0, "Access kind argument must be a constant");
+ kind_id = INTVAL (ops[0]);
+ if (kind_id != 0 && kind_id != 1)
+ WARN_INVALID (kind_id, "invalid access kind argument; using zero");
+ }
+
+ sprintf (prfop, "%s%s%s", kind_s[kind_id],
+ level_s[level_id],
+ rettn_s[rettn_id]);
+
+ rtx const_str = rtx_alloc (CONST_STRING);
+ PUT_CODE (const_str, CONST_STRING);
+ XSTR (const_str, 0) = xstrdup (prfop);
+
+ class expand_operand exp_ops[2];
+
+ create_fixed_operand (&exp_ops[0], const_str);
+ create_address_operand (&exp_ops[1], ops[3]);
+ maybe_expand_insn (CODE_FOR_aarch64_pldx, 2, exp_ops);
+
+ #undef EXPAND_CONST_INT
+ #undef WARN_INVALID
+}
+
/* Expand an expression EXP that calls a MEMTAG built-in FCODE
with result going to TARGET. */
static rtx
@@ -2832,6 +2987,12 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
case AARCH64_BUILTIN_RNG_RNDR:
case AARCH64_BUILTIN_RNG_RNDRRS:
return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
+ case AARCH64_PLD:
+ case AARCH64_PLDX:
+ case AARCH64_PLI:
+ case AARCH64_PLIX:
+ aarch64_expand_prefetch_builtin (exp, fcode);
+ return target;
}
if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
@@ -281,6 +281,7 @@
UNSPEC_UPDATE_FFRT
UNSPEC_RDFFR
UNSPEC_WRFFR
+ UNSPEC_PLDX
;; Represents an SVE-style lane index, in which the indexing applies
;; within the containing 128-bit block.
UNSPEC_SVE_LANE_SELECT
@@ -844,6 +845,17 @@
[(set_attr "type" "load_4")]
)
+(define_insn "aarch64_pldx"
+ [(unspec [(match_operand 0 "" "")
+ (match_operand:DI 1 "aarch64_prefetch_operand" "Dp")] UNSPEC_PLDX)]
+ ""
+ {
+ operands[1] = gen_rtx_MEM (DImode, operands[1]);
+ return "prfm\\t%0, %1";
+ }
+ [(set_attr "type" "load_4")]
+)
+
(define_insn "trap"
[(trap_if (const_int 1) (const_int 8))]
""
@@ -78,6 +78,36 @@ _GCC_ARM_ACLE_DATA_FN (revll, bswap64, uint64_t, uint64_t)
#undef _GCC_ARM_ACLE_DATA_FN
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__pld (void const volatile *__addr)
+{
+ return __builtin_aarch64_pld (__addr);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__pli (void const volatile *__addr)
+{
+ return __builtin_aarch64_pli (__addr);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__plix (unsigned int __cache, unsigned int __rettn,
+ void const volatile *__addr)
+{
+ return __builtin_aarch64_plix (__cache, __rettn, __addr);
+}
+
+__extension__ extern __inline void
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__pldx (unsigned int __access, unsigned int __cache, unsigned int __rettn,
+ void const volatile *__addr)
+{
+ return __builtin_aarch64_pldx (__access, __cache, __rettn, __addr);
+}
+
__extension__ extern __inline unsigned long
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
__revl (unsigned long __value)
new file mode 100644
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-march=armv8-a -O2" } */
+
+#include <arm_acle.h>
+
+/* Check that we can generate the immediate-offset addressing
+ mode for PRFM. */
+
+/* Access kind specifiers. */
+#define PLD 0
+#define PST 1
+/* Cache levels. */
+#define L1 0
+#define L2 1
+#define L3 2
+#define SLC 3
+/* Retention policies. */
+#define KEEP 0
+#define STRM 1
+
+void
+prefetch_for_read_write (void *a)
+{
+ __pldx (PLD, L1, KEEP, a);
+ __pldx (PLD, L1, STRM, a);
+ __pldx (PLD, L2, KEEP, a);
+ __pldx (PLD, L2, STRM, a);
+ __pldx (PLD, L3, KEEP, a);
+ __pldx (PLD, L3, STRM, a);
+ __pldx (PLD, SLC, KEEP, a);
+ __pldx (PLD, SLC, STRM, a);
+ __pldx (PST, L1, KEEP, a);
+ __pldx (PST, L1, STRM, a);
+ __pldx (PST, L2, KEEP, a);
+ __pldx (PST, L2, STRM, a);
+ __pldx (PST, L3, KEEP, a);
+ __pldx (PST, L3, STRM, a);
+ __pldx (PST, SLC, KEEP, a);
+ __pldx (PST, SLC, STRM, a);
+}
+
+/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDL1STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDL2KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDL2STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDL3KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDL3STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDSLCKEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLDSLCSTRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL1KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL1STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL2KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL2STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL3KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTL3STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTSLCKEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPSTSLCSTRM, \\\[x\[0-9\]+\\\]" } } */
+
+void
+prefetch_simple (void *a)
+{
+ __pld (a);
+ __pli (a);
+}
+
+/* { dg-final { scan-assembler "prfm\tPLDL1KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */
+
+void
+prefetch_instructions (void *a)
+{
+ __plix (L1, KEEP, a);
+ __plix (L1, STRM, a);
+ __plix (L2, KEEP, a);
+ __plix (L2, STRM, a);
+ __plix (L3, KEEP, a);
+ __plix (L3, STRM, a);
+ __plix (SLC, KEEP, a);
+ __plix (SLC, STRM, a);
+}
+
+/* { dg-final { scan-assembler "prfm\tPLIL1KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL1STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL2KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL2STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL3KEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLIL3STRM, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLISLCKEEP, \\\[x\[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler "prfm\tPLISLCSTRM, \\\[x\[0-9\]+\\\]" } } */
+