@@ -77,6 +77,11 @@
(define_register_constraint "v" "VGPR_REGS"
"VGPR registers")
+(define_register_constraint "a" "TARGET_CDNA1_PLUS ? AVGPR_REGS : NO_REGS"
+ "Accumulator VGPR registers")
+
+(define_register_constraint "b" "TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS")
+
(define_register_constraint "Sg" "SGPR_REGS"
"SGPR registers")
@@ -389,12 +389,17 @@
(set_attr "length" "0")])
(define_insn "*mov<mode>"
- [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
- (match_operand:V_1REG 1 "general_operand" "vA,B"))]
+ [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v,$a, b")
+ (match_operand:V_1REG 1 "general_operand" "vA,B, a, v, b"))]
""
- "v_mov_b32\t%0, %1"
- [(set_attr "type" "vop1,vop1")
- (set_attr "length" "4,8")])
+ "@
+ v_mov_b32\t%0, %1
+ v_mov_b32\t%0, %1
+ v_accvgpr_read_b32\t%0, %1
+ v_accvgpr_write_b32\t%0, %1
+ v_accvgpr_mov_b32\t%0, %1"
+ [(set_attr "type" "vop1,vop1,vop3p_mai,vop3p_mai,vop1")
+ (set_attr "length" "4,8,8,8,4")])
(define_insn "mov<mode>_exec"
[(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v, v, v, v, v, m")
@@ -435,17 +440,28 @@
; (set_attr "length" "4,8,16,16")])
(define_insn "*mov<mode>"
- [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
- (match_operand:V_2REG 1 "general_operand" "vDB"))]
+ [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a, b")
+ (match_operand:V_2REG 1 "general_operand" "vDB, a, v, b"))]
""
- {
- if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
- return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
- else
- return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
- }
- [(set_attr "type" "vmult")
- (set_attr "length" "16")])
+ "@
+ * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
+ return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
+ else \
+ return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
+ * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+ return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
+ else \
+ return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
+ * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+ return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
+ else \
+ return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
+ * if (REGNO (operands[0]) <= REGNO (operands[1])) \
+ return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
+ else \
+ return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
+ [(set_attr "type" "vmult,vmult,vmult,vmult")
+ (set_attr "length" "16,16,16,8")])
(define_insn "mov<mode>_exec"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
@@ -521,26 +537,28 @@
; flat_load v, vT
(define_insn "mov<mode>_sgprbase"
- [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
+ [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m, b, m")
(unspec:V_1REG
- [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v")]
+ [(match_operand:V_1REG 1 "general_operand" " vA,vB, m, v, m, b")]
UNSPEC_SGPRBASE))
- (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v"))]
+ (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
#
+ #
+ #
#"
- [(set_attr "type" "vop1,vop1,*,*")
- (set_attr "length" "4,8,12,12")])
+ [(set_attr "type" "vop1,vop1,*,*,*,*")
+ (set_attr "length" "4,8,12,12,12,12")])
(define_insn "mov<mode>_sgprbase"
- [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
+ [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, b, m")
(unspec:V_2REG
- [(match_operand:V_2REG 1 "general_operand" "vDB, m, v")]
+ [(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, b")]
UNSPEC_SGPRBASE))
- (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v"))]
+ (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v,&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
@@ -548,17 +566,19 @@
else \
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
#
+ #
+ #
#"
- [(set_attr "type" "vmult,*,*")
- (set_attr "length" "8,12,12")])
+ [(set_attr "type" "vmult,*,*,*,*")
+ (set_attr "length" "8,12,12,12,12")])
; reload_in was once a standard name, but here it's only referenced by
; gcn_secondary_reload. It allows a reload with a scratch register.
(define_expand "reload_in<mode>"
- [(set (match_operand:V_ALL 0 "register_operand" "= v")
- (match_operand:V_ALL 1 "memory_operand" " m"))
- (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
+ [(set (match_operand:V_ALL 0 "register_operand" "= v, b")
+ (match_operand:V_ALL 1 "memory_operand" " m, m"))
+ (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v"))]
""
{
emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
@@ -568,9 +588,9 @@
; reload_out is similar to reload_in, above.
(define_expand "reload_out<mode>"
- [(set (match_operand:V_ALL 0 "memory_operand" "= m")
- (match_operand:V_ALL 1 "register_operand" " v"))
- (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
+ [(set (match_operand:V_ALL 0 "memory_operand" "= m, m")
+ (match_operand:V_ALL 1 "register_operand" " v, b"))
+ (clobber (match_operand:<VnDI> 2 "register_operand" "=&v,&v"))]
""
{
emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
@@ -953,7 +973,7 @@
{})
(define_insn "gather<mode>_insn_1offset<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
+ [(set (match_operand:V_ALL 0 "register_operand" "=vb")
(unspec:V_ALL
[(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
(vec_duplicate:<VnDI>
@@ -992,7 +1012,7 @@
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_1offset_ds<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
+ [(set (match_operand:V_ALL 0 "register_operand" "=vb")
(unspec:V_ALL
[(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
(vec_duplicate:<VnSI>
@@ -1014,7 +1034,7 @@
(set_attr "length" "12")])
(define_insn "gather<mode>_insn_2offsets<exec>"
- [(set (match_operand:V_ALL 0 "register_operand" "=v")
+ [(set (match_operand:V_ALL 0 "register_operand" "=vb")
(unspec:V_ALL
[(plus:<VnDI>
(plus:<VnDI>
@@ -1085,7 +1105,7 @@
[(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
(vec_duplicate:<VnDI>
(match_operand 1 "immediate_operand" "n")))
- (match_operand:V_ALL 2 "register_operand" "v")
+ (match_operand:V_ALL 2 "register_operand" "vb")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
@@ -1123,7 +1143,7 @@
[(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
(vec_duplicate:<VnSI>
(match_operand 1 "immediate_operand" "n")))
- (match_operand:V_ALL 2 "register_operand" "v")
+ (match_operand:V_ALL 2 "register_operand" "vb")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
@@ -1149,7 +1169,7 @@
(sign_extend:<VnDI>
(match_operand:<VnSI> 1 "register_operand" " v")))
(vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
- (match_operand:V_ALL 3 "register_operand" " v")
+ (match_operand:V_ALL 3 "register_operand" " vb")
(match_operand 4 "immediate_operand" " n")
(match_operand 5 "immediate_operand" " n")]
UNSPEC_SCATTER))]
@@ -96,6 +96,7 @@ static hash_map<tree, int> lds_allocs;
#define MAX_NORMAL_SGPR_COUNT 62 // i.e. 64 with VCC
#define MAX_NORMAL_VGPR_COUNT 24
+#define MAX_NORMAL_AVGPR_COUNT 24
/* }}} */
/* {{{ Initialization and options. */
@@ -480,7 +481,7 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
{
/* Scalar registers are 32bit, vector registers are in fact tuples of
64 lanes. */
- if (rclass == VGPR_REGS)
+ if (rclass == VGPR_REGS || rclass == AVGPR_REGS)
{
if (vgpr_1reg_mode_p (mode))
return 1;
@@ -571,7 +572,7 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
return (sgpr_1reg_mode_p (mode)
|| (!((regno - FIRST_SGPR_REG) & 1) && sgpr_2reg_mode_p (mode))
|| (((regno - FIRST_SGPR_REG) & 3) == 0 && mode == TImode));
- if (VGPR_REGNO_P (regno))
+ if (VGPR_REGNO_P (regno) || (AVGPR_REGNO_P (regno) && TARGET_CDNA1_PLUS))
/* Vector instructions do not care about the alignment of register
pairs, but where there is no 64-bit instruction, many of the
define_split do not work if the input and output registers partially
@@ -611,6 +612,8 @@ gcn_regno_reg_class (int regno)
}
if (VGPR_REGNO_P (regno))
return VGPR_REGS;
+ if (AVGPR_REGNO_P (regno))
+ return AVGPR_REGS;
if (SGPR_REGNO_P (regno))
return SGPR_REGS;
if (regno < FIRST_VGPR_REG)
@@ -801,7 +804,7 @@ gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
|| c == VCC_CONDITIONAL_REG || c == EXEC_MASK_REG)
return SGPR_REGS;
else
- return NO_REGS;
+ return c == VGPR_REGS && TARGET_CDNA2_PLUS ? AVGPR_REGS : NO_REGS;
}
/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
@@ -2363,11 +2366,11 @@ gcn_sgpr_move_p (rtx op0, rtx op1)
if (MEM_P (op1) && AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op1)))
return true;
if (!REG_P (op0) || REGNO (op0) >= FIRST_PSEUDO_REGISTER
- || VGPR_REGNO_P (REGNO (op0)))
+ || VGPR_REGNO_P (REGNO (op0)) || AVGPR_REGNO_P (REGNO (op0)))
return false;
if (REG_P (op1)
&& REGNO (op1) < FIRST_PSEUDO_REGISTER
- && !VGPR_REGNO_P (REGNO (op1)))
+ && !VGPR_REGNO_P (REGNO (op1)) && !AVGPR_REGNO_P (REGNO (op1)))
return true;
return immediate_operand (op1, VOIDmode) || memory_operand (op1, VOIDmode);
}
@@ -2441,6 +2444,14 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
result = (rclass == VGPR_REGS ? NO_REGS : VGPR_REGS);
break;
}
+
+ /* CDNA1 doesn't have an instruction for going between the accumulator
+ registers and memory. Go via a VGPR in this case. */
+ if (TARGET_CDNA1 && rclass == AVGPR_REGS && result != VGPR_REGS)
+ {
+ result = VGPR_REGS;
+ sri->icode = CODE_FOR_nothing;
+ }
}
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2462,7 +2473,8 @@ gcn_conditional_register_usage (void)
if (cfun->machine->normal_function)
{
- /* Restrict the set of SGPRs and VGPRs used by non-kernel functions. */
+ /* Restrict the set of SGPRs, VGPRs and AVGPRs used by non-kernel
+ functions. */
for (int i = SGPR_REGNO (MAX_NORMAL_SGPR_COUNT);
i <= LAST_SGPR_REG; i++)
fixed_regs[i] = 1, call_used_regs[i] = 1;
@@ -2471,6 +2483,9 @@ gcn_conditional_register_usage (void)
i <= LAST_VGPR_REG; i++)
fixed_regs[i] = 1, call_used_regs[i] = 1;
+ for (int i = AVGPR_REGNO (MAX_NORMAL_AVGPR_COUNT);
+ i <= LAST_AVGPR_REG; i++)
+ fixed_regs[i] = 1, call_used_regs[i] = 1;
return;
}
@@ -2524,6 +2539,16 @@ gcn_conditional_register_usage (void)
fixed_regs[cfun->machine->args.reg[WORK_ITEM_ID_Z_ARG]] = 1;
}
+static bool
+gcn_vgpr_equivalent_register_operand (rtx x, machine_mode mode)
+{
+ if (gcn_vgpr_register_operand (x, mode))
+ return true;
+ if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (x, mode))
+ return true;
+ return false;
+}
+
/* Determine if a load or store is valid, according to the register classes
and address space. Used primarily by the machine description to decide
when to split a move into two steps. */
@@ -2532,21 +2557,35 @@ bool
gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
{
if (!MEM_P (dest) && !MEM_P (src))
- return true;
+ {
+ if (gcn_vgpr_register_operand (src, mode) &&
+ gcn_avgpr_register_operand (dest, mode))
+ return true;
+ if (gcn_avgpr_register_operand (src, mode) &&
+ gcn_vgpr_register_operand (dest, mode))
+ return true;
+ if (TARGET_CDNA2_PLUS && gcn_avgpr_register_operand (src, mode) &&
+ gcn_avgpr_register_operand (dest, mode))
+ return true;
+ if (gcn_avgpr_hard_register_operand (src, mode) ||
+ gcn_avgpr_hard_register_operand (dest, mode))
+ return false;
+ return true;
+ }
if (MEM_P (dest)
&& AS_FLAT_P (MEM_ADDR_SPACE (dest))
&& (gcn_flat_address_p (XEXP (dest, 0), mode)
|| GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
|| GET_CODE (XEXP (dest, 0)) == LABEL_REF)
- && gcn_vgpr_register_operand (src, mode))
+ && gcn_vgpr_equivalent_register_operand (src, mode))
return true;
else if (MEM_P (src)
&& AS_FLAT_P (MEM_ADDR_SPACE (src))
&& (gcn_flat_address_p (XEXP (src, 0), mode)
|| GET_CODE (XEXP (src, 0)) == SYMBOL_REF
|| GET_CODE (XEXP (src, 0)) == LABEL_REF)
- && gcn_vgpr_register_operand (dest, mode))
+ && gcn_vgpr_equivalent_register_operand (dest, mode))
return true;
if (MEM_P (dest)
@@ -2554,14 +2593,14 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
&& (gcn_global_address_p (XEXP (dest, 0))
|| GET_CODE (XEXP (dest, 0)) == SYMBOL_REF
|| GET_CODE (XEXP (dest, 0)) == LABEL_REF)
- && gcn_vgpr_register_operand (src, mode))
+ && gcn_vgpr_equivalent_register_operand (src, mode))
return true;
else if (MEM_P (src)
&& AS_GLOBAL_P (MEM_ADDR_SPACE (src))
&& (gcn_global_address_p (XEXP (src, 0))
|| GET_CODE (XEXP (src, 0)) == SYMBOL_REF
|| GET_CODE (XEXP (src, 0)) == LABEL_REF)
- && gcn_vgpr_register_operand (dest, mode))
+ && gcn_vgpr_equivalent_register_operand (dest, mode))
return true;
if (MEM_P (dest)
@@ -2582,12 +2621,12 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
if (MEM_P (dest)
&& AS_ANY_DS_P (MEM_ADDR_SPACE (dest))
&& gcn_ds_address_p (XEXP (dest, 0))
- && gcn_vgpr_register_operand (src, mode))
+ && gcn_vgpr_equivalent_register_operand (src, mode))
return true;
else if (MEM_P (src)
&& AS_ANY_DS_P (MEM_ADDR_SPACE (src))
&& gcn_ds_address_p (XEXP (src, 0))
- && gcn_vgpr_register_operand (dest, mode))
+ && gcn_vgpr_equivalent_register_operand (dest, mode))
return true;
return false;
@@ -3919,6 +3958,11 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
if (in)
return (LOAD_COST + 2) * nregs;
return STORE_COST * nregs;
+ case AVGPR_REGS:
+ case ALL_VGPR_REGS:
+ if (in)
+ return (LOAD_COST + (TARGET_CDNA2_PLUS ? 2 : 4)) * nregs;
+ return (STORE_COST + (TARGET_CDNA2_PLUS ? 0 : 2)) * nregs;
case ALL_REGS:
case ALL_GPR_REGS:
case SRCDST_REGS:
@@ -3938,6 +3982,15 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
static int
gcn_register_move_cost (machine_mode, reg_class_t dst, reg_class_t src)
{
+ if (src == AVGPR_REGS)
+ {
+ if (dst == AVGPR_REGS)
+ return TARGET_CDNA1 ? 6 : 2;
+ if (dst != VGPR_REGS)
+ return 6;
+ }
+ if (dst == AVGPR_REGS && src != VGPR_REGS)
+ return 6;
/* Increase cost of moving from and to vector registers. While this is
fast in hardware (I think), it has hidden cost of setting up the exec
flags. */
@@ -5372,6 +5425,7 @@ gcn_vmem_insn_p (attr_type type)
case TYPE_VOPC:
case TYPE_VOP3A:
case TYPE_VOP3B:
+ case TYPE_VOP3P_MAI:
case TYPE_VOP_SDWA:
case TYPE_VOP_DPP:
case TYPE_MULT:
@@ -6098,7 +6152,7 @@ output_file_start (void)
void
gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
{
- int sgpr, vgpr;
+ int sgpr, vgpr, avgpr;
bool xnack_enabled = false;
fputs ("\n\n", file);
@@ -6123,6 +6177,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
if (df_regs_ever_live_p (FIRST_VGPR_REG + vgpr))
break;
vgpr++;
+ for (avgpr = 255; avgpr >= 0; avgpr--)
+ if (df_regs_ever_live_p (FIRST_AVGPR_REG + avgpr))
+ break;
+ avgpr++;
if (!leaf_function_p ())
{
@@ -6131,6 +6189,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
vgpr = MAX_NORMAL_VGPR_COUNT;
if (sgpr < MAX_NORMAL_SGPR_COUNT)
sgpr = MAX_NORMAL_SGPR_COUNT;
+ if (avgpr < MAX_NORMAL_AVGPR_COUNT)
+ avgpr = MAX_NORMAL_AVGPR_COUNT;
}
/* The gfx90a accum_offset field can't represent 0 registers. */
@@ -6234,8 +6294,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
cfun->machine->kernarg_segment_alignment,
LDS_SIZE,
sgpr, vgpr);
- if (gcn_arch == PROCESSOR_GFX90a)
- fprintf (file, " .agpr_count: 0\n"); // AGPRs are not used, yet
+ if (gcn_arch == PROCESSOR_GFX90a || gcn_arch == PROCESSOR_GFX908)
+ fprintf (file, " .agpr_count: %i\n", avgpr);
fputs (" .end_amdgpu_metadata\n", file);
#endif
@@ -6331,6 +6391,9 @@ print_reg (FILE *file, rtx x)
else if (VGPR_REGNO_P (REGNO (x)))
fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
REGNO (x) - FIRST_VGPR_REG + 1);
+ else if (AVGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
+ REGNO (x) - FIRST_AVGPR_REG + 1);
else if (REGNO (x) == FLAT_SCRATCH_REG)
fprintf (file, "flat_scratch");
else if (REGNO (x) == EXEC_REG)
@@ -6349,6 +6412,9 @@ print_reg (FILE *file, rtx x)
else if (VGPR_REGNO_P (REGNO (x)))
fprintf (file, "v[%i:%i]", REGNO (x) - FIRST_VGPR_REG,
REGNO (x) - FIRST_VGPR_REG + 3);
+ else if (AVGPR_REGNO_P (REGNO (x)))
+ fprintf (file, "a[%i:%i]", REGNO (x) - FIRST_AVGPR_REG,
+ REGNO (x) - FIRST_AVGPR_REG + 3);
else
gcc_unreachable ();
}
@@ -7262,6 +7328,8 @@ gcn_dwarf_register_number (unsigned int regno)
}
else if (VGPR_REGNO_P (regno))
return (regno - FIRST_VGPR_REG + 2560);
+ else if (AVGPR_REGNO_P (regno))
+ return (regno - FIRST_AVGPR_REG + 3072);
/* Otherwise, there's nothing sensible to do. */
return regno + 100000;
@@ -142,6 +142,9 @@
#define FIRST_VGPR_REG 160
#define VGPR_REGNO(N) ((N)+FIRST_VGPR_REG)
#define LAST_VGPR_REG 415
+#define FIRST_AVGPR_REG 416
+#define AVGPR_REGNO(N) ((N)+FIRST_AVGPR_REG)
+#define LAST_AVGPR_REG 671
/* Frame Registers, and other registers */
@@ -153,10 +156,10 @@
#define RETURN_VALUE_REG 168 /* Must be divisible by 4. */
#define STATIC_CHAIN_REGNUM 30
#define WORK_ITEM_ID_Z_REG 162
-#define SOFT_ARG_REG 416
-#define FRAME_POINTER_REGNUM 418
-#define DWARF_LINK_REGISTER 420
-#define FIRST_PSEUDO_REGISTER 421
+#define SOFT_ARG_REG 672
+#define FRAME_POINTER_REGNUM 674
+#define DWARF_LINK_REGISTER 676
+#define FIRST_PSEUDO_REGISTER 677
#define FIRST_PARM_REG (FIRST_SGPR_REG + 24)
#define FIRST_VPARM_REG (FIRST_VGPR_REG + 8)
@@ -172,6 +175,7 @@
#define SGPR_OR_VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_SGPR_REG)
#define SGPR_REGNO_P(N) ((N) <= LAST_SGPR_REG)
#define VGPR_REGNO_P(N) ((N)>=FIRST_VGPR_REG && (N) <= LAST_VGPR_REG)
+#define AVGPR_REGNO_P(N) ((N)>=FIRST_AVGPR_REG && (N) <= LAST_AVGPR_REG)
#define SSRC_REGNO_P(N) ((N) <= SCC_REG && (N) != VCCZ_REG)
#define SDST_REGNO_P(N) ((N) <= EXEC_HI_REG && (N) != VCCZ_REG)
#define CC_REG_P(X) (REG_P (X) && CC_REGNO_P (REGNO (X)))
@@ -202,7 +206,7 @@
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
- /* VGRPs */ \
+ /* VGPRs */ \
0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -219,6 +223,23 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ /* Accumulation VGPRs */ \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
/* Other registers. */ \
1, 1, 1, 1, 1 \
}
@@ -240,7 +261,7 @@
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
- /* VGRPs */ \
+ /* VGPRs */ \
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
@@ -257,6 +278,23 @@
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+ /* Accumulation VGPRs */ \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
/* Other registers. */ \
1, 1, 1, 1, 1 \
}
@@ -316,6 +354,8 @@ enum reg_class
SGPR_SRC_REGS,
GENERAL_REGS,
VGPR_REGS,
+ AVGPR_REGS,
+ ALL_VGPR_REGS,
ALL_GPR_REGS,
SRCDST_REGS,
AFP_REGS,
@@ -341,6 +381,8 @@ enum reg_class
"SGPR_SRC_REGS", \
"GENERAL_REGS", \
"VGPR_REGS", \
+ "AVGPR_REGS", \
+ "ALL_VGPR_REGS", \
"ALL_GPR_REGS", \
"SRCDST_REGS", \
"AFP_REGS", \
@@ -353,40 +395,58 @@ enum reg_class
#define REG_CLASS_CONTENTS { \
/* NO_REGS. */ \
{0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SCC_CONDITIONAL_REG. */ \
{0, 0, 0, 0, \
NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
- 0, 0, 0, 0, 0}, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0}, \
/* VCCZ_CONDITIONAL_REG. */ \
{0, 0, 0, NAMED_REG_MASK (VCCZ_REG), \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* VCC_CONDITIONAL_REG. */ \
{0, 0, 0, NAMED_REG_MASK (VCC_LO_REG)|NAMED_REG_MASK (VCC_HI_REG), \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* EXECZ_CONDITIONAL_REG. */ \
{0, 0, 0, 0, \
NAMED_REG_MASK2 (EXECZ_REG), 0, 0, 0, \
- 0, 0, 0, 0, 0}, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0}, \
/* ALL_CONDITIONAL_REGS. */ \
{0, 0, 0, NAMED_REG_MASK (VCCZ_REG), \
NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* EXEC_MASK_REG. */ \
{0, 0, 0, NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG), \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SGPR_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SGPR_EXEC_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, \
0xf1 | NAMED_REG_MASK (EXEC_LO_REG) | NAMED_REG_MASK (EXEC_HI_REG), \
0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SGPR_VOP_SRC_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, \
@@ -394,12 +454,16 @@ enum reg_class
-NAMED_REG_MASK (EXEC_LO_REG) \
-NAMED_REG_MASK (EXEC_HI_REG), \
NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SGPR_MEM_SRC_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, \
0xffffffff-NAMED_REG_MASK (VCCZ_REG)-NAMED_REG_MASK (M0_REG) \
-NAMED_REG_MASK (EXEC_LO_REG)-NAMED_REG_MASK (EXEC_HI_REG), \
0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* SGPR_DST_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, \
@@ -409,30 +473,56 @@ enum reg_class
/* SGPR_SRC_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
NAMED_REG_MASK2 (EXECZ_REG) | NAMED_REG_MASK2 (SCC_REG), 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* GENERAL_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0}, \
/* VGPR_REGS. */ \
{0, 0, 0, 0, \
0, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0}, \
+ /* AVGPR_REGS. */ \
+ {0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
+ /* ALL_VGPR_REGS. */ \
+ {0, 0, 0, 0, \
+ 0, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
/* ALL_GPR_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xf1, \
0, 0xffffffff, 0xffffffff, 0xffffffff, \
- 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0}, \
/* SRCDST_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, \
0xffffffff-NAMED_REG_MASK (VCCZ_REG), \
0, 0xffffffff, 0xffffffff, 0xffffffff, \
- 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0}, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0, 0, 0, \
+ 0, 0, 0, 0, 0, 0}, \
/* AFP_REGS. */ \
{0, 0, 0, 0, \
+ 0, 0, 0, 0, \
+ 0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, 0, 0xf}, \
/* ALL_REGS. */ \
{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, \
0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0 }}
@@ -537,6 +627,34 @@ enum gcn_address_spaces
"v236", "v237", "v238", "v239", "v240", "v241", "v242", "v243", "v244", \
"v245", "v246", "v247", "v248", "v249", "v250", "v251", "v252", "v253", \
"v254", "v255", \
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", \
+ "a11", "a12", "a13", "a14", "a15", "a16", "a17", "a18", "a19", "a20", \
+ "a21", "a22", "a23", "a24", "a25", "a26", "a27", "a28", "a29", "a30", \
+ "a31", "a32", "a33", "a34", "a35", "a36", "a37", "a38", "a39", "a40", \
+ "a41", "a42", "a43", "a44", "a45", "a46", "a47", "a48", "a49", "a50", \
+ "a51", "a52", "a53", "a54", "a55", "a56", "a57", "a58", "a59", "a60", \
+ "a61", "a62", "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", \
+ "a71", "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80", \
+ "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89", "a90", \
+ "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98", "a99", "a100", \
+ "a101", "a102", "a103", "a104", "a105", "a106", "a107", "a108", "a109", \
+ "a110", "a111", "a112", "a113", "a114", "a115", "a116", "a117", "a118", \
+ "a119", "a120", "a121", "a122", "a123", "a124", "a125", "a126", "a127", \
+ "a128", "a129", "a130", "a131", "a132", "a133", "a134", "a135", "a136", \
+ "a137", "a138", "a139", "a140", "a141", "a142", "a143", "a144", "a145", \
+ "a146", "a147", "a148", "a149", "a150", "a151", "a152", "a153", "a154", \
+ "a155", "a156", "a157", "a158", "a159", "a160", "a161", "a162", "a163", \
+ "a164", "a165", "a166", "a167", "a168", "a169", "a170", "a171", "a172", \
+ "a173", "a174", "a175", "a176", "a177", "a178", "a179", "a180", "a181", \
+ "a182", "a183", "a184", "a185", "a186", "a187", "a188", "a189", "a190", \
+ "a191", "a192", "a193", "a194", "a195", "a196", "a197", "a198", "a199", \
+ "a200", "a201", "a202", "a203", "a204", "a205", "a206", "a207", "a208", \
+ "a209", "a210", "a211", "a212", "a213", "a214", "a215", "a216", "a217", \
+ "a218", "a219", "a220", "a221", "a222", "a223", "a224", "a225", "a226", \
+ "a227", "a228", "a229", "a230", "a231", "a232", "a233", "a234", "a235", \
+ "a236", "a237", "a238", "a239", "a240", "a241", "a242", "a243", "a244", \
+ "a245", "a246", "a247", "a248", "a249", "a250", "a251", "a252", "a253", \
+ "a254", "a255", \
"?ap0", "?ap1", "?fp0", "?fp1", "?dwlr" }
#define PRINT_OPERAND(FILE, X, CODE) print_operand(FILE, X, CODE)
@@ -51,13 +51,15 @@
(EXECZ_REG 128)
(SCC_REG 129)
(FIRST_VGPR_REG 160)
- (LAST_VGPR_REG 415)])
+ (LAST_VGPR_REG 415)
+ (FIRST_AVGPR_REG 416)
+ (LAST_AVGPR_REG 671)])
(define_constants
[(SP_REGNUM 16)
(LR_REGNUM 18)
- (AP_REGNUM 416)
- (FP_REGNUM 418)])
+ (AP_REGNUM 672)
+ (FP_REGNUM 674)])
(define_c_enum "unspecv" [
UNSPECV_PROLOGUE_USE
@@ -163,6 +165,11 @@
; vdst: vgpr0-255
; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11
;
+; vop3p_mai - vector, three inputs, one vector output
+; vsrc0,vsrc1,vsrc2: inline constant -16 to -64, fp inline immediate,
+; (acc or arch) vgpr0-255
+; vdst: (acc or arch) vgpr0-255
+;
; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address
; src0: vgpr0-255
; dst_sel: BYTE_0-3, WORD_0-1, DWORD
@@ -221,7 +228,8 @@
(define_attr "type"
"unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc,
- vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult"
+ vop3a,vop3b,vop3p_mai,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,
+ vmult"
(const_string "unknown"))
; Set if instruction is executed in scalar or vector unit
@@ -530,9 +538,9 @@
(define_insn "*mov<mode>_insn"
[(set (match_operand:SISF 0 "nonimmediate_operand"
- "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM")
+ "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v,vb,RF,v,RLRG, v,SD,vb,RM, v, a, b")
(match_operand:SISF 1 "gcn_load_operand"
- "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))]
+ "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF,vb,B, v,RLRG, Y,RM,vb,^a, v, b"))]
""
"@
s_mov_b32\t%0, %1
@@ -552,20 +560,23 @@
ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
s_mov_b32\t%0, %1
global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- global_store_dword\t%A0, %1%O0%g0"
+ global_store_dword\t%A0, %1%O0%g0
+ v_accvgpr_read_b32\t%0, %1
+ v_accvgpr_write_b32\t%0, %1
+ v_accvgpr_mov_b32\t%0, %1"
[(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
- flat,vop1,ds,ds,sop1,flat,flat")
- (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
- (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
+ flat,vop1,ds,ds,sop1,flat,flat,vop3p_mai,vop3p_mai,vop1")
+ (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12,8,8,4")])
; 8/16bit move pattern
; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
(define_insn "*mov<mode>_insn"
[(set (match_operand:QIHI 0 "nonimmediate_operand"
- "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM")
+ "=SD,SD,SD,v,Sg, v,vb,RF,v,RLRG, v,vb,RM, v, a, b")
(match_operand:QIHI 1 "gcn_load_operand"
- "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))]
+ "SSA, J, B,v, v,Sv,RF,vb,B, v,RLRG,RM,vb,^a, v, b"))]
"gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
"@
s_mov_b32\t%0, %1
@@ -580,19 +591,22 @@
ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- global_store%s0\t%A0, %1%O0%g0"
+ global_store%s0\t%A0, %1%O0%g0
+ v_accvgpr_read_b32\t%0, %1
+ v_accvgpr_write_b32\t%0, %1
+ v_accvgpr_mov_b32\t%0, %1"
[(set_attr "type"
- "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
- (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
- (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
+ "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat, vop3p_mai,vop3p_mai,vop1")
+ (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
+ (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12,8,8,4")])
; 64bit move pattern
(define_insn_and_split "*mov<mode>_insn"
[(set (match_operand:DIDF 0 "nonimmediate_operand"
- "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM")
+ "=SD,SD,SD,RS,Sm,v, v,Sg, v,vb,RF,RLRG, v,vb,RM, v, a, b")
(match_operand:DIDF 1 "general_operand"
- "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))]
+ "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF,vb, v,RLRG,RM,vb,^a, v, b"))]
"GET_CODE(operands[1]) != SYMBOL_REF"
"@
s_mov_b64\t%0, %1
@@ -609,7 +623,10 @@
ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
- global_store_dwordx2\t%A0, %1%O0%g0"
+ global_store_dwordx2\t%A0, %1%O0%g0
+ #
+ #
+ #"
"reload_completed
&& ((!MEM_P (operands[0]) && !MEM_P (operands[1])
&& !gcn_sgpr_move_p (operands[0], operands[1]))
@@ -640,16 +657,16 @@
}
}
[(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
- flat,ds,ds,flat,flat")
- (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
+ flat,ds,ds,flat,flat,vmult,vmult,vmult")
+ (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12,*,*,*")])
; 128-bit move.
(define_insn_and_split "*movti_insn"
[(set (match_operand:TI 0 "nonimmediate_operand"
- "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
+ "=SD,RS,Sm,RF,vb,v, v,SD,RM,vb,RL, v, v, a, b")
(match_operand:TI 1 "general_operand"
- "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
+ "SSB,Sm,RS,vb,RF,v,Sv, v,vb,RM, v,RL,^a, v, b"))]
""
"@
#
@@ -663,7 +680,10 @@
global_store_dwordx4\t%A0, %1%O0%g0
global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
- ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
+ ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
+ #
+ #
+ #"
"reload_completed
&& REG_P (operands[0])
&& (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)"
@@ -684,9 +704,9 @@
operands[1] = gcn_operand_part (TImode, operands[1], 0);
}
[(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
- ds,ds")
- (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
- (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
+ ds,ds,vmult,vmult,vmult")
+ (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*,*,*,*")
+ (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12,*,*,*")])
;; }}}
;; {{{ Prologue/Epilogue
@@ -70,6 +70,30 @@
return VGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
})
+(define_predicate "gcn_avgpr_register_operand"
+ (match_operand 0 "register_operand")
+ {
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return false;
+
+ return AVGPR_REGNO_P (REGNO (op)) || REGNO (op) >= FIRST_PSEUDO_REGISTER;
+})
+
+(define_predicate "gcn_avgpr_hard_register_operand"
+ (match_operand 0 "register_operand")
+ {
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return false;
+
+ return AVGPR_REGNO_P (REGNO (op));
+})
+
(define_predicate "gcn_inline_immediate_operand"
(match_code "const_int,const_double,const_vector")
{