@@ -29,6 +29,48 @@ VECTOR_MODE (FLOAT, HF, 64); /* V64HF */
VECTOR_MODE (FLOAT, SF, 64); /* V64SF */
VECTOR_MODE (FLOAT, DF, 64); /* V64DF */
+/* Artificial vector modes, for when vector masking doesn't work (yet). */
+VECTOR_MODE (INT, QI, 32); /* V32QI */
+VECTOR_MODE (INT, HI, 32); /* V32HI */
+VECTOR_MODE (INT, SI, 32); /* V32SI */
+VECTOR_MODE (INT, DI, 32); /* V32DI */
+VECTOR_MODE (INT, TI, 32); /* V32TI */
+VECTOR_MODE (FLOAT, HF, 32); /* V32HF */
+VECTOR_MODE (FLOAT, SF, 32); /* V32SF */
+VECTOR_MODE (FLOAT, DF, 32); /* V32DF */
+VECTOR_MODE (INT, QI, 16); /* V16QI */
+VECTOR_MODE (INT, HI, 16); /* V16HI */
+VECTOR_MODE (INT, SI, 16); /* V16SI */
+VECTOR_MODE (INT, DI, 16); /* V16DI */
+VECTOR_MODE (INT, TI, 16); /* V16TI */
+VECTOR_MODE (FLOAT, HF, 16); /* V16HF */
+VECTOR_MODE (FLOAT, SF, 16); /* V16SF */
+VECTOR_MODE (FLOAT, DF, 16); /* V16DF */
+VECTOR_MODE (INT, QI, 8); /* V8QI */
+VECTOR_MODE (INT, HI, 8); /* V8HI */
+VECTOR_MODE (INT, SI, 8); /* V8SI */
+VECTOR_MODE (INT, DI, 8); /* V8DI */
+VECTOR_MODE (INT, TI, 8); /* V8TI */
+VECTOR_MODE (FLOAT, HF, 8); /* V8HF */
+VECTOR_MODE (FLOAT, SF, 8); /* V8SF */
+VECTOR_MODE (FLOAT, DF, 8); /* V8DF */
+VECTOR_MODE (INT, QI, 4); /* V4QI */
+VECTOR_MODE (INT, HI, 4); /* V4HI */
+VECTOR_MODE (INT, SI, 4); /* V4SI */
+VECTOR_MODE (INT, DI, 4); /* V4DI */
+VECTOR_MODE (INT, TI, 4); /* V4TI */
+VECTOR_MODE (FLOAT, HF, 4); /* V4HF */
+VECTOR_MODE (FLOAT, SF, 4); /* V4SF */
+VECTOR_MODE (FLOAT, DF, 4); /* V4DF */
+VECTOR_MODE (INT, QI, 2); /* V2QI */
+VECTOR_MODE (INT, HI, 2); /* V2HI */
+VECTOR_MODE (INT, SI, 2); /* V2SI */
+VECTOR_MODE (INT, DI, 2); /* V2DI */
+VECTOR_MODE (INT, TI, 2); /* V2TI */
+VECTOR_MODE (FLOAT, HF, 2); /* V2HF */
+VECTOR_MODE (FLOAT, SF, 2); /* V2SF */
+VECTOR_MODE (FLOAT, DF, 2); /* V2DF */
+
/* Vector units handle reads independently and thus no large alignment
needed. */
ADJUST_ALIGNMENT (V64QI, 1);
@@ -39,3 +81,43 @@ ADJUST_ALIGNMENT (V64TI, 16);
ADJUST_ALIGNMENT (V64HF, 2);
ADJUST_ALIGNMENT (V64SF, 4);
ADJUST_ALIGNMENT (V64DF, 8);
+ADJUST_ALIGNMENT (V32QI, 1);
+ADJUST_ALIGNMENT (V32HI, 2);
+ADJUST_ALIGNMENT (V32SI, 4);
+ADJUST_ALIGNMENT (V32DI, 8);
+ADJUST_ALIGNMENT (V32TI, 16);
+ADJUST_ALIGNMENT (V32HF, 2);
+ADJUST_ALIGNMENT (V32SF, 4);
+ADJUST_ALIGNMENT (V32DF, 8);
+ADJUST_ALIGNMENT (V16QI, 1);
+ADJUST_ALIGNMENT (V16HI, 2);
+ADJUST_ALIGNMENT (V16SI, 4);
+ADJUST_ALIGNMENT (V16DI, 8);
+ADJUST_ALIGNMENT (V16TI, 16);
+ADJUST_ALIGNMENT (V16HF, 2);
+ADJUST_ALIGNMENT (V16SF, 4);
+ADJUST_ALIGNMENT (V16DF, 8);
+ADJUST_ALIGNMENT (V8QI, 1);
+ADJUST_ALIGNMENT (V8HI, 2);
+ADJUST_ALIGNMENT (V8SI, 4);
+ADJUST_ALIGNMENT (V8DI, 8);
+ADJUST_ALIGNMENT (V8TI, 16);
+ADJUST_ALIGNMENT (V8HF, 2);
+ADJUST_ALIGNMENT (V8SF, 4);
+ADJUST_ALIGNMENT (V8DF, 8);
+ADJUST_ALIGNMENT (V4QI, 1);
+ADJUST_ALIGNMENT (V4HI, 2);
+ADJUST_ALIGNMENT (V4SI, 4);
+ADJUST_ALIGNMENT (V4DI, 8);
+ADJUST_ALIGNMENT (V4TI, 16);
+ADJUST_ALIGNMENT (V4HF, 2);
+ADJUST_ALIGNMENT (V4SF, 4);
+ADJUST_ALIGNMENT (V4DF, 8);
+ADJUST_ALIGNMENT (V2QI, 1);
+ADJUST_ALIGNMENT (V2HI, 2);
+ADJUST_ALIGNMENT (V2SI, 4);
+ADJUST_ALIGNMENT (V2DI, 8);
+ADJUST_ALIGNMENT (V2TI, 16);
+ADJUST_ALIGNMENT (V2HF, 2);
+ADJUST_ALIGNMENT (V2SF, 4);
+ADJUST_ALIGNMENT (V2DF, 8);
@@ -34,8 +34,6 @@ extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx);
extern void gcn_expand_vector_init (rtx, rtx);
extern bool gcn_flat_address_p (rtx, machine_mode);
extern bool gcn_fp_constant_p (rtx, bool);
-extern rtx gcn_full_exec ();
-extern rtx gcn_full_exec_reg ();
extern rtx gcn_gen_undef (machine_mode);
extern bool gcn_global_address_p (rtx);
extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level);
@@ -67,8 +65,6 @@ extern rtx gcn_operand_part (machine_mode, rtx, int);
extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode,
addr_space_t, int, int);
extern reg_class gcn_regno_reg_class (int regno);
-extern rtx gcn_scalar_exec ();
-extern rtx gcn_scalar_exec_reg ();
extern bool gcn_scalar_flat_address_p (rtx);
extern bool gcn_scalar_flat_mem_p (rtx);
extern bool gcn_sgpr_move_p (rtx, rtx);
@@ -105,9 +101,11 @@ extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt);
inline bool
vgpr_1reg_mode_p (machine_mode mode)
{
- return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode
- || mode == V64QImode || mode == V64HImode || mode == V64SImode
- || mode == V64HFmode || mode == V64SFmode || mode == BImode);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ return (mode == SImode || mode == SFmode || mode == HImode || mode == HFmode
+ || mode == QImode || mode == BImode);
}
/* Return true if MODE is valid for 1 SGPR register. */
@@ -124,8 +122,10 @@ sgpr_1reg_mode_p (machine_mode mode)
inline bool
vgpr_2reg_mode_p (machine_mode mode)
{
- return (mode == DImode || mode == DFmode
- || mode == V64DImode || mode == V64DFmode);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ return (mode == DImode || mode == DFmode);
}
/* Return true if MODE can be handled directly by VGPR operations. */
@@ -133,9 +133,7 @@ vgpr_2reg_mode_p (machine_mode mode)
inline bool
vgpr_vector_mode_p (machine_mode mode)
{
- return (mode == V64QImode || mode == V64HImode
- || mode == V64SImode || mode == V64DImode
- || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode);
+ return VECTOR_MODE_P (mode);
}
@@ -17,88 +17,243 @@
;; {{{ Vector iterators
; Vector modes for specific types
-; (This will make more sense when there are multiple vector sizes)
(define_mode_iterator V_QI
- [V64QI])
+ [V2QI V4QI V8QI V16QI V32QI V64QI])
(define_mode_iterator V_HI
- [V64HI])
+ [V2HI V4HI V8HI V16HI V32HI V64HI])
(define_mode_iterator V_HF
- [V64HF])
+ [V2HF V4HF V8HF V16HF V32HF V64HF])
(define_mode_iterator V_SI
- [V64SI])
+ [V2SI V4SI V8SI V16SI V32SI V64SI])
(define_mode_iterator V_SF
- [V64SF])
+ [V2SF V4SF V8SF V16SF V32SF V64SF])
(define_mode_iterator V_DI
- [V64DI])
+ [V2DI V4DI V8DI V16DI V32DI V64DI])
(define_mode_iterator V_DF
- [V64DF])
+ [V2DF V4DF V8DF V16DF V32DF V64DF])
+
+(define_mode_iterator V64_SI
+ [V64SI])
+(define_mode_iterator V64_DI
+ [V64DI])
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
- [V64QI V64HI])
+ [V2QI V2HI
+ V4QI V4HI
+ V8QI V8HI
+ V16QI V16HI
+ V32QI V32HI
+ V64QI V64HI])
; Vector modes for one vector register
(define_mode_iterator V_1REG
- [V64QI V64HI V64SI V64HF V64SF])
+ [V2QI V2HI V2SI V2HF V2SF
+ V4QI V4HI V4SI V4HF V4SF
+ V8QI V8HI V8SI V8HF V8SF
+ V16QI V16HI V16SI V16HF V16SF
+ V32QI V32HI V32SI V32HF V32SF
+ V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_INT_1REG
- [V64QI V64HI V64SI])
+ [V2QI V2HI V2SI
+ V4QI V4HI V4SI
+ V8QI V8HI V8SI
+ V16QI V16HI V16SI
+ V32QI V32HI V32SI
+ V64QI V64HI V64SI])
(define_mode_iterator V_INT_1REG_ALT
- [V64QI V64HI V64SI])
+ [V2QI V2HI V2SI
+ V4QI V4HI V4SI
+ V8QI V8HI V8SI
+ V16QI V16HI V16SI
+ V32QI V32HI V32SI
+ V64QI V64HI V64SI])
(define_mode_iterator V_FP_1REG
- [V64HF V64SF])
+ [V2HF V2SF
+ V4HF V4SF
+ V8HF V8SF
+ V16HF V16SF
+ V32HF V32SF
+ V64HF V64SF])
+
+; V64_* modes are for where more general support is unimplemented
+; (e.g. reductions)
+(define_mode_iterator V64_1REG
+ [V64QI V64HI V64SI V64HF V64SF])
+(define_mode_iterator V64_INT_1REG
+ [V64QI V64HI V64SI])
; Vector modes for two vector registers
(define_mode_iterator V_2REG
+ [V2DI V2DF
+ V4DI V4DF
+ V8DI V8DF
+ V16DI V16DF
+ V32DI V32DF
+ V64DI V64DF])
+
+(define_mode_iterator V64_2REG
[V64DI V64DF])
; Vector modes with native support
(define_mode_iterator V_noQI
- [V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2HI V2HF V2SI V2SF V2DI V2DF
+ V4HI V4HF V4SI V4SF V4DI V4DF
+ V8HI V8HF V8SI V8SF V8DI V8DF
+ V16HI V16HF V16SI V16SF V16DI V16DF
+ V32HI V32HF V32SI V32SF V32DI V32DF
+ V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_noHI
- [V64HF V64SI V64SF V64DI V64DF])
+ [V2HF V2SI V2SF V2DI V2DF
+ V4HF V4SI V4SF V4DI V4DF
+ V8HF V8SI V8SF V8DI V8DF
+ V16HF V16SI V16SF V16DI V16DF
+ V32HF V32SI V32SF V32DI V32DF
+ V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT_noQI
- [V64HI V64SI V64DI])
+ [V2HI V2SI V2DI
+ V4HI V4SI V4DI
+ V8HI V8SI V8DI
+ V16HI V16SI V16DI
+ V32HI V32SI V32DI
+ V64HI V64SI V64DI])
(define_mode_iterator V_INT_noHI
- [V64SI V64DI])
+ [V2SI V2DI
+ V4SI V4DI
+ V8SI V8DI
+ V16SI V16DI
+ V32SI V32DI
+ V64SI V64DI])
; All of above
(define_mode_iterator V_ALL
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_ALL_ALT
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+ [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
+ V4QI V4HI V4HF V4SI V4SF V4DI V4DF
+ V8QI V8HI V8HF V8SI V8SF V8DI V8DF
+ V16QI V16HI V16HF V16SI V16SF V16DI V16DF
+ V32QI V32HI V32HF V32SI V32SF V32DI V32DF
+ V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT
- [V64QI V64HI V64SI V64DI])
+ [V2QI V2HI V2SI V2DI
+ V4QI V4HI V4SI V4DI
+ V8QI V8HI V8SI V8DI
+ V16QI V16HI V16SI V16DI
+ V32QI V32HI V32SI V32DI
+ V64QI V64HI V64SI V64DI])
(define_mode_iterator V_FP
+ [V2HF V2SF V2DF
+ V4HF V4SF V4DF
+ V8HF V8SF V8DF
+ V16HF V16SF V16DF
+ V32HF V32SF V32DF
+ V64HF V64SF V64DF])
+
+(define_mode_iterator V64_ALL
+ [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
+(define_mode_iterator V64_FP
[V64HF V64SF V64DF])
(define_mode_attr scalar_mode
- [(V64QI "qi") (V64HI "hi") (V64SI "si")
+ [(V2QI "qi") (V2HI "hi") (V2SI "si")
+ (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
+ (V4QI "qi") (V4HI "hi") (V4SI "si")
+ (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
+ (V8QI "qi") (V8HI "hi") (V8SI "si")
+ (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
+ (V16QI "qi") (V16HI "hi") (V16SI "si")
+ (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
+ (V32QI "qi") (V32HI "hi") (V32SI "si")
+ (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
+ (V64QI "qi") (V64HI "hi") (V64SI "si")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
- [(V64QI "QI") (V64HI "HI") (V64SI "SI")
+ [(V2QI "QI") (V2HI "HI") (V2SI "SI")
+ (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
+ (V4QI "QI") (V4HI "HI") (V4SI "SI")
+ (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
+ (V8QI "QI") (V8HI "HI") (V8SI "SI")
+ (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
+ (V16QI "QI") (V16HI "HI") (V16SI "SI")
+ (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
+ (V32QI "QI") (V32HI "HI") (V32SI "SI")
+ (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
+ (V64QI "QI") (V64HI "HI") (V64SI "SI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
(define_mode_attr vnsi
- [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
+ [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
+ (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
+ (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
+ (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
+ (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
+ (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
+ (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
+ (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
+ (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
+ (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
+ (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
(V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
(define_mode_attr VnSI
- [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
+ [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
+ (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
+ (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
+ (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
+ (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
+ (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
+ (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
+ (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
+ (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
+ (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
+ (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
(V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
(define_mode_attr vndi
- [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
+ [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
+ (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
+ (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
+ (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
+ (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
+ (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
+ (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
+ (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
+ (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
+ (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
+ (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
(V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
(define_mode_attr VnDI
- [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
+ [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
+ (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
+ (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
+ (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
+ (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
+ (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
+ (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
+ (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
+ (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
+ (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
+ (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
(V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
-(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
+(define_mode_attr sdwa
+ [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
+ (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
+ (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
+ (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
+ (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
+ (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
;; }}}
;; {{{ Substitutions
@@ -180,6 +335,37 @@ (define_expand "mov<mode>"
(match_operand:V_ALL 1 "general_operand"))]
""
{
+ /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
+ registers, but we can convert the MEM to a mode that does work. */
+ if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
+ && SUBREG_P (operands[1])
+ && GET_MODE_SIZE (GET_MODE (operands[1]))
+ == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
+ {
+ rtx src = SUBREG_REG (operands[1]);
+ rtx mem = copy_rtx (operands[0]);
+ PUT_MODE_RAW (mem, GET_MODE (src));
+ emit_move_insn (mem, src);
+ DONE;
+ }
+ if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
+ && SUBREG_P (operands[0])
+ && GET_MODE_SIZE (GET_MODE (operands[0]))
+ == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
+ {
+ rtx dest = SUBREG_REG (operands[0]);
+ rtx mem = copy_rtx (operands[1]);
+ PUT_MODE_RAW (mem, GET_MODE (dest));
+ emit_move_insn (dest, mem);
+ DONE;
+ }
+
+ /* SUBREG of MEM is not supported. */
+ gcc_assert ((!SUBREG_P (operands[0])
+ || !MEM_P (SUBREG_REG (operands[0])))
+ && (!SUBREG_P (operands[1])
+ || !MEM_P (SUBREG_REG (operands[1]))));
+
if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
{
operands[1] = force_reg (<MODE>mode, operands[1]);
@@ -2419,10 +2605,10 @@ (define_insn "ldexp<mode>3"
(set_attr "length" "8")])
(define_insn "ldexp<mode>3<exec>"
- [(set (match_operand:V_FP 0 "register_operand" "=v")
+ [(set (match_operand:V_FP 0 "register_operand" "= v")
(unspec:V_FP
- [(match_operand:V_FP 1 "gcn_alu_operand" "vB")
- (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")]
+ [(match_operand:V_FP 1 "gcn_alu_operand" " vB")
+ (match_operand:<VnSI> 2 "gcn_alu_operand" "vSvA")]
UNSPEC_LDEXP))]
""
"v_ldexp%i0\t%0, %1, %2"
@@ -2452,8 +2638,8 @@ (define_insn "frexp<mode>_mant2"
(set_attr "length" "8")])
(define_insn "frexp<mode>_exp2<exec>"
- [(set (match_operand:V64SI 0 "register_operand" "=v")
- (unspec:V64SI
+ [(set (match_operand:<VnSI> 0 "register_operand" "=v")
+ (unspec:<VnSI>
[(match_operand:V_FP 1 "gcn_alu_operand" "vB")]
UNSPEC_FREXP_EXP))]
""
@@ -2640,9 +2826,27 @@ (define_expand "div<mode>3"
(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
-(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
-(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
-(define_mode_iterator VCVT_IMODE [V64HI V64SI])
+(define_mode_iterator VCVT_MODE
+ [V2HI V2SI V2HF V2SF V2DF
+ V4HI V4SI V4HF V4SF V4DF
+ V8HI V8SI V8HF V8SF V8DF
+ V16HI V16SI V16HF V16SF V16DF
+ V32HI V32SI V32HF V32SF V32DF
+ V64HI V64SI V64HF V64SF V64DF])
+(define_mode_iterator VCVT_FMODE
+ [V2HF V2SF V2DF
+ V4HF V4SF V4DF
+ V8HF V8SF V8DF
+ V16HF V16SF V16DF
+ V32HF V32SF V32DF
+ V64HF V64SF V64DF])
+(define_mode_iterator VCVT_IMODE
+ [V2HI V2SI
+ V4HI V4SI
+ V8HI V8SI
+ V16HI V16SI
+ V32HI V32SI
+ V64HI V64SI])
(define_code_iterator cvt_op [fix unsigned_fix
float unsigned_float
@@ -3265,7 +3469,7 @@ (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
(define_expand "reduc_<reduc_op>_scal_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand")
(unspec:<SCALAR_MODE>
- [(match_operand:V_ALL 1 "register_operand")]
+ [(match_operand:V64_ALL 1 "register_operand")]
REDUC_UNSPEC))]
""
{
@@ -3284,7 +3488,7 @@ (define_expand "reduc_<reduc_op>_scal_<mode>"
(define_expand "fold_left_plus_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
- (match_operand:V_FP 2 "gcn_alu_operand")]
+ (match_operand:V64_FP 2 "gcn_alu_operand")]
"can_create_pseudo_p ()
&& (flag_openacc || flag_openmp
|| flag_associative_math)"
@@ -3300,11 +3504,11 @@ (define_expand "fold_left_plus_<mode>"
})
(define_insn "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V_1REG 0 "register_operand" "=v")
- (unspec:V_1REG
- [(match_operand:V_1REG 1 "register_operand" "v")
- (match_operand:V_1REG 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_1REG 0 "register_operand" "=v")
+ (unspec:V64_1REG
+ [(match_operand:V64_1REG 1 "register_operand" "v")
+ (match_operand:V64_1REG 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_UNSPEC))]
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
@@ -3317,11 +3521,11 @@ (define_insn "*<reduc_op>_dpp_shr_<mode>"
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V_DI 0 "register_operand" "=v")
- (unspec:V_DI
- [(match_operand:V_DI 1 "register_operand" "v")
- (match_operand:V_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_DI 0 "register_operand" "=v")
+ (unspec:V64_DI
+ [(match_operand:V64_DI 1 "register_operand" "v")
+ (match_operand:V64_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@@ -3346,10 +3550,10 @@ (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
; Special cases for addition.
(define_insn "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
- (unspec:V_INT_1REG
- [(match_operand:V_INT_1REG 1 "register_operand" "v")
- (match_operand:V_INT_1REG 2 "register_operand" "v")
+ [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v")
+ (unspec:V64_INT_1REG
+ [(match_operand:V64_INT_1REG 1 "register_operand" "v")
+ (match_operand:V64_INT_1REG 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
@@ -3363,12 +3567,12 @@ (define_insn "*plus_carry_dpp_shr_<mode>"
(set_attr "length" "8")])
(define_insn "*plus_carry_in_dpp_shr_<mode>"
- [(set (match_operand:V_SI 0 "register_operand" "=v")
- (unspec:V_SI
- [(match_operand:V_SI 1 "register_operand" "v")
- (match_operand:V_SI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")
- (match_operand:DI 4 "register_operand" "cV")]
+ [(set (match_operand:V64_SI 0 "register_operand" "=v")
+ (unspec:V64_SI
+ [(match_operand:V64_SI 1 "register_operand" "v")
+ (match_operand:V64_SI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3381,11 +3585,11 @@ (define_insn "*plus_carry_in_dpp_shr_<mode>"
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V_DI 0 "register_operand" "=v")
- (unspec:V_DI
- [(match_operand:V_DI 1 "register_operand" "v")
- (match_operand:V_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V64_DI 0 "register_operand" "=v")
+ (unspec:V64_DI
+ [(match_operand:V64_DI 1 "register_operand" "v")
+ (match_operand:V64_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3416,7 +3620,7 @@ (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
(define_insn "mov_from_lane63_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
(unspec:<SCALAR_MODE>
- [(match_operand:V_1REG 1 "register_operand" " v,v")]
+ [(match_operand:V64_1REG 1 "register_operand" " v,v")]
UNSPEC_MOV_FROM_LANE63))]
""
"@
@@ -3429,7 +3633,7 @@ (define_insn "mov_from_lane63_<mode>"
(define_insn "mov_from_lane63_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
(unspec:<SCALAR_MODE>
- [(match_operand:V_2REG 1 "register_operand" " v,v")]
+ [(match_operand:V64_2REG 1 "register_operand" " v,v")]
UNSPEC_MOV_FROM_LANE63))]
""
"@
@@ -395,6 +395,97 @@ gcn_scalar_mode_supported_p (scalar_mode mode)
|| mode == TImode);
}
+/* Return a vector mode with N lanes of MODE. */
+
+static machine_mode
+VnMODE (int n, machine_mode mode)
+{
+ switch (mode)
+ {
+ case QImode:
+ switch (n)
+ {
+ case 2: return V2QImode;
+ case 4: return V4QImode;
+ case 8: return V8QImode;
+ case 16: return V16QImode;
+ case 32: return V32QImode;
+ case 64: return V64QImode;
+ }
+ break;
+ case HImode:
+ switch (n)
+ {
+ case 2: return V2HImode;
+ case 4: return V4HImode;
+ case 8: return V8HImode;
+ case 16: return V16HImode;
+ case 32: return V32HImode;
+ case 64: return V64HImode;
+ }
+ break;
+ case HFmode:
+ switch (n)
+ {
+ case 2: return V2HFmode;
+ case 4: return V4HFmode;
+ case 8: return V8HFmode;
+ case 16: return V16HFmode;
+ case 32: return V32HFmode;
+ case 64: return V64HFmode;
+ }
+ break;
+ case SImode:
+ switch (n)
+ {
+ case 2: return V2SImode;
+ case 4: return V4SImode;
+ case 8: return V8SImode;
+ case 16: return V16SImode;
+ case 32: return V32SImode;
+ case 64: return V64SImode;
+ }
+ break;
+ case SFmode:
+ switch (n)
+ {
+ case 2: return V2SFmode;
+ case 4: return V4SFmode;
+ case 8: return V8SFmode;
+ case 16: return V16SFmode;
+ case 32: return V32SFmode;
+ case 64: return V64SFmode;
+ }
+ break;
+ case DImode:
+ switch (n)
+ {
+ case 2: return V2DImode;
+ case 4: return V4DImode;
+ case 8: return V8DImode;
+ case 16: return V16DImode;
+ case 32: return V32DImode;
+ case 64: return V64DImode;
+ }
+ break;
+ case DFmode:
+ switch (n)
+ {
+ case 2: return V2DFmode;
+ case 4: return V4DFmode;
+ case 8: return V8DFmode;
+ case 16: return V16DFmode;
+ case 32: return V32DFmode;
+ case 64: return V64DFmode;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return VOIDmode;
+}
+
/* Implement TARGET_CLASS_MAX_NREGS.
Return the number of hard registers needed to hold a value of MODE in
@@ -556,6 +647,23 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to,
{
if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to))
return true;
+
+ /* Vector conversions are only valid when changing mode with a fixed number
+ of lanes, or changing number of lanes with a fixed mode. Anything else
+ would require actual data movement. */
+ if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to)
+ && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)
+ && GET_MODE_INNER (from) != GET_MODE_INNER (to))
+ return false;
+
+ /* Vector/scalar conversions are only permitted when the scalar mode
+ is the same or smaller than the inner vector mode. */
+ if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to)
+ && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from)))
+ || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from)
+ && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to))))
+ return false;
+
return (gcn_class_max_nregs (regclass, from)
== gcn_class_max_nregs (regclass, to));
}
@@ -595,6 +703,16 @@ gcn_class_likely_spilled_p (reg_class_t rclass)
bool
gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
+ if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2))
+ {
+ int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1);
+ int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1);
+ machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1);
+ machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2);
+
+ return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1));
+ }
+
return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
&& GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
}
@@ -616,14 +734,16 @@ gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
rtx
gcn_operand_part (machine_mode mode, rtx op, int n)
{
- if (GET_MODE_SIZE (mode) >= 256)
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
+
+ if (vf > 1)
{
- /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */
+ machine_mode vsimode = VnMODE (vf, SImode);
if (REG_P (op))
{
gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER);
- return gen_rtx_REG (V64SImode, REGNO (op) + n);
+ return gen_rtx_REG (vsimode, REGNO (op) + n);
}
if (GET_CODE (op) == CONST_VECTOR)
{
@@ -634,10 +754,10 @@ gcn_operand_part (machine_mode mode, rtx op, int n)
RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode),
CONST_VECTOR_ELT (op, i), n);
- return gen_rtx_CONST_VECTOR (V64SImode, v);
+ return gen_rtx_CONST_VECTOR (vsimode, v);
}
if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR)
- return gcn_gen_undef (V64SImode);
+ return gcn_gen_undef (vsimode);
gcc_unreachable ();
}
else if (GET_MODE_SIZE (mode) == 8 && REG_P (op))
@@ -734,38 +854,6 @@ get_exec (int64_t val)
return reg;
}
-/* Return value of scalar exec register. */
-
-rtx
-gcn_scalar_exec ()
-{
- return const1_rtx;
-}
-
-/* Return pseudo holding scalar exec register. */
-
-rtx
-gcn_scalar_exec_reg ()
-{
- return get_exec (1);
-}
-
-/* Return value of full exec register. */
-
-rtx
-gcn_full_exec ()
-{
- return constm1_rtx;
-}
-
-/* Return pseudo holding full exec register. */
-
-rtx
-gcn_full_exec_reg ()
-{
- return get_exec (-1);
-}
-
/* }}} */
/* {{{ Immediate constants. */
@@ -802,8 +890,13 @@ int
gcn_inline_fp_constant_p (rtx x, bool allow_vector)
{
machine_mode mode = GET_MODE (x);
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
- if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ if (vf > 1)
+ mode = GET_MODE_INNER (mode);
+
+ if (vf > 1
+ && (mode == HFmode || mode == SFmode || mode == DFmode)
&& allow_vector)
{
int n;
@@ -812,7 +905,7 @@ gcn_inline_fp_constant_p (rtx x, bool allow_vector)
n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
if (!n)
return 0;
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
return 0;
return 1;
@@ -867,8 +960,13 @@ bool
gcn_fp_constant_p (rtx x, bool allow_vector)
{
machine_mode mode = GET_MODE (x);
+ int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1;
- if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode)
+ if (vf > 1)
+ mode = GET_MODE_INNER (mode);
+
+ if (vf > 1
+ && (mode == HFmode || mode == SFmode || mode == DFmode)
&& allow_vector)
{
int n;
@@ -877,7 +975,7 @@ gcn_fp_constant_p (rtx x, bool allow_vector)
n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false);
if (!n)
return false;
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0))
return false;
return true;
@@ -1090,6 +1188,244 @@ gcn_gen_undef (machine_mode mode)
return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR);
}
+/* }}} */
+/* {{{ Utility functions. */
+
+/* Generalised accessor functions for instruction patterns.
+ The machine desription '@' prefix does something similar, but as of
+ GCC 10 is incompatible with define_subst, and anyway it doesn't
+ auto-handle the exec feature.
+
+ Four macros are provided; each function only needs one:
+
+ GEN_VN - create accessor functions for all sizes of one mode
+ GEN_VNM - create accessor functions for all sizes of all modes
+ GEN_VN_NOEXEC - for insns without "_exec" variants
+ GEN_VNM_NOEXEC - likewise
+
+ E.g. add<mode>3
+ GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2)
+
+ gen_addvNsi3 (dst, a, b)
+ -> calls gen_addv64si3, or gen_addv32si3, etc.
+
+ gen_addvNm3 (dst, a, b)
+ -> calls gen_addv64qi3, or gen_addv2di3, etc.
+
+ The mode is determined from the first parameter, which must be called
+ "dest" (or else the macro doesn't work).
+
+ Each function has two optional parameters at the end: merge_src and exec.
+ If exec is non-null, the function will call the "_exec" variant of the
+ insn. If exec is non-null but merge_src is null then an undef unspec
+ will be created.
+
+ E.g. cont.
+ gen_addvNsi3 (v64sidst, a, b, oldval, exec)
+ -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec)
+
+ gen_addvNm3 (v2qidst, a, b, NULL, exec)
+ -> calls gen_addv2qi3_exec (v2qidst, a, b,
+ gcn_gen_undef (V2QImode), exec)
+ */
+
+#define A(...) __VA_ARGS__
+#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
+static rtx \
+gen_##PREFIX##vN##SUFFIX (PARAMS) \
+{ \
+ machine_mode mode = GET_MODE (dest); \
+ int n = GET_MODE_NUNITS (mode); \
+ \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
+ case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
+ case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
+ case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
+ case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
+ case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \
+GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+static rtx \
+gen_##PREFIX##vNm##SUFFIX (PARAMS) \
+{ \
+ machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
+ \
+ switch (mode) \
+ { \
+ case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \
+ case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \
+ case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \
+ case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \
+ case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \
+ case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \
+ case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \
+ default: \
+ break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \
+static rtx \
+gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
+{ \
+ machine_mode mode = GET_MODE (dest); \
+ int n = GET_MODE_NUNITS (mode); \
+ \
+ if (exec && !merge_src) \
+ merge_src = gcn_gen_undef (mode); \
+ \
+ if (exec) \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \
+ case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \
+ } \
+ else \
+ switch (n) \
+ { \
+ case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \
+ case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \
+ case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \
+ case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \
+ case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \
+ case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \
+GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
+GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+static rtx \
+gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
+{ \
+ machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \
+ \
+ switch (mode) \
+ { \
+ case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \
+ case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \
+ case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \
+ case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \
+ case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
+ case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
+ case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
+ default: \
+ break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return NULL_RTX; \
+}
+
+GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc),
+ A(dest, src1, src2, vcc))
+GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin),
+ A(dest, src1, src2, vccout, vccin))
+GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift))
+GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
+ A(dest, addr, src, exec))
+GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
+GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
+GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
+
+#undef GEN_VNM
+#undef GEN_VN
+#undef GET_VN_FN
+#undef A
+
+/* Get icode for vector instructions without an optab. */
+
+#define CODE_FOR(PREFIX, SUFFIX) \
+static int \
+get_code_for_##PREFIX##vN##SUFFIX (int nunits) \
+{ \
+ switch (nunits) \
+ { \
+ case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \
+ case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \
+ case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \
+ case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \
+ case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \
+ case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \
+ } \
+ \
+ gcc_unreachable (); \
+ return CODE_FOR_nothing; \
+}
+
+#define CODE_FOR_OP(PREFIX) \
+ CODE_FOR (PREFIX, qi) \
+ CODE_FOR (PREFIX, hi) \
+ CODE_FOR (PREFIX, hf) \
+ CODE_FOR (PREFIX, si) \
+ CODE_FOR (PREFIX, sf) \
+ CODE_FOR (PREFIX, di) \
+ CODE_FOR (PREFIX, df) \
+static int \
+get_code_for_##PREFIX (machine_mode mode) \
+{ \
+ int vf = GET_MODE_NUNITS (mode); \
+ machine_mode smode = GET_MODE_INNER (mode); \
+ \
+ switch (smode) \
+ { \
+ case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \
+ case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \
+ case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \
+ case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \
+ case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
+ case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
+ case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
+ default: break; \
+ } \
+ \
+ gcc_unreachable (); \
+ return CODE_FOR_nothing; \
+}
+
+CODE_FOR_OP (reload_in)
+CODE_FOR_OP (reload_out)
+
+#undef CODE_FOR_OP
+#undef CODE_FOR
+
/* }}} */
/* {{{ Addresses, pointers and moves. */
@@ -1644,60 +1980,6 @@ regno_ok_for_index_p (int regno)
return regno == M0_REG || VGPR_REGNO_P (regno);
}
-/* Generate move which uses the exec flags. If EXEC is NULL, then it is
- assumed that all lanes normally relevant to the mode of the move are
- affected. If PREV is NULL, then a sensible default is supplied for
- the inactive lanes. */
-
-static rtx
-gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL)
-{
- machine_mode mode = GET_MODE (op0);
-
- if (vgpr_vector_mode_p (mode))
- {
- if (exec && exec != CONSTM1_RTX (DImode))
- {
- if (!prev)
- prev = op0;
- }
- else
- {
- if (!prev)
- prev = gcn_gen_undef (mode);
- exec = gcn_full_exec_reg ();
- }
-
- rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec));
-
- return gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, set,
- gen_rtx_CLOBBER (VOIDmode,
- gen_rtx_SCRATCH (V64DImode))));
- }
-
- return (gen_rtx_PARALLEL
- (VOIDmode,
- gen_rtvec (2, gen_rtx_SET (op0, op1),
- gen_rtx_USE (VOIDmode,
- exec ? exec : gcn_scalar_exec ()))));
-}
-
-/* Generate masked move. */
-
-static rtx
-gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL)
-{
- if (exec)
- return (gen_rtx_SET (op0,
- gen_rtx_VEC_MERGE (GET_MODE (op0),
- gen_rtx_VEC_DUPLICATE (GET_MODE
- (op0), op1),
- op2, exec)));
- else
- return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1)));
-}
-
/* Expand vector init of OP0 by VEC.
Implements vec_init instruction pattern. */
@@ -1707,10 +1989,11 @@ gcn_expand_vector_init (rtx op0, rtx vec)
int64_t initialized_mask = 0;
int64_t curr_mask = 1;
machine_mode mode = GET_MODE (op0);
+ int vf = GET_MODE_NUNITS (mode);
rtx val = XVECEXP (vec, 0, 0);
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (rtx_equal_p (val, XVECEXP (vec, 0, i)))
curr_mask |= (int64_t) 1 << i;
@@ -1719,26 +2002,26 @@ gcn_expand_vector_init (rtx op0, rtx vec)
else
{
val = force_reg (GET_MODE_INNER (mode), val);
- emit_insn (gen_duplicate_load (op0, val));
+ emit_insn (gen_vec_duplicatevNm (op0, val));
}
initialized_mask |= curr_mask;
- for (int i = 1; i < 64; i++)
+ for (int i = 1; i < vf; i++)
if (!(initialized_mask & ((int64_t) 1 << i)))
{
curr_mask = (int64_t) 1 << i;
rtx val = XVECEXP (vec, 0, i);
- for (int j = i + 1; j < 64; j++)
+ for (int j = i + 1; j < vf; j++)
if (rtx_equal_p (val, XVECEXP (vec, 0, j)))
curr_mask |= (int64_t) 1 << j;
if (gcn_constant_p (val))
- emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val),
- get_exec (curr_mask)));
+ emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val), op0,
+ get_exec (curr_mask)));
else
{
val = force_reg (GET_MODE_INNER (mode), val);
- emit_insn (gen_duplicate_load (op0, val, op0,
- get_exec (curr_mask)));
+ emit_insn (gen_vec_duplicatevNm (op0, val, op0,
+ get_exec (curr_mask)));
}
initialized_mask |= curr_mask;
}
@@ -1751,18 +2034,18 @@ strided_constant (machine_mode mode, int base, int val)
{
rtx x = gen_reg_rtx (mode);
emit_move_insn (x, gcn_vec_constant (mode, base));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32),
- x, get_exec (0xffffffff00000000)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16),
- x, get_exec (0xffff0000ffff0000)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8),
- x, get_exec (0xff00ff00ff00ff00)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4),
- x, get_exec (0xf0f0f0f0f0f0f0f0)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2),
- x, get_exec (0xcccccccccccccccc)));
- emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1),
- x, get_exec (0xaaaaaaaaaaaaaaaa)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32),
+ x, get_exec (0xffffffff00000000)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16),
+ x, get_exec (0xffff0000ffff0000)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8),
+ x, get_exec (0xff00ff00ff00ff00)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4),
+ x, get_exec (0xf0f0f0f0f0f0f0f0)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2),
+ x, get_exec (0xcccccccccccccccc)));
+ emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1),
+ x, get_exec (0xaaaaaaaaaaaaaaaa)));
return x;
}
@@ -1792,15 +2075,17 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
case ADDR_SPACE_LDS:
case ADDR_SPACE_GDS:
/* FIXME: LDS support offsets, handle them!. */
- if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode)
+ if (vgpr_vector_mode_p (mode)
+ && GET_MODE_INNER (GET_MODE (x)) != SImode)
{
- rtx addrs = gen_reg_rtx (V64SImode);
+ machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode);
+ rtx addrs = gen_reg_rtx (simode);
rtx base = force_reg (SImode, x);
- rtx offsets = strided_constant (V64SImode, 0,
+ rtx offsets = strided_constant (simode, 0,
GET_MODE_UNIT_SIZE (mode));
- emit_insn (gen_vec_duplicatev64si (addrs, base));
- emit_insn (gen_addv64si3 (addrs, offsets, addrs));
+ emit_insn (gen_vec_duplicatevNsi (addrs, base));
+ emit_insn (gen_addvNsi3 (addrs, offsets, addrs));
return addrs;
}
return x;
@@ -1808,16 +2093,18 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
gcc_unreachable ();
}
-/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:V64DI)) with the
+/* Convert a (mem:<MODE> (reg:DI)) to (mem:<MODE> (reg:VnDI)) with the
proper vector of stepped addresses.
MEM will be a DImode address of a vector in an SGPR.
- TMP will be a V64DImode VGPR pair or (scratch:V64DI). */
+ TMP will be a VnDImode VGPR pair or (scratch:VnDI). */
rtx
gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
rtx tmp)
{
+ machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode);
+ machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode);
gcc_assert (MEM_P (mem));
rtx mem_base = XEXP (mem, 0);
rtx mem_index = NULL_RTX;
@@ -1841,22 +2128,18 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
machine_mode inner = GET_MODE_INNER (mode);
int shift = exact_log2 (GET_MODE_SIZE (inner));
- rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
- rtx undef_v64si = gcn_gen_undef (V64SImode);
+ rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1));
rtx new_base = NULL_RTX;
addr_space_t as = MEM_ADDR_SPACE (mem);
rtx tmplo = (REG_P (tmp)
- ? gcn_operand_part (V64DImode, tmp, 0)
- : gen_reg_rtx (V64SImode));
+ ? gcn_operand_part (pmode, tmp, 0)
+ : gen_reg_rtx (offmode));
/* tmplo[:] = ramp[:] << shift */
- if (exec)
- emit_insn (gen_ashlv64si3_exec (tmplo, ramp,
- gen_int_mode (shift, SImode),
- undef_v64si, exec));
- else
- emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode)));
+ emit_insn (gen_ashlvNsi3 (tmplo, ramp,
+ gen_int_mode (shift, SImode),
+ NULL, exec));
if (AS_FLAT_P (as))
{
@@ -1866,53 +2149,41 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
{
rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0);
rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1);
- rtx tmphi = gcn_operand_part (V64DImode, tmp, 1);
+ rtx tmphi = gcn_operand_part (pmode, tmp, 1);
/* tmphi[:] = mem_base_hi */
- if (exec)
- emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi,
- undef_v64si, exec));
- else
- emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi));
+ emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec));
/* tmp[:] += zext (mem_base) */
if (exec)
{
- emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo,
- vcc, undef_v64si, exec));
- emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx,
- vcc, vcc, undef_v64si, exec));
+ emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo,
+ vcc, NULL, exec));
+ emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx,
+ vcc, vcc, NULL, exec));
}
else
- emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
+ emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc));
}
else
{
- tmp = gen_reg_rtx (V64DImode);
- if (exec)
- emit_insn (gen_addv64di3_vcc_zext_dup2_exec
- (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode),
- exec));
- else
- emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc));
+ tmp = gen_reg_rtx (pmode);
+ emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc,
+ NULL, exec));
}
new_base = tmp;
}
else if (AS_ANY_DS_P (as))
{
- if (!exec)
- emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base));
- else
- emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base,
- gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec));
new_base = tmplo;
}
else
{
- mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base);
- new_base = gen_rtx_PLUS (V64DImode, mem_base,
- gen_rtx_SIGN_EXTEND (V64DImode, tmplo));
+ mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base);
+ new_base = gen_rtx_PLUS (pmode, mem_base,
+ gen_rtx_SIGN_EXTEND (pmode, tmplo));
}
return gen_rtx_PLUS (GET_MODE (new_base), new_base,
@@ -1929,42 +2200,33 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
If EXEC is set then _exec patterns will be used, otherwise plain.
Return values.
- ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses.
- ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */
+ ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
+ ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
- rtx tmpsi = gen_reg_rtx (V64SImode);
- rtx tmpdi = gen_reg_rtx (V64DImode);
- rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL;
- rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL;
+ int vf = GET_MODE_NUNITS (GET_MODE (offsets));
+ rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
+ rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
if (CONST_INT_P (scale)
&& INTVAL (scale) > 0
&& exact_log2 (INTVAL (scale)) >= 0)
- emit_insn (gen_ashlv64si3 (tmpsi, offsets,
- GEN_INT (exact_log2 (INTVAL (scale)))));
+ emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
+ GEN_INT (exact_log2 (INTVAL (scale))),
+ NULL, exec));
else
- (exec
- ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi,
- exec))
- : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale)));
+ emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
/* "Global" instructions do not support negative register offsets. */
if (as == ADDR_SPACE_FLAT || !unsigned_p)
{
if (unsigned_p)
- (exec
- ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base,
- undefdi, exec))
- : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base)));
+ emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
else
- (exec
- ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base,
- undefdi, exec))
- : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base)));
+ emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
return tmpdi;
}
else if (as == ADDR_SPACE_GLOBAL)
@@ -2065,59 +2327,9 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
|| GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT)
{
if (in_p)
- switch (reload_mode)
- {
- case E_V64SImode:
- sri->icode = CODE_FOR_reload_inv64si;
- break;
- case E_V64SFmode:
- sri->icode = CODE_FOR_reload_inv64sf;
- break;
- case E_V64HImode:
- sri->icode = CODE_FOR_reload_inv64hi;
- break;
- case E_V64HFmode:
- sri->icode = CODE_FOR_reload_inv64hf;
- break;
- case E_V64QImode:
- sri->icode = CODE_FOR_reload_inv64qi;
- break;
- case E_V64DImode:
- sri->icode = CODE_FOR_reload_inv64di;
- break;
- case E_V64DFmode:
- sri->icode = CODE_FOR_reload_inv64df;
- break;
- default:
- gcc_unreachable ();
- }
+ sri->icode = get_code_for_reload_in (reload_mode);
else
- switch (reload_mode)
- {
- case E_V64SImode:
- sri->icode = CODE_FOR_reload_outv64si;
- break;
- case E_V64SFmode:
- sri->icode = CODE_FOR_reload_outv64sf;
- break;
- case E_V64HImode:
- sri->icode = CODE_FOR_reload_outv64hi;
- break;
- case E_V64HFmode:
- sri->icode = CODE_FOR_reload_outv64hf;
- break;
- case E_V64QImode:
- sri->icode = CODE_FOR_reload_outv64qi;
- break;
- case E_V64DImode:
- sri->icode = CODE_FOR_reload_outv64di;
- break;
- case E_V64DFmode:
- sri->icode = CODE_FOR_reload_outv64df;
- break;
- default:
- gcc_unreachable ();
- }
+ sri->icode = get_code_for_reload_out (reload_mode);
break;
}
/* Fallthrough. */
@@ -3428,6 +3640,9 @@ gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op)
if (VECTOR_MODE_P (from))
{
+ if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to))
+ return false;
+
from = GET_MODE_INNER (from);
to = GET_MODE_INNER (to);
}
@@ -3926,7 +4141,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
rtx mem = gen_rtx_MEM (GET_MODE (target), addrs);
/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
/* FIXME: set attributes. */
- emit_insn (gen_mov_with_exec (target, mem, exec));
+ emit_insn (gen_movvNm (target, mem, NULL, exec));
return target;
}
case GCN_BUILTIN_FLAT_STORE_PTR_INT32:
@@ -3961,20 +4176,18 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
rtx mem = gen_rtx_MEM (vmode, addrs);
/*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */
/* FIXME: set attributes. */
- emit_insn (gen_mov_with_exec (mem, val, exec));
+ emit_insn (gen_movvNm (mem, val, NULL, exec));
return target;
}
case GCN_BUILTIN_SQRTVF:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_sqrtv64sf2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_sqrtv64sf2 (target, arg));
return target;
}
case GCN_BUILTIN_SQRTF:
@@ -3992,20 +4205,17 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_absv64sf2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_absv64sf2 (target, arg));
return target;
}
case GCN_BUILTIN_LDEXPVF:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg1 = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
@@ -4014,15 +4224,13 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
V64SImode,
EXPAND_NORMAL));
- emit_insn (gen_ldexpv64sf3_exec
- (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_ldexpv64sf3 (target, arg1, arg2));
return target;
}
case GCN_BUILTIN_LDEXPV:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg1 = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
@@ -4031,60 +4239,51 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX,
V64SImode,
EXPAND_NORMAL));
- emit_insn (gen_ldexpv64df3_exec
- (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec));
+ emit_insn (gen_ldexpv64df3 (target, arg1, arg2));
return target;
}
case GCN_BUILTIN_FREXPVF_EXP:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64sf_exp2_exec
- (target, arg, gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_frexpv64sf_exp2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPVF_MANT:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64SFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64SFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64sf_mant2_exec
- (target, arg, gcn_gen_undef (V64SFmode), exec));
+ emit_insn (gen_frexpv64sf_mant2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPV_EXP:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64DFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64df_exp2_exec
- (target, arg, gcn_gen_undef (V64SImode), exec));
+ emit_insn (gen_frexpv64df_exp2 (target, arg));
return target;
}
case GCN_BUILTIN_FREXPV_MANT:
{
if (ignore)
return target;
- rtx exec = gcn_full_exec_reg ();
rtx arg = force_reg (V64DFmode,
expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX,
V64DFmode,
EXPAND_NORMAL));
- emit_insn (gen_frexpv64df_mant2_exec
- (target, arg, gcn_gen_undef (V64DFmode), exec));
+ emit_insn (gen_frexpv64df_mant2 (target, arg));
return target;
}
case GCN_BUILTIN_OMP_DIM_SIZE:
@@ -4239,10 +4438,11 @@ gcn_vectorize_get_mask_mode (machine_mode)
Helper function for gcn_vectorize_vec_perm_const. */
static rtx
-gcn_make_vec_perm_address (unsigned int *perm)
+gcn_make_vec_perm_address (unsigned int *perm, int nelt)
{
- rtx x = gen_reg_rtx (V64SImode);
- emit_move_insn (x, gcn_vec_constant (V64SImode, 0));
+ machine_mode mode = VnMODE (nelt, SImode);
+ rtx x = gen_reg_rtx (mode);
+ emit_move_insn (x, gcn_vec_constant (mode, 0));
/* Permutation addresses use byte addressing. With each vector lane being
4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant,
@@ -4258,15 +4458,13 @@ gcn_make_vec_perm_address (unsigned int *perm)
{
uint64_t exec_mask = 0;
uint64_t lane_mask = 1;
- for (int j = 0; j < 64; j++, lane_mask <<= 1)
- if ((perm[j] * 4) & bit_mask)
+ for (int j = 0; j < nelt; j++, lane_mask <<= 1)
+ if (((perm[j] % nelt) * 4) & bit_mask)
exec_mask |= lane_mask;
if (exec_mask)
- emit_insn (gen_addv64si3_exec (x, x,
- gcn_vec_constant (V64SImode,
- bit_mask),
- x, get_exec (exec_mask)));
+ emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask),
+ x, get_exec (exec_mask)));
}
return x;
@@ -4336,39 +4534,11 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
src1_lanes |= lane_bit;
}
- rtx addr = gcn_make_vec_perm_address (perm);
- rtx (*ds_bpermute) (rtx, rtx, rtx, rtx);
-
- switch (vmode)
- {
- case E_V64QImode:
- ds_bpermute = gen_ds_bpermutev64qi;
- break;
- case E_V64HImode:
- ds_bpermute = gen_ds_bpermutev64hi;
- break;
- case E_V64SImode:
- ds_bpermute = gen_ds_bpermutev64si;
- break;
- case E_V64HFmode:
- ds_bpermute = gen_ds_bpermutev64hf;
- break;
- case E_V64SFmode:
- ds_bpermute = gen_ds_bpermutev64sf;
- break;
- case E_V64DImode:
- ds_bpermute = gen_ds_bpermutev64di;
- break;
- case E_V64DFmode:
- ds_bpermute = gen_ds_bpermutev64df;
- break;
- default:
- gcc_assert (false);
- }
+ rtx addr = gcn_make_vec_perm_address (perm, nelt);
/* Load elements from src0 to dst. */
- gcc_assert (~src1_lanes);
- emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ()));
+ gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt)));
+ emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode)));
/* Load elements from src1 to dst. */
if (src1_lanes)
@@ -4379,8 +4549,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
the two source vectors together.
*/
rtx tmp = gen_reg_rtx (vmode);
- emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ()));
- emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes)));
+ emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode)));
+ emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes)));
}
return true;
@@ -4396,7 +4566,22 @@ gcn_vector_mode_supported_p (machine_mode mode)
{
return (mode == V64QImode || mode == V64HImode
|| mode == V64SImode || mode == V64DImode
- || mode == V64SFmode || mode == V64DFmode);
+ || mode == V64SFmode || mode == V64DFmode
+ || mode == V32QImode || mode == V32HImode
+ || mode == V32SImode || mode == V32DImode
+ || mode == V32SFmode || mode == V32DFmode
+ || mode == V16QImode || mode == V16HImode
+ || mode == V16SImode || mode == V16DImode
+ || mode == V16SFmode || mode == V16DFmode
+ || mode == V8QImode || mode == V8HImode
+ || mode == V8SImode || mode == V8DImode
+ || mode == V8SFmode || mode == V8DFmode
+ || mode == V4QImode || mode == V4HImode
+ || mode == V4SImode || mode == V4DImode
+ || mode == V4SFmode || mode == V4DFmode
+ || mode == V2QImode || mode == V2HImode
+ || mode == V2SImode || mode == V2DImode
+ || mode == V2SFmode || mode == V2DFmode);
}
/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
@@ -4425,23 +4610,74 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode)
}
}
+/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.
+
+ Try all the vector modes. */
+
+unsigned int gcn_autovectorize_vector_modes (vector_modes *modes,
+ bool ARG_UNUSED (all))
+{
+ modes->safe_push (V64QImode);
+ modes->safe_push (V64HImode);
+ modes->safe_push (V64SImode);
+ modes->safe_push (V64SFmode);
+ modes->safe_push (V64DImode);
+ modes->safe_push (V64DFmode);
+
+ modes->safe_push (V32QImode);
+ modes->safe_push (V32HImode);
+ modes->safe_push (V32SImode);
+ modes->safe_push (V32SFmode);
+ modes->safe_push (V32DImode);
+ modes->safe_push (V32DFmode);
+
+ modes->safe_push (V16QImode);
+ modes->safe_push (V16HImode);
+ modes->safe_push (V16SImode);
+ modes->safe_push (V16SFmode);
+ modes->safe_push (V16DImode);
+ modes->safe_push (V16DFmode);
+
+ modes->safe_push (V8QImode);
+ modes->safe_push (V8HImode);
+ modes->safe_push (V8SImode);
+ modes->safe_push (V8SFmode);
+ modes->safe_push (V8DImode);
+ modes->safe_push (V8DFmode);
+
+ modes->safe_push (V4QImode);
+ modes->safe_push (V4HImode);
+ modes->safe_push (V4SImode);
+ modes->safe_push (V4SFmode);
+ modes->safe_push (V4DImode);
+ modes->safe_push (V4DFmode);
+
+ modes->safe_push (V2QImode);
+ modes->safe_push (V2HImode);
+ modes->safe_push (V2SImode);
+ modes->safe_push (V2SFmode);
+ modes->safe_push (V2DImode);
+ modes->safe_push (V2DFmode);
+
+ /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */
+ return 0;
+}
+
/* Implement TARGET_VECTORIZE_RELATED_MODE.
All GCN vectors are 64-lane, so this is simpler than other architectures.
In particular, we do *not* want to match vector bit-size. */
static opt_machine_mode
-gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode),
+gcn_related_vector_mode (machine_mode vector_mode,
scalar_mode element_mode, poly_uint64 nunits)
{
- if (known_ne (nunits, 0U) && known_ne (nunits, 64U))
- return VOIDmode;
+ int n = nunits.to_constant ();
- machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode);
- if (!VECTOR_MODE_P (pref_mode))
- return VOIDmode;
+ if (n == 0)
+ n = GET_MODE_NUNITS (vector_mode);
- return pref_mode;
+ return VnMODE (n, element_mode);
}
/* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT.
@@ -4566,6 +4802,8 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
The vector register SRC of mode MODE is reduced using the operation given
by UNSPEC, and the scalar result is returned in lane 63 of a vector
register. */
+/* FIXME: Implement reductions for sizes other than V64.
+ (They're currently disabled in the machine description.) */
rtx
gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
@@ -4975,10 +5213,11 @@ gcn_md_reorg (void)
{
if (VECTOR_MODE_P (GET_MODE (x)))
{
- new_exec = -1;
- break;
+ int vf = GET_MODE_NUNITS (GET_MODE (x));
+ new_exec = MAX ((uint64_t)new_exec,
+ 0xffffffffffffffffUL >> (64-vf));
}
- else
+ else if (new_exec == 0)
new_exec = 1;
}
}
@@ -5693,13 +5932,12 @@ static void
print_reg (FILE *file, rtx x)
{
machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
if (mode == BImode || mode == QImode || mode == HImode || mode == SImode
- || mode == HFmode || mode == SFmode
- || mode == V64SFmode || mode == V64SImode
- || mode == V64QImode || mode == V64HImode)
+ || mode == HFmode || mode == SFmode)
fprintf (file, "%s", reg_names[REGNO (x)]);
- else if (mode == DImode || mode == V64DImode
- || mode == DFmode || mode == V64DFmode)
+ else if (mode == DImode || mode == DFmode)
{
if (SGPR_REGNO_P (REGNO (x)))
fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG,
@@ -6146,20 +6384,20 @@ print_operand (FILE *file, rtx x, int code)
case 'o':
{
const char *s = 0;
- switch (GET_MODE_SIZE (GET_MODE (x)))
+ machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ switch (mode)
{
- case 1:
+ case E_QImode:
s = "_ubyte";
break;
- case 2:
+ case E_HImode:
+ case E_HFmode:
s = "_ushort";
break;
- /* The following are full-vector variants. */
- case 64:
- s = "_ubyte";
- break;
- case 128:
- s = "_ushort";
+ default:
break;
}
@@ -6174,43 +6412,31 @@ print_operand (FILE *file, rtx x, int code)
}
case 's':
{
- const char *s = "";
- switch (GET_MODE_SIZE (GET_MODE (x)))
+ const char *s;
+ machine_mode mode = GET_MODE (x);
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ switch (mode)
{
- case 1:
+ case E_QImode:
s = "_byte";
break;
- case 2:
+ case E_HImode:
+ case E_HFmode:
s = "_short";
break;
- case 4:
+ case E_SImode:
+ case E_SFmode:
s = "_dword";
break;
- case 8:
+ case E_DImode:
+ case E_DFmode:
s = "_dwordx2";
break;
- case 12:
- s = "_dwordx3";
- break;
- case 16:
+ case E_TImode:
s = "_dwordx4";
break;
- case 32:
- s = "_dwordx8";
- break;
- case 64:
- s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16";
- break;
- /* The following are full-vector variants. */
- case 128:
- s = "_short";
- break;
- case 256:
- s = "_dword";
- break;
- case 512:
- s = "_dwordx2";
- break;
default:
output_operand_lossage ("invalid operand %%xn code");
return;
@@ -6714,6 +6940,9 @@ gcn_dwarf_register_span (rtx rtl)
#define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE gcn_attribute_table
+#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
+#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
+ gcn_autovectorize_vector_modes
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL gcn_builtin_decl
#undef TARGET_CAN_CHANGE_MODE_CLASS