s390: Implement vec_set with vec_merge and, vec_duplicate.
Commit Message
Hi,
similar to other backends this patch implements vec_set via
vec_merge and vec_duplicate instead of an unspec. This opens up
more possibilites to combine instructions.
Bootstrapped and regtested. No regressions.
Is it OK?
Regards
Robin
gcc/ChangeLog:
* config/s390/s390.md: Implement vec_set with vec_merge and
vec_duplicate.
* config/s390/vector.md: Likewise.
* config/s390/vx-builtins.md: Likewise.
* config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
(print_operand_address): New output modifier.
(print_operand): New output modifier.
---
"vsce<V_HW_2:bhfgq>\t%v0,%O2(%v1,%R2),%3"
@@ -1858,14 +1861,18 @@ (define_expand "vec_ld2f"
; necessary since all elements of the vector will be set anyway.
; This is just to make it explicit to the data flow framework.
(set (match_dup 2) (match_dup 3))
- (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1
"memory_operand" "")
- (const_int 0)
- (match_dup 2)]
- UNSPEC_VEC_SET))
- (set (match_dup 2) (unspec:V4SF [(match_dup 4)
- (const_int 2)
- (match_dup 2)]
- UNSPEC_VEC_SET))
+ (set (match_dup 2)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "memory_operand" ""))
+ (match_dup 2)
+ (const_int 1)))
+ (set (match_dup 2)
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_dup 4))
+ (match_dup 2)
+ (const_int 4)))
(set (match_operand:V2DF 0 "register_operand" "")
(unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VFLL))]
"TARGET_VX"
@@ -2303,26 +2310,29 @@ (define_insn_and_split "*eltswap<mode>_emu"
; vlebrh, vlebrf, vlebrg
(define_insn "*vec_set_bswap_elem<mode>"
[(set (match_operand:V_HW_HSD 0
"register_operand" "=v")
- (unspec:V_HW_HSD [(bswap:<non_vec> (match_operand:<non_vec> 1
"memory_operand" "R"))
- (match_operand:SI 2
"const_int_operand" "C")
- (match_operand:V_HW_HSD 3 "register_operand" "0")]
- UNSPEC_VEC_SET))]
- "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
- "vlebr<bhfgq>\t%v0,%1,%2"
+ (vec_merge:V_HW_HSD
+ (vec_duplicate:V_HW_HSD
+ (bswap:<non_vec>
+ (match_operand:<non_vec> 1 "memory_operand" "R")))
+ (match_operand:V_HW_HSD 3 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "C")))]
+ "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
+ "vlebr<bhfgq>\t%v0,%1,%p2"
[(set_attr "op_type" "VRX")])
; vec_revb (vec_insert (*a, vec_revb (b), 1)) set-element-bswap-1.c
; vlebrh, vlebrf, vlebrg
(define_insn "*vec_set_bswap_vec<mode>"
[(set (match_operand:V_HW_HSD 0
"register_operand" "=v")
- (bswap:V_HW_HSD
- (unspec:V_HW_HSD [(match_operand:<non_vec> 1
"memory_operand" "R")
- (match_operand:SI 2
"const_int_operand" "C")
- (bswap:V_HW_HSD (match_operand:V_HW_HSD 3 "register_operand"
"0"))]
- UNSPEC_VEC_SET)))
- (use (match_operand:V16QI 4
"permute_pattern_operand" "X"))]
- "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
- "vlebr<bhfgq>\t%v0,%1,%2"
+ (bswap:V_HW_HSD
+ (vec_merge:V_HW_HSD
+ (vec_duplicate:V_HW_HSD
+ (match_operand:<non_vec> 1 "memory_operand" "R"))
+ (match_operand:V_HW_HSD 3 "register_operand" "0")
+ (match_operand:SI 2 "const_int_operand" "C"))))
+ (use (match_operand:V16QI 4 "permute_pattern_operand" "X"))]
+ "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<V_HW_HSD:MODE>mode)"
+ "vlebr<bhfgq>\t%v0,%1,%p2"
[(set_attr "op_type" "VRX")])
; *a = vec_extract (vec_revb (b), 1); get-element-bswap-3.c
Comments
On 8/12/22 16:48, Robin Dapp wrote:
> Hi,
>
> similar to other backends this patch implements vec_set via
> vec_merge and vec_duplicate instead of an unspec. This opens up
> more possibilites to combine instructions.
>
> Bootstrapped and regtested. No regressions.
>
> Is it OK?
>
> Regards
> Robin
>
> gcc/ChangeLog:
>
> * config/s390/s390.md: Implement vec_set with vec_merge and
> vec_duplicate.
> * config/s390/vector.md: Likewise.
> * config/s390/vx-builtins.md: Likewise.
> * config/s390/s390.cc (s390_expand_vec_init): Emit new pattern.
> (print_operand_address): New output modifier.
> (print_operand): New output modifier.
The way you handle the element selector doesn't look right to me. It appears to be an index if it is
a CONST_INT and a bitmask otherwise. I don't think it is legal to change operand semantics like this
depending on the operand type. This would break e.g. if LRA would decide to load the immediate index
in a register.
Couldn't you make the shift part of the RTX instead and have the parameter always as an index?
Bye,
Andreas
> ---
>
> diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
> index c86b26933d7a..ff89fb83360a 100644
> --- a/gcc/config/s390/s390.cc
> +++ b/gcc/config/s390/s390.cc
> @@ -7073,11 +7073,10 @@ s390_expand_vec_init (rtx target, rtx vals)
> if (!general_operand (elem, GET_MODE (elem)))
> elem = force_reg (inner_mode, elem);
>
> - emit_insn (gen_rtx_SET (target,
> - gen_rtx_UNSPEC (mode,
> - gen_rtvec (3, elem,
> - GEN_INT (i), target),
> - UNSPEC_VEC_SET)));
> + emit_insn
> + (gen_rtx_SET
> + (target, gen_rtx_VEC_MERGE
> + (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << i))));
> }
> }
>
> @@ -8057,6 +8056,8 @@ print_operand_address (FILE *file, rtx addr)
> 'S': print S-type memory reference (base+displacement).
> 'Y': print address style operand without index (e.g. shift count or
> setmem
> operand).
> + 'P': print address-style operand without index but with the offset as
> + if it were specified by a 'p' format flag.
>
> 'b': print integer X as if it's an unsigned byte.
> 'c': print integer X as if it's an signed byte.
> @@ -8068,6 +8069,7 @@ print_operand_address (FILE *file, rtx addr)
> 'k': print the first nonzero SImode part of X.
> 'm': print the first SImode part unequal to -1 of X.
> 'o': print integer X as if it's an unsigned 32bit word.
> + 'p': print N such that 2^N == X (X must be a power of 2 and const int).
> 's': "start" of contiguous bitmask X in either DImode or vector
> inner mode.
> 't': CONST_INT: "start" of contiguous bitmask X in SImode.
> CONST_VECTOR: Generate a bitmask for vgbm instruction.
> @@ -8237,6 +8239,16 @@ print_operand (FILE *file, rtx x, int code)
> print_shift_count_operand (file, x);
> return;
>
> + case 'P':
> + if (CONST_INT_P (x))
> + {
> + ival = exact_log2 (INTVAL (x));
> + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
> + }
> + else
> + print_shift_count_operand (file, x);
> + return;
> +
> case 'K':
> /* Append @PLT to both local and non-local symbols in order to
> support
> Linux Kernel livepatching: patches contain individual functions and
> @@ -8321,6 +8333,9 @@ print_operand (FILE *file, rtx x, int code)
> case 'o':
> ival &= 0xffffffff;
> break;
> + case 'p':
> + ival = exact_log2 (INTVAL (x));
> + break;
> case 'e': case 'f':
> case 's': case 't':
> {
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index f37d8fd33a15..a82db4c624fa 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -183,7 +183,6 @@ (define_c_enum "unspec" [
> UNSPEC_VEC_GFMSUM_128
> UNSPEC_VEC_GFMSUM_ACCUM
> UNSPEC_VEC_GFMSUM_ACCUM_128
> - UNSPEC_VEC_SET
>
> UNSPEC_VEC_VSUMG
> UNSPEC_VEC_VSUMQ
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index c50451a8326c..bde3a39db3d4 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -467,12 +467,17 @@ (define_insn "mov<mode>"
> ; vec_set is supposed to *modify* an existing vector so operand 0 is
> ; duplicated as input operand.
> (define_expand "vec_set<mode>"
> - [(set (match_operand:V 0 "register_operand" "")
> - (unspec:V [(match_operand:<non_vec> 1 "general_operand" "")
> - (match_operand:SI 2 "nonmemory_operand" "")
> - (match_dup 0)]
> - UNSPEC_VEC_SET))]
> - "TARGET_VX")
> + [(set (match_operand:V 0 "register_operand" "")
> + (vec_merge:V
> + (vec_duplicate:V
> + (match_operand:<non_vec> 1 "general_operand" ""))
> + (match_dup 0)
> + (match_operand:SI 2 "nonmemory_operand")))]
> + ""
> +{
> + if (CONST_INT_P (operands[2]))
> + operands[2] = GEN_INT (1 << INTVAL (operands[2]));
> +})
>
> ; FIXME: Support also vector mode operands for 1
> ; FIXME: A target memory operand seems to be useful otherwise we end
> @@ -480,28 +485,31 @@ (define_expand "vec_set<mode>"
> ; that itself?
> ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih,
> vleif, vleig
> (define_insn "*vec_set<mode>"
> - [(set (match_operand:V 0 "register_operand" "=v,v,v")
> - (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d,R,K")
> - (match_operand:SI 2 "nonmemory_operand" "an,I,I")
> - (match_operand:V 3 "register_operand" "0,0,0")]
> - UNSPEC_VEC_SET))]
> + [(set (match_operand:V 0 "register_operand" "=v,v,v")
> + (vec_merge:V
> + (vec_duplicate:V
> + (match_operand:<non_vec> 1 "general_operand" "d,R,K"))
> + (match_operand:V 3 "register_operand" "0,0,0")
> + (match_operand:SI 2 "nonmemory_operand" "an,I,I")))]
> "TARGET_VX
> && (!CONST_INT_P (operands[2])
> - || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
> + || exact_log2 (UINTVAL (operands[2])) < (GET_MODE_NUNITS
> (<V:MODE>mode)))"
> "@
> - vlvg<bhfgq>\t%v0,%1,%Y2
> - vle<bhfgq>\t%v0,%1,%2
> - vlei<bhfgq>\t%v0,%1,%2"
> + vlvg<bhfgq>\t%v0,%1,%P2
> + vle<bhfgq>\t%v0,%1,%p2
> + vlei<bhfgq>\t%v0,%1,%p2"
> [(set_attr "op_type" "VRS,VRX,VRI")])
>
> ; vlvgb, vlvgh, vlvgf, vlvgg
> (define_insn "*vec_set<mode>_plus"
> - [(set (match_operand:V 0 "register_operand" "=v")
> - (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d")
> - (plus:SI (match_operand:SI 2 "register_operand" "a")
> - (match_operand:SI 4 "const_int_operand" "n"))
> - (match_operand:V 3 "register_operand" "0")]
> - UNSPEC_VEC_SET))]
> + [(set (match_operand:V 0 "register_operand" "=v")
> + (vec_merge:V
> + (vec_duplicate:V
> + (match_operand:<non_vec> 1 "general_operand" "d"))
> + (match_operand:V 3 "register_operand" "0")
> + (plus:SI
> + (match_operand:SI 2 "register_operand" "a")
> + (match_operand:SI 4 "const_int_operand" "n"))))]
> "TARGET_VX"
> "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
> [(set_attr "op_type" "VRS")])
> @@ -575,7 +583,7 @@ (define_insn "*vec_splat<mode>"
> (match_operand:V_128_NOSINGLE 1 "register_operand" "v")
> (parallel
> [(match_operand:QI 2 "const_mask_operand" "C")]))))]
> - "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
> + "TARGET_VX && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<MODE>mode)"
> "vrep<bhfgq>\t%v0,%v1,%2"
> [(set_attr "op_type" "VRI")])
>
> @@ -678,13 +686,18 @@ (define_split
> (vec_duplicate:V_128_NOSINGLE (match_operand:<non_vec> 1
> "register_operand" "")))]
> "TARGET_VX && GENERAL_REG_P (operands[1])"
> [(set (match_dup 0)
> - (unspec:V_128_NOSINGLE [(match_dup 1) (match_dup 2) (match_dup 0)]
> UNSPEC_VEC_SET))
> + (vec_merge:V_128_NOSINGLE
> + (vec_duplicate:V_128_NOSINGLE
> + (match_dup 1))
> + (match_dup 0)
> + (match_dup 2)))
> (set (match_dup 0)
> (vec_duplicate:V_128_NOSINGLE
> (vec_select:<non_vec>
> - (match_dup 0) (parallel [(match_dup 2)]))))]
> + (match_dup 0) (parallel [(match_dup 3)]))))]
> {
> - operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
> + operands[2] = GEN_INT (1 << (GET_MODE_NUNITS (<MODE>mode) - 1));
> + operands[3] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
> })
>
> (define_predicate "vcond_comparison_operator"
> @@ -1136,9 +1149,12 @@ (define_expand "popcountv8hi2_vx"
> (set (match_dup 3) (match_dup 2))
> ; Generate the shift count operand in a VR (8->byte 7)
> (set (match_dup 4) (match_dup 5))
> - (set (match_dup 4) (unspec:V16QI [(const_int 8)
> - (const_int 7)
> - (match_dup 4)] UNSPEC_VEC_SET))
> + (set (match_dup 4)
> + (vec_merge:V16QI
> + (vec_duplicate:V16QI
> + (const_int 8))
> + (match_dup 4)
> + (const_int 128)))
> ; Vector shift right logical by one byte
> (set (match_dup 3)
> (unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
> @@ -1339,10 +1355,11 @@ (define_insn "*vec_slb<mode>"
> ; this means it is a left shift on BE targets!
> (define_expand "vec_shr_<mode>"
> [(set (match_dup 3)
> - (unspec:V16QI [(match_operand:SI 2 "const_shift_by_byte_operand" "")
> - (const_int 7)
> - (match_dup 3)]
> - UNSPEC_VEC_SET))
> + (vec_merge:V16QI
> + (vec_duplicate:V16QI
> + (match_operand:SI 2 "const_shift_by_byte_operand" ""))
> + (match_dup 3)
> + (const_int 128)))
> (set (match_operand:V_128 0 "register_operand" "")
> (unspec:V_128 [(match_operand:V_128 1 "register_operand" "")
> (match_dup 3)]
> diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
> index 01dfde438af5..2bdd694112a1 100644
> --- a/gcc/config/s390/vx-builtins.md
> +++ b/gcc/config/s390/vx-builtins.md
> @@ -150,22 +150,25 @@ (define_expand "vec_splats<mode>"
> (vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand" "")))]
> "TARGET_VX")
>
> +
> (define_expand "vec_insert<mode>"
> - [(set (match_operand:VEC_HW 0 "register_operand" "")
> - (unspec:VEC_HW [(match_operand:<non_vec> 2 "register_operand" "")
> - (match_operand:SI 3 "nonmemory_operand" "")
> - (match_operand:VEC_HW 1 "register_operand" "")]
> - UNSPEC_VEC_SET))]
> + [(set (match_operand:VEC_HW 0 "register_operand" "")
> + (vec_merge:VEC_HW
> + (vec_duplicate:VEC_HW
> + (match_operand:<non_vec> 2 "register_operand" ""))
> + (match_operand:VEC_HW 1 "register_operand" "")
> + (match_operand:SI 3 "nonmemory_operand" "")))]
> "TARGET_VX"
> "")
>
> ; This is vec_set + modulo arithmetic on the element selector (op 2)
> (define_expand "vec_promote<mode>"
> - [(set (match_operand:VEC_HW 0 "register_operand" "")
> - (unspec:VEC_HW [(match_operand:<non_vec> 1 "register_operand" "")
> - (match_operand:SI 2 "nonmemory_operand" "")
> - (match_dup 0)]
> - UNSPEC_VEC_SET))]
> + [(set (match_operand:VEC_HW 0 "register_operand" "")
> + (vec_merge:VEC_HW
> + (vec_duplicate:VEC_HW
> + (match_operand:<non_vec> 1 "register_operand" ""))
> + (match_dup 0)
> + (match_operand:SI 2 "nonmemory_operand" "")))]
> "TARGET_VX"
> "")
>
> @@ -457,11 +460,11 @@ (define_insn "vec_scatter_element<V_HW_2:mode>_SI"
> [(set (mem:<non_vec>
> (plus:SI (subreg:SI
> (vec_select:<non_vec_int>
> - (match_operand:V_HW_2 1 "register_operand" "v")
> - (parallel [(match_operand:QI 3 "const_mask_operand" "C")])) 4)
> - (match_operand:SI 2 "address_operand" "ZQ")))
> + (match_operand:<TOINTVEC> 1 "register_operand" "v")
> + (parallel [(match_operand:QI 3 "const_mask_operand" "C")])) 4)
> + (match_operand:SI 2 "address_operand" "ZQ")))
> (vec_select:<non_vec>
> - (match_operand:V_HW_2 0 "register_operand" "v")
> + (match_operand:V_HW_2 0 "register_operand" "v")
> (parallel [(match_dup 3)])))]
> "TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) <
> GET_MODE_NUNITS (<V_HW_2:MODE>mode)"
> "vsce<V_HW_2:bhfgq>\t%v0,%O2(%v1,%R2),%3"
> @@ -1858,14 +1861,18 @@ (define_expand "vec_ld2f"
> ; necessary since all elements of the vector will be set anyway.
> ; This is just to make it explicit to the data flow framework.
> (set (match_dup 2) (match_dup 3))
> - (set (match_dup 2) (unspec:V4SF [(match_operand:SF 1
> "memory_operand" "")
> - (const_int 0)
> - (match_dup 2)]
> - UNSPEC_VEC_SET))
> - (set (match_dup 2) (unspec:V4SF [(match_dup 4)
> - (const_int 2)
> - (match_dup 2)]
> - UNSPEC_VEC_SET))
> + (set (match_dup 2)
> + (vec_merge:V4SF
> + (vec_duplicate:V4SF
> + (match_operand:SF 1 "memory_operand" ""))
> + (match_dup 2)
> + (const_int 1)))
> + (set (match_dup 2)
> + (vec_merge:V4SF
> + (vec_duplicate:V4SF
> + (match_dup 4))
> + (match_dup 2)
> + (const_int 4)))
> (set (match_operand:V2DF 0 "register_operand" "")
> (unspec:V2DF [(match_dup 2)] UNSPEC_VEC_VFLL))]
> "TARGET_VX"
> @@ -2303,26 +2310,29 @@ (define_insn_and_split "*eltswap<mode>_emu"
> ; vlebrh, vlebrf, vlebrg
> (define_insn "*vec_set_bswap_elem<mode>"
> [(set (match_operand:V_HW_HSD 0
> "register_operand" "=v")
> - (unspec:V_HW_HSD [(bswap:<non_vec> (match_operand:<non_vec> 1
> "memory_operand" "R"))
> - (match_operand:SI 2
> "const_int_operand" "C")
> - (match_operand:V_HW_HSD 3 "register_operand" "0")]
> - UNSPEC_VEC_SET))]
> - "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> - "vlebr<bhfgq>\t%v0,%1,%2"
> + (vec_merge:V_HW_HSD
> + (vec_duplicate:V_HW_HSD
> + (bswap:<non_vec>
> + (match_operand:<non_vec> 1 "memory_operand" "R")))
> + (match_operand:V_HW_HSD 3 "register_operand" "0")
> + (match_operand:SI 2 "const_int_operand" "C")))]
> + "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> + "vlebr<bhfgq>\t%v0,%1,%p2"
> [(set_attr "op_type" "VRX")])
>
> ; vec_revb (vec_insert (*a, vec_revb (b), 1)) set-element-bswap-1.c
> ; vlebrh, vlebrf, vlebrg
> (define_insn "*vec_set_bswap_vec<mode>"
> [(set (match_operand:V_HW_HSD 0
> "register_operand" "=v")
> - (bswap:V_HW_HSD
> - (unspec:V_HW_HSD [(match_operand:<non_vec> 1
> "memory_operand" "R")
> - (match_operand:SI 2
> "const_int_operand" "C")
> - (bswap:V_HW_HSD (match_operand:V_HW_HSD 3 "register_operand"
> "0"))]
> - UNSPEC_VEC_SET)))
> - (use (match_operand:V16QI 4
> "permute_pattern_operand" "X"))]
> - "TARGET_VXE2 && UINTVAL (operands[2]) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> - "vlebr<bhfgq>\t%v0,%1,%2"
> + (bswap:V_HW_HSD
> + (vec_merge:V_HW_HSD
> + (vec_duplicate:V_HW_HSD
> + (match_operand:<non_vec> 1 "memory_operand" "R"))
> + (match_operand:V_HW_HSD 3 "register_operand" "0")
> + (match_operand:SI 2 "const_int_operand" "C"))))
> + (use (match_operand:V16QI 4 "permute_pattern_operand" "X"))]
> + "TARGET_VXE2 && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
> (<V_HW_HSD:MODE>mode)"
> + "vlebr<bhfgq>\t%v0,%1,%p2"
> [(set_attr "op_type" "VRX")])
>
> ; *a = vec_extract (vec_revb (b), 1); get-element-bswap-3.c
@@ -7073,11 +7073,10 @@ s390_expand_vec_init (rtx target, rtx vals)
if (!general_operand (elem, GET_MODE (elem)))
elem = force_reg (inner_mode, elem);
- emit_insn (gen_rtx_SET (target,
- gen_rtx_UNSPEC (mode,
- gen_rtvec (3, elem,
- GEN_INT (i), target),
- UNSPEC_VEC_SET)));
+ emit_insn
+ (gen_rtx_SET
+ (target, gen_rtx_VEC_MERGE
+ (mode, gen_rtx_VEC_DUPLICATE (mode, elem), target, GEN_INT (1 << i))));
}
}
@@ -8057,6 +8056,8 @@ print_operand_address (FILE *file, rtx addr)
'S': print S-type memory reference (base+displacement).
'Y': print address style operand without index (e.g. shift count or
setmem
operand).
+ 'P': print address-style operand without index but with the offset as
+ if it were specified by a 'p' format flag.
'b': print integer X as if it's an unsigned byte.
'c': print integer X as if it's an signed byte.
@@ -8068,6 +8069,7 @@ print_operand_address (FILE *file, rtx addr)
'k': print the first nonzero SImode part of X.
'm': print the first SImode part unequal to -1 of X.
'o': print integer X as if it's an unsigned 32bit word.
+ 'p': print N such that 2^N == X (X must be a power of 2 and const int).
's': "start" of contiguous bitmask X in either DImode or vector
inner mode.
't': CONST_INT: "start" of contiguous bitmask X in SImode.
CONST_VECTOR: Generate a bitmask for vgbm instruction.
@@ -8237,6 +8239,16 @@ print_operand (FILE *file, rtx x, int code)
print_shift_count_operand (file, x);
return;
+ case 'P':
+ if (CONST_INT_P (x))
+ {
+ ival = exact_log2 (INTVAL (x));
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
+ }
+ else
+ print_shift_count_operand (file, x);
+ return;
+
case 'K':
/* Append @PLT to both local and non-local symbols in order to
support
Linux Kernel livepatching: patches contain individual functions and
@@ -8321,6 +8333,9 @@ print_operand (FILE *file, rtx x, int code)
case 'o':
ival &= 0xffffffff;
break;
+ case 'p':
+ ival = exact_log2 (INTVAL (x));
+ break;
case 'e': case 'f':
case 's': case 't':
{
@@ -183,7 +183,6 @@ (define_c_enum "unspec" [
UNSPEC_VEC_GFMSUM_128
UNSPEC_VEC_GFMSUM_ACCUM
UNSPEC_VEC_GFMSUM_ACCUM_128
- UNSPEC_VEC_SET
UNSPEC_VEC_VSUMG
UNSPEC_VEC_VSUMQ
@@ -467,12 +467,17 @@ (define_insn "mov<mode>"
; vec_set is supposed to *modify* an existing vector so operand 0 is
; duplicated as input operand.
(define_expand "vec_set<mode>"
- [(set (match_operand:V 0 "register_operand" "")
- (unspec:V [(match_operand:<non_vec> 1 "general_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")
- (match_dup 0)]
- UNSPEC_VEC_SET))]
- "TARGET_VX")
+ [(set (match_operand:V 0 "register_operand" "")
+ (vec_merge:V
+ (vec_duplicate:V
+ (match_operand:<non_vec> 1 "general_operand" ""))
+ (match_dup 0)
+ (match_operand:SI 2 "nonmemory_operand")))]
+ ""
+{
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (1 << INTVAL (operands[2]));
+})
; FIXME: Support also vector mode operands for 1
; FIXME: A target memory operand seems to be useful otherwise we end
@@ -480,28 +485,31 @@ (define_expand "vec_set<mode>"
; that itself?
; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih,
vleif, vleig
(define_insn "*vec_set<mode>"
- [(set (match_operand:V 0 "register_operand" "=v,v,v")
- (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d,R,K")
- (match_operand:SI 2 "nonmemory_operand" "an,I,I")
- (match_operand:V 3 "register_operand" "0,0,0")]
- UNSPEC_VEC_SET))]
+ [(set (match_operand:V 0 "register_operand" "=v,v,v")
+ (vec_merge:V
+ (vec_duplicate:V
+ (match_operand:<non_vec> 1 "general_operand" "d,R,K"))
+ (match_operand:V 3 "register_operand" "0,0,0")
+ (match_operand:SI 2 "nonmemory_operand" "an,I,I")))]
"TARGET_VX
&& (!CONST_INT_P (operands[2])
- || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+ || exact_log2 (UINTVAL (operands[2])) < (GET_MODE_NUNITS
(<V:MODE>mode)))"
"@
- vlvg<bhfgq>\t%v0,%1,%Y2
- vle<bhfgq>\t%v0,%1,%2
- vlei<bhfgq>\t%v0,%1,%2"
+ vlvg<bhfgq>\t%v0,%1,%P2
+ vle<bhfgq>\t%v0,%1,%p2
+ vlei<bhfgq>\t%v0,%1,%p2"
[(set_attr "op_type" "VRS,VRX,VRI")])
; vlvgb, vlvgh, vlvgf, vlvgg
(define_insn "*vec_set<mode>_plus"
- [(set (match_operand:V 0 "register_operand" "=v")
- (unspec:V [(match_operand:<non_vec> 1 "general_operand" "d")
- (plus:SI (match_operand:SI 2 "register_operand" "a")
- (match_operand:SI 4 "const_int_operand" "n"))
- (match_operand:V 3 "register_operand" "0")]
- UNSPEC_VEC_SET))]
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (vec_merge:V
+ (vec_duplicate:V
+ (match_operand:<non_vec> 1 "general_operand" "d"))
+ (match_operand:V 3 "register_operand" "0")
+ (plus:SI
+ (match_operand:SI 2 "register_operand" "a")
+ (match_operand:SI 4 "const_int_operand" "n"))))]
"TARGET_VX"
"vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
[(set_attr "op_type" "VRS")])
@@ -575,7 +583,7 @@ (define_insn "*vec_splat<mode>"
(match_operand:V_128_NOSINGLE 1 "register_operand" "v")
(parallel
[(match_operand:QI 2 "const_mask_operand" "C")]))))]
- "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
+ "TARGET_VX && exact_log2 (UINTVAL (operands[2])) < GET_MODE_NUNITS
(<MODE>mode)"
"vrep<bhfgq>\t%v0,%v1,%2"
[(set_attr "op_type" "VRI")])
@@ -678,13 +686,18 @@ (define_split
(vec_duplicate:V_128_NOSINGLE (match_operand:<non_vec> 1
"register_operand" "")))]
"TARGET_VX && GENERAL_REG_P (operands[1])"
[(set (match_dup 0)
- (unspec:V_128_NOSINGLE [(match_dup 1) (match_dup 2) (match_dup 0)]
UNSPEC_VEC_SET))
+ (vec_merge:V_128_NOSINGLE
+ (vec_duplicate:V_128_NOSINGLE
+ (match_dup 1))
+ (match_dup 0)
+ (match_dup 2)))
(set (match_dup 0)
(vec_duplicate:V_128_NOSINGLE
(vec_select:<non_vec>
- (match_dup 0) (parallel [(match_dup 2)]))))]
+ (match_dup 0) (parallel [(match_dup 3)]))))]
{
- operands[2] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+ operands[2] = GEN_INT (1 << (GET_MODE_NUNITS (<MODE>mode) - 1));
+ operands[3] = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
})
(define_predicate "vcond_comparison_operator"
@@ -1136,9 +1149,12 @@ (define_expand "popcountv8hi2_vx"
(set (match_dup 3) (match_dup 2))
; Generate the shift count operand in a VR (8->byte 7)
(set (match_dup 4) (match_dup 5))
- (set (match_dup 4) (unspec:V16QI [(const_int 8)
- (const_int 7)
- (match_dup 4)] UNSPEC_VEC_SET))
+ (set (match_dup 4)
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (const_int 8))
+ (match_dup 4)
+ (const_int 128)))
; Vector shift right logical by one byte
(set (match_dup 3)
(unspec:V16QI [(match_dup 3) (match_dup 4)] UNSPEC_VEC_SRLB))
@@ -1339,10 +1355,11 @@ (define_insn "*vec_slb<mode>"
; this means it is a left shift on BE targets!
(define_expand "vec_shr_<mode>"
[(set (match_dup 3)
- (unspec:V16QI [(match_operand:SI 2 "const_shift_by_byte_operand" "")
- (const_int 7)
- (match_dup 3)]
- UNSPEC_VEC_SET))
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (match_operand:SI 2 "const_shift_by_byte_operand" ""))
+ (match_dup 3)
+ (const_int 128)))
(set (match_operand:V_128 0 "register_operand" "")
(unspec:V_128 [(match_operand:V_128 1 "register_operand" "")
(match_dup 3)]
@@ -150,22 +150,25 @@ (define_expand "vec_splats<mode>"
(vec_duplicate:VEC_HW (match_operand:<non_vec> 1 "general_operand" "")))]
"TARGET_VX")
+
(define_expand "vec_insert<mode>"
- [(set (match_operand:VEC_HW 0 "register_operand" "")
- (unspec:VEC_HW [(match_operand:<non_vec> 2 "register_operand" "")
- (match_operand:SI 3 "nonmemory_operand" "")
- (match_operand:VEC_HW 1 "register_operand" "")]
- UNSPEC_VEC_SET))]
+ [(set (match_operand:VEC_HW 0 "register_operand" "")
+ (vec_merge:VEC_HW
+ (vec_duplicate:VEC_HW
+ (match_operand:<non_vec> 2 "register_operand" ""))
+ (match_operand:VEC_HW 1 "register_operand" "")
+ (match_operand:SI 3 "nonmemory_operand" "")))]
"TARGET_VX"
"")
; This is vec_set + modulo arithmetic on the element selector (op 2)
(define_expand "vec_promote<mode>"
- [(set (match_operand:VEC_HW 0 "register_operand" "")
- (unspec:VEC_HW [(match_operand:<non_vec> 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")
- (match_dup 0)]
- UNSPEC_VEC_SET))]
+ [(set (match_operand:VEC_HW 0 "register_operand" "")
+ (vec_merge:VEC_HW
+ (vec_duplicate:VEC_HW
+ (match_operand:<non_vec> 1 "register_operand" ""))
+ (match_dup 0)
+ (match_operand:SI 2 "nonmemory_operand" "")))]
"TARGET_VX"
"")
@@ -457,11 +460,11 @@ (define_insn "vec_scatter_element<V_HW_2:mode>_SI"
[(set (mem:<non_vec>
(plus:SI (subreg:SI
(vec_select:<non_vec_int>
- (match_operand:V_HW_2 1 "register_operand" "v")
- (parallel [(match_operand:QI 3 "const_mask_operand" "C")])) 4)
- (match_operand:SI 2 "address_operand" "ZQ")))
+ (match_operand:<TOINTVEC> 1 "register_operand" "v")
+ (parallel [(match_operand:QI 3 "const_mask_operand" "C")])) 4)
+ (match_operand:SI 2 "address_operand" "ZQ")))
(vec_select:<non_vec>
- (match_operand:V_HW_2 0 "register_operand" "v")
+ (match_operand:V_HW_2 0 "register_operand" "v")
(parallel [(match_dup 3)])))]
"TARGET_VX && !TARGET_64BIT && UINTVAL (operands[3]) <
GET_MODE_NUNITS (<V_HW_2:MODE>mode)"