[AArch64] Cleanup move immediate code
Checks
Commit Message
Hi Richard,
Here is the immediate cleanup splitoff from the previous patch:
Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVZ/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
pattern only needs a single alternative for move immediate.
Passes bootstrap and regress, OK for commit?
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Remove.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov<mode>_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.
---
Comments
Sorry for the very long delay in reviewing this.
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,
>
> Here is the immediate cleanup splitoff from the previous patch:
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVZ/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.
Just to make sure I understand: isn't it really just MOVN? I would have
expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
a 64-bit MOVZ.
> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Remove.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov<mode>_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 1a71f02284137c64e7115b26e6aa00447596f105..a73bfa20acb9b92ae0475794c3f11c67d22feb97 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
> poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
> int aarch64_get_condition_code (rtx);
> bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
> unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
> unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
> @@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT);
> bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
> machine_mode aarch64_sve_int_mode (machine_mode);
> opt_machine_mode aarch64_sve_pred_mode (unsigned int);
> machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
> bool aarch64_sve_float_mul_immediate_p (rtx);
> bool aarch64_split_dimode_const_store (rtx, rtx);
> bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
> int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
> +bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
> bool aarch64_use_return_insn_p (void);
> const char *aarch64_output_casesi (rtx *);
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 5d1ab5aa42b2cda0a655d2bc69c4df19da457ab3..798363bcc449c414de5bbb4f26b8e1c64a0cf71a 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -5558,12 +5558,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
>
> /* Return true if VAL is a valid bitmask immediate for MODE. */
> bool
> -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
> +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> {
> if (mode == DImode)
> - return aarch64_bitmask_imm (val_in);
> -
> - unsigned HOST_WIDE_INT val = val_in;
> + return aarch64_bitmask_imm (val);
>
> if (mode == SImode)
> return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
> @@ -5602,51 +5600,60 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
> }
>
>
> -/* Return true if val is an immediate that can be loaded into a
> - register by a MOVZ instruction. */
> -static bool
> -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
> +/* Return true if immediate VAL can only be created by using a 32-bit
> + zero-extended move immediate, not by a 64-bit move. */
> +bool
> +aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
> {
> - if (GET_MODE_SIZE (mode) > 4)
> - {
> - if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
> - return 1;
> - }
> - else
> - {
> - /* Ignore sign extension. */
> - val &= (HOST_WIDE_INT) 0xffffffff;
> - }
> - return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
> + if (val < 65536 || (val >> 32) != 0 || (val & 0xffff) == 0)
> + return false;
> + return !aarch64_bitmask_imm (val);
> }
>
>
> -/* Return true if VAL is an immediate that can be loaded into a
> - register in a single instruction. */
> +/* Return true if VAL is an immediate that can be created by a single
> + MOV instruction. */
> bool
> -aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
> +aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> {
> - scalar_int_mode int_mode;
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> - return false;
> + unsigned HOST_WIDE_INT val2;
>
> - if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
> - return 1;
> - return aarch64_bitmask_imm (val, int_mode);
> + gcc_assert (mode == SImode || mode == DImode);
> +
> + if (val < 65536)
> + return true;
> +
> + val2 = val ^ ((HOST_WIDE_INT) val >> 63);
I think we're supposed to avoid relying on >> being an arithmetic shift,
since C++11 doesn't guarantee it. So I think it needs to be something like:
val2 = ((HOST_WIDE_INT) val < 0 ? ~val : val);
> + if ((val2 >> (__builtin_ctzll (val2) & 48)) < 65536)
> + return true;
We should use ctz_hwi here, instead of using the GCC builtin directly.
> +
> + /* Special case 0xyyyyffffffffffff. */
> + if (((val2 + 1) << 16) == 0)
> + return true;
> +
> + /* Special case immediates 0xffffyyyy and 0xyyyyffff. */
> + val2 = (mode == DImode) ? val : val2;
> + if (((val2 + 1) & ~(unsigned HOST_WIDE_INT) 0xffff0000) == 0
> + || (val2 >> 16) == 0xffff)
> + return true;
> +
> + if (mode == SImode || (val >> 32) == 0)
> + val = (val & 0xffffffff) | (val << 32);
> + return aarch64_bitmask_imm (val);
I agree the ctz trick is more elegant than (and an improvement over)
the current approach to testing for movz. But I think the overall logic
is harder to follow than it was in the original version. Initially
canonicalising val2 based on the sign bit seems unintuitive since we
still need to handle all four combinations of (top bit set, top bit clear)
x (low 48 bits set, low 48 bits clear). I preferred the original
approach of testing once with the original value (for MOVZ) and once
with the inverted value (for MOVN).
Don't the new cases boil down to: if mode is DImode and the upper 32 bits
are clear, we can test based on SImode instead? In other words, couldn't
the "(val >> 32) == 0" part of the final test be done first, with the
effect of changing the mode to SImode? Something like:
gcc_assert (mode == SImode || mode == DImode);
/* We can't use a 64-bit MOVN if the upper 32 bits are clear, but we might
be able to use a 32-bit MOVN. We could also use a 32-bit ORR. */
if (mode == DImode && (val >> 32) == 0)
mode = SImode;
if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
return true;
if (mode == SImode)
val = (val & 0xffffffff) | (val << 32);
return aarch64_bitmask_imm (val);
with movw_imm rewritten to use ctz? (Completely untested :-))
Or am I missing a case that the patch handles and the code above doesn't?
Thanks,
Richard
> }
>
>
> static int
> aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> - scalar_int_mode mode)
> + machine_mode mode)
> {
> int i;
> unsigned HOST_WIDE_INT val, val2, mask;
> int one_match, zero_match;
> int num_insns;
>
> + gcc_assert (mode == SImode || mode == DImode);
> +
> val = INTVAL (imm);
>
> if (aarch64_move_imm (val, mode))
> @@ -5656,31 +5663,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> return 1;
> }
>
> - /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
> - (with XXXX non-zero). In that case check to see if the move can be done in
> - a smaller mode. */
> - val2 = val & 0xffffffff;
> - if (mode == DImode
> - && aarch64_move_imm (val2, SImode)
> - && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
> - {
> - if (generate)
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> -
> - /* Check if we have to emit a second instruction by checking to see
> - if any of the upper 32 bits of the original DI mode value is set. */
> - if (val == val2)
> - return 1;
> -
> - i = (val >> 48) ? 48 : 32;
> -
> - if (generate)
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> -
> - return 2;
> - }
> -
> if ((val >> 32) == 0 || mode == SImode)
> {
> if (generate)
> @@ -5704,24 +5686,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
> ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
>
> + /* Try a bitmask immediate and a movk to generate the immediate
> + in 2 instructions. */
> +
> if (zero_match < 2 && one_match < 2)
> {
> - /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> - For a 64-bit bitmask try whether changing 16 bits to all ones or
> - zeroes creates a valid bitmask. To check any repeated bitmask,
> - try using 16 bits from the other 32-bit half of val. */
> -
> for (i = 0; i < 64; i += 16)
> - if (aarch64_check_bitmask (val, val2, mask << i))
> - {
> - if (generate)
> - {
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> - }
> - return 2;
> - }
> + {
> + if (aarch64_check_bitmask (val, val2, mask << i))
> + break;
> +
> + val2 = val & ~(mask << i);
> + if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
> + break;
> + }
> +
> + if (i != 64)
> + {
> + if (generate)
> + {
> + emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> + emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> + GEN_INT ((val >> i) & 0xffff)));
> + }
> + return 2;
> + }
> }
>
> /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
> @@ -5790,26 +5779,24 @@ aarch64_mov128_immediate (rtx imm)
> /* Return true if val can be encoded as a 12-bit unsigned immediate with
> a left shift of 0 or 12 bits. */
> bool
> -aarch64_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> - return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
> - );
> + return val < 4096 || (val & 0xfff000) == val;
> }
>
> /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
> that can be created with a left shift of 0 or 12. */
> static HOST_WIDE_INT
> -aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> /* Check to see if the value fits in 24 bits, as that is the maximum we can
> handle correctly. */
> - gcc_assert ((val & 0xffffff) == val);
> + gcc_assert (val < 0x1000000);
>
> - if (((val & 0xfff) << 0) == val)
> + if (val < 4096)
> return val;
>
> - return val & (0xfff << 12);
> + return val & 0xfff000;
> }
>
>
> @@ -6957,8 +6944,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
> return;
> }
>
> - aarch64_internal_mov_immediate (dest, imm, true,
> - as_a <scalar_int_mode> (mode));
> + aarch64_internal_mov_immediate (dest, imm, true, mode);
> }
>
> /* Return the MEM rtx that provides the canary value that should be used
> @@ -11130,9 +11116,7 @@ aarch64_float_const_rtx_p (rtx x)
> && SCALAR_FLOAT_MODE_P (mode)
> && aarch64_reinterpret_float_as_int (x, &ival))
> {
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
> int num_instr = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> return num_instr < 3;
> @@ -13790,10 +13774,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> proportionally expensive to the number of instructions
> required to build that constant. This is true whether we
> are compiling for SPEED or otherwise. */
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> - int_mode = word_mode;
> + machine_mode imode = (mode == SImode) ? SImode : DImode;
> *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
> - (NULL_RTX, x, false, int_mode));
> + (NULL_RTX, x, false, imode));
> }
> return true;
>
> @@ -13809,9 +13792,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
> gcc_assert (succeed);
>
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
> int ncost = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> *cost += COSTS_N_INSNS (ncost);
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index f2e3d905dbbeb2949f2947f5cfd68208c94c9272..604a67d87a7ef525efebed48f39de43c97f2a397 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
> )
>
> (define_insn_and_split "*movdi_aarch64"
> - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> "(register_operand (operands[0], DImode)
> || aarch64_reg_or_zero (operands[1], DImode))"
> "@
> mov\\t%x0, %x1
> mov\\t%0, %x1
> mov\\t%x0, %1
> - mov\\t%x0, %1
> - mov\\t%w0, %1
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
> #
> * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
> ldr\\t%x0, %1
> @@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
> DONE;
> }"
> ;; The "mov_imm" type for CNTD is just a placeholder.
> - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
> load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
> fmov,neon_move")
> - (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> - (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> + (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> )
>
> (define_insn "insv_imm<mode>"
> @@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
>
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
> - (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
> + (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
> "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> "@
> @@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
> ldr\\t%x0, %1
> str\\t%x1, %0
> mov\\t%x0, %x1
> - mov\\t%x0, %1"
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
> [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
> f_loadd,f_stored,load_8,store_8,mov_reg,\
> fconstd")
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index ee7587cca1673208e2bfd6b503a21d0c8b69bf75..e91c7eab0b3674ca34ac2f790c38fcd27986c35f 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -106,6 +106,12 @@ (define_constraint "M"
>
> (define_constraint "N"
> "A constant that can be used with a 64-bit MOV immediate operation."
> + (and (match_code "const_int")
> + (match_test "aarch64_move_imm (ival, DImode)")
> + (match_test "!aarch64_zeroextended_move_imm (ival)")))
> +
> +(define_constraint "O"
> + "A constant that can be used with a 32 or 64-bit MOV immediate operation."
> (and (match_code "const_int")
> (match_test "aarch64_move_imm (ival, DImode)")))
Hi Richard,
> Just to make sure I understand: isn't it really just MOVN? I would have
> expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
> a 64-bit MOVZ.
The 32-bit MOVZ immediates are equivalent, MOVN never overlaps, and
MOVI has some overlaps . Since we allow all 3 variants, the 2 alternatives
in the movdi pattern are overlapping for MOVZ and MOVI immediates.
> I agree the ctz trick is more elegant than (and an improvement over)
> the current approach to testing for movz. But I think the overall logic
> is harder to follow than it was in the original version. Initially
> canonicalising val2 based on the sign bit seems unintuitive since we
> still need to handle all four combinations of (top bit set, top bit clear)
> x (low 48 bits set, low 48 bits clear). I preferred the original
> approach of testing once with the original value (for MOVZ) and once
> with the inverted value (for MOVN).
Yes, the canonicalization on the sign ends up requiring 2 special cases.
Handling the MOVZ case first and then MOVN does avoid that, and makes
things simpler overall, so I've used that approach in v2.
> Don't the new cases boil down to: if mode is DImode and the upper 32 bits
> are clear, we can test based on SImode instead? In other words, couldn't
> the "(val >> 32) == 0" part of the final test be done first, with the
> effect of changing the mode to SImode? Something like:
Yes that works. I used masking of the top bits to avoid repeatedly testing the
same condition. The new version removes most special cases and ends up
both smaller and simpler:
v2: Simplify the special cases in aarch64_move_imm, use aarch64_is_movz.
Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVZ/I/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
pattern only needs a single alternative for move immediate.
Passes bootstrap and regress, OK for commit?
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Rename to aarch64_is_movz.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov<mode>_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.
---
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
machine_mode aarch64_sve_int_mode (machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
bool aarch64_sve_float_mul_immediate_p (rtx);
bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e97f3b32f7c7f43564d6a4207eae5a34b9e9bfe7..fab478ada8d72d163c8daa3713e5ff6945de409f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5625,12 +5625,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
/* Return true if VAL is a valid bitmask immediate for MODE. */
bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
if (mode == DImode)
- return aarch64_bitmask_imm (val_in);
-
- unsigned HOST_WIDE_INT val = val_in;
+ return aarch64_bitmask_imm (val);
if (mode == SImode)
return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
@@ -5669,51 +5667,57 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
}
-/* Return true if val is an immediate that can be loaded into a
- register by a MOVZ instruction. */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+/* Return true if immediate VAL can only be created by using a 32-bit
+ zero-extended move immediate, not by a 64-bit move. */
+bool
+aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
{
- if (GET_MODE_SIZE (mode) > 4)
- {
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
- return 1;
- }
- else
- {
- /* Ignore sign extension. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- }
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+ if (val < 65536 || (val >> 32) != 0 || (val & 0xffff) == 0)
+ return false;
+ return !aarch64_bitmask_imm (val);
}
-/* Return true if VAL is an immediate that can be loaded into a
- register in a single instruction. */
+/* Return true if VAL is a valid MOVZ immediate. */
+static inline bool
+aarch64_is_movz (unsigned HOST_WIDE_INT val)
+{
+ return (val >> (ctz_hwi (val) & 48)) < 65536;
+}
+
+
+/* Return true if VAL is an immediate that can be created by a single
+ MOV instruction. */
bool
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- return false;
+ gcc_assert (mode == SImode || mode == DImode);
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
- return 1;
- return aarch64_bitmask_imm (val, int_mode);
+ if (val < 65536)
+ return true;
+
+ unsigned HOST_WIDE_INT mask =
+ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
+
+ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
+ return true;
+
+ val = (val & mask) | ((val << 32) & ~mask);
+ return aarch64_bitmask_imm (val);
}
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
- scalar_int_mode mode)
+ machine_mode mode)
{
int i;
unsigned HOST_WIDE_INT val, val2, mask;
int one_match, zero_match;
int num_insns;
+ gcc_assert (mode == SImode || mode == DImode);
+
val = INTVAL (imm);
if (aarch64_move_imm (val, mode))
@@ -5723,31 +5727,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
- (with XXXX non-zero). In that case check to see if the move can be done in
- a smaller mode. */
- val2 = val & 0xffffffff;
- if (mode == DImode
- && aarch64_move_imm (val2, SImode)
- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
- {
- if (generate)
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
-
- /* Check if we have to emit a second instruction by checking to see
- if any of the upper 32 bits of the original DI mode value is set. */
- if (val == val2)
- return 1;
-
- i = (val >> 48) ? 48 : 32;
-
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
-
- return 2;
- }
-
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -5771,24 +5750,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
+ /* Try a bitmask immediate and a movk to generate the immediate
+ in 2 instructions. */
+
if (zero_match < 2 && one_match < 2)
{
- /* Try emitting a bitmask immediate with a movk replacing 16 bits.
- For a 64-bit bitmask try whether changing 16 bits to all ones or
- zeroes creates a valid bitmask. To check any repeated bitmask,
- try using 16 bits from the other 32-bit half of val. */
-
for (i = 0; i < 64; i += 16)
- if (aarch64_check_bitmask (val, val2, mask << i))
- {
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- }
- return 2;
- }
+ {
+ if (aarch64_check_bitmask (val, val2, mask << i))
+ break;
+
+ val2 = val & ~(mask << i);
+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
+ break;
+ }
+
+ if (i != 64)
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+ return 2;
+ }
}
/* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
@@ -5857,26 +5843,24 @@ aarch64_mov128_immediate (rtx imm)
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
-aarch64_uimm12_shift (HOST_WIDE_INT val)
+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
{
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
- );
+ return val < 4096 || (val & 0xfff000) == val;
}
/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
that can be created with a left shift of 0 or 12. */
static HOST_WIDE_INT
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
{
/* Check to see if the value fits in 24 bits, as that is the maximum we can
handle correctly. */
- gcc_assert ((val & 0xffffff) == val);
+ gcc_assert (val < 0x1000000);
- if (((val & 0xfff) << 0) == val)
+ if (val < 4096)
return val;
- return val & (0xfff << 12);
+ return val & 0xfff000;
}
@@ -7024,8 +7008,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
- aarch64_internal_mov_immediate (dest, imm, true,
- as_a <scalar_int_mode> (mode));
+ aarch64_internal_mov_immediate (dest, imm, true, mode);
}
/* Return the MEM rtx that provides the canary value that should be used
@@ -11197,9 +11180,7 @@ aarch64_float_const_rtx_p (rtx x)
&& SCALAR_FLOAT_MODE_P (mode)
&& aarch64_reinterpret_float_as_int (x, &ival))
{
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = (mode == DFmode) ? DImode : SImode;
int num_instr = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
return num_instr < 3;
@@ -13857,10 +13838,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
proportionally expensive to the number of instructions
required to build that constant. This is true whether we
are compiling for SPEED or otherwise. */
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- int_mode = word_mode;
+ machine_mode imode = (mode == SImode) ? SImode : DImode;
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
- (NULL_RTX, x, false, int_mode));
+ (NULL_RTX, x, false, imode));
}
return true;
@@ -13876,9 +13856,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
gcc_assert (succeed);
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = (mode == DFmode) ? DImode : SImode;
int ncost = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
*cost += COSTS_N_INSNS (ncost);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 76b6898ca048c559ff7f7fba78119161b3d382f6..5694a1efea1df8e532520475140c8748b54e3d89 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
mov\\t%x0, %x1
mov\\t%0, %x1
mov\\t%x0, %1
- mov\\t%x0, %1
- mov\\t%w0, %1
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%x0, %1
@@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
DONE;
}"
;; The "mov_imm" type for CNTD is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm<mode>"
@@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
@@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1
- mov\\t%x0, %1"
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 29efb6c0cff7574c9b239ef358acaca96dd75d03..6168295814c3ce4b6f65030cddfd08172b6febd8 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -106,6 +106,12 @@ (define_constraint "M"
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
+ (and (match_code "const_int")
+ (match_test "aarch64_move_imm (ival, DImode)")
+ (match_test "!aarch64_zeroextended_move_imm (ival)")))
+
+(define_constraint "O"
+ "A constant that can be used with a 32 or 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_move_imm (ival, DImode)")))
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,
>
>> Just to make sure I understand: isn't it really just MOVN? I would have
>> expected a 32-bit MOVZ to be equivalent to (and add no capabilities over)
>> a 64-bit MOVZ.
>
> The 32-bit MOVZ immediates are equivalent, MOVN never overlaps, and
> MOVI has some overlaps . Since we allow all 3 variants, the 2 alternatives
> in the movdi pattern are overlapping for MOVZ and MOVI immediates.
>
>> I agree the ctz trick is more elegant than (and an improvement over)
>> the current approach to testing for movz. But I think the overall logic
>> is harder to follow than it was in the original version. Initially
>> canonicalising val2 based on the sign bit seems unintuitive since we
>> still need to handle all four combinations of (top bit set, top bit clear)
>> x (low 48 bits set, low 48 bits clear). I preferred the original
>> approach of testing once with the original value (for MOVZ) and once
>> with the inverted value (for MOVN).
>
> Yes, the canonicalization on the sign ends up requiring 2 special cases.
> Handling the MOVZ case first and then MOVN does avoid that, and makes
> things simpler overall, so I've used that approach in v2.
>
>> Don't the new cases boil down to: if mode is DImode and the upper 32 bits
>> are clear, we can test based on SImode instead? In other words, couldn't
>> the "(val >> 32) == 0" part of the final test be done first, with the
>> effect of changing the mode to SImode? Something like:
>
> Yes that works. I used masking of the top bits to avoid repeatedly testing the
> same condition. The new version removes most special cases and ends up
> both smaller and simpler:
>
>
> v2: Simplify the special cases in aarch64_move_imm, use aarch64_is_movz.
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVZ/I/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.
>
> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Rename to aarch64_is_movz.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov<mode>_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
> poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
> int aarch64_get_condition_code (rtx);
> bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
> unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
> unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
> @@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT);
> bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
> machine_mode aarch64_sve_int_mode (machine_mode);
> opt_machine_mode aarch64_sve_pred_mode (unsigned int);
> machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
> bool aarch64_sve_float_mul_immediate_p (rtx);
> bool aarch64_split_dimode_const_store (rtx, rtx);
> bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
> int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
> +bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
> bool aarch64_use_return_insn_p (void);
> const char *aarch64_output_casesi (rtx *);
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index e97f3b32f7c7f43564d6a4207eae5a34b9e9bfe7..fab478ada8d72d163c8daa3713e5ff6945de409f 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -5625,12 +5625,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
>
> /* Return true if VAL is a valid bitmask immediate for MODE. */
> bool
> -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
> +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> {
> if (mode == DImode)
> - return aarch64_bitmask_imm (val_in);
> -
> - unsigned HOST_WIDE_INT val = val_in;
> + return aarch64_bitmask_imm (val);
>
> if (mode == SImode)
> return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
> @@ -5669,51 +5667,57 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
> }
>
>
> -/* Return true if val is an immediate that can be loaded into a
> - register by a MOVZ instruction. */
> -static bool
> -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
> +/* Return true if immediate VAL can only be created by using a 32-bit
> + zero-extended move immediate, not by a 64-bit move. */
> +bool
> +aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
> {
> - if (GET_MODE_SIZE (mode) > 4)
> - {
> - if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
> - return 1;
> - }
> - else
> - {
> - /* Ignore sign extension. */
> - val &= (HOST_WIDE_INT) 0xffffffff;
> - }
> - return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
> + if (val < 65536 || (val >> 32) != 0 || (val & 0xffff) == 0)
> + return false;
> + return !aarch64_bitmask_imm (val);
> }
>
>
> -/* Return true if VAL is an immediate that can be loaded into a
> - register in a single instruction. */
> +/* Return true if VAL is a valid MOVZ immediate. */
> +static inline bool
> +aarch64_is_movz (unsigned HOST_WIDE_INT val)
> +{
> + return (val >> (ctz_hwi (val) & 48)) < 65536;
> +}
> +
> +
> +/* Return true if VAL is an immediate that can be created by a single
> + MOV instruction. */
> bool
> -aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
> +aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> {
> - scalar_int_mode int_mode;
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> - return false;
> + gcc_assert (mode == SImode || mode == DImode);
>
> - if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
> - return 1;
> - return aarch64_bitmask_imm (val, int_mode);
> + if (val < 65536)
> + return true;
> +
> + unsigned HOST_WIDE_INT mask =
> + (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
> +
> + if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
> + return true;
> +
> + val = (val & mask) | ((val << 32) & ~mask);
> + return aarch64_bitmask_imm (val);
> }
>
>
> static int
> aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> - scalar_int_mode mode)
> + machine_mode mode)
> {
> int i;
> unsigned HOST_WIDE_INT val, val2, mask;
> int one_match, zero_match;
> int num_insns;
>
> + gcc_assert (mode == SImode || mode == DImode);
> +
> val = INTVAL (imm);
>
> if (aarch64_move_imm (val, mode))
> @@ -5723,31 +5727,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> return 1;
> }
>
> - /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
> - (with XXXX non-zero). In that case check to see if the move can be done in
> - a smaller mode. */
> - val2 = val & 0xffffffff;
> - if (mode == DImode
> - && aarch64_move_imm (val2, SImode)
> - && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
> - {
> - if (generate)
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> -
> - /* Check if we have to emit a second instruction by checking to see
> - if any of the upper 32 bits of the original DI mode value is set. */
> - if (val == val2)
> - return 1;
> -
> - i = (val >> 48) ? 48 : 32;
> -
> - if (generate)
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> -
> - return 2;
> - }
> -
> if ((val >> 32) == 0 || mode == SImode)
> {
> if (generate)
> @@ -5771,24 +5750,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
> ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
>
> + /* Try a bitmask immediate and a movk to generate the immediate
> + in 2 instructions. */
> +
> if (zero_match < 2 && one_match < 2)
> {
> - /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> - For a 64-bit bitmask try whether changing 16 bits to all ones or
> - zeroes creates a valid bitmask. To check any repeated bitmask,
> - try using 16 bits from the other 32-bit half of val. */
> -
> for (i = 0; i < 64; i += 16)
> - if (aarch64_check_bitmask (val, val2, mask << i))
> - {
> - if (generate)
> - {
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> - }
> - return 2;
> - }
> + {
> + if (aarch64_check_bitmask (val, val2, mask << i))
> + break;
> +
> + val2 = val & ~(mask << i);
> + if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
> + break;
> + }
> +
> + if (i != 64)
> + {
> + if (generate)
> + {
> + emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> + emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> + GEN_INT ((val >> i) & 0xffff)));
> + }
> + return 2;
> + }
> }
>
> /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
> @@ -5857,26 +5843,24 @@ aarch64_mov128_immediate (rtx imm)
> /* Return true if val can be encoded as a 12-bit unsigned immediate with
> a left shift of 0 or 12 bits. */
> bool
> -aarch64_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> - return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
> - );
> + return val < 4096 || (val & 0xfff000) == val;
> }
>
> /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
> that can be created with a left shift of 0 or 12. */
> static HOST_WIDE_INT
> -aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> /* Check to see if the value fits in 24 bits, as that is the maximum we can
> handle correctly. */
> - gcc_assert ((val & 0xffffff) == val);
> + gcc_assert (val < 0x1000000);
>
> - if (((val & 0xfff) << 0) == val)
> + if (val < 4096)
> return val;
>
> - return val & (0xfff << 12);
> + return val & 0xfff000;
> }
>
>
> @@ -7024,8 +7008,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
> return;
> }
>
> - aarch64_internal_mov_immediate (dest, imm, true,
> - as_a <scalar_int_mode> (mode));
> + aarch64_internal_mov_immediate (dest, imm, true, mode);
> }
>
> /* Return the MEM rtx that provides the canary value that should be used
> @@ -11197,9 +11180,7 @@ aarch64_float_const_rtx_p (rtx x)
> && SCALAR_FLOAT_MODE_P (mode)
> && aarch64_reinterpret_float_as_int (x, &ival))
> {
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
It looks like this might mishandle DDmode, if not now then in the future.
Seems safer to use known_eq (GET_MODE_SIZE (mode), 8)
> int num_instr = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> return num_instr < 3;
> @@ -13857,10 +13838,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> proportionally expensive to the number of instructions
> required to build that constant. This is true whether we
> are compiling for SPEED or otherwise. */
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> - int_mode = word_mode;
> + machine_mode imode = (mode == SImode) ? SImode : DImode;
And here known_le (GET_MODE_SIZE (mode), 4), so that we handle QImode
and HImode as SImode rather than DImode.
> *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
> - (NULL_RTX, x, false, int_mode));
> + (NULL_RTX, x, false, imode));
> }
> return true;
>
> @@ -13876,9 +13856,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
> gcc_assert (succeed);
>
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
Same GET_MODE_SIZE test here.
> int ncost = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> *cost += COSTS_N_INSNS (ncost);
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 76b6898ca048c559ff7f7fba78119161b3d382f6..5694a1efea1df8e532520475140c8748b54e3d89 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
> )
>
> (define_insn_and_split "*movdi_aarch64"
> - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> "(register_operand (operands[0], DImode)
> || aarch64_reg_or_zero (operands[1], DImode))"
> "@
> mov\\t%x0, %x1
> mov\\t%0, %x1
> mov\\t%x0, %1
> - mov\\t%x0, %1
> - mov\\t%w0, %1
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
> #
> * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
> ldr\\t%x0, %1
> @@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
> DONE;
> }"
> ;; The "mov_imm" type for CNTD is just a placeholder.
> - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
> load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
> fmov,neon_move")
> - (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> - (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> + (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> )
>
> (define_insn "insv_imm<mode>"
> @@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
>
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
> - (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
> + (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
> "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> "@
> @@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
> ldr\\t%x0, %1
> str\\t%x1, %0
> mov\\t%x0, %x1
> - mov\\t%x0, %1"
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
> [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
> f_loadd,f_stored,load_8,store_8,mov_reg,\
> fconstd")
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 29efb6c0cff7574c9b239ef358acaca96dd75d03..6168295814c3ce4b6f65030cddfd08172b6febd8 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -106,6 +106,12 @@ (define_constraint "M"
>
> (define_constraint "N"
> "A constant that can be used with a 64-bit MOV immediate operation."
> + (and (match_code "const_int")
> + (match_test "aarch64_move_imm (ival, DImode)")
> + (match_test "!aarch64_zeroextended_move_imm (ival)")))
Sorry for not noticing last time, but: rather than have
aarch64_zeroextended_move_imm (which is quite a complicated test),
could we just add an extra (default off) parameter to aarch64_move_imm
that suppresses the (val >> 32) == 0 test?
Thanks,
Richard
> +
> +(define_constraint "O"
> + "A constant that can be used with a 32 or 64-bit MOV immediate operation."
> (and (match_code "const_int")
> (match_test "aarch64_move_imm (ival, DImode)")))
>
Hi Richard,
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
> It looks like this might mishandle DDmode, if not now then in the future.
> Seems safer to use known_eq (GET_MODE_SIZE (mode), 8)
I've changed that, but it does not matter for the narrow modes as the result
will be identical - only DDmode might get costed incorrectly.
> Sorry for not noticing last time, but: rather than have
> aarch64_zeroextended_move_imm (which is quite a complicated test),
> could we just add an extra (default off) parameter to aarch64_move_imm
> that suppresses the (val >> 32) == 0 test?
That makes things more complicated again - ultimately I'd like to get rid of the
mode parameter since most callers use a fixed mode, and ones that don't are
now creating and passing fake modes... I've change it like aarch64_move_imm
and call aarch64_is_movz twice to check it is not a 64-bit MOVZ/MOVN.
Cheers,
Wilco
v3: Use aarch64_is_movz, use known_eq
Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVI/N as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
pattern only needs a single alternative for move immediate.
Passes bootstrap and regress, OK for commit?
gcc/ChangeLog:
* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_zeroextended_move_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Rename to aarch64_is_movz.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov<mode>_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_zeroextended_move_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.
---
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
machine_mode aarch64_sve_int_mode (machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
bool aarch64_sve_float_mul_immediate_p (rtx);
bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a73741800c963ee6605fd2cfa918f4399da4bfdf..00269632eeb52c29ba2011c4c82274968b850d71 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5625,12 +5625,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
/* Return true if VAL is a valid bitmask immediate for MODE. */
bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
if (mode == DImode)
- return aarch64_bitmask_imm (val_in);
-
- unsigned HOST_WIDE_INT val = val_in;
+ return aarch64_bitmask_imm (val);
if (mode == SImode)
return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
@@ -5669,51 +5667,58 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
}
-/* Return true if val is an immediate that can be loaded into a
- register by a MOVZ instruction. */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+/* Return true if VAL is a valid MOVZ immediate. */
+static inline bool
+aarch64_is_movz (unsigned HOST_WIDE_INT val)
{
- if (GET_MODE_SIZE (mode) > 4)
- {
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
- return 1;
- }
- else
- {
- /* Ignore sign extension. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- }
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+ return (val >> (ctz_hwi (val) & 48)) < 65536;
}
-/* Return true if VAL is an immediate that can be loaded into a
- register in a single instruction. */
+/* Return true if immediate VAL can only be created by using a 32-bit
+ zero-extended move immediate, not by a 64-bit move. */
bool
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
+aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
+ if (aarch64_is_movz (val) || aarch64_is_movz (~val))
return false;
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
- return 1;
- return aarch64_bitmask_imm (val, int_mode);
+ return !aarch64_bitmask_imm (val);
+}
+
+
+/* Return true if VAL is an immediate that can be created by a single
+ MOV instruction. */
+bool
+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
+{
+ gcc_assert (mode == SImode || mode == DImode);
+
+ if (val < 65536)
+ return true;
+
+ unsigned HOST_WIDE_INT mask =
+ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
+
+ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
+ return true;
+
+ val = (val & mask) | ((val << 32) & ~mask);
+ return aarch64_bitmask_imm (val);
}
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
- scalar_int_mode mode)
+ machine_mode mode)
{
int i;
unsigned HOST_WIDE_INT val, val2, mask;
int one_match, zero_match;
int num_insns;
+ gcc_assert (mode == SImode || mode == DImode);
+
val = INTVAL (imm);
if (aarch64_move_imm (val, mode))
@@ -5723,31 +5728,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
- (with XXXX non-zero). In that case check to see if the move can be done in
- a smaller mode. */
- val2 = val & 0xffffffff;
- if (mode == DImode
- && aarch64_move_imm (val2, SImode)
- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
- {
- if (generate)
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
-
- /* Check if we have to emit a second instruction by checking to see
- if any of the upper 32 bits of the original DI mode value is set. */
- if (val == val2)
- return 1;
-
- i = (val >> 48) ? 48 : 32;
-
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
-
- return 2;
- }
-
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -5771,24 +5751,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
+ /* Try a bitmask immediate and a movk to generate the immediate
+ in 2 instructions. */
+
if (zero_match < 2 && one_match < 2)
{
- /* Try emitting a bitmask immediate with a movk replacing 16 bits.
- For a 64-bit bitmask try whether changing 16 bits to all ones or
- zeroes creates a valid bitmask. To check any repeated bitmask,
- try using 16 bits from the other 32-bit half of val. */
-
for (i = 0; i < 64; i += 16)
- if (aarch64_check_bitmask (val, val2, mask << i))
- {
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- }
- return 2;
- }
+ {
+ if (aarch64_check_bitmask (val, val2, mask << i))
+ break;
+
+ val2 = val & ~(mask << i);
+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
+ break;
+ }
+
+ if (i != 64)
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+ return 2;
+ }
}
/* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
@@ -5857,26 +5844,24 @@ aarch64_mov128_immediate (rtx imm)
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
-aarch64_uimm12_shift (HOST_WIDE_INT val)
+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
{
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
- );
+ return val < 4096 || (val & 0xfff000) == val;
}
/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
that can be created with a left shift of 0 or 12. */
static HOST_WIDE_INT
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
{
/* Check to see if the value fits in 24 bits, as that is the maximum we can
handle correctly. */
- gcc_assert ((val & 0xffffff) == val);
+ gcc_assert (val < 0x1000000);
- if (((val & 0xfff) << 0) == val)
+ if (val < 4096)
return val;
- return val & (0xfff << 12);
+ return val & 0xfff000;
}
@@ -7024,8 +7009,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
- aarch64_internal_mov_immediate (dest, imm, true,
- as_a <scalar_int_mode> (mode));
+ aarch64_internal_mov_immediate (dest, imm, true, mode);
}
/* Return the MEM rtx that provides the canary value that should be used
@@ -11197,9 +11181,7 @@ aarch64_float_const_rtx_p (rtx x)
&& SCALAR_FLOAT_MODE_P (mode)
&& aarch64_reinterpret_float_as_int (x, &ival))
{
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode;
int num_instr = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
return num_instr < 3;
@@ -13857,10 +13839,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
proportionally expensive to the number of instructions
required to build that constant. This is true whether we
are compiling for SPEED or otherwise. */
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- int_mode = word_mode;
+ machine_mode imode = known_le (GET_MODE_SIZE (mode), 4)
+ ? SImode : DImode;
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
- (NULL_RTX, x, false, int_mode));
+ (NULL_RTX, x, false, imode));
}
return true;
@@ -13876,9 +13858,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
gcc_assert (succeed);
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8)
+ ? DImode : SImode;
int ncost = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
*cost += COSTS_N_INSNS (ncost);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 76b6898ca048c559ff7f7fba78119161b3d382f6..5694a1efea1df8e532520475140c8748b54e3d89 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
mov\\t%x0, %x1
mov\\t%0, %x1
mov\\t%x0, %1
- mov\\t%x0, %1
- mov\\t%w0, %1
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%x0, %1
@@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
DONE;
}"
;; The "mov_imm" type for CNTD is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm<mode>"
@@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
@@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1
- mov\\t%x0, %1"
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 29efb6c0cff7574c9b239ef358acaca96dd75d03..6168295814c3ce4b6f65030cddfd08172b6febd8 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -106,6 +106,12 @@ (define_constraint "M"
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
+ (and (match_code "const_int")
+ (match_test "aarch64_move_imm (ival, DImode)")
+ (match_test "!aarch64_zeroextended_move_imm (ival)")))
+
+(define_constraint "O"
+ "A constant that can be used with a 32 or 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_move_imm (ival, DImode)")))
Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Hi Richard,
>
>> - scalar_int_mode imode = (mode == HFmode
>> - ? SImode
>> - : int_mode_for_mode (mode).require ());
>> + machine_mode imode = (mode == DFmode) ? DImode : SImode;
>
>> It looks like this might mishandle DDmode, if not now then in the future.
>> Seems safer to use known_eq (GET_MODE_SIZE (mode), 8)
>
> I've changed that, but it does not matter for the narrow modes as the result
> will be identical - only DDmode might get costed incorrectly.
>
>> Sorry for not noticing last time, but: rather than have
>> aarch64_zeroextended_move_imm (which is quite a complicated test),
>> could we just add an extra (default off) parameter to aarch64_move_imm
>> that suppresses the (val >> 32) == 0 test?
>
> That makes things more complicated again - ultimately I'd like to get rid of the
> mode parameter since most callers use a fixed mode, and ones that don't are
> now creating and passing fake modes...
I guess we'll have to agree to disagree on that one.
> I've change it like aarch64_move_imm and call aarch64_is_movz twice to
> check it is not a 64-bit MOVZ/MOVN.
Unlike with my suggestion, the result of the function is only meaningful
if the caller has checked for a valid move immediate first. That is,
aarch64_zeroextended_move_imm takes as granted that the immediate is
valid for MOV Xn or MOV Wn, and the function returns false if the
immediate is valid for MOV Xn. It seems like an odd way round to me,
since it inherently means checking the same thing twice.
Maybe a compromise would be to reverse the sense of the return value
(return true for MOV Xns rather than false) and call the function
something like aarch64_mov_xn_imm. All callers could cope with that,
and:
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_move_imm (ival, DImode)")
(match_test "!aarch64_zeroextended_move_imm (ival)")))
would become simply:
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_mov_xn_imm (ival)")))
I'm still not too happy with the duplication, since the structure of
aarch64_mov_xn_imm is obviously aarch64_move_imm with some bits taken
out. If (somehow) one function learns a new trick, that trick would
need to be duplicated in the other function. But like I say, I think
we'll have to agree to disagree on that.
So the patch is OK with the aarch64_mov_xn_imm change suggested above,
or let me know if you disagree.
Thanks,
Richard
>
> Cheers,
> Wilco
>
> v3: Use aarch64_is_movz, use known_eq
>
> Simplify, refactor and improve various move immediate functions.
> Allow 32-bit MOVI/N as a valid 64-bit immediate which removes special
> cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
> pattern only needs a single alternative for move immediate.
>
> Passes bootstrap and regress, OK for commit?
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
> (aarch64_zeroextended_move_imm): New function.
> (aarch64_move_imm): Refactor, assert mode is SImode or DImode.
> (aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
> Simplify special cases.
> (aarch64_uimm12_shift): Simplify code.
> (aarch64_clamp_to_uimm12_shift): Likewise.
> (aarch64_movw_imm): Rename to aarch64_is_movz.
> (aarch64_float_const_rtx_p): Pass either SImode or DImode to
> aarch64_internal_mov_immediate.
> (aarch64_rtx_costs): Likewise.
> * config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
> constraints into single 'O'.
> (mov<mode>_aarch64): Likewise.
> * config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
> (aarch64_bitmask_imm): Likewise.
> (aarch64_uimm12_shift): Likewise.
> (aarch64_zeroextended_move_imm): New prototype.
> * config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
> limit 'N' to 64-bit only moves.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 4be93c93c26e091f878bc8e4cf06e90888405fb2..8bce6ec7599edcc2e6a1d8006450f35c0ce7f61f 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -756,7 +756,7 @@ void aarch64_post_cfi_startproc (void);
> poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
> int aarch64_get_condition_code (rtx);
> bool aarch64_address_valid_for_prefetch_p (rtx, bool);
> -bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
> unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
> unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
> @@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT,
> unsigned HOST_WIDE_INT);
> bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
> -bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
> +bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
> machine_mode aarch64_sve_int_mode (machine_mode);
> opt_machine_mode aarch64_sve_pred_mode (unsigned int);
> machine_mode aarch64_sve_pred_mode (machine_mode);
> @@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
> bool aarch64_sve_float_mul_immediate_p (rtx);
> bool aarch64_split_dimode_const_store (rtx, rtx);
> bool aarch64_symbolic_address_p (rtx);
> -bool aarch64_uimm12_shift (HOST_WIDE_INT);
> +bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
> int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
> +bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
> bool aarch64_use_return_insn_p (void);
> const char *aarch64_output_casesi (rtx *);
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index a73741800c963ee6605fd2cfa918f4399da4bfdf..00269632eeb52c29ba2011c4c82274968b850d71 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -5625,12 +5625,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
>
> /* Return true if VAL is a valid bitmask immediate for MODE. */
> bool
> -aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
> +aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> {
> if (mode == DImode)
> - return aarch64_bitmask_imm (val_in);
> -
> - unsigned HOST_WIDE_INT val = val_in;
> + return aarch64_bitmask_imm (val);
>
> if (mode == SImode)
> return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
> @@ -5669,51 +5667,58 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
> }
>
>
> -/* Return true if val is an immediate that can be loaded into a
> - register by a MOVZ instruction. */
> -static bool
> -aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
> +/* Return true if VAL is a valid MOVZ immediate. */
> +static inline bool
> +aarch64_is_movz (unsigned HOST_WIDE_INT val)
> {
> - if (GET_MODE_SIZE (mode) > 4)
> - {
> - if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
> - return 1;
> - }
> - else
> - {
> - /* Ignore sign extension. */
> - val &= (HOST_WIDE_INT) 0xffffffff;
> - }
> - return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
> + return (val >> (ctz_hwi (val) & 48)) < 65536;
> }
>
>
> -/* Return true if VAL is an immediate that can be loaded into a
> - register in a single instruction. */
> +/* Return true if immediate VAL can only be created by using a 32-bit
> + zero-extended move immediate, not by a 64-bit move. */
> bool
> -aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
> +aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
> {
> - scalar_int_mode int_mode;
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> + if (aarch64_is_movz (val) || aarch64_is_movz (~val))
> return false;
>
> - if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
> - return 1;
> - return aarch64_bitmask_imm (val, int_mode);
> + return !aarch64_bitmask_imm (val);
> +}
> +
> +
> +/* Return true if VAL is an immediate that can be created by a single
> + MOV instruction. */
> +bool
> +aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
> +{
> + gcc_assert (mode == SImode || mode == DImode);
> +
> + if (val < 65536)
> + return true;
> +
> + unsigned HOST_WIDE_INT mask =
> + (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
> +
> + if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
> + return true;
> +
> + val = (val & mask) | ((val << 32) & ~mask);
> + return aarch64_bitmask_imm (val);
> }
>
>
> static int
> aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> - scalar_int_mode mode)
> + machine_mode mode)
> {
> int i;
> unsigned HOST_WIDE_INT val, val2, mask;
> int one_match, zero_match;
> int num_insns;
>
> + gcc_assert (mode == SImode || mode == DImode);
> +
> val = INTVAL (imm);
>
> if (aarch64_move_imm (val, mode))
> @@ -5723,31 +5728,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> return 1;
> }
>
> - /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
> - (with XXXX non-zero). In that case check to see if the move can be done in
> - a smaller mode. */
> - val2 = val & 0xffffffff;
> - if (mode == DImode
> - && aarch64_move_imm (val2, SImode)
> - && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
> - {
> - if (generate)
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> -
> - /* Check if we have to emit a second instruction by checking to see
> - if any of the upper 32 bits of the original DI mode value is set. */
> - if (val == val2)
> - return 1;
> -
> - i = (val >> 48) ? 48 : 32;
> -
> - if (generate)
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> -
> - return 2;
> - }
> -
> if ((val >> 32) == 0 || mode == SImode)
> {
> if (generate)
> @@ -5771,24 +5751,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
> ((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
>
> + /* Try a bitmask immediate and a movk to generate the immediate
> + in 2 instructions. */
> +
> if (zero_match < 2 && one_match < 2)
> {
> - /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> - For a 64-bit bitmask try whether changing 16 bits to all ones or
> - zeroes creates a valid bitmask. To check any repeated bitmask,
> - try using 16 bits from the other 32-bit half of val. */
> -
> for (i = 0; i < 64; i += 16)
> - if (aarch64_check_bitmask (val, val2, mask << i))
> - {
> - if (generate)
> - {
> - emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> - emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> - GEN_INT ((val >> i) & 0xffff)));
> - }
> - return 2;
> - }
> + {
> + if (aarch64_check_bitmask (val, val2, mask << i))
> + break;
> +
> + val2 = val & ~(mask << i);
> + if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
> + break;
> + }
> +
> + if (i != 64)
> + {
> + if (generate)
> + {
> + emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> + emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> + GEN_INT ((val >> i) & 0xffff)));
> + }
> + return 2;
> + }
> }
>
> /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
> @@ -5857,26 +5844,24 @@ aarch64_mov128_immediate (rtx imm)
> /* Return true if val can be encoded as a 12-bit unsigned immediate with
> a left shift of 0 or 12 bits. */
> bool
> -aarch64_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> - return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
> - || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
> - );
> + return val < 4096 || (val & 0xfff000) == val;
> }
>
> /* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
> that can be created with a left shift of 0 or 12. */
> static HOST_WIDE_INT
> -aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
> +aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
> {
> /* Check to see if the value fits in 24 bits, as that is the maximum we can
> handle correctly. */
> - gcc_assert ((val & 0xffffff) == val);
> + gcc_assert (val < 0x1000000);
>
> - if (((val & 0xfff) << 0) == val)
> + if (val < 4096)
> return val;
>
> - return val & (0xfff << 12);
> + return val & 0xfff000;
> }
>
>
> @@ -7024,8 +7009,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
> return;
> }
>
> - aarch64_internal_mov_immediate (dest, imm, true,
> - as_a <scalar_int_mode> (mode));
> + aarch64_internal_mov_immediate (dest, imm, true, mode);
> }
>
> /* Return the MEM rtx that provides the canary value that should be used
> @@ -11197,9 +11181,7 @@ aarch64_float_const_rtx_p (rtx x)
> && SCALAR_FLOAT_MODE_P (mode)
> && aarch64_reinterpret_float_as_int (x, &ival))
> {
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode;
> int num_instr = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> return num_instr < 3;
> @@ -13857,10 +13839,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> proportionally expensive to the number of instructions
> required to build that constant. This is true whether we
> are compiling for SPEED or otherwise. */
> - if (!is_a <scalar_int_mode> (mode, &int_mode))
> - int_mode = word_mode;
> + machine_mode imode = known_le (GET_MODE_SIZE (mode), 4)
> + ? SImode : DImode;
> *cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
> - (NULL_RTX, x, false, int_mode));
> + (NULL_RTX, x, false, imode));
> }
> return true;
>
> @@ -13876,9 +13858,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
> bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
> gcc_assert (succeed);
>
> - scalar_int_mode imode = (mode == HFmode
> - ? SImode
> - : int_mode_for_mode (mode).require ());
> + machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8)
> + ? DImode : SImode;
> int ncost = aarch64_internal_mov_immediate
> (NULL_RTX, gen_int_mode (ival, imode), false, imode);
> *cost += COSTS_N_INSNS (ncost);
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 76b6898ca048c559ff7f7fba78119161b3d382f6..5694a1efea1df8e532520475140c8748b54e3d89 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
> )
>
> (define_insn_and_split "*movdi_aarch64"
> - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> - (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
> + (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
> "(register_operand (operands[0], DImode)
> || aarch64_reg_or_zero (operands[1], DImode))"
> "@
> mov\\t%x0, %x1
> mov\\t%0, %x1
> mov\\t%x0, %1
> - mov\\t%x0, %1
> - mov\\t%w0, %1
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
> #
> * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
> ldr\\t%x0, %1
> @@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
> DONE;
> }"
> ;; The "mov_imm" type for CNTD is just a placeholder.
> - [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
> + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
> load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
> fmov,neon_move")
> - (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> - (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> + (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
> + (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
> )
>
> (define_insn "insv_imm<mode>"
> @@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
>
> (define_insn "*mov<mode>_aarch64"
> [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
> - (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
> + (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
> "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
> || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
> "@
> @@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
> ldr\\t%x0, %1
> str\\t%x1, %0
> mov\\t%x0, %x1
> - mov\\t%x0, %1"
> + * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
> [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
> f_loadd,f_stored,load_8,store_8,mov_reg,\
> fconstd")
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 29efb6c0cff7574c9b239ef358acaca96dd75d03..6168295814c3ce4b6f65030cddfd08172b6febd8 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -106,6 +106,12 @@ (define_constraint "M"
>
> (define_constraint "N"
> "A constant that can be used with a 64-bit MOV immediate operation."
> + (and (match_code "const_int")
> + (match_test "aarch64_move_imm (ival, DImode)")
> + (match_test "!aarch64_zeroextended_move_imm (ival)")))
> +
> +(define_constraint "O"
> + "A constant that can be used with a 32 or 64-bit MOV immediate operation."
> (and (match_code "const_int")
> (match_test "aarch64_move_imm (ival, DImode)")))
>
FAIL: gcc.target/aarch64/sve/cond_arith_5.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/const_3.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/loop_add_5.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/mask_load_slp_1.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/mul_highpart_3.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_13.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_2.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_8.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/slp_9.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/spill_4.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/spill_6.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_18.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_19.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_20.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_3.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/vcond_7.c (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c90 -O0 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c90 -O0 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c90 -O1 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c90 -O1 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c99 -O2 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c99 -O2 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c11 -O3 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=c11 -O3 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu99 -Ofast -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu99 -Ofast -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu11 -Os -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_f16.c -std=gnu11 -Os -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c90 -O0 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c90 -O0 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c90 -O1 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c90 -O1 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c99 -O2 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c99 -O2 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c11 -O3 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=c11 -O3 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu99 -Ofast -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu99 -Ofast -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu11 -Os -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_s16.c -std=gnu11 -Os -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c90 -O0 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c90 -O0 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c90 -O1 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c90 -O1 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c99 -O2 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c99 -O2 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c11 -O3 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=c11 -O3 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu99 -Ofast -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu99 -Ofast -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu11 -Os -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dup_u16.c -std=gnu11 -Os -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c90 -O0 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c90 -O0 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c90 -O1 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c90 -O1 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c99 -O2 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c99 -O2 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c11 -O3 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=c11 -O3 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu99 -Ofast -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu99 -Ofast -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu11 -Os -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_s8.c -std=gnu11 -Os -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c90 -O0 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c90 -O0 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c90 -O1 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c90 -O1 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c99 -O2 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c99 -O2 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c11 -O3 -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=c11 -O3 -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu90 -O2 -fno-schedule-insns -DCHECK_ASM --save-temps -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu99 -Ofast -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu99 -Ofast -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu11 -Os -g -DTEST_FULL (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
FAIL: gcc.target/aarch64/sve/acle/asm/dupq_u8.c -std=gnu11 -Os -g -DTEST_OVERLOADS (internal compiler error: in aarch64_move_imm, at config/aarch64/aarch64.cc:5692)
Hi Andreas,
Thanks for the report, I've committed the fix: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108006
Cheers,
Wilco
@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
@@ -792,7 +792,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
machine_mode aarch64_sve_int_mode (machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -842,8 +842,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
bool aarch64_sve_float_mul_immediate_p (rtx);
bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
@@ -5558,12 +5558,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
/* Return true if VAL is a valid bitmask immediate for MODE. */
bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
if (mode == DImode)
- return aarch64_bitmask_imm (val_in);
-
- unsigned HOST_WIDE_INT val = val_in;
+ return aarch64_bitmask_imm (val);
if (mode == SImode)
return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
@@ -5602,51 +5600,60 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
}
-/* Return true if val is an immediate that can be loaded into a
- register by a MOVZ instruction. */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+/* Return true if immediate VAL can only be created by using a 32-bit
+ zero-extended move immediate, not by a 64-bit move. */
+bool
+aarch64_zeroextended_move_imm (unsigned HOST_WIDE_INT val)
{
- if (GET_MODE_SIZE (mode) > 4)
- {
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
- return 1;
- }
- else
- {
- /* Ignore sign extension. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- }
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+ if (val < 65536 || (val >> 32) != 0 || (val & 0xffff) == 0)
+ return false;
+ return !aarch64_bitmask_imm (val);
}
-/* Return true if VAL is an immediate that can be loaded into a
- register in a single instruction. */
+/* Return true if VAL is an immediate that can be created by a single
+ MOV instruction. */
bool
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- return false;
+ unsigned HOST_WIDE_INT val2;
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
- return 1;
- return aarch64_bitmask_imm (val, int_mode);
+ gcc_assert (mode == SImode || mode == DImode);
+
+ if (val < 65536)
+ return true;
+
+ val2 = val ^ ((HOST_WIDE_INT) val >> 63);
+ if ((val2 >> (__builtin_ctzll (val2) & 48)) < 65536)
+ return true;
+
+ /* Special case 0xyyyyffffffffffff. */
+ if (((val2 + 1) << 16) == 0)
+ return true;
+
+ /* Special case immediates 0xffffyyyy and 0xyyyyffff. */
+ val2 = (mode == DImode) ? val : val2;
+ if (((val2 + 1) & ~(unsigned HOST_WIDE_INT) 0xffff0000) == 0
+ || (val2 >> 16) == 0xffff)
+ return true;
+
+ if (mode == SImode || (val >> 32) == 0)
+ val = (val & 0xffffffff) | (val << 32);
+ return aarch64_bitmask_imm (val);
}
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
- scalar_int_mode mode)
+ machine_mode mode)
{
int i;
unsigned HOST_WIDE_INT val, val2, mask;
int one_match, zero_match;
int num_insns;
+ gcc_assert (mode == SImode || mode == DImode);
+
val = INTVAL (imm);
if (aarch64_move_imm (val, mode))
@@ -5656,31 +5663,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
- (with XXXX non-zero). In that case check to see if the move can be done in
- a smaller mode. */
- val2 = val & 0xffffffff;
- if (mode == DImode
- && aarch64_move_imm (val2, SImode)
- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
- {
- if (generate)
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
-
- /* Check if we have to emit a second instruction by checking to see
- if any of the upper 32 bits of the original DI mode value is set. */
- if (val == val2)
- return 1;
-
- i = (val >> 48) ? 48 : 32;
-
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
-
- return 2;
- }
-
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -5704,24 +5686,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
+ /* Try a bitmask immediate and a movk to generate the immediate
+ in 2 instructions. */
+
if (zero_match < 2 && one_match < 2)
{
- /* Try emitting a bitmask immediate with a movk replacing 16 bits.
- For a 64-bit bitmask try whether changing 16 bits to all ones or
- zeroes creates a valid bitmask. To check any repeated bitmask,
- try using 16 bits from the other 32-bit half of val. */
-
for (i = 0; i < 64; i += 16)
- if (aarch64_check_bitmask (val, val2, mask << i))
- {
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- }
- return 2;
- }
+ {
+ if (aarch64_check_bitmask (val, val2, mask << i))
+ break;
+
+ val2 = val & ~(mask << i);
+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
+ break;
+ }
+
+ if (i != 64)
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+ return 2;
+ }
}
/* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
@@ -5790,26 +5779,24 @@ aarch64_mov128_immediate (rtx imm)
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
-aarch64_uimm12_shift (HOST_WIDE_INT val)
+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
{
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
- );
+ return val < 4096 || (val & 0xfff000) == val;
}
/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
that can be created with a left shift of 0 or 12. */
static HOST_WIDE_INT
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
{
/* Check to see if the value fits in 24 bits, as that is the maximum we can
handle correctly. */
- gcc_assert ((val & 0xffffff) == val);
+ gcc_assert (val < 0x1000000);
- if (((val & 0xfff) << 0) == val)
+ if (val < 4096)
return val;
- return val & (0xfff << 12);
+ return val & 0xfff000;
}
@@ -6957,8 +6944,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
- aarch64_internal_mov_immediate (dest, imm, true,
- as_a <scalar_int_mode> (mode));
+ aarch64_internal_mov_immediate (dest, imm, true, mode);
}
/* Return the MEM rtx that provides the canary value that should be used
@@ -11130,9 +11116,7 @@ aarch64_float_const_rtx_p (rtx x)
&& SCALAR_FLOAT_MODE_P (mode)
&& aarch64_reinterpret_float_as_int (x, &ival))
{
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = (mode == DFmode) ? DImode : SImode;
int num_instr = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
return num_instr < 3;
@@ -13790,10 +13774,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
proportionally expensive to the number of instructions
required to build that constant. This is true whether we
are compiling for SPEED or otherwise. */
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- int_mode = word_mode;
+ machine_mode imode = (mode == SImode) ? SImode : DImode;
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
- (NULL_RTX, x, false, int_mode));
+ (NULL_RTX, x, false, imode));
}
return true;
@@ -13809,9 +13792,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
gcc_assert (succeed);
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = (mode == DFmode) ? DImode : SImode;
int ncost = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
*cost += COSTS_N_INSNS (ncost);
@@ -1309,16 +1309,15 @@ (define_insn_and_split "*movsi_aarch64"
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
mov\\t%x0, %x1
mov\\t%0, %x1
mov\\t%x0, %1
- mov\\t%x0, %1
- mov\\t%w0, %1
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%x0, %1
@@ -1340,11 +1339,11 @@ (define_insn_and_split "*movdi_aarch64"
DONE;
}"
;; The "mov_imm" type for CNTD is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm<mode>"
@@ -1508,7 +1507,7 @@ (define_insn "*mov<mode>_aarch64"
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
@@ -1523,7 +1522,7 @@ (define_insn "*mov<mode>_aarch64"
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1
- mov\\t%x0, %1"
+ * return aarch64_zeroextended_move_imm (INTVAL (operands[1])) ? \"mov\\t%w0, %1\" : \"mov\\t%x0, %1\";"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
@@ -106,6 +106,12 @@ (define_constraint "M"
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
+ (and (match_code "const_int")
+ (match_test "aarch64_move_imm (ival, DImode)")
+ (match_test "!aarch64_zeroextended_move_imm (ival)")))
+
+(define_constraint "O"
+ "A constant that can be used with a 32 or 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_move_imm (ival, DImode)")))