@@ -42,8 +42,7 @@ extern void xtensa_expand_conditional_branch (rtx *, machine_mode);
extern int xtensa_expand_conditional_move (rtx *, int);
extern int xtensa_expand_scc (rtx *, machine_mode);
extern int xtensa_expand_block_move (rtx *);
-extern int xtensa_expand_block_set_unrolled_loop (rtx *);
-extern int xtensa_expand_block_set_small_loop (rtx *);
+extern int xtensa_expand_block_set (rtx *);
extern void xtensa_split_operand_pair (rtx *, machine_mode);
extern int xtensa_constantsynth (rtx, HOST_WIDE_INT);
extern int xtensa_emit_move_sequence (rtx *, machine_mode);
@@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "insn-attr.h"
#include "tree-pass.h"
+#include "print-rtl.h"
/* This file should be included last. */
#include "target-def.h"
@@ -1530,77 +1531,61 @@ xtensa_expand_block_move (rtx *operands)
}
-/* Try to expand a block set operation to a sequence of RTL move
- instructions. If not optimizing, or if the block size is not a
- constant, or if the block is too large, or if the value to
- initialize the block with is not a constant, the expansion
- fails and GCC falls back to calling memset().
+/* Worker function for xtensa_expand_block_set().
- operands[0] is the destination
- operands[1] is the length
- operands[2] is the initialization value
- operands[3] is the alignment */
+ Expand into an insn sequence that calls the "memset" function. */
-static int
-xtensa_sizeof_MOVI (HOST_WIDE_INT imm)
+static rtx_insn *
+xtensa_expand_block_set_libcall (rtx dst_mem,
+ HOST_WIDE_INT value,
+ HOST_WIDE_INT bytes)
{
- return (TARGET_DENSITY && IN_RANGE (imm, -32, 95)) ? 2 : 3;
+ rtx reg;
+ rtx_insn *seq;
+
+ start_sequence ();
+
+ reg = XEXP (dst_mem, 0);
+ if (! REG_P (reg))
+ reg = XEXP (replace_equiv_address (dst_mem,
+ force_reg (Pmode, reg)), 0);
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "memset"),
+ LCT_NORMAL, VOIDmode,
+ reg, SImode,
+ GEN_INT (value), SImode,
+ GEN_INT (bytes), SImode);
+
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
}
-int
-xtensa_expand_block_set_unrolled_loop (rtx *operands)
+/* Worker function for xtensa_expand_block_set().
+
+ Expand into an insn sequence of one constant load followed by multiple
+ memory stores. Returns NULL if the conditions for expansion are not
+ met. */
+
+static rtx_insn *
+xtensa_expand_block_set_unrolled_loop (rtx dst_mem,
+ HOST_WIDE_INT value,
+ HOST_WIDE_INT bytes,
+ HOST_WIDE_INT align)
{
- rtx dst_mem = operands[0];
- HOST_WIDE_INT bytes, value, align;
- int expand_len, funccall_len;
- rtx x, reg;
+ rtx reg;
int offset;
+ rtx_insn *seq;
- if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2]))
- return 0;
+ if (bytes > 64)
+ return NULL;
- bytes = INTVAL (operands[1]);
- if (bytes <= 0)
- return 0;
- value = (int8_t)INTVAL (operands[2]);
- align = INTVAL (operands[3]);
- if (align > MOVE_MAX)
- align = MOVE_MAX;
-
- /* Insn expansion: holding the init value.
- Either MOV(.N) or L32R w/litpool. */
- if (align == 1)
- expand_len = xtensa_sizeof_MOVI (value);
- else if (value == 0 || value == -1)
- expand_len = TARGET_DENSITY ? 2 : 3;
- else
- expand_len = 3 + 4;
- /* Insn expansion: a series of aligned memory stores.
- Consist of S8I, S16I or S32I(.N). */
- expand_len += (bytes / align) * (TARGET_DENSITY
- && align == 4 ? 2 : 3);
- /* Insn expansion: the remainder, sub-aligned memory stores.
- A combination of S8I and S16I as needed. */
- expand_len += ((bytes % align + 1) / 2) * 3;
-
- /* Function call: preparing two arguments. */
- funccall_len = xtensa_sizeof_MOVI (value);
- funccall_len += xtensa_sizeof_MOVI (bytes);
- /* Function call: calling memset(). */
- funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3;
-
- /* Apply expansion bonus (2x) if optimizing for speed. */
- if (optimize > 1 && !optimize_size)
- funccall_len *= 2;
-
- /* Decide whether to expand or not, based on the sum of the length
- of instructions. */
- if (expand_len > funccall_len)
- return 0;
+ start_sequence ();
- x = XEXP (dst_mem, 0);
- if (!REG_P (x))
- dst_mem = replace_equiv_address (dst_mem, force_reg (Pmode, x));
+ reg = XEXP (dst_mem, 0);
+ if (! REG_P (reg))
+ dst_mem = replace_equiv_address (dst_mem,
+ force_reg (Pmode, reg));
switch (align)
{
case 1:
@@ -1621,45 +1606,43 @@ xtensa_expand_block_set_unrolled_loop (rtx *operands)
{
int unit_size = MIN (bytes, align);
machine_mode unit_mode = (unit_size >= 4 ? SImode :
- (unit_size >= 2 ? HImode :
- QImode));
- unit_size = GET_MODE_SIZE (unit_mode);
+ (unit_size >= 2 ? HImode : QImode));
+ unit_size = GET_MODE_SIZE (unit_mode);
emit_move_insn (adjust_address (dst_mem, unit_mode, offset),
- unit_mode == SImode ? reg
- : convert_to_mode (unit_mode, reg, true));
-
+ (unit_mode == SImode) ? reg
+ : convert_to_mode (unit_mode, reg, true));
offset += unit_size;
bytes -= unit_size;
}
while (bytes > 0);
- return 1;
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
}
-int
-xtensa_expand_block_set_small_loop (rtx *operands)
+/* Worker function for xtensa_expand_block_set(),
+
+ Expand into an insn sequence of a small loop that fill the memory
+ range. Returns NULL if the conditions for expansion are not met. */
+
+static rtx_insn *
+xtensa_expand_block_set_small_loop (rtx dst_mem,
+ HOST_WIDE_INT value,
+ HOST_WIDE_INT bytes,
+ HOST_WIDE_INT align)
{
- HOST_WIDE_INT bytes, value, align, count;
- int expand_len, funccall_len;
- rtx x, dst, end, reg;
+ HOST_WIDE_INT count;
+ rtx reg, dst, end;
machine_mode unit_mode;
rtx_code_label *label;
-
- if (!CONST_INT_P (operands[1]) || !CONST_INT_P (operands[2]))
- return 0;
-
- bytes = INTVAL (operands[1]);
- if (bytes <= 0)
- return 0;
- value = (int8_t)INTVAL (operands[2]);
- align = INTVAL (operands[3]);
- if (align > MOVE_MAX)
- align = MOVE_MAX;
+ rtx_insn *seq;
/* Totally-aligned block only. */
if (bytes % align != 0)
- return 0;
+ return NULL;
count = bytes / align;
/* If the Loop Option (zero-overhead looping) is configured and active,
@@ -1671,77 +1654,28 @@ xtensa_expand_block_set_small_loop (rtx *operands)
instruction. */
if (align == 4
&& ! (bytes <= 127 || xtensa_simm8x256 (bytes)))
- return 0;
+ return NULL;
/* If no 4-byte aligned, loop count should be treated as the
constraint. */
if (align != 4
&& count > ((optimize > 1 && !optimize_size) ? 8 : 15))
- return 0;
+ return NULL;
}
- /* Insn expansion: holding the init value.
- Either MOV(.N) or L32R w/litpool. */
- if (align == 1)
- expand_len = xtensa_sizeof_MOVI (value);
- else if (value == 0 || value == -1)
- expand_len = TARGET_DENSITY ? 2 : 3;
- else
- expand_len = 3 + 4;
- if (TARGET_LOOPS && optimize) /* zero-overhead looping */
- {
- /* Insn translation: Either MOV(.N) or L32R w/litpool for the
- loop count. */
- expand_len += xtensa_simm12b (count) ? xtensa_sizeof_MOVI (count)
- : 3 + 4;
- /* Insn translation: LOOP, the zero-overhead looping setup
- instruction. */
- expand_len += 3;
- /* Insn expansion: the loop body instructions.
- For store, one of S8I, S16I or S32I(.N).
- For advance, ADDI(.N). */
- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3)
- + (TARGET_DENSITY ? 2 : 3);
- }
- else /* NO zero-overhead looping */
- {
- /* Insn expansion: Either ADDI(.N) or ADDMI for the end address. */
- expand_len += bytes > 127 ? 3
- : (TARGET_DENSITY && bytes <= 15) ? 2 : 3;
- /* Insn expansion: the loop body and branch instruction.
- For store, one of S8I, S16I or S32I(.N).
- For advance, ADDI(.N).
- For branch, BNE. */
- expand_len += (TARGET_DENSITY && align == 4 ? 2 : 3)
- + (TARGET_DENSITY ? 2 : 3) + 3;
- }
-
- /* Function call: preparing two arguments. */
- funccall_len = xtensa_sizeof_MOVI (value);
- funccall_len += xtensa_sizeof_MOVI (bytes);
- /* Function call: calling memset(). */
- funccall_len += TARGET_LONGCALLS ? (3 + 4 + 3) : 3;
-
- /* Apply expansion bonus (2x) if optimizing for speed. */
- if (optimize > 1 && !optimize_size)
- funccall_len *= 2;
-
- /* Decide whether to expand or not, based on the sum of the length
- of instructions. */
- if (expand_len > funccall_len)
- return 0;
+ start_sequence ();
- x = XEXP (operands[0], 0);
- if (!REG_P (x))
- x = XEXP (replace_equiv_address (operands[0], force_reg (Pmode, x)), 0);
- dst = gen_reg_rtx (SImode);
- emit_move_insn (dst, x);
- end = gen_reg_rtx (SImode);
- if (TARGET_LOOPS && optimize)
- x = force_reg (SImode, operands[1] /* the length */);
+ reg = XEXP (dst_mem, 0);
+ if (REG_P (reg))
+ emit_move_insn (dst = gen_reg_rtx (SImode), reg);
else
- x = operands[1];
- emit_insn (gen_addsi3 (end, dst, x));
+ dst = XEXP (replace_equiv_address (dst_mem,
+ force_reg (Pmode, reg)), 0);
+ emit_insn (gen_addsi3 (end = gen_reg_rtx (SImode),
+ dst,
+ (TARGET_LOOPS && optimize)
+ ? force_reg (SImode, GEN_INT (bytes))
+ : GEN_INT (bytes)));
switch (align)
{
case 1:
@@ -1760,12 +1694,99 @@ xtensa_expand_block_set_small_loop (rtx *operands)
}
reg = force_reg (unit_mode, GEN_INT (value));
- label = gen_label_rtx ();
- emit_label (label);
+ emit_label (label = gen_label_rtx ());
emit_move_insn (gen_rtx_MEM (unit_mode, dst), reg);
emit_insn (gen_addsi3 (dst, dst, GEN_INT (align)));
emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label);
+ seq = get_insns ();
+ end_sequence ();
+
+ return seq;
+}
+
+
+/* Try to expand a block set operation to a sequence of RTL move
+ instructions. If not optimizing, or if the block size is not a
+ constant, or if the block is too large, or if the value to
+ initialize the block with is not a constant, the expansion
+ fails and GCC falls back to calling memset().
+
+ operands[0] is the destination
+ operands[1] is the length
+ operands[2] is the initialization value
+ operands[3] is the alignment */
+
+int
+xtensa_expand_block_set (rtx *operands)
+{
+ rtx dst_mem = operands[0];
+ HOST_WIDE_INT bytes, value, align;
+ rtx_insn *seq[3];
+ int min_cost, min_index, i, n, cost;
+ rtx_insn *insn;
+
+ if (! CONST_INT_P (operands[1])
+ || ! CONST_INT_P (operands[2])
+ || (bytes = INTVAL (operands[1])) <= 0)
+ return 0;
+
+ value = (int8_t)INTVAL (operands[2]);
+ align = INTVAL (operands[3]);
+ if (align > MOVE_MAX)
+ align = MOVE_MAX;
+
+ /* Try to generate three equivalent insn sequences but method and
+ size. */
+ seq[0] = xtensa_expand_block_set_libcall (dst_mem, value, bytes);
+ seq[1] = xtensa_expand_block_set_unrolled_loop (dst_mem, value,
+ bytes, align);
+ seq[2] = xtensa_expand_block_set_small_loop (dst_mem, value,
+ bytes, align);
+
+ /* Find the sequence that has minimum size-basis insn costs. */
+ if (dump_file)
+ fprintf (dump_file, "xtensa_expand_block_set:\n");
+ min_cost = INT_MAX, min_index = 0;
+ for (i = 0; i < 3; ++i)
+ if ((insn = seq[i]))
+ {
+ if (dump_file)
+ fprintf (dump_file, " method %d...\n", i);
+
+ for (n = 0, cost = 0; insn; insn = NEXT_INSN (insn))
+ {
+ if (active_insn_p (insn))
+ ++n, cost += xtensa_insn_cost (insn, false);
+ if (dump_file)
+ dump_insn_slim (dump_file, insn);
+ }
+
+ /* Apply expansion bonus if -O2 or -O3 by discounting the cost
+ other than libcall. */
+ if (i > 0)
+ {
+ if (optimize == 2 && !optimize_size)
+ cost = (cost + 1) / 2;
+ else if (optimize >= 3)
+ cost = (cost + 2) / 4;
+ }
+
+ if (dump_file)
+ fprintf (dump_file, "\t%d active insns, %d cost.\n", n, cost);
+
+ if (cost < min_cost)
+ min_cost = cost, min_index = i;
+ }
+ if (dump_file)
+ fprintf (dump_file, " choose method %d.\n", min_index);
+
+ /* Fall back if libcall is minimum. */
+ if (min_index == 0)
+ return 0;
+
+ emit_insn (seq[min_index]);
+
return 1;
}
@@ -1547,9 +1547,7 @@
(match_operand:SI 3 "const_int_operand")]
"!optimize_debug && optimize"
{
- if (xtensa_expand_block_set_unrolled_loop (operands))
- DONE;
- if (xtensa_expand_block_set_small_loop (operands))
+ if (xtensa_expand_block_set (operands))
DONE;
FAIL;
})