@@ -17466,6 +17466,7 @@ c_parser_omp_clause_unroll_full (c_parser *parser, tree list)
location_t loc = c_parser_peek_token (parser)->location;
tree c = build_omp_clause (loc, OMP_CLAUSE_UNROLL_FULL);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = list;
return c;
}
@@ -17486,6 +17487,7 @@ c_parser_omp_clause_unroll_partial (c_parser *parser, tree list)
loc = c_parser_peek_token (parser)->location;
c = build_omp_clause (loc, OMP_CLAUSE_UNROLL_PARTIAL);
OMP_CLAUSE_UNROLL_PARTIAL_EXPR (c) = NULL_TREE;
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = list;
if (!c_parser_next_token_is (parser, CPP_OPEN_PAREN))
@@ -24011,6 +24013,7 @@ c_parser_omp_tile_sizes (c_parser *parser, location_t loc)
gcc_assert (sizes);
tree c = build_omp_clause (loc, OMP_CLAUSE_TILE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_TILE_SIZES (c) = sizes;
return c;
@@ -24036,7 +24039,11 @@ c_parser_omp_loop_transform_clause (c_parser *parser)
if (!c)
{
if (c_parser_next_token_is (parser, CPP_PRAGMA_EOL))
- c = build_omp_clause (tok->location, OMP_CLAUSE_UNROLL_NONE);
+ {
+ c = build_omp_clause (tok->location, OMP_CLAUSE_UNROLL_NONE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) =
+ build_int_cst (unsigned_type_node, 0);
+ }
else
c = error_mark_node;
}
@@ -24191,6 +24198,7 @@ c_parser_omp_unroll (location_t loc, c_parser *parser, bool *if_p)
if (!clauses)
{
tree c = build_omp_clause (loc, OMP_CLAUSE_UNROLL_NONE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = clauses;
clauses = c;
}
@@ -39476,6 +39476,7 @@ cp_parser_omp_clause_unroll_full (tree list, location_t loc)
return list;
tree c = build_omp_clause (loc, OMP_CLAUSE_UNROLL_FULL);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = list;
return c;
}
@@ -39494,6 +39495,7 @@ cp_parser_omp_clause_unroll_partial (cp_parser *parser, tree list,
tree c, num = error_mark_node;
c = build_omp_clause (loc, OMP_CLAUSE_UNROLL_PARTIAL);
OMP_CLAUSE_UNROLL_PARTIAL_EXPR (c) = NULL_TREE;
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = list;
if (!cp_lexer_next_token_is (parser->lexer, CPP_OPEN_PAREN))
@@ -45786,6 +45788,8 @@ cp_parser_omp_tile_sizes (cp_parser *parser, location_t loc)
gcc_assert (sizes);
tree c = build_omp_clause (loc, OMP_CLAUSE_TILE);
OMP_CLAUSE_TILE_SIZES (c) = sizes;
+ OMP_CLAUSE_TRANSFORM_LEVEL (c)
+ = build_int_cst (unsigned_type_node, 0);
return c;
}
@@ -45846,7 +45850,11 @@ cp_parser_omp_loop_transform_clause (cp_parser *parser)
if (!c)
{
if (cp_lexer_next_token_is (lexer, CPP_PRAGMA_EOL))
- c = build_omp_clause (tok->location, OMP_CLAUSE_UNROLL_NONE);
+ {
+ c = build_omp_clause (tok->location, OMP_CLAUSE_UNROLL_NONE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c)
+ = build_int_cst (unsigned_type_node, 0);
+ }
else
c = error_mark_node;
}
@@ -45926,6 +45934,7 @@ cp_parser_omp_nested_loop_transform_clauses (cp_parser *parser, tree &clauses,
default:
gcc_unreachable ();
}
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
if (depth < last_depth)
{
@@ -45974,6 +45983,7 @@ cp_parser_omp_unroll (cp_parser *parser, cp_token *tok, bool *if_p)
if (!clauses)
{
tree c = build_omp_clause (tok->location, OMP_CLAUSE_UNROLL_NONE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_CHAIN (c) = clauses;
clauses = c;
}
@@ -18086,11 +18086,19 @@ tsubst_omp_clauses (tree clauses, enum c_omp_region_type ort,
case OMP_CLAUSE_ASYNC:
case OMP_CLAUSE_WAIT:
case OMP_CLAUSE_DETACH:
- case OMP_CLAUSE_UNROLL_PARTIAL:
- case OMP_CLAUSE_TILE:
OMP_CLAUSE_OPERAND (nc, 0)
= tsubst_expr (OMP_CLAUSE_OPERAND (oc, 0), args, complain, in_decl);
break;
+ case OMP_CLAUSE_UNROLL_PARTIAL:
+ OMP_CLAUSE_UNROLL_PARTIAL_EXPR (nc)
+ = tsubst_expr (OMP_CLAUSE_UNROLL_PARTIAL_EXPR (oc), args, complain,
+ in_decl);
+ break;
+ case OMP_CLAUSE_TILE:
+ OMP_CLAUSE_TILE_SIZES (nc)
+ = tsubst_expr (OMP_CLAUSE_TILE_SIZES (oc), args, complain,
+ in_decl);
+ break;
case OMP_CLAUSE_REDUCTION:
case OMP_CLAUSE_IN_REDUCTION:
case OMP_CLAUSE_TASK_REDUCTION:
@@ -9389,27 +9389,79 @@ gfc_resolve_omp_local_vars (gfc_namespace *ns)
gfc_traverse_ns (ns, handle_local_var);
}
+
+/* Forward declaration for mutually recursive functions. */
+static gfc_code *
+find_nested_loop_in_block (gfc_code *block);
+
+/* Return the first nested DO loop in CHAIN, or NULL if there
+ isn't one. Does no error checking on intervening code. */
+
+static gfc_code *
+find_nested_loop_in_chain (gfc_code *chain)
+{
+ gfc_code *code;
+
+ if (!chain)
+ return NULL;
+
+ for (code = chain; code; code = code->next)
+ {
+ if (code->op == EXEC_DO)
+ return code;
+ else if (loop_transform_p (code->op) && code->block)
+ {
+ code = code->block;
+ continue;
+ }
+ else if (code->op == EXEC_BLOCK)
+ {
+ gfc_code *c = find_nested_loop_in_block (code);
+ if (c)
+ return c;
+ }
+ }
+ return NULL;
+}
+
+/* Return the first nested DO loop in BLOCK, or NULL if there
+ isn't one. Does no error checking on intervening code. */
+static gfc_code *
+find_nested_loop_in_block (gfc_code *block)
+{
+ gfc_namespace *ns;
+ gcc_assert (block->op == EXEC_BLOCK);
+ ns = block->ext.block.ns;
+ gcc_assert (ns);
+ return find_nested_loop_in_chain (ns->code);
+}
/* CODE is an OMP loop construct. Return true if VAR matches an iteration
variable outer to level DEPTH. */
static bool
is_outer_iteration_variable (gfc_code *code, int depth, gfc_symbol *var)
{
int i;
- gfc_code *do_code = code->block->next;
- while (loop_transform_p (do_code->op)) {
- if (do_code->block)
- do_code = do_code->block->next;
- else
- do_code = do_code->next;
- }
- gcc_assert (!loop_transform_p (do_code->op));
+ gfc_code *chain;
+ if (code->block)
+ chain = code->block->next;
+ else
+ {
+ gcc_assert (loop_transform_p (code->op));
+ chain = code;
+ while (loop_transform_p (chain->op))
+ chain = chain->next;
+ }
for (i = 1; i < depth; i++)
{
+ gfc_code *do_code = find_nested_loop_in_chain (chain);
+ gcc_assert (do_code != code);
+ gcc_assert (do_code && do_code->op == EXEC_DO);
gfc_symbol *ivar = do_code->ext.iterator->var->symtree->n.sym;
if (var == ivar)
return true;
- do_code = do_code->block->next;
+
+ chain = do_code->block->next;
}
return false;
}
@@ -9420,21 +9472,22 @@ static bool
expr_is_invariant (gfc_code *code, int depth, gfc_expr *expr)
{
int i;
- gfc_code *do_code = code->block->next;
- while (loop_transform_p (do_code->op)) {
- if (do_code->block)
- do_code = do_code->block->next;
- else
- do_code = do_code->next;
- }
- gcc_assert (!loop_transform_p (do_code->op));
+ gfc_code *do_code = code;
+
+ /* Move over loop transformations until the
+ loop is found. It may also be represented by a
+ transformation construct (but then with a block)
+ if it is not associated with any other construct. */
+ while (loop_transform_p (do_code->op) && !do_code->block)
+ do_code = do_code->next;
for (i = 1; i < depth; i++)
{
+ do_code = find_nested_loop_in_chain (do_code->block->next);
+ gcc_assert (do_code);
gfc_symbol *ivar = do_code->ext.iterator->var->symtree->n.sym;
if (gfc_find_sym_in_expr (ivar, expr))
return false;
- do_code = do_code->block->next;
}
return true;
}
@@ -9828,6 +9881,8 @@ resolve_omp_do (gfc_code *code)
if (i == collapse || c)
break;
do_code = do_code->block;
+ do_code = resolve_nested_loop_transforms (do_code, name, collapse - i,
+ &code->loc);
if (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE)
{
gfc_error ("not enough DO loops for collapsed %s at %L",
@@ -9835,6 +9890,8 @@ resolve_omp_do (gfc_code *code)
break;
}
do_code = do_code->next;
+ do_code = resolve_nested_loop_transforms (do_code, name, collapse - i,
+ &code->loc);
if (do_code == NULL
|| (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE))
{
@@ -9848,7 +9905,7 @@ resolve_omp_do (gfc_code *code)
static void
resolve_omp_tile (gfc_code *code)
{
- gfc_code *do_code, *c;
+ gfc_code *do_code, *next;
gfc_symbol *dovar;
const char *name = "!$OMP TILE";
@@ -9862,65 +9919,78 @@ resolve_omp_tile (gfc_code *code)
for (unsigned i = 1; i <= num_loops; i++)
{
+
+ gfc_symbol *start_var = NULL, *end_var = NULL;
+
if (do_code->op == EXEC_DO_WHILE)
{
gfc_error ("%s cannot be a DO WHILE or DO without loop control "
"at %L", name, &do_code->loc);
- break;
+ return;
}
if (do_code->op == EXEC_DO_CONCURRENT)
{
gfc_error ("%s cannot be a DO CONCURRENT loop at %L", name,
&do_code->loc);
- break;
+ return;
}
if (do_code->op != EXEC_DO)
{
gfc_error ("%s must be DO loop at %L", name,
&do_code->loc);
- break;
+ return;
}
gcc_assert (do_code->op != EXEC_OMP_UNROLL);
gcc_assert (do_code->op == EXEC_DO);
dovar = do_code->ext.iterator->var->symtree->n.sym;
- if (i > 1)
+ if (is_outer_iteration_variable (code, i, dovar))
{
- gfc_code *do_code2 = code;
- while (loop_transform_p (do_code2->op))
- {
- if (do_code2->block)
- do_code2 = do_code2->block->next;
- else
- do_code2 = do_code2->next;
- }
- gcc_assert (!loop_transform_p (do_code2->op));
-
- for (unsigned j = 1; j < i; j++)
- {
- gfc_symbol *ivar = do_code2->ext.iterator->var->symtree->n.sym;
- if (dovar == ivar
- || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->start)
- || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->end)
- || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->step))
- {
- gfc_error ("%s loops don't form rectangular "
- "iteration space at %L", name, &do_code->loc);
- break;
- }
- do_code2 = do_code2->block->next;
- }
+ gfc_error ("%s iteration variable used in more than one loop at %L (depth %d)",
+ name, &do_code->loc, i);
+ return;
}
- for (c = do_code->next; c; c = c->next)
- if (c->op != EXEC_NOP && c->op != EXEC_CONTINUE)
+ else if (!bound_expr_is_canonical (code, i,
+ do_code->ext.iterator->start,
+ &start_var))
+ {
+ gfc_error ("%s loop start expression not in canonical form at %L",
+ name, &do_code->loc);
+ return;
+ }
+ else if (!bound_expr_is_canonical (code, i,
+ do_code->ext.iterator->end,
+ &end_var))
+ {
+ gfc_error ("%s loop end expression not in canonical form at %L",
+ name, &do_code->loc);
+ return;
+ }
+ else if (start_var && end_var && start_var != end_var)
+ {
+ gfc_error ("%s loop bounds reference different "
+ "iteration variables at %L", name, &do_code->loc);
+ return;
+ }
+ else if (!expr_is_invariant (code, i, do_code->ext.iterator->step))
+ {
+ gfc_error ("%s loop increment not in canonical form at %L",
+ name, &do_code->loc);
+ return;
+ }
+ if (start_var || end_var)
+ code->ext.omp_clauses->non_rectangular = 1;
+ for (next = do_code->next; next; next = next->next)
+ if (next->op != EXEC_NOP && next->op != EXEC_CONTINUE)
{
gfc_error ("%s loops not perfectly nested at %L",
- name, &c->loc);
+ name, &next->loc);
break;
}
- if (i == num_loops || c)
+ if (i == num_loops || next)
break;
do_code = do_code->block;
+ do_code = resolve_nested_loop_transforms (do_code, name, num_loops - i, &code->loc);
if (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE)
{
gfc_error ("not enough DO loops for %s at %L",
@@ -9928,6 +9998,7 @@ resolve_omp_tile (gfc_code *code)
break;
}
do_code = do_code->next;
+ do_code = resolve_nested_loop_transforms (do_code, name, num_loops - i, &code->loc);
if (do_code == NULL
|| (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE))
{
@@ -3893,12 +3893,14 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
if (clauses->unroll_full)
{
c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_UNROLL_FULL);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
}
if (clauses->unroll_none)
{
c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_UNROLL_NONE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
}
@@ -3906,6 +3908,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
{
c = build_omp_clause (gfc_get_location (&where),
OMP_CLAUSE_UNROLL_PARTIAL);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_UNROLL_PARTIAL_EXPR (c)
= clauses->unroll_partial_factor ? build_int_cst (
integer_type_node, clauses->unroll_partial_factor)
@@ -3926,6 +3929,7 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
c = build_omp_clause (gfc_get_location (&where),
OMP_CLAUSE_TILE);
OMP_CLAUSE_TILE_SIZES (c) = build_tree_list_vec (tvec);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
tvec->truncate (0);
@@ -5308,6 +5312,29 @@ gfc_expr_list_len (gfc_expr_list *list)
return len;
}
+/* Traverse the loops with nesting depth at most
+ COLLAPSE from CODE and determine the largest
+ loop nest depth required by the loop transformations
+ found on the loops. */
+int compute_transformed_depth (gfc_code *code, int collapse)
+{
+ int new_collapse = collapse;
+ for (int i = 0; i < new_collapse; i++)
+ {
+ gcc_assert (code->op == EXEC_DO || loop_transform_p (code->op));
+ while (loop_transform_p (code->op))
+ {
+ int tile_depth
+ = gfc_expr_list_len (code->ext.omp_clauses->tile_sizes);
+ new_collapse = MAX (new_collapse, i + tile_depth);
+ code = code->block ? code->block->next : code->next;
+ }
+ code = code->block->next;
+ }
+
+ return new_collapse;
+}
+
static tree
gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
gfc_omp_clauses *do_clauses, tree par_clauses)
@@ -5343,6 +5370,7 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
do" (or similar directive) are represented as clauses on the "omp do". */
loop_transform_clauses = NULL;
int omp_tile_depth = gfc_expr_list_len (omp_tile);
+ tree clauses_tail = NULL;
while (loop_transform_p (code->op))
{
tree clauses = gfc_trans_omp_clauses (pblock, code->ext.omp_clauses,
@@ -5354,7 +5382,14 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
directive, an error will be emitted in pass-omp_transform_loops. */
omp_tile_depth = gfc_expr_list_len (code->ext.omp_clauses->tile_sizes);
- loop_transform_clauses = chainon (loop_transform_clauses, clauses);
+ if (!loop_transform_clauses)
+ {
+ loop_transform_clauses = clauses;
+ clauses_tail = tree_last (clauses);
+ }
+ else
+ clauses_tail = chainon (clauses_tail, clauses);
+
code = code->block ? code->block->next : code->next;
}
gcc_assert (!loop_transform_p (code->op));
@@ -5371,9 +5406,12 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
collapse = clauses->orderedc;
if (collapse <= 0)
collapse = 1;
-
collapse = MAX (collapse, omp_tile_depth);
+ gfc_code *first_loop = loop_transform_p (orig_code->op) ?
+ orig_code : orig_code->block->next;
+ int transform_depth = compute_transformed_depth (first_loop, collapse);
+ collapse = transform_depth;
init = make_tree_vec (collapse);
cond = make_tree_vec (collapse);
incr = make_tree_vec (collapse);
@@ -5384,15 +5422,8 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
on the simd construct and DO's clauses are translated elsewhere. */
do_clauses->sched_simd = false;
- if (loop_transform_p (op))
- {
- /* This is a loop transformation on a loop which is not associated with
- any other directive. Use the directive location instead of the loop
- location for the clauses. */
- omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, top_loc);
- }
- else
- omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, code->loc);
+ omp_clauses = NULL;
+ omp_clauses = gfc_trans_omp_clauses (pblock, do_clauses, top_loc);
omp_clauses = chainon (omp_clauses, loop_transform_clauses);
for (i = 0; i < collapse; i++)
@@ -5665,7 +5696,26 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
}
if (i + 1 < collapse)
- code = code->block->next;
+ {
+ code = code->block->next;
+
+ loop_transform_clauses = NULL;
+ clauses_tail = omp_clauses;
+ while (loop_transform_p (code->op))
+ {
+ loop_transform_clauses = gfc_trans_omp_clauses (
+ pblock, code->ext.omp_clauses, code->loc);
+ for (tree c = loop_transform_clauses; c;
+ c = OMP_CLAUSE_CHAIN (c))
+ OMP_CLAUSE_TRANSFORM_LEVEL (c)
+ = build_int_cst (unsigned_type_node, i + 1);
+
+ clauses_tail = chainon (clauses_tail, loop_transform_clauses);
+ clauses_tail = tree_last (loop_transform_clauses);
+
+ code = code->block ? code->block->next : code->next;
+ }
+ }
}
if (pblock != &block)
@@ -127,7 +127,7 @@ extern tree
gimple_assign_rhs_to_tree (gimple *stmt);
/* Substitute all definitions from SEQ bottom-up into EXPR. This is used to
- reconstruct a tree for a gimplified expression for determinig whether or not
+ reconstruct a tree from a gimplified expression for determinig whether or not
the number of iterations of a loop is constant. */
tree
@@ -227,6 +227,7 @@ gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0, bool expand = false)
{
int collapse = gimple_omp_for_collapse (omp_for);
gcc_assert (from_depth < collapse);
+ gcc_assert (from_depth >= 0);
if (collapse <= 1)
return omp_for;
@@ -266,6 +267,7 @@ gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0, bool expand = false)
if (from_depth > 0)
{
gimple_omp_set_body (omp_for, body);
+ omp_for->collapse = from_depth;
return omp_for;
}
@@ -453,7 +455,7 @@ after transform: Misc 6.0: Loop transformations #3440") in the
non-public OpenMP spec repository. */
static gimple_seq
-partial_unroll (gomp_for *omp_for, tree unroll_factor,
+partial_unroll (gomp_for *omp_for, size_t level, tree unroll_factor,
location_t loc, tree transformation_clauses, walk_ctx *ctx)
{
gcc_assert (unroll_factor);
@@ -463,7 +465,7 @@ partial_unroll (gomp_for *omp_for, tree unroll_factor,
/* Partial unrolling reduces the loop nest depth of a canonical loop nest to 1
hence outer directives cannot require a greater collapse. */
- gcc_assert (gimple_omp_for_collapse (omp_for) <= 1);
+ gcc_assert (gimple_omp_for_collapse (omp_for) <= level + 1);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
@@ -473,12 +475,12 @@ partial_unroll (gomp_for *omp_for, tree unroll_factor,
gomp_for *unrolled_for = as_a<gomp_for *> (copy_gimple_seq_and_replace_locals (omp_for));
- tree final = gimple_omp_for_final (unrolled_for, 0);
- tree incr = gimple_omp_for_incr (unrolled_for, 0);
- tree index = gimple_omp_for_index (unrolled_for, 0);
+ tree final = gimple_omp_for_final (unrolled_for, level);
+ tree incr = gimple_omp_for_incr (unrolled_for, level);
+ tree index = gimple_omp_for_index (unrolled_for, level);
gimple_seq body = gimple_omp_body (unrolled_for);
- tree_code cond = gimple_omp_for_cond (unrolled_for, 0);
+ tree_code cond = gimple_omp_for_cond (unrolled_for, level);
tree step = TREE_OPERAND (incr, 1);
gimple_omp_set_body (unrolled_for,
build_unroll_body (body, unroll_factor, index, incr,
@@ -503,7 +505,7 @@ partial_unroll (gomp_for *omp_for, tree unroll_factor,
scaled_step = var;
}
TREE_OPERAND (incr, 1) = scaled_step;
- gimple_omp_for_set_incr (unrolled_for, 0, incr);
+ gimple_omp_for_set_incr (unrolled_for, level, incr);
pop_gimplify_context (result_bind);
@@ -864,7 +866,7 @@ canonicalize_conditions (gomp_for *omp_for)
*/
static gimple_seq
-tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
+tile (gomp_for *omp_for, location_t loc, size_t start_level, tree tile_sizes,
tree transformation_clauses, walk_ctx *ctx)
{
if (dump_enabled_p ())
@@ -896,22 +898,21 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
collapse_clause = c;
}
- /* The 'omp tile' construct creates a canonical loop-nest whose nesting depth
- equals tiling_depth. The whole loop-nest has depth at least 2 *
- omp_tile_depth, but the 'tile loops' at levels
- omp_tile_depth+1...2*omp_tile_depth are not in canonical loop-nest form
- and hence cannot be associated with a loop construct. */
- if (clause_collapse > tiling_depth)
+ /* The tiled loop nest is a canonical loop nest with nesting depth
+ tiling_depth. The tile loops below that level are not in
+ canonical loop nest form and hence cannot be associated with a
+ loop construct. */
+ if (clause_collapse > tiling_depth + start_level)
{
error_at (OMP_CLAUSE_LOCATION (collapse_clause),
"collapse cannot extend below the floor loops "
"generated by the %<omp tile%> construct");
OMP_CLAUSE_COLLAPSE_EXPR (collapse_clause)
- = build_int_cst (unsigned_type_node, tiling_depth);
+ = build_int_cst (unsigned_type_node, start_level + tiling_depth);
return transform_gomp_for (omp_for, NULL, ctx);
}
- if (tiling_depth > collapse)
+ if (start_level + tiling_depth > collapse)
return transform_gomp_for (omp_for, NULL, ctx);
gcc_assert (collapse >= clause_collapse);
@@ -919,13 +920,15 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
push_gimplify_context ();
/* Create the index variables for iterating the tiles in the floor
- loops first tiling_depth loops transformed loop nest. */
+ loops which will be the loops at levels start_level
+ ... start_level + tiling_depth of the transformed loop nest. The
+ loops at level 0 ... start_level - 1 are left unchanged. */
gimple_seq floor_loops_pre_body = NULL;
size_t tile_level = 0;
auto_vec<tree> sizes_vec;
for (tree el = tile_sizes; el; el = TREE_CHAIN (el), tile_level++)
{
- size_t nest_level = tile_level;
+ size_t nest_level = start_level + tile_level;
tree index = gimple_omp_for_index (omp_for, nest_level);
tree init = gimple_omp_for_initial (omp_for, nest_level);
tree incr = gimple_omp_for_incr (omp_for, nest_level);
@@ -956,6 +959,7 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
gimple_omp_for_set_incr (floor_loops, nest_level, incr);
gimple_omp_for_set_index (floor_loops, nest_level, tile_index);
}
+
gbind *result_bind = gimple_build_bind (NULL, NULL, NULL);
pop_gimplify_context (result_bind);
gimple_seq_add_seq (gimple_omp_for_pre_body_ptr (floor_loops),
@@ -972,6 +976,9 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
to add the incomplete tile checks to each level loop. */
tile_loops = gomp_for_uncollapse (as_a <gomp_for *> (tile_loops));
+ for (size_t i = 0; i < start_level; i++)
+ tile_loops = gimple_omp_body (tile_loops);
+
gimple_omp_for_set_kind (as_a<gomp_for *> (tile_loops),
GF_OMP_FOR_KIND_TRANSFORM_LOOP);
gimple_omp_for_set_clauses (tile_loops, NULL_TREE);
@@ -990,50 +997,51 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
tree break_label = create_artificial_label (UNKNOWN_LOCATION);
gimple_seq_add_stmt (surrounding_seq, gimple_build_label (break_label));
- for (size_t level = 0; level < tiling_depth; level++)
+ for (size_t tile_level = 0; tile_level < tiling_depth; tile_level++)
{
- tree original_index = gimple_omp_for_index (omp_for, level);
- tree original_final = gimple_omp_for_final (omp_for, level);
+ gimple_seq level_preamble = NULL;
+ gimple_seq level_body = gimple_omp_body (level_loop);
+ auto gsi = gsi_start (level_body);
- tree tile_index = gimple_omp_for_index (floor_loops, level);
- tree tile_size = sizes_vec[level];
+ int nest_level = start_level + tile_level;
+ tree original_index = gimple_omp_for_index (omp_for, nest_level);
+ tree original_final = gimple_omp_for_final (omp_for, nest_level);
+
+ tree tile_index
+ = gimple_omp_for_index (floor_loops, nest_level);
+ tree tile_size = sizes_vec[tile_level];
tree type = TREE_TYPE (tile_index);
tree plus_type = type;
- tree incr = gimple_omp_for_incr (omp_for, level);
+ tree incr = gimple_omp_for_incr (omp_for, nest_level);
tree step = omp_get_for_step_from_incr (gimple_location (omp_for), incr);
gimple_seq *pre_body = gimple_omp_for_pre_body_ptr (level_loop);
- gimple_seq level_body = gimple_omp_body (level_loop);
gcc_assert (gimple_omp_for_collapse (level_loop) == 1);
- tree_code original_cond = gimple_omp_for_cond (omp_for, level);
+ tree_code original_cond = gimple_omp_for_cond (omp_for, nest_level);
gimple_omp_for_set_initial (level_loop, 0, tile_index);
tree tile_final = create_tmp_var (type);
- tree scaled_tile_size = fold_build2 (MULT_EXPR, TREE_TYPE (tile_size),
- tile_size, step);
+ tree scaled_tile_size
+ = fold_build2 (MULT_EXPR, TREE_TYPE (tile_size), tile_size, step);
tree_code plus_code = PLUS_EXPR;
if (POINTER_TYPE_P (TREE_TYPE (tile_index)))
{
plus_code = POINTER_PLUS_EXPR;
int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scaled_tile_size));
- plus_type = signed_or_unsigned_type_for (unsignedp, ptrdiff_type_node);
+ plus_type
+ = signed_or_unsigned_type_for (unsignedp, ptrdiff_type_node);
}
scaled_tile_size = fold_convert (plus_type, scaled_tile_size);
- gimplify_assign (tile_final,
- fold_build2 (plus_code, type,
- tile_index, scaled_tile_size),
- pre_body);
+ gimplify_assign (
+ tile_final,
+ fold_build2 (plus_code, type, tile_index, scaled_tile_size),
+ pre_body);
gimple_omp_for_set_final (level_loop, 0, tile_final);
- /* Redefine the original loop index variable of OMP_FOR in terms of the
- floor loop and the tiling loop index variable for the current
- dimension/level at the top of the loop. */
- gimple_seq level_preamble = NULL;
-
push_gimplify_context ();
tree body_label = create_artificial_label (UNKNOWN_LOCATION);
@@ -1047,7 +1055,6 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
break_label));
gimple_seq_add_stmt (&level_preamble, gimple_build_label (body_label));
- auto gsi = gsi_start (level_body);
gsi_insert_seq_before (&gsi, level_preamble, GSI_SAME_STMT);
gbind *level_bind = gimple_build_bind (NULL, NULL, NULL);
pop_gimplify_context (level_bind);
@@ -1057,10 +1064,10 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
surrounding_seq = &level_body;
level_loop = gsi_stmt (gsi);
- /* The label for jumping out of the loop at the next nesting
- level. For the outermost level, the label is put after the
- loop-nest, for the last one it is not necessary. */
- if (level != tiling_depth - 1)
+ /* The label for jumping out of the loop at the next
+ nesting level. For the outermost level, the label is put
+ after the loop-nest, for the last one it is not necessary. */
+ if (tile_level != tiling_depth - 1)
{
break_label = create_artificial_label (UNKNOWN_LOCATION);
gsi_insert_after (&gsi, gimple_build_label (break_label),
@@ -1093,13 +1100,15 @@ tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
next_transform_depth
= list_length (OMP_CLAUSE_TILE_SIZES (remaining_clauses));
+ size_t next_level
+ = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (remaining_clauses));
/* The current "omp tile" transformation reduces the nesting depth
of the canonical loop-nest to TILING_DEPTH.
Hence the following "omp tile" transformation is invalid if
it requires a greater nesting depth. */
- gcc_assert (next_transform_depth <= tiling_depth);
- if (next_transform_depth > new_collapse)
- new_collapse = next_transform_depth;
+ gcc_assert (next_level + next_transform_depth <= start_level + tiling_depth);
+ if (next_level + next_transform_depth > new_collapse)
+ new_collapse = next_level + next_transform_depth;
}
if (collapse > new_collapse)
@@ -1260,14 +1269,17 @@ transform_gomp_for (gomp_for *omp_for, tree transformation, walk_ctx *ctx)
gimple_seq result = NULL;
location_t loc = OMP_CLAUSE_LOCATION (transformation);
auto dump_loc = dump_user_location_t::from_location_t (loc);
+ size_t level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (transformation));
switch (OMP_CLAUSE_CODE (transformation))
{
case OMP_CLAUSE_UNROLL_FULL:
gcc_assert (TREE_CHAIN (transformation) == NULL);
+ gcc_assert (level == 0);
result = full_unroll (omp_for, loc, ctx);
break;
case OMP_CLAUSE_UNROLL_NONE:
gcc_assert (TREE_CHAIN (transformation) == NULL);
+ gcc_assert (level == 0);
if (assign_unroll_full_clause_p (omp_for, transformation))
{
result = full_unroll (omp_for, loc, ctx);
@@ -1275,7 +1287,7 @@ transform_gomp_for (gomp_for *omp_for, tree transformation, walk_ctx *ctx)
else if (tree unroll_factor
= assign_unroll_partial_clause_p (omp_for, transformation))
{
- result = partial_unroll (omp_for, unroll_factor, loc,
+ result = partial_unroll (omp_for, level, unroll_factor, loc,
transformation, ctx);
}
else {
@@ -1312,12 +1324,14 @@ transform_gomp_for (gomp_for *omp_for, tree transformation, walk_ctx *ctx)
"factor turned into %<partial(%u)%> clause\n",
factor);
}
- result = partial_unroll (omp_for, unroll_factor, loc, transformation,
- ctx);
+
+ result = partial_unroll (omp_for, level,
+ unroll_factor, loc, transformation, ctx);
}
break;
case OMP_CLAUSE_TILE:
- result = tile (omp_for, loc, OMP_CLAUSE_TILE_SIZES (transformation),
+ result = tile (omp_for, loc, level,
+ OMP_CLAUSE_TILE_SIZES (transformation),
transformation, ctx);
break;
default:
@@ -1418,6 +1432,9 @@ print_optimized_unroll_partial_msg (tree c)
static tree
optimize_transformation_clauses (tree clauses)
{
+ if (!clauses)
+ return NULL_TREE;
+
/* The last unroll_partial clause seen in clauses, if any,
or the last merged unroll partial clause. */
tree unroll_partial = NULL;
@@ -1429,6 +1446,7 @@ optimize_transformation_clauses (tree clauses)
since last_non_unroll was seen. */
bool merged_unroll_partial = false;
+ size_t level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (clauses));
for (tree c = clauses; c != NULL_TREE; c = OMP_CLAUSE_CHAIN (c))
{
enum omp_clause_code code = OMP_CLAUSE_CODE (c);
@@ -1516,6 +1534,24 @@ optimize_transformation_clauses (tree clauses)
default:
gcc_unreachable ();
}
+
+ /* The transformations are ordered by the level of the loop-nest to which
+ they apply in decreasing order. Handle the different levels separately
+ as long as we do not implement optimizations across the levels. */
+ tree next_c = OMP_CLAUSE_CHAIN (c);
+ if (!next_c)
+ break;
+
+ size_t next_level = tree_to_uhwi (OMP_CLAUSE_TRANSFORM_LEVEL (next_c));
+ if (next_level != level)
+ {
+ gcc_assert (next_level < level);
+ tree tail = optimize_transformation_clauses (next_c);
+ OMP_CLAUSE_CHAIN (c) = tail;
+ break;
+ }
+ else level = next_level;
+
}
if (merged_unroll_partial && dump_enabled_p ())
new file mode 100644
@@ -0,0 +1,124 @@
+subroutine test1
+ !$omp parallel do collapse(2)
+ do i=0,100
+ !$omp unroll partial(2)
+ do j=-300,100
+ call dummy (j)
+ end do
+ end do
+end subroutine test1
+
+subroutine test2
+ !$omp parallel do collapse(3)
+ do i=0,100
+ !$omp unroll partial(2) ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP PARALLEL DO} }
+ do j=-300,100
+ do k=-300,100
+ call dummy (k)
+ end do
+ end do
+ end do
+end subroutine test2
+
+subroutine test3
+!$omp parallel do collapse(3)
+do i=0,100
+ do j=-300,100
+ !$omp unroll partial(2)
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test3
+
+subroutine test4
+!$omp parallel do collapse(3)
+do i=0,100
+ !$omp tile sizes(3) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP PARALLEL DO} }
+ do j=-300,100
+ !$omp unroll partial(2)
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test4
+
+subroutine test5
+ !$omp parallel do collapse(3)
+ !$omp tile sizes(3,2) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP PARALLEL DO} }
+ do i=0,100
+ do j=-300,100
+ do k=-300,100
+ call dummy (k)
+ end do
+ end do
+ end do
+end subroutine test5
+
+subroutine test6
+!$omp parallel do collapse(3)
+do i=0,100
+ !$omp tile sizes(3,2)
+ do j=-300,100
+ !$omp unroll partial(2)
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test6
+
+subroutine test7
+!$omp parallel do collapse(3)
+do i=0,100
+ !$omp tile sizes(3,3)
+ do j=-300,100
+ !$omp tile sizes(5)
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test7
+
+subroutine test8
+!$omp parallel do collapse(1)
+do i=0,100
+ !$omp tile sizes(3,3)
+ do j=-300,100
+ !$omp tile sizes(5)
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test8
+
+subroutine test9
+!$omp parallel do collapse(3)
+do i=0,100
+ !$omp tile sizes(3,3,3) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do j=-300,100
+ !$omp tile sizes(5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test9
+
+subroutine test10
+!$omp parallel do
+do i=0,100
+ !$omp tile sizes(3,3,3) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do j=-300,100
+ !$omp tile sizes(5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ call dummy (k)
+ end do
+end do
+end do
+end subroutine test10
+
@@ -2,9 +2,9 @@ subroutine test
implicit none
integer :: i, j, k
- !$omp parallel do collapse(2) ordered(2)
+ !$omp parallel do collapse(2) ordered(2) ! { dg-error {'ordered' invalid in conjunction with 'omp tile'} }
!$omp tile sizes (1,2)
- do i = 1,100 ! { dg-error {'ordered' invalid in conjunction with 'omp tile'} }
+ do i = 1,100
do j = 1,100
call dummy(j)
do k = 1,100
new file mode 100644
@@ -0,0 +1,93 @@
+subroutine test0
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+ !$omp parallel do collapse(2) private(inner)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ !$omp tile sizes (8, 1)
+ do j = 1,n
+ !$omp unroll partial(10)
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ end do
+ end do
+ end do
+end subroutine test0
+
+subroutine test0m
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+ !$omp parallel do collapse(2) private(inner)
+ do i = 1,m
+ !$omp tile sizes (8, 1)
+ do j = 1,n
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} }
+ end do
+ end do
+end subroutine test0m
+
+subroutine test1
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+ !$omp parallel do collapse(2) private(inner)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ !$omp tile sizes (8, 1)
+ do j = 1,n
+ !$omp unroll partial(10)
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} "TODO Fix with upcoming imperfect loop nest handling" { xfail *-*-* } }
+ end do
+ end do
+end subroutine test1
+
+
+subroutine test2
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+ !$omp parallel do collapse(2) private(inner)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ !$omp tile sizes (8, 1)
+ do j = 1,n
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} }
+ end do
+ end do
+end subroutine test2
+
+subroutine test3
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+ !$omp parallel do collapse(2) private(inner)
+ do i = 1,m
+ !$omp tile sizes (8, 1)
+ do j = 1,n
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} }
+ end do
+ end do
+end subroutine test3
new file mode 100644
@@ -0,0 +1,16 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test1
+ !$omp parallel do collapse(2)
+ do i=0,100
+ !$omp tile sizes(4)
+ do j=-300,100
+ call dummy (j)
+ end do
+ end do
+end subroutine test1
+
+! Collapse of the gimple_omp_for should be unaffacted by the transformation
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait collapse\(2\) tile sizes\(4\).1\n +for \(i = 0; i <= 100; i = i \+ 1\)\n +for \(j = -300; j <= 100; j = j \+ 1\)} 1 "original" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait collapse\(2\) private\(j.0\) private\(j\)\n +for \(i = 0; i < 101; i = i \+ 1\)\n +for \(.omp_tile_index.\d = -300; .omp_tile_index.\d < 101; .omp_tile_index.\d = .omp_tile_index.\d \+ 4\)} 1 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,23 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test2
+ !$omp parallel do
+ !$omp tile sizes(3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(3,3)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test2
+
+! One gimple_omp_for should cover the outer two loops, another the inner two loops
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait tile sizes\(3, 3\)@0\n +for \(i = 0; i <= 100; i = i \+ 1\)\n +for \(j = -300; j <= 100; j = j \+ 1\)\n} 1 "original" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp loop_transform tile sizes\(3, 3\)@0\n +for \(k = -300; k <= 100; k = k \+ 1\)\n +for \(l = 0; l <= 100; l = l \+ 1\)} 1 "original" } }
+! Collapse after the transformations should be 1
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait\n +for \(.omp_tile_index.\d = 0; .omp_tile_index.\d < 101; .omp_tile_index.\d = .omp_tile_index.\d \+ \d\)} 1 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,22 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test3
+ !$omp parallel do
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(3,3)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test3
+
+! gimple_omp_for collapse should be extended to cover all loops affected by the transformations (i.e. 4)
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait tile sizes\(3, 3, 3\)@0 tile sizes\(3, 3\)@2\n +for \(i = 0; i <= 100; i = i \+ 1\)\n +for \(j = -300; j <= 100; j = j \+ 1\)\n +for \(k = -300; k <= 100; k = k \+ 1\)\n +for \(l = 0; l <= 100; l = l \+ 1\)} 1 "original" } }
+! Collapse after the transformations should be 1
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait private\(l.0\) private\(k\)\n +for \(.omp_tile_index.\d = 0; .omp_tile_index.\d < 101; .omp_tile_index.\d = .omp_tile_index.\d \+ \d\)} 1 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,31 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(3,3)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test
+
+! gimple_omp_for collapse should be extended to cover all loops affected by the transformations (i.e. 4)
+! { dg-final { scan-tree-dump-times {\#pragma omp loop_transform tile sizes\(3, 3, 3\)@0 tile sizes\(3, 3\)@2\n +for \(i = 0; i <= 100; i = i \+ 1\)\n +for \(j = -300; j <= 100; j = j \+ 1\)\n +for \(k = -300; k <= 100; k = k \+ 1\)\n +for \(l = 0; l <= 100; l = l \+ 1\)} 1 "original" } }
+
+! The loops should be lowered after the tiling transformations
+! { dg-final { scan-tree-dump-not {\#pragma omp} "omp_transform_loops" } }
+
+! Third level is tiled first by the inner construct. The resulting floor loop is tiled by the outer construct.
+! { dg-final { scan-tree-dump-times {if \(.omp_tile_index.1} 2 "omp_transform_loops" } }
+
+! All other levels are tiled once
+! { dg-final { scan-tree-dump-times {if \(.omp_tile_index.2} 1 "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {if \(.omp_tile_index.3} 1 "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {if \(.omp_tile_index.4} 1 "omp_transform_loops" } }
+
new file mode 100644
@@ -0,0 +1,30 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test3
+ !$omp parallel do
+ !$omp tile sizes(3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(3,3)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test3
+
+! The outer gimple_omp_for should not cover the loop with the tile transformation
+! { dg-final { scan-tree-dump-times {\#pragma omp for nowait tile sizes\(3\)@0\n +for \(i = 0; i <= 100; i = i \+ 1\)\n} 1 "original" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp loop_transform tile sizes\(3, 3\)@0\n +for \(k = -300; k <= 100; k = k \+ 1\)\n +for \(l = 0; l <= 100; l = l \+ 1\)} 1 "original" } }
+
+
+! After transformations, the outer loop should be a floor loop created
+! by the tiling and the outer construct type and non-transformation
+! clauses should be unaffected by the tiling
+! { dg-final { scan-tree-dump {\#pragma omp for nowait\n +for \(.omp_tile_index.\d = 0; .omp_tile_index.\d < 101; .omp_tile_index.\d = .omp_tile_index.\d \+ 3\)} "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp} 2 "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp parallel} 1 "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp for} 1 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,26 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+subroutine test3
+ !$omp tile sizes(3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(3,3)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test3
+
+! There should be separate gimple_omp_for constructs for the tile constructs because the tiling depth
+! of the outer construct does not reach the level of the inner construct
+! { dg-final { scan-tree-dump-times {\#pragma omp loop_transform tile sizes\(3\)@0\n +for \(i = 0; i <= 100; i = i \+ 1\)\n} 1 "original" } }
+! { dg-final { scan-tree-dump-times {\#pragma omp loop_transform tile sizes\(3, 3\)@0\n +for \(k = -300; k <= 100; k = k \+ 1\)\n +for \(l = 0; l <= 100; l = l \+ 1\)} 1 "original" } }
+
+
+! The loops should be lowered after the tiling transformations
+! { dg-final { scan-tree-dump-not {\#pragma omp} "omp_transform_loops" } }
+! { dg-final { scan-tree-dump-times {if \(.omp_tile_index} 3 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,123 @@
+subroutine test1a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5)
+ do k=-300,100
+ call dummy (k)
+ end do
+ end do
+ end do
+end subroutine test1a
+
+subroutine test2a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5,5)
+ do k=-300,100
+ do l=-300,100
+ do m=-300,100
+ call dummy (m)
+ end do
+ end do
+ end do
+ end do
+ end do
+end subroutine test2a
+
+subroutine test3a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=-300,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test3a
+
+subroutine test4a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5,5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=-300,100
+ do m=-300,100
+ call dummy (m)
+ end do
+ end do
+ end do
+ end do
+ end do
+end subroutine test4a
+
+subroutine test1b
+ !$omp parallel do
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5)
+ do k=-300,100
+ call dummy (k)
+ end do
+ end do
+ end do
+end subroutine test1b
+
+subroutine test2b
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5,5)
+ do k=-300,100
+ do l=-300,100
+ do m=-300,100
+ call dummy (m)
+ end do
+ end do
+ end do
+ end do
+ end do
+end subroutine test2b
+
+subroutine test3b
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=-300,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test3b
+
+subroutine test4b
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp tile sizes(5,5) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=-300,100
+ do m=-300,100
+ call dummy (m)
+ end do
+ end do
+ end do
+ end do
+ end do
+end subroutine test4b
new file mode 100644
@@ -0,0 +1,71 @@
+subroutine test1
+ !$omp tile sizes(1)
+ do i = 1,100
+ do j = 1,i
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test1
+
+subroutine test2
+ !$omp tile sizes(1,2) ! { dg-error {'tile' loop transformation may not appear on non-rectangular for} }
+ do i = 1,100
+ do j = 1,i
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test2
+
+subroutine test3
+ !$omp tile sizes(1,2,1) ! { dg-error {'tile' loop transformation may not appear on non-rectangular for} }
+ do i = 1,100
+ do j = 1,i
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test3
+
+subroutine test4
+ !$omp tile sizes(1,2,1) ! { dg-error {'tile' loop transformation may not appear on non-rectangular for} }
+ do i = 1,100
+ do j = 1,100
+ do k = 1,i
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test4
+
+subroutine test5
+ !$omp tile sizes(1,2)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,j
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test5
+
+subroutine test6
+ !$omp tile sizes(1,2,1) ! { dg-error {'tile' loop transformation may not appear on non-rectangular for} }
+ do i = 1,100
+ do j = 1,100
+ do k = 1,j
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test6
new file mode 100644
@@ -0,0 +1,12 @@
+subroutine test
+ !$omp tile sizes(1,2,1) ! { dg-error {'tile' loop transformation may not appear on non-rectangular for} } ! { dg-error {'tile' loop transformation may not appear on non-rectangular for
} }
+ do i = 1,100
+ do j = 1,100
+ do k = 1,i
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test
+
@@ -16,7 +16,7 @@ end subroutine test1
! Loop should be unrolled 1 * 2 * 3 * 4 = 24 times
-! { dg-final { scan-tree-dump {#pragma omp for nowait collapse\(1\) unroll_partial\(4\) unroll_partial\(3\) unroll_partial\(2\) unroll_partial\(1\)} "original" } }
+! { dg-final { scan-tree-dump {#pragma omp for nowait collapse\(1\) unroll_partial\(4\).0 unroll_partial\(3\).0 unroll_partial\(2\).0 unroll_partial\(1\)} "original" } }
! { dg-final { scan-tree-dump-not "#pragma omp loop_transform" "omp_transform_loops" } }
! { dg-final { scan-tree-dump-times "dummy" 24 "omp_transform_loops" } }
! { dg-final { scan-tree-dump-times {#pragma omp for} 1 "omp_transform_loops" } }
@@ -13,6 +13,6 @@ subroutine test1
end do
end subroutine test1
-! { dg-final { scan-tree-dump {#pragma omp loop_transform unroll_full unroll_partial\(3\) unroll_partial\(2\) unroll_partial\(1\)} "original" } }
+! { dg-final { scan-tree-dump {#pragma omp loop_transform unroll_full.0 unroll_partial\(3\).0 unroll_partial\(2\).0 unroll_partial\(1\).0} "original" } }
! { dg-final { scan-tree-dump-not "#pragma omp unroll" "omp_transform_loops" } }
! { dg-final { scan-tree-dump-times "dummy" 100 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,57 @@
+subroutine test1a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp unroll partial(5)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test1a
+
+subroutine test1b
+ !$omp tile sizes(3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp unroll partial(5)
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test1b
+
+subroutine test2a
+ !$omp parallel do
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp unroll partial(5) ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test2a
+
+subroutine test2b
+ !$omp tile sizes(3,3,3,3)
+ do i=0,100
+ do j=-300,100
+ !$omp unroll partial(5) ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do k=-300,100
+ do l=0,100
+ call dummy (l)
+ end do
+ end do
+ end do
+ end do
+end subroutine test2b
new file mode 100644
@@ -0,0 +1,31 @@
+subroutine test
+ implicit none
+
+ integer :: i, j, k
+ !$omp target parallel do collapse(2) ! { dg-error {invalid OpenMP non-rectangular loop step; '\(2 - 1\) \* 1' is not a multiple of loop 2 step '5'} }
+ do i = -300, 100
+ !$omp unroll partial
+ do j = i,i*2
+ call dummy (i)
+ end do
+ end do
+
+ !$omp target parallel do collapse(3) ! { dg-error {invalid OpenMP non-rectangular loop step; '\(2 - 1\) \* 1' is not a multiple of loop 3 step '5'} }
+ do i = -300, 100
+ do j = 1,10
+ !$omp unroll partial
+ do k = j,j*2 + 1
+ call dummy (i)
+ end do
+ end do
+ end do
+
+ !$omp unroll full
+ do i = -3, 5
+ do j = 1,10
+ do k = j,j*2 + 1
+ call dummy (i)
+ end do
+ end do
+ end do
+end subroutine
@@ -21,7 +21,7 @@ function mult (a, b) result (c)
end do
end function mult
-! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(1\) tile sizes\(8, 8\)} 1 "original" } }
+! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(1\)@0 tile sizes\(8, 8\)@0} 1 "original" } }
! { dg-final { scan-tree-dump-not "#pragma omp loop_transform unroll_partial" "omp_transform_loops" } }
! Tiling adds two floor and two tile loops.
@@ -22,7 +22,7 @@ function mult (a, b) result (c)
!$omp end target
end function mult
-! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(2\) tile sizes\(8, 8, 4\)} 1 "original" } }
+! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(2\)@0 tile sizes\(8, 8, 4\)@0} 1 "original" } }
! { dg-final { scan-tree-dump-not "#pragma omp loop_transform unroll_partial" "omp_transform_loops" } }
! Check the number of loops
new file mode 100644
@@ -0,0 +1,25 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+
+ !$omp parallel do collapse(2)
+ !$omp tile sizes (8,8)
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ !$omp unroll partial(10)
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+end function mult
+
+! { dg-final { scan-tree-dump-times "#pragma omp loop_transform unroll_partial" 1 "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp loop_transform unroll_partial" "omp_transform_loops" } }
@@ -507,9 +507,21 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
goto print_remap;
case OMP_CLAUSE_UNROLL_FULL:
pp_string (pp, "unroll_full");
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
break;
case OMP_CLAUSE_UNROLL_NONE:
pp_string (pp, "unroll_none");
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
break;
case OMP_CLAUSE_UNROLL_PARTIAL:
pp_string (pp, "unroll_partial");
@@ -520,6 +532,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
false);
pp_right_paren (pp);
}
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
break;
case OMP_CLAUSE_TILE:
pp_string (pp, "tile sizes");
@@ -528,6 +546,12 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
dump_generic_node (pp, OMP_CLAUSE_TILE_SIZES (clause), spc, flags,
false);
pp_right_paren (pp);
+ if (OMP_CLAUSE_TRANSFORM_LEVEL (clause))
+ {
+ pp_string (pp, "@");
+ dump_generic_node (pp, OMP_CLAUSE_TRANSFORM_LEVEL (clause),
+ spc, flags, false);
+ }
break;
case OMP_CLAUSE__LOOPTEMP_:
name = "_looptemp_";
@@ -326,11 +326,11 @@ unsigned const char omp_clause_num_ops[] =
0, /* OMP_CLAUSE_IF_PRESENT */
0, /* OMP_CLAUSE_FINALIZE */
0, /* OMP_CLAUSE_NOHOST */
- 0, /* OMP_CLAUSE_UNROLL_FULL */
+ 1, /* OMP_CLAUSE_UNROLL_FULL */
- 0, /* OMP_CLAUSE_UNROLL_NONE */
- 1, /* OMP_CLAUSE_UNROLL_PARTIAL */
- 1 /* OMP_CLAUSE_TILE */
+ 1, /* OMP_CLAUSE_UNROLL_NONE */
+ 2, /* OMP_CLAUSE_UNROLL_PARTIAL */
+ 2 /* OMP_CLAUSE_TILE */
};
const char * const omp_clause_code_name[] =
@@ -1787,11 +1787,16 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_USE_DEVICE_PTR_IF_PRESENT(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_USE_DEVICE_PTR)->base.public_flag)
+/* The level of a collapsed loop nest at which the tranformation represented
+ by this clause should be applied. */
+#define OMP_CLAUSE_TRANSFORM_LEVEL(NODE) \
+ OMP_CLAUSE_OPERAND (NODE, 0)
+
#define OMP_CLAUSE_UNROLL_PARTIAL_EXPR(NODE) \
- OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_UNROLL_PARTIAL), 0)
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_UNROLL_PARTIAL), 1)
#define OMP_CLAUSE_TILE_SIZES(NODE) \
- OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0)
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 1)
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
new file mode 100644
@@ -0,0 +1,77 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ !$omp target parallel do collapse(2) private(inner) map(to:a,b) map(from:c)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ !$omp tile sizes (8)
+ do j = 1,n
+ !$omp unroll partial(10)
+ do k = 1, n
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ if (k == n) then
+ c(j, i) = inner
+ endif
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+
+end program main