@@ -2062,6 +2062,18 @@ show_omp_clauses (gfc_omp_clauses *omp_clauses)
if (omp_clauses->unroll_partial_factor > 0)
fprintf (dumpfile, "(%u)", omp_clauses->unroll_partial_factor);
}
+ if (omp_clauses->tile_sizes)
+ {
+ gfc_expr_list *sizes;
+ fputs (" TILE SIZES(", dumpfile);
+ for (sizes = omp_clauses->tile_sizes; sizes; sizes = sizes->next)
+ {
+ show_expr (sizes->expr);
+ if (sizes->next)
+ fputs (", ", dumpfile);
+ }
+ fputc (')', dumpfile);
+ }
}
/* Show a single OpenMP or OpenACC directive node and everything underneath it
@@ -2172,6 +2184,7 @@ show_omp_node (int level, gfc_code *c)
name = "TEAMS DISTRIBUTE PARALLEL DO SIMD"; break;
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD: name = "TEAMS DISTRIBUTE SIMD"; break;
case EXEC_OMP_TEAMS_LOOP: name = "TEAMS LOOP"; break;
+ case EXEC_OMP_TILE: name = "TILE"; break;
case EXEC_OMP_UNROLL: name = "UNROLL"; break;
case EXEC_OMP_WORKSHARE: name = "WORKSHARE"; break;
default:
@@ -2249,6 +2262,7 @@ show_omp_node (int level, gfc_code *c)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
omp_clauses = c->ext.omp_clauses;
@@ -2311,7 +2325,7 @@ show_omp_node (int level, gfc_code *c)
d = d->block;
}
}
- else if (c->op == EXEC_OMP_UNROLL)
+ else if (c->op == EXEC_OMP_UNROLL || c->op == EXEC_OMP_TILE)
show_code (level + 1, c->block != NULL ? c->block->next : c->next);
else
show_code (level + 1, c->block->next);
@@ -3491,6 +3505,7 @@ show_code_node (int level, gfc_code *c)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
show_omp_node (level, c);
@@ -320,7 +320,8 @@ enum gfc_statement
ST_OMP_ERROR, ST_OMP_ASSUME, ST_OMP_END_ASSUME, ST_OMP_ASSUMES,
/* Note: gfc_match_omp_nothing returns ST_NONE. */
ST_OMP_NOTHING, ST_NONE,
- ST_OMP_UNROLL, ST_OMP_END_UNROLL
+ ST_OMP_UNROLL, ST_OMP_END_UNROLL,
+ ST_OMP_TILE, ST_OMP_END_TILE
};
/* Types of interfaces that we can have. Assignment interfaces are
@@ -1550,6 +1551,7 @@ typedef struct gfc_omp_clauses
struct gfc_expr *dist_chunk_size;
struct gfc_expr *message;
struct gfc_omp_assumptions *assume;
+ struct gfc_expr_list *tile_sizes;
const char *critical_name;
enum gfc_omp_default_sharing default_sharing;
enum gfc_omp_atomic_op atomic_op;
@@ -2977,7 +2979,7 @@ enum gfc_exec_op
EXEC_OMP_TARGET_TEAMS_LOOP, EXEC_OMP_MASKED, EXEC_OMP_PARALLEL_MASKED,
EXEC_OMP_PARALLEL_MASKED_TASKLOOP, EXEC_OMP_PARALLEL_MASKED_TASKLOOP_SIMD,
EXEC_OMP_MASKED_TASKLOOP, EXEC_OMP_MASKED_TASKLOOP_SIMD, EXEC_OMP_SCOPE,
- EXEC_OMP_UNROLL,
+ EXEC_OMP_UNROLL, EXEC_OMP_TILE,
EXEC_OMP_ERROR
};
@@ -3874,6 +3876,7 @@ bool gfc_inline_intrinsic_function_p (gfc_expr *);
/* trans-openmp.cc */
bool loop_transform_p (gfc_exec_op op);
+int gfc_expr_list_len (gfc_expr_list *);
/* bbt.cc */
typedef int (*compare_fn) (void *, void *);
@@ -226,6 +226,7 @@ match gfc_match_omp_teams_distribute_parallel_do_simd (void);
match gfc_match_omp_teams_distribute_simd (void);
match gfc_match_omp_teams_loop (void);
match gfc_match_omp_threadprivate (void);
+match gfc_match_omp_tile (void);
match gfc_match_omp_unroll (void);
match gfc_match_omp_workshare (void);
match gfc_match_omp_end_critical (void);
@@ -191,6 +191,7 @@ gfc_free_omp_clauses (gfc_omp_clauses *c)
i == OMP_LIST_ALLOCATE);
gfc_free_expr_list (c->wait_list);
gfc_free_expr_list (c->tile_list);
+ gfc_free_expr_list (c->tile_sizes);
free (CONST_CAST (char *, c->critical_name));
if (c->assume)
{
@@ -977,6 +978,76 @@ cleanup:
return MATCH_ERROR;
}
+static match
+match_tile_sizes (gfc_expr_list **list)
+{
+ gfc_expr_list *head, *tail, *p;
+ locus old_loc;
+ gfc_expr *expr;
+ match m;
+
+ head = tail = NULL;
+
+ old_loc = gfc_current_locus;
+
+ m = gfc_match_char ('(');
+ if (m != MATCH_YES)
+ goto syntax;
+
+ for (;;)
+ {
+ m = gfc_match_expr (&expr);
+ if (m == MATCH_YES)
+ {
+ p = gfc_get_expr_list ();
+ if (head == NULL)
+ head = tail = p;
+ else
+ {
+ tail->next = p;
+ tail = tail->next;
+ }
+ int size = 0;
+ if (m == MATCH_YES)
+ {
+ if (gfc_extract_int (expr, &size, 1))
+ goto cleanup;
+ else if (size < 1)
+ {
+ gfc_error_now ("tile size not constant "
+ "positive integer at %C");
+ goto cleanup;
+ }
+ tail->expr = expr;
+ }
+ goto next_item;
+ }
+ if (m == MATCH_ERROR)
+ goto cleanup;
+ goto syntax;
+
+ next_item:
+ if (gfc_match_char (')') == MATCH_YES)
+ break;
+ if (gfc_match_char (',') != MATCH_YES)
+ goto syntax;
+ }
+
+ while (*list)
+ list = &(*list)->next;
+
+ *list = head;
+ return MATCH_YES;
+
+syntax:
+ gfc_error ("Syntax error in 'tile sizes' list at %C");
+
+cleanup:
+ gfc_free_expr_list (head);
+ gfc_current_locus = old_loc;
+ return MATCH_ERROR;
+}
+
/* OpenMP clauses. */
enum omp_mask1
{
@@ -1054,6 +1125,7 @@ enum omp_mask2
OMP_CLAUSE_UNROLL_FULL, /* OpenMP 5.1. */
OMP_CLAUSE_UNROLL_NONE, /* OpenMP 5.1. */
OMP_CLAUSE_UNROLL_PARTIAL, /* OpenMP 5.1. */
+ OMP_CLAUSE_TILE, /* OpenMP 5.1. */
OMP_CLAUSE_ASYNC,
OMP_CLAUSE_NUM_GANGS,
OMP_CLAUSE_NUM_WORKERS,
@@ -4310,7 +4382,8 @@ cleanup:
omp_mask (OMP_CLAUSE_NOWAIT)
#define OMP_UNROLL_CLAUSES \
(omp_mask (OMP_CLAUSE_UNROLL_FULL) | OMP_CLAUSE_UNROLL_PARTIAL)
-
+#define OMP_TILE_CLAUSES \
+ (omp_mask (OMP_CLAUSE_TILE))
static match
match_omp (gfc_exec_op op, const omp_mask mask)
@@ -6409,6 +6482,16 @@ gfc_match_omp_teams_distribute_simd (void)
| OMP_SIMD_CLAUSES);
}
+match
+gfc_match_omp_tile (void)
+{
+ gfc_omp_clauses *c = gfc_get_omp_clauses();
+ new_st.op = EXEC_OMP_TILE;
+ new_st.ext.omp_clauses = c;
+
+ return match_tile_sizes (&c->tile_sizes);
+}
+
match
gfc_match_omp_unroll (void)
{
@@ -9289,75 +9372,6 @@ gfc_resolve_do_iterator (gfc_code *code, gfc_symbol *sym, bool add_clause)
}
}
-
-static bool
-omp_unroll_removes_loop_nest (gfc_code *code)
-{
- gcc_assert (code->op == EXEC_OMP_UNROLL);
- if (!code->ext.omp_clauses)
- return true;
-
- if (code->ext.omp_clauses->unroll_none)
- {
- gfc_warning (0, "!$OMP UNROLL without PARTIAL clause at %L turns loop "
- "into a non-loop",
- &code->loc);
- return true;
- }
- if (code->ext.omp_clauses->unroll_full)
- {
- gfc_warning (0, "!$OMP UNROLL with FULL clause at %L turns loop into a "
- "non-loop",
- &code->loc);
- return true;
- }
- return false;
-}
-
-static void
-resolve_loop_transform_generic (gfc_code *code, const char *descr)
-{
- gcc_assert (code->block);
-
- if (code->block->op == EXEC_OMP_UNROLL
- && !omp_unroll_removes_loop_nest (code->block))
- return;
-
- if (code->block->next->op == EXEC_OMP_UNROLL
- && !omp_unroll_removes_loop_nest (code->block->next))
- return;
-
- if (code->block->next->op == EXEC_DO_WHILE)
- {
- gfc_error ("%s invalid around DO WHILE or DO without loop "
- "control at %L", descr, &code->loc);
- return;
- }
- if (code->block->next->op == EXEC_DO_CONCURRENT)
- {
- gfc_error ("%s invalid around DO CONCURRENT loop at %L",
- descr, &code->loc);
- return;
- }
-
- gfc_error ("missing canonical loop nest after %s at %L",
- descr, &code->loc);
-
-}
-
-static void
-resolve_omp_unroll (gfc_code *code)
-{
- if (!code->block || code->block->op == EXEC_DO)
- return;
-
- if (code->block->next->op == EXEC_DO)
- return;
-
- resolve_loop_transform_generic (code, "!$OMP UNROLL");
-}
-
-
static void
handle_local_var (gfc_symbol *sym)
{
@@ -9488,6 +9502,106 @@ bound_expr_is_canonical (gfc_code *code, int depth, gfc_expr *expr,
return false;
}
+static bool
+omp_unroll_removes_loop_nest (gfc_code *code)
+{
+ gcc_assert (code->op == EXEC_OMP_UNROLL);
+ if (!code->ext.omp_clauses)
+ return true;
+
+ if (code->ext.omp_clauses->unroll_none)
+ {
+ gfc_warning (0, "!$OMP UNROLL without PARTIAL clause at %L turns loop "
+ "into a non-loop",
+ &code->loc);
+ return true;
+ }
+ if (code->ext.omp_clauses->unroll_full)
+ {
+ gfc_warning (0, "!$OMP UNROLL with FULL clause at %L turns loop into a "
+ "non-loop",
+ &code->loc);
+ return true;
+ }
+ return false;
+}
+
+static gfc_code *
+resolve_nested_loop_transforms (gfc_code *code, const char *name,
+ int required_depth, locus *loc)
+{
+ if (!code)
+ return code;
+
+ bool error = false;
+ while (loop_transform_p (code->op))
+ {
+ if (!error && code->op == EXEC_OMP_UNROLL)
+ {
+ if (omp_unroll_removes_loop_nest (code))
+ {
+ gfc_error ("missing canonical loop nest after %s at %L", name,
+ loc);
+ error = true;
+ }
+ else if (required_depth > 1)
+ {
+ gfc_error ("loop nest depth after !$OMP UNROLL at %L is insufficient "
+ "for outer %s", &code->loc, name);
+ error = true;
+ }
+ }
+ else if (!error && code->op == EXEC_OMP_TILE
+ && required_depth > gfc_expr_list_len (code->ext.omp_clauses->tile_sizes))
+ {
+ gfc_error ("loop nest depth after !$OMP TILE at %L is insufficient "
+ "for outer %s", &code->loc, name);
+ error = true;
+ }
+
+ if (code->block)
+ code = code->block->next;
+ else
+ code = code->next;
+ }
+ gcc_assert (!loop_transform_p (code->op));
+
+ return code;
+}
+
+static void
+resolve_omp_unroll (gfc_code *code)
+{
+ const char *descr = "!$OMP UNROLL";
+ locus *loc = &code->loc;
+
+ if (!code->block || code->block->op == EXEC_DO)
+ return;
+
+ code = resolve_nested_loop_transforms (code->block->next, descr, 1,
+ &code->loc);
+
+ if (code->op == EXEC_DO)
+ return;
+
+ if (code->op == EXEC_DO_WHILE)
+ {
+ gfc_error ("%s invalid around DO WHILE or DO without loop "
+ "control at %L", descr, loc);
+ return;
+ }
+
+ if (code->op == EXEC_DO_CONCURRENT)
+ {
+ gfc_error ("%s invalid around DO CONCURRENT loop at %L",
+ descr, loc);
+ return;
+ }
+
+ gfc_error ("missing canonical loop nest after %s at %L",
+ descr, loc);
+}
+
static void
resolve_omp_do (gfc_code *code)
{
@@ -9592,30 +9706,13 @@ resolve_omp_do (gfc_code *code)
break;
case EXEC_OMP_TEAMS_LOOP: name = "!$OMP TEAMS LOOP"; break;
case EXEC_OMP_UNROLL: name = "!$OMP UNROLL"; break;
+ case EXEC_OMP_TILE: name = "!$OMP TILE"; break;
default: gcc_unreachable ();
}
if (code->ext.omp_clauses)
resolve_omp_clauses (code, code->ext.omp_clauses, NULL);
- do_code = code->block->next;
- /* Move forward over any loop transformation directives to find the loop. */
- bool error = false;
- while (do_code->op == EXEC_OMP_UNROLL)
- {
- if (!error && omp_unroll_removes_loop_nest (do_code))
- {
- gfc_error ("missing canonical loop nest after %s at %L", name,
- &code->loc);
- error = true;
- }
- if (do_code->block)
- do_code = do_code->block->next;
- else
- do_code = do_code->next;
- }
- gcc_assert (do_code->op != EXEC_OMP_UNROLL);
-
if (code->ext.omp_clauses->orderedc)
collapse = code->ext.omp_clauses->orderedc;
else
@@ -9630,6 +9727,9 @@ resolve_omp_do (gfc_code *code)
depth and treats any further inner loops as the final-loop-body. So
here we also check canonical loop nest form only for the number of
outer loops specified by the COLLAPSE clause too. */
+ do_code = resolve_nested_loop_transforms (code->block->next, name, collapse,
+ &code->loc);
+
for (i = 1; i <= collapse; i++)
{
gfc_symbol *start_var = NULL, *end_var = NULL;
@@ -9745,6 +9845,98 @@ resolve_omp_do (gfc_code *code)
}
}
+static void
+resolve_omp_tile (gfc_code *code)
+{
+ gfc_code *do_code, *c;
+ gfc_symbol *dovar;
+ const char *name = "!$OMP TILE";
+
+ unsigned num_loops = 0;
+ gcc_assert (code->ext.omp_clauses->tile_sizes);
+ for (gfc_expr_list *el = code->ext.omp_clauses->tile_sizes; el;
+ el = el->next)
+ num_loops++;
+
+ do_code = resolve_nested_loop_transforms (code, name, num_loops, &code->loc);
+
+ for (unsigned i = 1; i <= num_loops; i++)
+ {
+ if (do_code->op == EXEC_DO_WHILE)
+ {
+ gfc_error ("%s cannot be a DO WHILE or DO without loop control "
+ "at %L", name, &do_code->loc);
+ break;
+ }
+ if (do_code->op == EXEC_DO_CONCURRENT)
+ {
+ gfc_error ("%s cannot be a DO CONCURRENT loop at %L", name,
+ &do_code->loc);
+ break;
+ }
+ if (do_code->op != EXEC_DO)
+ {
+ gfc_error ("%s must be DO loop at %L", name,
+ &do_code->loc);
+ break;
+ }
+
+ gcc_assert (do_code->op != EXEC_OMP_UNROLL);
+ gcc_assert (do_code->op == EXEC_DO);
+ dovar = do_code->ext.iterator->var->symtree->n.sym;
+ if (i > 1)
+ {
+ gfc_code *do_code2 = code;
+ while (loop_transform_p (do_code2->op))
+ {
+ if (do_code2->block)
+ do_code2 = do_code2->block->next;
+ else
+ do_code2 = do_code2->next;
+ }
+ gcc_assert (!loop_transform_p (do_code2->op));
+
+ for (unsigned j = 1; j < i; j++)
+ {
+ gfc_symbol *ivar = do_code2->ext.iterator->var->symtree->n.sym;
+ if (dovar == ivar
+ || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->start)
+ || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->end)
+ || gfc_find_sym_in_expr (ivar, do_code->ext.iterator->step))
+ {
+ gfc_error ("%s loops don't form rectangular "
+ "iteration space at %L", name, &do_code->loc);
+ break;
+ }
+ do_code2 = do_code2->block->next;
+ }
+ }
+ for (c = do_code->next; c; c = c->next)
+ if (c->op != EXEC_NOP && c->op != EXEC_CONTINUE)
+ {
+ gfc_error ("%s loops not perfectly nested at %L",
+ name, &c->loc);
+ break;
+ }
+ if (i == num_loops || c)
+ break;
+ do_code = do_code->block;
+ if (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE)
+ {
+ gfc_error ("not enough DO loops for %s at %L",
+ name, &code->loc);
+ break;
+ }
+ do_code = do_code->next;
+ if (do_code == NULL
+ || (do_code->op != EXEC_DO && do_code->op != EXEC_DO_WHILE))
+ {
+ gfc_error ("not enough DO loops for %s at %L",
+ name, &code->loc);
+ break;
+ }
+ }
+}
static gfc_statement
omp_code_to_statement (gfc_code *code)
@@ -9889,6 +10081,8 @@ omp_code_to_statement (gfc_code *code)
return ST_OMP_PARALLEL_LOOP;
case EXEC_OMP_DEPOBJ:
return ST_OMP_DEPOBJ;
+ case EXEC_OMP_TILE:
+ return ST_OMP_TILE;
case EXEC_OMP_UNROLL:
return ST_OMP_UNROLL;
default:
@@ -10320,6 +10514,9 @@ gfc_resolve_omp_directive (gfc_code *code, gfc_namespace *ns)
case EXEC_OMP_TEAMS_LOOP:
resolve_omp_do (code);
break;
+ case EXEC_OMP_TILE:
+ resolve_omp_tile (code);
+ break;
case EXEC_OMP_UNROLL:
resolve_omp_unroll (code);
break;
@@ -1009,6 +1009,7 @@ decode_omp_directive (void)
matcho ("end teams loop", gfc_match_omp_eos_error, ST_OMP_END_TEAMS_LOOP);
matcho ("end teams", gfc_match_omp_eos_error, ST_OMP_END_TEAMS);
matchs ("end unroll", gfc_match_omp_eos_error, ST_OMP_END_UNROLL);
+ matchs ("end tile", gfc_match_omp_eos_error, ST_OMP_END_TILE);
matcho ("end workshare", gfc_match_omp_end_nowait,
ST_OMP_END_WORKSHARE);
break;
@@ -1137,6 +1138,7 @@ decode_omp_directive (void)
matcho ("teams", gfc_match_omp_teams, ST_OMP_TEAMS);
matchdo ("threadprivate", gfc_match_omp_threadprivate,
ST_OMP_THREADPRIVATE);
+ matchs ("tile sizes", gfc_match_omp_tile, ST_OMP_TILE);
break;
case 'u':
matchs ("unroll", gfc_match_omp_unroll, ST_OMP_UNROLL);
@@ -1729,6 +1731,7 @@ next_statement (void)
case ST_OMP_TARGET_PARALLEL_LOOP: case ST_OMP_TARGET_TEAMS_LOOP: \
case ST_OMP_ASSUME: \
case ST_OMP_UNROLL: \
+ case ST_OMP_TILE: \
case ST_CRITICAL: \
case ST_OACC_PARALLEL_LOOP: case ST_OACC_PARALLEL: case ST_OACC_KERNELS: \
case ST_OACC_DATA: case ST_OACC_HOST_DATA: case ST_OACC_LOOP: \
@@ -2774,6 +2777,9 @@ gfc_ascii_statement (gfc_statement st, bool strip_sentinel)
case ST_OMP_THREADPRIVATE:
p = "!$OMP THREADPRIVATE";
break;
+ case ST_OMP_TILE:
+ p = "!$OMP TILE";
+ break;
case ST_OMP_UNROLL:
p = "!$OMP UNROLL";
break;
@@ -5214,6 +5220,11 @@ parse_omp_do (gfc_statement omp_st)
num_unroll++;
continue;
}
+ else if (st == ST_OMP_TILE)
+ {
+ accept_statement (st);
+ continue;
+ }
else
unexpected_statement (st);
}
@@ -5338,6 +5349,9 @@ parse_omp_do (gfc_statement omp_st)
case ST_OMP_TEAMS_LOOP:
omp_end_st = ST_OMP_END_TEAMS_LOOP;
break;
+ case ST_OMP_TILE:
+ omp_end_st = ST_OMP_END_TILE;
+ break;
case ST_OMP_UNROLL:
omp_end_st = ST_OMP_END_UNROLL;
break;
@@ -6025,6 +6039,7 @@ parse_executable (gfc_statement st)
case ST_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case ST_OMP_TEAMS_DISTRIBUTE_SIMD:
case ST_OMP_TEAMS_LOOP:
+ case ST_OMP_TILE:
case ST_OMP_UNROLL:
st = parse_omp_do (st);
if (st == ST_IMPLIED_ENDDO)
@@ -11041,6 +11041,7 @@ gfc_resolve_blocks (gfc_code *b, gfc_namespace *ns)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_LOOP:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
break;
@@ -12198,6 +12199,7 @@ gfc_resolve_code (gfc_code *code, gfc_namespace *ns)
case EXEC_OMP_LOOP:
case EXEC_OMP_SIMD:
case EXEC_OMP_TARGET_SIMD:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
gfc_resolve_omp_do_blocks (code, ns);
break;
@@ -12695,6 +12697,7 @@ start:
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
gfc_resolve_omp_directive (code, ns);
@@ -277,6 +277,7 @@ gfc_free_statement (gfc_code *p)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
gfc_free_omp_clauses (p->ext.omp_clauses);
@@ -3913,6 +3913,24 @@ gfc_trans_omp_clauses (stmtblock_t *block, gfc_omp_clauses *clauses,
omp_clauses = gfc_trans_add_clause (c, omp_clauses);
}
+ if (clauses->tile_sizes)
+ {
+ vec<tree, va_gc> *tvec;
+ gfc_expr_list *el;
+
+ vec_alloc (tvec, 4);
+
+ for (el = clauses->tile_sizes; el; el = el->next)
+ vec_safe_push (tvec, gfc_convert_expr_to_tree (block, el->expr));
+
+ c = build_omp_clause (gfc_get_location (&where),
+ OMP_CLAUSE_TILE);
+ OMP_CLAUSE_TILE_SIZES (c) = build_tree_list_vec (tvec);
+ omp_clauses = gfc_trans_add_clause (c, omp_clauses);
+
+ tvec->truncate (0);
+ }
+
if (clauses->ordered)
{
c = build_omp_clause (gfc_get_location (&where), OMP_CLAUSE_ORDERED);
@@ -5106,7 +5124,7 @@ gfc_trans_omp_cancel (gfc_code *code)
bool
loop_transform_p (gfc_exec_op op)
{
- return op == EXEC_OMP_UNROLL;
+ return op == EXEC_OMP_UNROLL || op == EXEC_OMP_TILE;
}
static tree
@@ -5280,6 +5298,16 @@ gfc_nonrect_loop_expr (stmtblock_t *pblock, gfc_se *sep, int loop_n,
return true;
}
+int
+gfc_expr_list_len (gfc_expr_list *list)
+{
+ unsigned len = 0;
+ for (; list; list = list->next)
+ len++;
+
+ return len;
+}
+
static tree
gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
gfc_omp_clauses *do_clauses, tree par_clauses)
@@ -5295,25 +5323,14 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
dovar_init *di;
unsigned ix;
vec<tree, va_heap, vl_embed> *saved_doacross_steps = doacross_steps;
- gfc_expr_list *tile = do_clauses ? do_clauses->tile_list : clauses->tile_list;
gfc_code *orig_code = code;
locus top_loc = code->loc;
-
- /* Both collapsed and tiled loops are lowered the same way. In
- OpenACC, those clauses are not compatible, so prioritize the tile
- clause, if present. */
- if (tile)
- {
- collapse = 0;
- for (gfc_expr_list *el = tile; el; el = el->next)
- collapse++;
- }
-
- doacross_steps = NULL;
- if (clauses->orderedc)
- collapse = clauses->orderedc;
- if (collapse <= 0)
- collapse = 1;
+ gfc_expr_list *oacc_tile
+ = do_clauses ? do_clauses->tile_list : clauses->tile_list;
+ gfc_expr_list *omp_tile
+ = do_clauses ? do_clauses->tile_sizes : clauses->tile_sizes;
+ gcc_assert (!omp_tile || op == EXEC_OMP_TILE);
+ gcc_assert (!(oacc_tile && omp_tile));
if (pblock == NULL)
{
@@ -5321,21 +5338,42 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
pblock = █
}
code = code->block->next;
- gcc_assert (code->op == EXEC_DO || code->op == EXEC_OMP_UNROLL);
+ gcc_assert (code->op == EXEC_DO || loop_transform_p (code->op));
/* Loop transformation directives surrounding the associated loop of an "omp
do" (or similar directive) are represented as clauses on the "omp do". */
loop_transform_clauses = NULL;
- while (code->op == EXEC_OMP_UNROLL)
+ int omp_tile_depth = gfc_expr_list_len (omp_tile);
+ while (loop_transform_p (code->op))
{
tree clauses = gfc_trans_omp_clauses (pblock, code->ext.omp_clauses,
code->loc);
- loop_transform_clauses = chainon (loop_transform_clauses, clauses);
+ /* There might be several "!$omp tile" transformations surrounding the
+ loop. Use the innermost one which must have the largest tiling depth.
+ If an inner directive has a smaller tiling depth than an outer
+ directive, an error will be emitted in pass-omp_transform_loops. */
+ omp_tile_depth = gfc_expr_list_len (code->ext.omp_clauses->tile_sizes);
+
+ loop_transform_clauses = chainon (loop_transform_clauses, clauses);
code = code->block ? code->block->next : code->next;
}
- gcc_assert (code->op != EXEC_OMP_UNROLL);
+ gcc_assert (!loop_transform_p (code->op));
gcc_assert (code->op == EXEC_DO);
+ /* Both collapsed and tiled loops are lowered the same way. In
+ OpenACC, those clauses are not compatible, so prioritize the tile
+ clause, if present. */
+ if (oacc_tile)
+ collapse = gfc_expr_list_len (oacc_tile);
+
+ doacross_steps = NULL;
+ if (clauses->orderedc)
+ collapse = clauses->orderedc;
+ if (collapse <= 0)
+ collapse = 1;
+
+ collapse = MAX (collapse, omp_tile_depth);
+
init = make_tree_vec (collapse);
cond = make_tree_vec (collapse);
incr = make_tree_vec (collapse);
@@ -5346,7 +5384,7 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
on the simd construct and DO's clauses are translated elsewhere. */
do_clauses->sched_simd = false;
- if (op == EXEC_OMP_UNROLL)
+ if (loop_transform_p (op))
{
/* This is a loop transformation on a loop which is not associated with
any other directive. Use the directive location instead of the loop
@@ -5695,6 +5733,7 @@ gfc_trans_omp_do (gfc_code *code, gfc_exec_op op, stmtblock_t *pblock,
case EXEC_OMP_LOOP: stmt = make_node (OMP_LOOP); break;
case EXEC_OMP_TASKLOOP: stmt = make_node (OMP_TASKLOOP); break;
case EXEC_OACC_LOOP: stmt = make_node (OACC_LOOP); break;
+ case EXEC_OMP_TILE: stmt = make_node (OMP_LOOP_TRANS); break;
case EXEC_OMP_UNROLL: stmt = make_node (OMP_LOOP_TRANS); break;
default: gcc_unreachable ();
}
@@ -7793,6 +7832,7 @@ gfc_trans_omp_directive (gfc_code *code)
case EXEC_OMP_LOOP:
case EXEC_OMP_SIMD:
case EXEC_OMP_TASKLOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
return gfc_trans_omp_do (code, code->op, NULL, code->ext.omp_clauses,
NULL);
@@ -2520,6 +2520,7 @@ trans_code (gfc_code * code, tree cond)
case EXEC_OMP_TEAMS_DISTRIBUTE_PARALLEL_DO_SIMD:
case EXEC_OMP_TEAMS_DISTRIBUTE_SIMD:
case EXEC_OMP_TEAMS_LOOP:
+ case EXEC_OMP_TILE:
case EXEC_OMP_UNROLL:
case EXEC_OMP_WORKSHARE:
res = gfc_trans_omp_directive (code);
@@ -12105,6 +12105,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
case OMP_CLAUSE_UNROLL_FULL:
case OMP_CLAUSE_UNROLL_NONE:
case OMP_CLAUSE_UNROLL_PARTIAL:
+ case OMP_CLAUSE_TILE:
break;
case OMP_CLAUSE_NOHOST:
default:
@@ -13076,6 +13077,7 @@ gimplify_adjust_omp_clauses (gimple_seq *pre_p, gimple_seq body, tree *list_p,
case OMP_CLAUSE_FINALIZE:
case OMP_CLAUSE_INCLUSIVE:
case OMP_CLAUSE_EXCLUSIVE:
+ case OMP_CLAUSE_TILE:
case OMP_CLAUSE_UNROLL_FULL:
case OMP_CLAUSE_UNROLL_NONE:
case OMP_CLAUSE_UNROLL_PARTIAL:
@@ -15134,6 +15136,7 @@ gimplify_omp_loop (tree *expr_p, gimple_seq *pre_p)
}
pc = &OMP_CLAUSE_CHAIN (*pc);
break;
+ case OMP_CLAUSE_TILE:
case OMP_CLAUSE_UNROLL_PARTIAL:
case OMP_CLAUSE_UNROLL_FULL:
case OMP_CLAUSE_UNROLL_NONE:
@@ -2264,7 +2264,7 @@ omp_loop_transform_clause_p (tree c)
enum omp_clause_code code = OMP_CLAUSE_CODE (c);
return (code == OMP_CLAUSE_UNROLL_FULL || code == OMP_CLAUSE_UNROLL_PARTIAL
- || code == OMP_CLAUSE_UNROLL_NONE);
+ || code == OMP_CLAUSE_UNROLL_NONE || code == OMP_CLAUSE_TILE);
}
/* Try to resolve declare variant, return the variant decl if it should
@@ -211,6 +211,9 @@ gomp_for_constant_iterations_p (gomp_for *omp_for,
return true;
}
+static gimple_seq
+expand_transformed_loop (gomp_for *omp_for);
+
/* Split a gomp_for that represents a collapsed loop-nest into single
loops. The result is a gomp_for of the same kind which is not collapsed
(i.e. gimple_omp_for_collapse (OMP_FOR) == 1) and which contains nested,
@@ -220,7 +223,7 @@ gomp_for_constant_iterations_p (gomp_for *omp_for,
FROM_DEPTH are left collapsed. */
static gomp_for*
-gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0)
+gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0, bool expand = false)
{
int collapse = gimple_omp_for_collapse (omp_for);
gcc_assert (from_depth < collapse);
@@ -251,7 +254,11 @@ gomp_for_uncollapse (gomp_for *omp_for, int from_depth = 0)
gimple_omp_for_set_index (level_omp_for, 0,
gimple_omp_for_index (omp_for, level));
- body = level_omp_for;
+
+ if (expand)
+ body = expand_transformed_loop (level_omp_for);
+ else
+ body = level_omp_for;
}
omp_for->collapse = from_depth;
@@ -808,6 +815,316 @@ canonicalize_conditions (gomp_for *omp_for)
return new_decls;
}
+/* Execute the tiling transformation for OMP_FOR with the given TILE_SIZES and
+ return the resulting gimple bind. TILE_SIZES must be a non-empty tree chain
+ of integer constants and the collapse of OMP_FOR must be at least the length
+ of TILE_SIZES. TRANSFORMATION_CLAUSES are the loop transformations that
+ must be applied to OMP_FOR. Those are applied on the result of the tiling
+ transformation. LOC is the location for diagnostic messages.
+
+ Example 1
+ ---------
+ ---------
+
+ Original loop
+ -------------
+
+ #pragma omp for
+ #pragma omp tile sizes(3)
+ for (i = 1; i <= n; i = i + 1)
+ {
+ body;
+ }
+
+ Internally, the tile directive is represented as a clause on the
+ omp for, i.e. as #pragma omp for tile_sizes(3).
+
+ Transformed loop
+ ----------------
+
+ #pragma omp for
+ for (.omp_tile_index = 1; .omp_tile_index < ceil(n/3); .omp_tile_index = .omp_tile_index + 3)
+ {
+ D.4287 = .omp_tile_index + 3 + 1
+ #pragma omp loop_transform
+ for (i = .omp_tile_index; i < D.4287; i = i + 1)
+ {
+ if (i.0 > n)
+ goto L.0
+ body;
+ }
+ L_0:
+ }
+
+ The outer loop is the "floor loop" and the inner loop is the "tile
+ loop". The tile loop is never in canonical loop nest form and
+ hence it cannot be associated with any loop construct. The
+ GCC-internal "omp loop transform" construct will be lowered after
+ the tiling transformation.
+ */
+
+static gimple_seq
+tile (gomp_for *omp_for, location_t loc, tree tile_sizes,
+ tree transformation_clauses, walk_ctx *ctx)
+{
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE | MSG_PRIORITY_INTERNALS,
+ dump_user_location_t::from_location_t (loc),
+ "Executing tile transformation %T:\n %G\n",
+ transformation_clauses, static_cast<gimple *> (omp_for));
+
+ gimple_seq tile_loops = copy_gimple_seq_and_replace_locals (omp_for);
+ gimple_seq floor_loops = copy_gimple_seq_and_replace_locals (omp_for);
+
+ size_t collapse = gimple_omp_for_collapse (omp_for);
+ size_t tiling_depth = list_length (tile_sizes);
+ tree clauses = gimple_omp_for_clauses (omp_for);
+ size_t clause_collapse = 1;
+ tree collapse_clause = NULL;
+
+ if (tree c = omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
+ {
+ error_at (OMP_CLAUSE_LOCATION (c),
+ "%<ordered%> invalid in conjunction with %<omp tile%>");
+ return omp_for;
+ }
+
+ if (tree c = omp_find_clause (clauses, OMP_CLAUSE_COLLAPSE))
+ {
+ tree expr = OMP_CLAUSE_COLLAPSE_EXPR (c);
+ clause_collapse = tree_to_uhwi (expr);
+ collapse_clause = c;
+ }
+
+ /* The 'omp tile' construct creates a canonical loop-nest whose nesting depth
+ equals tiling_depth. The whole loop-nest has depth at least 2 *
+ omp_tile_depth, but the 'tile loops' at levels
+ omp_tile_depth+1...2*omp_tile_depth are not in canonical loop-nest form
+ and hence cannot be associated with a loop construct. */
+ if (clause_collapse > tiling_depth)
+ {
+ error_at (OMP_CLAUSE_LOCATION (collapse_clause),
+ "collapse cannot extend below the floor loops "
+ "generated by the %<omp tile%> construct");
+ OMP_CLAUSE_COLLAPSE_EXPR (collapse_clause)
+ = build_int_cst (unsigned_type_node, tiling_depth);
+ return transform_gomp_for (omp_for, NULL, ctx);
+ }
+
+ if (tiling_depth > collapse)
+ return transform_gomp_for (omp_for, NULL, ctx);
+
+ gcc_assert (collapse >= clause_collapse);
+
+ push_gimplify_context ();
+
+ /* Create the index variables for iterating the tiles in the floor
+ loops first tiling_depth loops transformed loop nest. */
+ gimple_seq floor_loops_pre_body = NULL;
+ size_t tile_level = 0;
+ auto_vec<tree> sizes_vec;
+ for (tree el = tile_sizes; el; el = TREE_CHAIN (el), tile_level++)
+ {
+ size_t nest_level = tile_level;
+ tree index = gimple_omp_for_index (omp_for, nest_level);
+ tree init = gimple_omp_for_initial (omp_for, nest_level);
+ tree incr = gimple_omp_for_incr (omp_for, nest_level);
+ tree step = TREE_OPERAND (incr, 1);
+
+ /* Initialize original index variables in the pre-body. The
+ loop lowering will not initialize them because of the changed
+ index variables. */
+ gimplify_assign (index, init, &floor_loops_pre_body);
+
+ tree tile_size = fold_convert (TREE_TYPE (step), TREE_VALUE (el));
+ sizes_vec.safe_push (tile_size);
+ tree tile_index = create_tmp_var (TREE_TYPE (index), ".omp_tile_index");
+ gimplify_assign (tile_index, init, &floor_loops_pre_body);
+
+ /* Floor loops */
+ step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, tile_size);
+ tree tile_step = step;
+ /* For combined constructs, step will be gimplified on the outer
+ gomp_for. */
+ if (!gimple_omp_for_combined_into_p (omp_for) && !TREE_CONSTANT (step))
+ {
+ tile_step = create_tmp_var (TREE_TYPE (step), ".omp_tile_step");
+ gimplify_assign (tile_step, step, &floor_loops_pre_body);
+ }
+ incr = fold_build2 (TREE_CODE (incr), TREE_TYPE (incr), tile_index,
+ tile_step);
+ gimple_omp_for_set_incr (floor_loops, nest_level, incr);
+ gimple_omp_for_set_index (floor_loops, nest_level, tile_index);
+ }
+ gbind *result_bind = gimple_build_bind (NULL, NULL, NULL);
+ pop_gimplify_context (result_bind);
+ gimple_seq_add_seq (gimple_omp_for_pre_body_ptr (floor_loops),
+ floor_loops_pre_body);
+
+ /* The tiling loops will not form a perfect loop nest because the
+ loop for each tiling dimension needs to check if the current tile
+ is incomplete and this check is intervening code. Since OpenMP
+ 5.1 does not allow the collapse of the loop-nest to extend beyond
+ the floor loops, this is not a problem.
+
+ "Uncollapse" the tiling loop nest, i.e. split the loop nest into
+ nested separate gomp_for structures for each level. This allows
+ to add the incomplete tile checks to each level loop. */
+
+ tile_loops = gomp_for_uncollapse (as_a <gomp_for *> (tile_loops));
+ gimple_omp_for_set_kind (as_a<gomp_for *> (tile_loops),
+ GF_OMP_FOR_KIND_TRANSFORM_LOOP);
+ gimple_omp_for_set_clauses (tile_loops, NULL_TREE);
+ gimple_omp_for_set_pre_body (tile_loops, NULL);
+
+ /* Transform the loop bodies of the "uncollapsed" tiling loops and
+ add them to the body of the floor loops. At this point, the
+ loop nest consists of perfectly nested gimple_omp_for constructs,
+ each representing a single loop. */
+ gimple_seq floor_loops_body = NULL;
+ gimple *level_loop = tile_loops;
+ gimple_seq_add_stmt (&floor_loops_body, tile_loops);
+ gimple_seq *surrounding_seq = &floor_loops_body;
+
+ push_gimplify_context ();
+
+ tree break_label = create_artificial_label (UNKNOWN_LOCATION);
+ gimple_seq_add_stmt (surrounding_seq, gimple_build_label (break_label));
+ for (size_t level = 0; level < tiling_depth; level++)
+ {
+ tree original_index = gimple_omp_for_index (omp_for, level);
+ tree original_final = gimple_omp_for_final (omp_for, level);
+
+ tree tile_index = gimple_omp_for_index (floor_loops, level);
+ tree tile_size = sizes_vec[level];
+ tree type = TREE_TYPE (tile_index);
+ tree plus_type = type;
+
+ tree incr = gimple_omp_for_incr (omp_for, level);
+ tree step = omp_get_for_step_from_incr (gimple_location (omp_for), incr);
+
+ gimple_seq *pre_body = gimple_omp_for_pre_body_ptr (level_loop);
+ gimple_seq level_body = gimple_omp_body (level_loop);
+ gcc_assert (gimple_omp_for_collapse (level_loop) == 1);
+ tree_code original_cond = gimple_omp_for_cond (omp_for, level);
+
+ gimple_omp_for_set_initial (level_loop, 0, tile_index);
+
+ tree tile_final = create_tmp_var (type);
+ tree scaled_tile_size = fold_build2 (MULT_EXPR, TREE_TYPE (tile_size),
+ tile_size, step);
+
+ tree_code plus_code = PLUS_EXPR;
+ if (POINTER_TYPE_P (TREE_TYPE (tile_index)))
+ {
+ plus_code = POINTER_PLUS_EXPR;
+ int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scaled_tile_size));
+ plus_type = signed_or_unsigned_type_for (unsignedp, ptrdiff_type_node);
+ }
+
+ scaled_tile_size = fold_convert (plus_type, scaled_tile_size);
+ gimplify_assign (tile_final,
+ fold_build2 (plus_code, type,
+ tile_index, scaled_tile_size),
+ pre_body);
+ gimple_omp_for_set_final (level_loop, 0, tile_final);
+
+ /* Redefine the original loop index variable of OMP_FOR in terms of the
+ floor loop and the tiling loop index variable for the current
+ dimension/level at the top of the loop. */
+ gimple_seq level_preamble = NULL;
+
+ push_gimplify_context ();
+
+ tree body_label = create_artificial_label (UNKNOWN_LOCATION);
+
+ /* Handle partial tiles, i.e. add a check that breaks from the tile loop
+ if the new index value does not belong to the iteration space of the
+ original loop. */
+ gimple_seq_add_stmt (&level_preamble,
+ gimple_build_cond (original_cond, original_index,
+ original_final, body_label,
+ break_label));
+ gimple_seq_add_stmt (&level_preamble, gimple_build_label (body_label));
+
+ auto gsi = gsi_start (level_body);
+ gsi_insert_seq_before (&gsi, level_preamble, GSI_SAME_STMT);
+ gbind *level_bind = gimple_build_bind (NULL, NULL, NULL);
+ pop_gimplify_context (level_bind);
+ gimple_bind_set_body (level_bind, level_body);
+ gimple_omp_set_body (level_loop, level_bind);
+
+ surrounding_seq = &level_body;
+ level_loop = gsi_stmt (gsi);
+
+ /* The label for jumping out of the loop at the next nesting
+ level. For the outermost level, the label is put after the
+ loop-nest, for the last one it is not necessary. */
+ if (level != tiling_depth - 1)
+ {
+ break_label = create_artificial_label (UNKNOWN_LOCATION);
+ gsi_insert_after (&gsi, gimple_build_label (break_label),
+ GSI_NEW_STMT);
+ }
+ }
+
+ gbind *tile_loops_bind;
+ tile_loops_bind = gimple_build_bind (NULL, tile_loops, NULL);
+ pop_gimplify_context (tile_loops_bind);
+
+ gimple_omp_set_body (floor_loops, tile_loops_bind);
+
+ tree remaining_clauses = OMP_CLAUSE_CHAIN (transformation_clauses);
+
+ /* Collapsing of the OMP_FOR is used both for the "omp tile"
+ implementation and for the actual "collapse" clause. If the
+ tiling depth was greater than the collapse depth required by the
+ clauses on OMP_FOR, the collapse of OMP_FOR must be adjusted to
+ the latter value and all loops below the new collapse depth must
+ be transformed to GF_OMP_FOR_KIND_TRANSFORM_LOOP to ensure their
+ lowering in this pass. */
+ size_t new_collapse = clause_collapse;
+
+ /* Keep the omp_for collapsed if there are further transformations */
+ if (remaining_clauses)
+ {
+ size_t next_transform_depth = 1;
+ if (OMP_CLAUSE_CODE (remaining_clauses) == OMP_CLAUSE_TILE)
+ next_transform_depth
+ = list_length (OMP_CLAUSE_TILE_SIZES (remaining_clauses));
+
+ /* The current "omp tile" transformation reduces the nesting depth
+ of the canonical loop-nest to TILING_DEPTH.
+ Hence the following "omp tile" transformation is invalid if
+ it requires a greater nesting depth. */
+ gcc_assert (next_transform_depth <= tiling_depth);
+ if (next_transform_depth > new_collapse)
+ new_collapse = next_transform_depth;
+ }
+
+ if (collapse > new_collapse)
+ floor_loops = gomp_for_uncollapse (as_a<gomp_for *> (floor_loops),
+ new_collapse, true);
+
+ /* Lower the uncollapsed tile loops. */
+ walk_omp_for_loops (gimple_bind_body_ptr (tile_loops_bind), ctx);
+
+ gcc_assert (remaining_clauses || !collapse_clause
+ || gimple_omp_for_collapse (floor_loops)
+ == (size_t)clause_collapse);
+
+ if (gimple_omp_for_combined_into_p (omp_for))
+ ctx->inner_combined_loop = as_a<gomp_for *> (floor_loops);
+
+ /* Apply remaining transformation clauses and assemble the transformation
+ result. */
+ gimple_bind_set_body (result_bind,
+ transform_gomp_for (as_a<gomp_for *> (floor_loops),
+ remaining_clauses, ctx));
+
+ return result_bind;
+}
+
/* Combined distribute or taskloop constructs are represented by two
or more nested gomp_for constructs which are created during
gimplification. Loop transformations on the combined construct are
@@ -999,6 +1316,10 @@ transform_gomp_for (gomp_for *omp_for, tree transformation, walk_ctx *ctx)
ctx);
}
break;
+ case OMP_CLAUSE_TILE:
+ result = tile (omp_for, loc, OMP_CLAUSE_TILE_SIZES (transformation),
+ transformation, ctx);
+ break;
default:
gcc_unreachable ();
}
@@ -1177,6 +1498,21 @@ optimize_transformation_clauses (tree clauses)
unroll_partial = c;
}
break;
+ case OMP_CLAUSE_TILE:
+ {
+ /* No optimization for those clauses yet, but they end any chain of
+ "unroll partial" clauses. */
+ if (merged_unroll_partial && dump_enabled_p ())
+ print_optimized_unroll_partial_msg (unroll_partial);
+
+ if (unroll_partial)
+ OMP_CLAUSE_CHAIN (unroll_partial) = c;
+
+ unroll_partial = NULL;
+ merged_unroll_partial = false;
+ last_non_unroll = c;
+ }
+ break;
default:
gcc_unreachable ();
}
new file mode 100644
@@ -0,0 +1,163 @@
+subroutine test
+ implicit none
+ integer :: i, j, k
+
+ !$omp tile sizes(1)
+ do i = 1,100
+ call dummy(i)
+ end do
+
+ !$omp tile sizes(1)
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(2+3)
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(-21) ! { dg-error {tile size not constant positive integer at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(0) ! { dg-error {tile size not constant positive integer at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(i) ! { dg-error {Constant expression required at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes( ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(2 ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes() ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(2,) ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(,2) ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(,i) ! { dg-error {Syntax error in 'tile sizes' list at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(i,) ! { dg-error {Constant expression required at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(i)
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ call dummy(i) ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} }
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ call dummy(j) ! { dg-error {\!\$OMP TILE loops not perfectly nested at \(1\)} }
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do i = 1,100
+ call dummy(i)
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes(1,2,1) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test
new file mode 100644
@@ -0,0 +1,10 @@
+
+subroutine test
+ !$omp tile sizes(1,2,1) ! { dg-error {not enough DO loops for \!\$OMP TILE at \(1\)} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(i)
+ end do
+ end do
+ !$end omp tile
+end subroutine test
new file mode 100644
@@ -0,0 +1,80 @@
+subroutine test1
+ implicit none
+ integer :: i, j, k
+
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+ !$omp tile sizes (8)
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1,2,3)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+end subroutine test1
+
+subroutine test2
+ implicit none
+ integer :: i, j, k
+
+ !$omp taskloop collapse(2)
+ !$omp tile sizes (3,4)
+ !$omp tile sizes (1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+ !$omp end taskloop
+
+ !$omp taskloop simd
+ !$omp tile sizes (8)
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1,2,3)
+ do i = 1,100
+ do j = 1,100
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+ !$omp end taskloop simd
+end subroutine test2
+
+subroutine test3
+ implicit none
+ integer :: i, j, k
+
+ !$omp taskloop collapse(3) ! { dg-error {not enough DO loops for collapsed \!\$OMP TASKLOOP at \(1\)} }
+ !$omp tile sizes (1,2) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TASKLOOP} }
+ !$omp tile sizes (1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+ !$omp end taskloop
+end subroutine test3
new file mode 100644
@@ -0,0 +1,18 @@
+subroutine test
+ implicit none
+ integer :: i, j, k
+
+ !$omp parallel do collapse(2) ordered(2)
+ !$omp tile sizes (1,2)
+ do i = 1,100 ! { dg-error {'ordered' invalid in conjunction with 'omp tile'} }
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+ !$end omp target
+
+end subroutine test
new file mode 100644
@@ -0,0 +1,95 @@
+
+subroutine test1
+ implicit none
+ integer :: i, j, k
+
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+end subroutine test1
+
+subroutine test2
+ implicit none
+ integer :: i, j, k
+
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+end subroutine test2
+
+subroutine test3
+ implicit none
+ integer :: i, j, k
+
+ !$omp target teams distribute
+ !$omp tile sizes (1,2)
+ !$omp tile sizes (1) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+end subroutine test3
+
+subroutine test4
+ implicit none
+ integer :: i, j, k
+
+ !$omp target teams distribute collapse(2)
+ !$omp tile sizes (8) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP TARGET TEAMS DISTRIBUTE} }
+ !$omp tile sizes (1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+
+end subroutine test4
+
+subroutine test5
+ implicit none
+ integer :: i, j, k
+
+ !$omp parallel do collapse(2) ordered(2)
+ !$omp tile sizes (8) ! { dg-error {loop nest depth after \!\$OMP TILE at \(1\) is insufficient for outer \!\$OMP PARALLEL DO} }
+ !$omp tile sizes (1,2)
+ do i = 1,100
+ do j = 1,100
+ call dummy(j)
+ do k = 1,100
+ call dummy(i)
+ end do
+ end do
+ end do
+ !$end omp tile
+ !$end omp tile
+ !$end omp target
+
+end subroutine test5
new file mode 100644
@@ -0,0 +1,57 @@
+function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+
+ !$omp parallel do collapse(2)
+ !$omp tile sizes (8,8)
+ !$omp unroll partial(2) ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP TILE} }
+ ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP PARALLEL DO} "" { target *-*-*} .-1 }
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+
+ !$omp tile sizes (8,8)
+ !$omp unroll partial(2) ! { dg-error {loop nest depth after \!\$OMP UNROLL at \(1\) is insufficient for outer \!\$OMP TILE} }
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+
+ !$omp tile sizes (8)
+ !$omp unroll partial(1)
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+
+ !$omp parallel do collapse(2) ! { dg-error {missing canonical loop nest after \!\$OMP PARALLEL DO at \(1\)} }
+ !$omp tile sizes (8,8) ! { dg-error {missing canonical loop nest after \!\$OMP TILE at \(1\)} }
+ !$omp unroll full ! { dg-warning {\!\$OMP UNROLL with FULL clause at \(1\) turns loop into a non-loop} }
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+end function mult
new file mode 100644
@@ -0,0 +1,37 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+
+ !$omp parallel do
+ !$omp unroll partial(1)
+ !$omp tile sizes (8,8)
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+end function mult
+
+! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(1\) tile sizes\(8, 8\)} 1 "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp loop_transform unroll_partial" "omp_transform_loops" } }
+
+! Tiling adds two floor and two tile loops.
+
+! Number of conditional statements after tiling:
+! 5
+! = 2 (lowering of 2 tile loops)
+! + 1 (partial tile handling in 2 tile loops)
+! + 1 (lowering of non-associated floor loop)
+
+! The unrolling with unroll factor 1 currently gets executed (TODO could/should be skipped?)
+
+! { dg-final { scan-tree-dump-times {if \([A-Za-z0-9_.]+ < } 5 "omp_transform_loops" } }
new file mode 100644
@@ -0,0 +1,41 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-additional-options "-fdump-tree-omp_transform_loops" }
+
+function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ c = 0
+
+ !$omp target
+ !$omp parallel do
+ !$omp unroll partial(2)
+ !$omp tile sizes (8,8,4)
+ do i = 1,m
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ !$omp end target
+end function mult
+
+! { dg-final { scan-tree-dump-times {#pragma omp for nowait unroll_partial\(2\) tile sizes\(8, 8, 4\)} 1 "original" } }
+! { dg-final { scan-tree-dump-not "#pragma omp loop_transform unroll_partial" "omp_transform_loops" } }
+
+! Check the number of loops
+
+! Tiling adds three tile and three floor loops.
+! The outermost floor loop is associated with the "!$omp parallel do"
+! and hence it isn't lowered in the transformation pass.
+! Number of conditional statements after tiling:
+! 8
+! = 2 (inner floor loop lowering)
+! + 3 (partial tile handling in 3 tile loops)
+! + 3 (lowering of 3 tile loops)
+!
+! Unrolling creates 2 copies of the tiled loop nest.
+
+! { dg-final { scan-tree-dump-times {if \([A-Za-z0-9_.]+ < } 16 "omp_transform_loops" } }
@@ -534,6 +534,9 @@ enum omp_clause_code {
/* Internal representation for an "omp unroll partial" directive. */
OMP_CLAUSE_UNROLL_PARTIAL,
+
+ /* Represents a "tile" directive internally. */
+ OMP_CLAUSE_TILE
};
#undef DEFTREESTRUCT
@@ -521,6 +521,14 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
pp_right_paren (pp);
}
break;
+ case OMP_CLAUSE_TILE:
+ pp_string (pp, "tile sizes");
+ pp_left_paren (pp);
+ gcc_assert (OMP_CLAUSE_TILE_SIZES (clause));
+ dump_generic_node (pp, OMP_CLAUSE_TILE_SIZES (clause), spc, flags,
+ false);
+ pp_right_paren (pp);
+ break;
case OMP_CLAUSE__LOOPTEMP_:
name = "_looptemp_";
goto print_remap;
@@ -327,8 +327,10 @@ unsigned const char omp_clause_num_ops[] =
0, /* OMP_CLAUSE_FINALIZE */
0, /* OMP_CLAUSE_NOHOST */
0, /* OMP_CLAUSE_UNROLL_FULL */
+
0, /* OMP_CLAUSE_UNROLL_NONE */
- 1 /* OMP_CLAUSE_UNROLL_PARTIAL */
+ 1, /* OMP_CLAUSE_UNROLL_PARTIAL */
+ 1 /* OMP_CLAUSE_TILE */
};
const char * const omp_clause_code_name[] =
@@ -422,7 +424,8 @@ const char * const omp_clause_code_name[] =
"nohost",
"unroll_full",
"unroll_none",
- "unroll_partial"
+ "unroll_partial",
+ "tile"
};
/* Unless specific to OpenACC, we tend to internally maintain OpenMP-centric
@@ -1790,6 +1790,9 @@ class auto_suppress_location_wrappers
#define OMP_CLAUSE_UNROLL_PARTIAL_EXPR(NODE) \
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_UNROLL_PARTIAL), 0)
+#define OMP_CLAUSE_TILE_SIZES(NODE) \
+ OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_TILE), 0)
+
#define OMP_CLAUSE_PROC_BIND_KIND(NODE) \
(OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PROC_BIND)->omp_clause.subcode.proc_bind_kind)
new file mode 100644
@@ -0,0 +1,84 @@
+#include <stdlib.h>
+#include <stdio.h>
+
+template <int dim0, int dim1>
+int sum ()
+{
+ int sum = 0;
+#pragma omp unroll full
+#pragma omp tile sizes(dim0, dim1)
+ for (unsigned i = 0; i < 4; i++)
+ for (unsigned j = 0; j < 5; j++)
+ sum++;
+
+ return sum;
+}
+
+int main ()
+{
+ if (sum <1,1> () != 20)
+ __builtin_abort ();
+ if (sum <1,2> () != 20)
+ __builtin_abort ();
+ if (sum <1,3> () != 20)
+ __builtin_abort ();
+ if (sum <1,4> () != 20)
+ __builtin_abort ();
+ if (sum <1,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <2,1> () != 20)
+ __builtin_abort ();
+ if (sum <2,2> () != 20)
+ __builtin_abort ();
+ if (sum <2,3> () != 20)
+ __builtin_abort ();
+ if (sum <2,4> () != 20)
+ __builtin_abort ();
+ if (sum <2,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <3,1> () != 20)
+ __builtin_abort ();
+ if (sum <3,2> () != 20)
+ __builtin_abort ();
+ if (sum <3,3> () != 20)
+ __builtin_abort ();
+ if (sum <3,4> () != 20)
+ __builtin_abort ();
+ if (sum <3,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <4,1> () != 20)
+ __builtin_abort ();
+ if (sum <4,2> () != 20)
+ __builtin_abort ();
+ if (sum <4,3> () != 20)
+ __builtin_abort ();
+ if (sum <4,4> () != 20)
+ __builtin_abort ();
+ if (sum <4,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <5,1> () != 20)
+ __builtin_abort ();
+ if (sum <5,2> () != 20)
+ __builtin_abort ();
+ if (sum <5,3> () != 20)
+ __builtin_abort ();
+ if (sum <5,4> () != 20)
+ __builtin_abort ();
+ if (sum <5,5> () != 20)
+ __builtin_abort ();
+
+ if (sum <6,1> () != 20)
+ __builtin_abort ();
+ if (sum <6,2> () != 20)
+ __builtin_abort ();
+ if (sum <6,3> () != 20)
+ __builtin_abort ();
+ if (sum <6,4> () != 20)
+ __builtin_abort ();
+ if (sum <6,5> () != 20)
+ __builtin_abort ();
+}
new file mode 100644
@@ -0,0 +1,71 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ !$omp parallel do collapse(2) private(inner)
+ !$omp tile sizes (8, 1)
+ do i = 1,m
+ do j = 1,n
+ inner = 0
+ do k = 1, n
+ inner = inner + a(k, i) * b(j, k)
+ end do
+ c(j, i) = inner
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+
+end program main
new file mode 100644
@@ -0,0 +1,117 @@
+! { dg-additional-options "-fdump-tree-original" }
+! { dg-do run }
+
+module test_functions
+ contains
+ integer function compute_sum1() result(sum)
+ implicit none
+
+ integer :: i,j
+
+ sum = 0
+ !$omp do
+ do i = 1,10,3
+ !$omp tile sizes(2)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum2() result(sum)
+ implicit none
+
+ integer :: i,j
+
+ sum = 0
+ !$omp do
+ do i = 1,10,3
+ !$omp tile sizes(16)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum3() result(sum)
+ implicit none
+
+ integer :: i,j
+
+ sum = 0
+ !$omp do
+ do i = 1,10,3
+ !$omp tile sizes(100)
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum4() result(sum)
+ implicit none
+
+ integer :: i,j
+
+ sum = 0
+ !$omp do
+ !$omp tile sizes(6,10)
+ do i = 1,10,3
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+
+ integer function compute_sum5() result(sum)
+ implicit none
+
+ integer :: i,j
+
+ sum = 0
+ !$omp parallel do collapse(2)
+ !$omp tile sizes(6,10)
+ do i = 1,10,3
+ do j = 1,10,3
+ sum = sum + 1
+ end do
+ end do
+ end function
+end module test_functions
+
+program test
+ use test_functions
+ implicit none
+
+ integer :: result
+
+ result = compute_sum1 ()
+ write (*,*) result
+ if (result .ne. 16) then
+ call abort
+ end if
+
+ result = compute_sum2 ()
+ write (*,*) result
+ if (result .ne. 16) then
+ call abort
+ end if
+
+ result = compute_sum3 ()
+ write (*,*) result
+ if (result .ne. 16) then
+ call abort
+ end if
+
+ result = compute_sum4 ()
+ write (*,*) result
+ if (result .ne. 16) then
+ call abort
+ end if
+
+ result = compute_sum5 ()
+ write (*,*) result
+ if (result .ne. 16) then
+ call abort
+ end if
+end program
new file mode 100644
@@ -0,0 +1,112 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(10)
+ !$omp tile sizes(1, 3)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ write (*,*) i, j, k
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ function mult2 (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes(1,2)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ write (*,*) i, j, k
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult2
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ ! c = mult (a, b)
+
+ ! call print_matrix (a)
+ ! call print_matrix (b)
+ ! call print_matrix (c)
+
+ ! do i = 1,n
+ ! do j = 1,m
+ ! if (b(i,j) .ne. c(i,j)) call abort ()
+ ! end do
+ ! end do
+
+
+ c = mult2 (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,71 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function copy (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes (1,5)
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = c(j,i) + a(j, i)
+ end do
+ end do
+ end function copy
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = 1
+ end do
+ end do
+
+ c = copy (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (c(i,j) .ne. a(i,j)) call abort ()
+ end do
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,77 @@
+module matrix
+ implicit none
+ integer :: n = 4
+ integer :: m = 4
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ ! omp do private(inner)
+ do i = 1,m
+ !$omp unroll partial(4)
+ !$omp tile sizes (5)
+ do j = 1,n
+ do k = 1, n
+ write (*,*) "i", i, "j", j, "k", k
+ if (k == 1) then
+ inner = 0
+ endif
+ inner = inner + a(k, i) * b(j, k)
+ if (k == n) then
+ c(j, i) = inner
+ endif
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+
+end program main
new file mode 100644
@@ -0,0 +1,75 @@
+module matrix
+ implicit none
+ integer :: n = 4
+ integer :: m = 4
+
+contains
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,m
+ do j = 1,n
+ c(j, i) = 0
+ end do
+ end do
+
+ !$omp parallel do
+ do i = 1,m
+ !$omp tile sizes (5,2)
+ do j = 1,n
+ do k = 1, n
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ c = mult (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+
+end program main
new file mode 100644
@@ -0,0 +1,112 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function mult (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(10)
+ !$omp tile sizes(1, 3)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ write (*,*) i, j, k
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult
+
+ function mult2 (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes(1,2)
+ do i = 1,10
+ do j = 1,n
+ do k = 1, n
+ write (*,*) i, j, k
+ c(j,i) = c(j,i) + a(k, i) * b(j, k)
+ end do
+ end do
+ end do
+ end function mult2
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = merge(1,0, i.eq.j)
+ b(j,i) = j
+ end do
+ end do
+
+ ! c = mult (a, b)
+
+ ! call print_matrix (a)
+ ! call print_matrix (b)
+ ! call print_matrix (c)
+
+ ! do i = 1,n
+ ! do j = 1,m
+ ! if (b(i,j) .ne. c(i,j)) call abort ()
+ ! end do
+ ! end do
+
+
+ c = mult2 (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (b(i,j) .ne. c(i,j)) call abort ()
+ end do
+ end do
+
+end program main
new file mode 100644
@@ -0,0 +1,71 @@
+module matrix
+ implicit none
+ integer :: n = 10
+ integer :: m = 10
+
+contains
+
+ function copy (a, b) result (c)
+ integer, allocatable, dimension (:,:) :: a,b,c
+ integer :: i, j, k, inner
+
+ allocate(c( n, m ))
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = 0
+ end do
+ end do
+
+ !$omp unroll partial(2)
+ !$omp tile sizes (1,5)
+ do i = 1,10
+ do j = 1,n
+ c(j,i) = c(j,i) + a(j, i)
+ end do
+ end do
+ end function copy
+
+ subroutine print_matrix (m)
+ integer, allocatable :: m(:,:)
+ integer :: i, j, n
+
+ n = size (m, 1)
+ do i = 1,n
+ do j = 1,n
+ write (*, fmt="(i4)", advance='no') m(j, i)
+ end do
+ write (*, *) ""
+ end do
+ write (*, *) ""
+ end subroutine
+end module matrix
+
+program main
+ use matrix
+ implicit none
+
+ integer, allocatable :: a(:,:),b(:,:),c(:,:)
+ integer :: i,j
+
+ allocate(a( n, m ))
+ allocate(b( n, m ))
+
+ do i = 1,n
+ do j = 1,m
+ a(j,i) = 1
+ end do
+ end do
+
+ c = copy (a, b)
+
+ call print_matrix (a)
+ call print_matrix (b)
+ call print_matrix (c)
+
+ do i = 1,n
+ do j = 1,m
+ if (c(i,j) .ne. a(i,j)) call abort ()
+ end do
+ end do
+
+end program main