@@ -20246,7 +20246,7 @@ c_parser_omp_scan_loop_body (c_parser *parser, bool open_brace_parsed)
}
static int c_parser_omp_nested_loop_transform_clauses (c_parser *, tree &, int,
- const char *);
+ int, const char *);
/* Parse the restricted form of loop statements allowed by OpenACC and OpenMP.
The real trick here is to determine the loop control variable early
@@ -20300,7 +20300,7 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
ordered = collapse;
}
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, collapse,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, collapse,
"loop collapse");
/* Find the depth of the loop nest affected by "omp tile"
@@ -20489,6 +20489,22 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
else if (bracecount
&& c_parser_next_token_is (parser, CPP_SEMICOLON))
c_parser_consume_token (parser);
+ else if (c_parser_peek_token (parser)->pragma_kind
+ == PRAGMA_OMP_UNROLL
+ || c_parser_peek_token (parser)->pragma_kind
+ == PRAGMA_OMP_TILE)
+ {
+ int depth = c_parser_omp_nested_loop_transform_clauses (
+ parser, clauses, i + 1, count - i - 1, "loop collapse");
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ }
+ }
else
{
c_parser_error (parser, "not enough perfectly nested loops");
@@ -20500,7 +20516,7 @@ c_parser_omp_for_loop (location_t loc, c_parser *parser, enum tree_code code,
fail = true;
count = 0;
break;
- }
+ }
}
while (1);
@@ -24066,9 +24082,9 @@ c_parser_omp_loop_transform_clause (c_parser *parser)
}
/* Parse zero or more OpenMP loop transformation directives that
- follow another directive that requires a canonical loop nest and
- append all to CLAUSES. Return the nesting depth
- of the transformed loop nest.
+ follow another directive that requires a canonical loop nest,
+ append all to CLAUSES and record the LEVEL at which the clauses
+ appear in the loop nest in each clause.
REQUIRED_DEPTH is the nesting depth of the loop nest required by
the preceding directive. OUTER_DESCR is a description of the
@@ -24078,7 +24094,7 @@ c_parser_omp_loop_transform_clause (c_parser *parser)
static int
c_parser_omp_nested_loop_transform_clauses (c_parser *parser, tree &clauses,
- int required_depth,
+ int level, int required_depth,
const char *outer_descr)
{
tree c = NULL_TREE;
@@ -24139,6 +24155,7 @@ c_parser_omp_nested_loop_transform_clauses (c_parser *parser, tree &clauses,
if (!transformed_depth)
transformed_depth = last_depth;
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, level);
if (!clauses)
clauses = c;
else if (last_c)
@@ -24172,7 +24189,7 @@ c_parser_omp_tile (location_t loc, c_parser *parser, bool *if_p)
return error_mark_node;
int required_depth = list_length (OMP_CLAUSE_TILE_SIZES (clauses));
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, required_depth,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, required_depth,
"outer transformation");
block = c_begin_compound_stmt (true);
@@ -24192,7 +24209,7 @@ c_parser_omp_unroll (location_t loc, c_parser *parser, bool *if_p)
tree clauses = c_parser_omp_all_clauses (parser, mask, p_name, false);
int required_depth = 1;
- c_parser_omp_nested_loop_transform_clauses (parser, clauses, required_depth,
+ c_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, required_depth,
"outer transformation");
if (!clauses)
@@ -2974,6 +2974,14 @@ cp_parser_is_keyword (cp_token* token, enum rid keyword)
return token->keyword == keyword;
}
+/* Returns nonzero if TOKEN is a pragma of the indicated KIND. */
+
+static bool
+cp_parser_is_pragma (cp_token* token, enum pragma_kind kind)
+{
+ return cp_parser_pragma_kind (token) == kind;
+}
+
/* Helper function for cp_parser_error.
Having peeked a token of kind TOK1_KIND that might signify
a conflict marker, peek successor tokens to determine
@@ -43634,7 +43642,8 @@ cp_parser_omp_scan_loop_body (cp_parser *parser)
}
static int cp_parser_omp_nested_loop_transform_clauses (cp_parser *, tree &,
- int, const char *);
+ int, int,
+ const char *);
/* Parse the restricted form of the for statement allowed by OpenMP. */
@@ -43686,7 +43695,7 @@ cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
gcc_assert (oacc_tiling || (collapse >= 1 && ordered >= 0));
count = ordered ? ordered : collapse;
- cp_parser_omp_nested_loop_transform_clauses (parser, clauses, count,
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0, count,
"loop collapse");
/* Find the depth of the loop nest affected by "omp tile"
@@ -43956,19 +43965,42 @@ cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
cp_parser_parse_tentatively (parser);
for (;;)
{
- if (cp_lexer_next_token_is_keyword (parser->lexer, RID_FOR))
+ cp_token *tok = cp_lexer_peek_token (parser->lexer);
+ if (cp_parser_is_keyword (tok, RID_FOR))
break;
- else if (cp_lexer_next_token_is (parser->lexer, CPP_OPEN_BRACE))
+ else if (tok->type == CPP_OPEN_BRACE)
{
cp_lexer_consume_token (parser->lexer);
bracecount++;
}
- else if (bracecount
- && cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
+ else if (bracecount && tok->type == CPP_SEMICOLON)
cp_lexer_consume_token (parser->lexer);
+ else if (cp_parser_is_pragma (tok, PRAGMA_OMP_UNROLL)
+ || cp_parser_is_pragma (tok, PRAGMA_OMP_TILE))
+ {
+ int depth = cp_parser_omp_nested_loop_transform_clauses (
+ parser, clauses, i + 1, count - i - 1, "loop collapse");
+
+ /* Adjust the loop nest depth to the requirements of the
+ loop transformations. The collapse will be reduced
+ to value requested by the "collapse" and "ordered"
+ clauses after the execution of the loop transformations
+ in the middle end. */
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ if (declv)
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ if (orig_declv)
+ declv = grow_tree_vec (orig_declv, count);
+ }
+ }
else
{
- loc = cp_lexer_peek_token (parser->lexer)->location;
+ loc = tok->location;
error_at (loc, "not enough for loops to collapse");
collapse_err = true;
cp_parser_abort_tentative_parse (parser);
@@ -44027,6 +44059,27 @@ cp_parser_omp_for_loop (cp_parser *parser, enum tree_code code, tree clauses,
}
else if (cp_lexer_next_token_is (parser->lexer, CPP_SEMICOLON))
cp_lexer_consume_token (parser->lexer);
+ else if (cp_parser_is_pragma (cp_lexer_peek_token (parser->lexer),
+ PRAGMA_OMP_UNROLL)
+ || cp_parser_is_pragma (cp_lexer_peek_token (parser->lexer),
+ PRAGMA_OMP_TILE))
+ {
+ int depth =
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses,
+ i + 1, count - i -1,
+ "loop collapse");
+ if (i + 1 + depth > count)
+ {
+ count = i + 1 + depth;
+ if (declv)
+ declv = grow_tree_vec (declv, count);
+ initv = grow_tree_vec (initv, count);
+ condv = grow_tree_vec (condv, count);
+ incrv = grow_tree_vec (incrv, count);
+ if (orig_declv)
+ declv = grow_tree_vec (orig_declv, count);
+ }
+ }
else
{
if (!collapse_err)
@@ -45787,6 +45840,7 @@ cp_parser_omp_tile_sizes (cp_parser *parser, location_t loc)
gcc_assert (sizes);
tree c = build_omp_clause (loc, OMP_CLAUSE_TILE);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
OMP_CLAUSE_TILE_SIZES (c) = sizes;
OMP_CLAUSE_TRANSFORM_LEVEL (c)
= build_int_cst (unsigned_type_node, 0);
@@ -45810,8 +45864,9 @@ cp_parser_omp_tile (cp_parser *parser, cp_token *tok, bool *if_p)
return error_mark_node;
int required_depth = list_length (OMP_CLAUSE_TILE_SIZES (clauses));
- cp_parser_omp_nested_loop_transform_clauses (
- parser, clauses, required_depth, "outer transformation");
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0,
+ required_depth,
+ "outer transformation");
block = begin_omp_structured_block ();
clauses = finish_omp_clauses (clauses, C_ORT_OMP);
@@ -45878,8 +45933,9 @@ cp_parser_omp_loop_transform_clause (cp_parser *parser)
}
/* Parse zero or more OpenMP loop transformation directives that
- follow another directive that requires a canonical loop nest and
- append all to CLAUSES. Return the nesting depth
+ follow another directive that requires a canonical loop nest,
+ append all to CLAUSES, and require the level at which the clause
+ appears in the loop nest in each clause. Return the nesting depth
of the transformed loop nest.
REQUIRED_DEPTH is the nesting depth of the loop nest required by
@@ -45890,7 +45946,7 @@ cp_parser_omp_loop_transform_clause (cp_parser *parser)
static int
cp_parser_omp_nested_loop_transform_clauses (cp_parser *parser, tree &clauses,
- int required_depth,
+ int level, int required_depth,
const char *outer_descr)
{
tree c = NULL_TREE;
@@ -45934,7 +45990,8 @@ cp_parser_omp_nested_loop_transform_clauses (cp_parser *parser, tree &clauses,
default:
gcc_unreachable ();
}
- OMP_CLAUSE_TRANSFORM_LEVEL (c) = build_int_cst (unsigned_type_node, 0);
+ OMP_CLAUSE_TRANSFORM_LEVEL (c)
+ = build_int_cst (unsigned_type_node, level);
if (depth < last_depth)
{
@@ -45989,8 +46046,9 @@ cp_parser_omp_unroll (cp_parser *parser, cp_token *tok, bool *if_p)
}
int required_depth = 1;
- cp_parser_omp_nested_loop_transform_clauses (
- parser, clauses, required_depth, "outer transformation");
+ cp_parser_omp_nested_loop_transform_clauses (parser, clauses, 0,
+ required_depth,
+ "outer transformation");
block = begin_omp_structured_block ();
ret = cp_parser_omp_for_loop (parser, OMP_LOOP_TRANS, clauses, NULL, if_p);
new file mode 100644
@@ -0,0 +1,12 @@
+void test ()
+{
+#pragma omp tile sizes (2,4,6)
+ for (unsigned i = 0; i < 10; i++)
+ for (unsigned j = 0; j < 10; j++)
+ {
+ float intervening_decl = 0; /* { dg-bogus "not enough for loops to collapse" "TODO C/C++ imperfect loop nest handling" { xfail c++ } } */
+ /* { dg-bogus "not enough perfectly nested loops" "TODO C/C++ imperfect loop nest handling" { xfail c } .-1 } */
+#pragma omp unroll partial(2)
+ for (unsigned k = 0; k < 10; k++);
+ }
+}
new file mode 100644
@@ -0,0 +1,15 @@
+/* { dg-additional-options "-std=c++11" { target c++} } */
+
+extern void dummy (int);
+
+void
+test ()
+{
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+ #pragma omp unroll partial
+ for (int j = 0; j != 100; ++j)
+ dummy (i);
+}
+
new file mode 100644
@@ -0,0 +1,31 @@
+/* { dg-additional-options "-std=c++11" { target c++} } */
+
+extern void dummy (int);
+
+void
+test ()
+{
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2)
+ for (int j = 0; j != 100; ++j)
+ dummy (i);
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2, 3)
+ for (int j = 0; j != 100; ++j)
+ dummy (i); /* { dg-error {not enough for loops to collapse} "" { target c++ } } */
+/* { dg-error {'i' was not declared in this scope} "" { target c++ } .-1 } */
+/* { dg-error {not enough perfectly nested loops before 'dummy'} "" { target c } .-2 } */
+
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp tile sizes(2, 3)
+ for (int j = 0; j != 100; ++j)
+ for (int k = 0; k != 100; ++k)
+ dummy (i);
+}
+
+
new file mode 100644
@@ -0,0 +1,37 @@
+extern void dummy (int);
+
+void
+test1 ()
+{
+#pragma omp target parallel for collapse(2)
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll partial(2)
+ for (int j = i * 2; j <= i * 4 + 1; ++j)
+ dummy (i);
+
+#pragma omp target parallel for collapse(3)
+ for (int i = -300; i != 100; ++i)
+ for (int j = i; j != i * 2; ++j)
+ #pragma omp unroll partial
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+#pragma omp unroll full
+ for (int i = -300; i != 100; ++i)
+ for (int j = i; j != i * 2; ++j)
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll full
+ for (int j = i; j != i + 10; ++j)
+ for (int k = 2; k != 100; ++k)
+ dummy (i);
+
+ for (int i = -300; i != 100; ++i)
+#pragma omp unroll full
+ for (int j = i; j != i + 10; ++j)
+ for (int k = j; k != 100; ++k)
+ dummy (i);
+}
+
new file mode 100644
@@ -0,0 +1,22 @@
+extern void dummy (int);
+
+void
+test1 ()
+{
+#pragma omp target parallel for collapse(2) /* { dg-error {invalid OpenMP non-rectangular loop step; \'\(1 - 0\) \* 1\' is not a multiple of loop 2 step \'5\'} "" { target c } } */
+ for (int i = -300; i != 100; ++i) /* { dg-error {invalid OpenMP non-rectangular loop step; \'\(1 - 0\) \* 1\' is not a multiple of loop 2 step \'5\'} "" { target c++ } } */
+#pragma omp unroll partial
+ for (int j = 2; j != i; ++j)
+ dummy (i);
+}
+
+void
+test2 ()
+{
+ int i,j;
+#pragma omp target parallel for collapse(2)
+ for (i = -300; i != 100; ++i)
+ #pragma omp unroll partial
+ for (j = 2; j != i; ++j)
+ dummy (i);
+}
deleted file mode 100644
@@ -1,52 +0,0 @@
-#include <string.h>
-#include <stdio.h>
-#include <math.h>
-
-void
-mult (float *matrix1, float *matrix2, float *result, unsigned dim0,
- unsigned dim1)
-{
- memset (result, 0, sizeof (float) * dim0 * dim1);
-#pragma omp target parallel for collapse(3) map(tofrom:result[0:dim0*dim1]) map(to:matrix1[0:dim0*dim1], matrix2[0:dim0*dim1])
-#pragma omp tile sizes(8, 16, 4)
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- for (unsigned k = 0; k < dim1; k++)
- result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
-}
-
-int
-main ()
-{
- unsigned dim0 = 20;
- unsigned dim1 = 20;
-
- float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
- float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
- float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
-
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- matrix1[i * dim1 + j] = j;
-
- for (unsigned i = 0; i < dim1; i++)
- for (unsigned j = 0; j < dim0; j++)
- if (i == j)
- matrix2[i * dim0 + j] = 1;
- else
- matrix2[i * dim0 + j] = 0;
-
- mult (matrix1, matrix2, result, dim0, dim1);
-
- for (unsigned i = 0; i < dim0; i++)
- for (unsigned j = 0; j < dim1; j++)
- {
- if (matrix1[i * dim1 + j] != result[i * dim1 + j])
- {
- printf ("ERROR at %d, %d\n", i, j);
- __builtin_abort ();
- }
- }
-
- return 0;
-}
new file mode 100644
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void MULT (float *matrix1, float *matrix2, float *result,
+ unsigned dim0, unsigned dim1)
+{
+ unsigned i;
+
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int MAIN ()
+{
+ unsigned dim0 = 20;
+ unsigned dim1 = 20;
+
+ float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result, dim0, dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++) {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j]) {
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ fprintf(stderr, "%s: ERROR at %d, %d\n", __FUNCTION__, i, j);
+ abort();
+ }
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,71 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifndef FUN_NAME_SUFFIX
+#define FUN_NAME_SUFFIX
+#endif
+
+#ifdef MULT
+#undef MULT
+#endif
+#define MULT CAT(mult, FUN_NAME_SUFFIX)
+
+#ifdef MAIN
+#undef MAIN
+#endif
+#define MAIN CAT(main, FUN_NAME_SUFFIX)
+
+void MULT (float *matrix1, float *matrix2, float *result)
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ memset (result, 0, sizeof (float) * dim0 * dim1);
+ DIRECTIVE
+ TRANSFORMATION1
+ for (unsigned i = 0; i < dim0; i++)
+ TRANSFORMATION2
+ for (unsigned j = 0; j < dim1; j++)
+ TRANSFORMATION3
+ for (unsigned k = 0; k < dim1; k++)
+ result[i * dim1 + j] += matrix1[i * dim1 + k] * matrix2[k * dim0 + j];
+}
+
+int MAIN ()
+{
+ const unsigned dim0 = 20;
+ const unsigned dim1 = 20;
+
+ float *result = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix1 = (float *)malloc (sizeof (float) * dim0 * dim1);
+ float *matrix2 = (float *)malloc (sizeof (float) * dim0 * dim1);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ matrix1[i * dim1 + j] = j;
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++)
+ if (i == j)
+ matrix2[i * dim1 + j] = 1;
+ else
+ matrix2[i * dim1 + j] = 0;
+
+ MULT (matrix1, matrix2, result);
+
+ for (unsigned i = 0; i < dim0; i++)
+ for (unsigned j = 0; j < dim1; j++) {
+ if (matrix1[i * dim1 + j] != result[i * dim1 + j]) {
+ __builtin_printf("%s: error at %d, %d\n", __FUNCTION__, i, j);
+ print_matrix (matrix1, dim0, dim1);
+ print_matrix (matrix2, dim0, dim1);
+ print_matrix (result, dim0, dim1);
+ __builtin_printf("\n");
+ __builtin_abort();
+ }
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#define CAT(x,y) XCAT(x,y)
+#define XCAT(x,y) x ## y
+#define DO_PRAGMA(x) XDO_PRAGMA(x)
+#define XDO_PRAGMA(x) _Pragma (#x)
+
+
+void print_matrix (float *matrix, unsigned dim0, unsigned dim1)
+{
+ for (unsigned i = 0; i < dim0; i++)
+ {
+ for (unsigned j = 0; j < dim1; j++)
+ fprintf (stderr, "%f ", matrix[i * dim1 + j]);
+ fprintf (stderr, "\n");
+ }
+ fprintf (stderr, "\n");
+}
new file mode 100644
@@ -0,0 +1,11 @@
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll_partial" 12 "original" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE
+#define COMMON_TOP_TRANSFORM omp unroll full
+#define COLLAPSE_1
+#define COLLAPSE_2
+#define COLLAPSE_3
+#define IMPLEMENTATION_FILE "matrix-constant-iter.h"
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "unroll_full" 13 "original" } } */
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE "omp teams distribute parallel for"
+#define COLLAPSE_1 "collapse(1)"
+#define COLLAPSE_2 "collapse(2)"
+#define COLLAPSE_3 "collapse(3)"
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp for" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 12 "original" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp parallel" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp parallel masked taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp parallel masked taskloop simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-additional-options {-fdump-tree-original} } */
+
+#define COMMON_DIRECTIVE omp target parallel for map(tofrom:result[0:dim0*dim1]) map(to:matrix1[0:dim0*dim1], matrix2[0:dim0*dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
+
+/* A consistency check to prevent broken macro usage. */
+/* { dg-final { scan-tree-dump-times "omp target" 13 "original" } } */
+/* { dg-final { scan-tree-dump-times "collapse" 9 "original" } } */
+/* { dg-final { scan-tree-dump-times "unroll_partial" 12 "original" } } */
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp target teams distribute parallel for map(tofrom:result[:dim0*dim1]) map(to:matrix1[0:dim0*dim1], matrix2[0:dim0*dim1])
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp taskloop
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp teams distribute parallel for
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,6 @@
+#define COMMON_DIRECTIVE omp simd
+#define COLLAPSE_1 collapse(1)
+#define COLLAPSE_2 collapse(2)
+#define COLLAPSE_3 collapse(3)
+
+#include "matrix-transform-variants-1.h"
new file mode 100644
@@ -0,0 +1,191 @@
+#include "matrix-helper.h"
+
+#ifndef COMMON_TOP_TRANSFORM
+#define COMMON_TOP_TRANSFORM
+#endif
+
+#ifndef IMPLEMENTATION_FILE
+#define IMPLEMENTATION_FILE "matrix-1.h"
+#endif
+
+#define FUN_NAME_SUFFIX 1
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(2)") _Pragma("omp tile sizes(10)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 2
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8,16,4)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 3
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 4
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 5
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(8, 8, 8)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 6
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(10)") _Pragma("omp unroll partial(2)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 7
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 8
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 9
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp tile sizes(7, 11)")
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 10
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_1)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM) _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION2
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 11
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_2)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#define TRANSFORMATION3
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 12
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2
+#define TRANSFORMATION3 _Pragma("omp unroll partial(5)") _Pragma("omp tile sizes(7)") _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+#undef DIRECTIVE
+#undef TRANSFORMATION1
+#undef TRANSFORMATION2
+#undef TRANSFORMATION3
+#undef FUN_NAME_SUFFIX
+
+#define FUN_NAME_SUFFIX 13
+#define DIRECTIVE DO_PRAGMA(COMMON_DIRECTIVE COLLAPSE_3)
+#define TRANSFORMATION1 DO_PRAGMA(COMMON_TOP_TRANSFORM)
+#define TRANSFORMATION2 _Pragma("omp tile sizes(7,8)")
+#define TRANSFORMATION3 _Pragma("omp unroll partial(3)") _Pragma("omp tile sizes(7)")
+#include IMPLEMENTATION_FILE
+
+int main ()
+{
+ main1 ();
+ main2 ();
+ main3 ();
+ main4 ();
+ main5 ();
+ main6 ();
+ main7 ();
+ main8 ();
+ main9 ();
+ main10 ();
+ main11 ();
+ main12 ();
+ main13 ();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+void test1 ()
+{
+ int sum = 0;
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test2 ()
+{
+ int sum = 0;
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test3 ()
+{
+ int sum = 0;
+ #pragma omp unroll partial
+ for (int i = -3; i != 1; ++i)
+ #pragma omp unroll partial
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test4 ()
+{
+ int sum = 0;
+#pragma omp for
+#pragma omp unroll partial(5)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test5 ()
+{
+ int sum = 0;
+#pragma omp parallel for reduction(+:sum)
+#pragma omp unroll partial(2)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test6 ()
+{
+ int sum = 0;
+#pragma omp target parallel for reduction(+:sum)
+#pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+void test7 ()
+{
+ int sum = 0;
+#pragma omp target teams distribute parallel for reduction(+:sum)
+#pragma omp unroll partial(7)
+ for (int i = -3; i != 1; ++i)
+#pragma omp unroll partial(2)
+ for (int j = -2; j < i * -1; ++j)
+ sum++;
+
+ if (sum != 14)
+ {
+ fprintf (stderr, "%s: Wrong sum: %d\n", __FUNCTION__, sum);
+ abort ();
+ }
+}
+
+int
+main ()
+{
+ test1 ();
+ test2 ();
+ test3 ();
+ test4 ();
+ test5 ();
+ test6 ();
+ test7 ();
+
+ return 0;
+}