Dear all,
we are accepting loop annotations IVDEP, UNROLL n, VECTOR, and NOVECTOR
for ordinary do loops, but ICE when such an annotation is specified
before a DO CONCURRENT loop.
Since at least the Intel compilers recognize some of the annotations
also for DO CONCURRENT, it seems natural to extend gfortran instead
of rejecting or ignoring the attributes.
The attached patch handles the annotations as needed for the control
structures of FORALL/DO CONCURRENT.
Regarding the UNROLL directive, I don't have good references, so
feedback is welcome. The current patch applies UNROLL only to
the first loop control variable (for the case of loop nests),
which translates into the innermost loop in gcc's representation.
Regtested on x86_64-pc-linux-gnu.
OK for mainline?
Comments?
Thanks,
Harald
On Wed, 10 Jan 2024 23:24:22 +0100
Harald Anlauf <anlauf@gmx.de> wrote:
> diff --git a/gcc/fortran/gfortran.h b/gcc/fortran/gfortran.h
> index 82f388c05f8..88502c1e3f0 100644
> --- a/gcc/fortran/gfortran.h
> +++ b/gcc/fortran/gfortran.h
> @@ -2926,6 +2926,10 @@ gfc_dt;
> typedef struct gfc_forall_iterator
> {
> gfc_expr *var, *start, *end, *stride;
> + unsigned short unroll;
> + bool ivdep;
> + bool vector;
> + bool novector;
> struct gfc_forall_iterator *next;
> }
[]
> diff --git a/gcc/fortran/trans-stmt.cc b/gcc/fortran/trans-stmt.cc
> index a718dce237f..59a9cf99f9b 100644
> --- a/gcc/fortran/trans-stmt.cc
> +++ b/gcc/fortran/trans-stmt.cc
> @@ -41,6 +41,10 @@ typedef struct iter_info
> tree start;
> tree end;
> tree step;
> + unsigned short unroll;
> + bool ivdep;
> + bool vector;
> + bool novector;
> struct iter_info *next;
> }
Given that we already have in gfortran.h
> typedef struct
> {
> gfc_expr *var, *start, *end, *step;
> unsigned short unroll;
> bool ivdep;
> bool vector;
> bool novector;
> }
> gfc_iterator;
would it make sense to break out these loop annotation flags into its
own let's say struct gfc_iterator_flags and use pointers to that flags
instead?
LGTM otherwise.
Thanks for the patch!
From 0df60f02c399a6bf65850ecd5719b25b3de6676f Mon Sep 17 00:00:00 2001
From: Harald Anlauf <anlauf@gmx.de>
Date: Wed, 10 Jan 2024 23:10:02 +0100
Subject: [PATCH] Fortran: annotations for DO CONCURRENT loops [PR113305]
gcc/fortran/ChangeLog:
PR fortran/113305
* gfortran.h: Add annotation controls to gfc_forall_iterator.
* gfortran.texi: Document annotations IVDEP, UNROLL n, VECTOR,
NOVECTOR as applied to DO CONCURRENT.
* parse.cc (parse_do_block): Parse annotations IVDEP, UNROLL n,
VECTOR, NOVECTOR as applied to DO CONCURRENT. Apply UNROLL only to
first loop control variable.
* trans-stmt.cc (gfc_trans_forall_loop): Annotate loops with IVDEP,
UNROLL n, VECTOR, NOVECTOR as needed for DO CONCURRENT.
(gfc_trans_forall_1): Handle annotations IVDEP, UNROLL n, VECTOR,
NOVECTOR.
gcc/testsuite/ChangeLog:
PR fortran/113305
* gfortran.dg/do_concurrent_7.f90: New test.
---
gcc/fortran/gfortran.h | 4 +++
gcc/fortran/gfortran.texi | 12 ++++++++
gcc/fortran/parse.cc | 26 ++++++++++++++++-
gcc/fortran/trans-stmt.cc | 29 ++++++++++++++++++-
gcc/testsuite/gfortran.dg/do_concurrent_7.f90 | 26 +++++++++++++++++
5 files changed, 95 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gfortran.dg/do_concurrent_7.f90
@@ -2926,6 +2926,10 @@ gfc_dt;
typedef struct gfc_forall_iterator
{
gfc_expr *var, *start, *end, *stride;
+ unsigned short unroll;
+ bool ivdep;
+ bool vector;
+ bool novector;
struct gfc_forall_iterator *next;
}
gfc_forall_iterator;
@@ -3262,6 +3262,9 @@ It must be placed immediately before a @code{DO} loop and applies only to the
loop that follows. N is an integer constant specifying the unrolling factor.
The values of 0 and 1 block any unrolling of the loop.
+For @code{DO CONCURRENT} constructs the unrolling specification applies
+only to the first loop control variable.
+
@node BUILTIN directive
@subsection BUILTIN directive
@@ -3300,6 +3303,9 @@ whether a particular loop is vectorizable due to potential
dependencies between iterations. The purpose of the directive is to
tell the compiler that vectorization is safe.
+For @code{DO CONCURRENT} constructs this annotation is implicit to all
+loop control variables.
+
This directive is intended for annotation of existing code. For new
code it is recommended to consider OpenMP SIMD directives as potential
alternative.
@@ -3316,6 +3322,9 @@ This directive tells the compiler to vectorize the following loop. It
must be placed immediately before a @code{DO} loop and applies only to
the loop that follows.
+For @code{DO CONCURRENT} constructs this annotation applies to all loops
+specified in the concurrent header.
+
@node NOVECTOR directive
@subsection NOVECTOR directive
@@ -3328,6 +3337,9 @@ This directive tells the compiler to not vectorize the following loop.
It must be placed immediately before a @code{DO} loop and applies only
to the loop that follows.
+For @code{DO CONCURRENT} constructs this annotation applies to all loops
+specified in the concurrent header.
+
@node Non-Fortran Main Program
@section Non-Fortran Main Program
@@ -5307,7 +5307,31 @@ parse_do_block (void)
do_op = new_st.op;
s.ext.end_do_label = new_st.label1;
- if (new_st.ext.iterator != NULL)
+ if (do_op == EXEC_DO_CONCURRENT)
+ {
+ gfc_forall_iterator *fa;
+ for (fa = new_st.ext.forall_iterator; fa; fa = fa->next)
+ {
+ /* Apply unroll only to innermost loop (first control
+ variable). */
+ if (directive_unroll != -1)
+ {
+ fa->unroll = directive_unroll;
+ directive_unroll = -1;
+ }
+ if (directive_ivdep)
+ fa->ivdep = directive_ivdep;
+ if (directive_vector)
+ fa->vector = directive_vector;
+ if (directive_novector)
+ fa->novector = directive_novector;
+ }
+ directive_ivdep = false;
+ directive_vector = false;
+ directive_novector = false;
+ stree = NULL;
+ }
+ else if (new_st.ext.iterator != NULL)
{
stree = new_st.ext.iterator->var->symtree;
if (directive_unroll != -1)
@@ -41,6 +41,10 @@ typedef struct iter_info
tree start;
tree end;
tree step;
+ unsigned short unroll;
+ bool ivdep;
+ bool vector;
+ bool novector;
struct iter_info *next;
}
iter_info;
@@ -4117,12 +4121,30 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tree body,
/* PR 83064 means that we cannot use annot_expr_parallel_kind until
the autoparallelizer can handle this. */
- if (forall_tmp->do_concurrent)
+ if (forall_tmp->do_concurrent || iter->ivdep)
cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
annot_expr_ivdep_kind),
integer_zero_node);
+ if (iter->unroll && cond != error_mark_node)
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node,
+ annot_expr_unroll_kind),
+ build_int_cst (integer_type_node, iter->unroll));
+
+ if (iter->vector && cond != error_mark_node)
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node,
+ annot_expr_vector_kind),
+ integer_zero_node);
+
+ if (iter->novector && cond != error_mark_node)
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node,
+ annot_expr_no_vector_kind),
+ integer_zero_node);
+
tmp = build1_v (GOTO_EXPR, exit_label);
tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
cond, tmp, build_empty_stmt (input_location));
@@ -5090,6 +5112,11 @@ gfc_trans_forall_1 (gfc_code * code, forall_info * nested_forall_info)
gfc_add_block_to_block (&block, &se.pre);
step[n] = se.expr;
+ this_forall->unroll = fa->unroll;
+ this_forall->ivdep = fa->ivdep;
+ this_forall->vector = fa->vector;
+ this_forall->novector = fa->novector;
+
/* Set the NEXT field of this_forall to NULL. */
this_forall->next = NULL;
/* Link this_forall to the info construct. */
new file mode 100644
@@ -0,0 +1,26 @@
+! { dg-do compile }
+! { dg-additional-options "-fdump-tree-original" }
+! PR fortran/113305
+
+program dc
+ implicit none
+ real :: a(12), b(12), c(16,8), d(16,8)
+ integer :: i, j
+ call random_number(b)
+!GCC$ ivdep
+!GCC$ vector
+ do concurrent (i=1:12)
+ a(i) = 2*b(i)
+ end do
+ c = b(1)
+ d = a(2)
+!GCC$ novector
+!GCC$ unroll 4
+ do concurrent (i=1:16:2,j=1:8:2)
+ d(i,j) = 3*c(i,j)
+ end do
+end program
+
+! { dg-final { scan-tree-dump "ANNOTATE_EXPR .* ivdep>, vector" "original" } }
+! { dg-final { scan-tree-dump "ANNOTATE_EXPR .* ivdep>, no-vector" "original" } }
+! { dg-final { scan-tree-dump "ANNOTATE_EXPR .* ivdep>, unroll 4>, no-vector" "original" } }
--
2.35.3