Handle OpenACC 'self' clause for compute constructs in OpenACC 'kernels' decomposition (was: Extend test suite coverage for OpenACC 'self' clause for compute constructs (was: [PATCH, OpenACC 2.7] Implement self clause for compute constructs))
Checks
Commit Message
Hi!
On 2023-10-25T11:29:52+0200, I wrote:
> On 2023-10-25T10:57:06+0200, I wrote:
>> With minor textual conflicts resolved, I've pushed this to master branch
>> in commit 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a
>> "OpenACC 2.7: Implement self clause for compute constructs", see
>> attached.
>>
>>
>> I'll then apply/submit a number of follow-on commits.
>
>> From 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a Mon Sep 17 00:00:00 2001
>> From: Chung-Lin Tang <cltang@codesourcery.com>
>> Date: Tue, 13 Jun 2023 08:44:31 -0700
>> Subject: [PATCH] OpenACC 2.7: Implement self clause for compute constructs
>
>> .../c-c++-common/goacc/self-clause-1.c | 22 +
>> .../c-c++-common/goacc/self-clause-2.c | 17 +
>> gcc/testsuite/gfortran.dg/goacc/self.f95 | 53 +
>
>> .../libgomp.oacc-c-c++-common/self-1.c | 962 ++++++++++++++++++
>
> I found that insufficient, and added some more. Pushed to
> master branch commit 047841a68ebf5f991e842961f9e54f3c10b94f2c
> "Extend test suite coverage for OpenACC 'self' clause for compute constructs",
> see attached. This is mostly just adapting and cross-linking some
> existing 'if' clause test cases. (..., which turned up a problem when
> the 'self' clause is used with OpenACC 'kernels'.)
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.oacc-fortran/self-1.f90
> @@ -0,0 +1,996 @@
> +! OpenACC 'self' clause.
> +
> +! This is 'if-1.f90' with 'self(!cond)' instead of 'if(cond)' on compute
> +! constructs.
> +! ..., which the exception of certain 'kernels' constructs.
..., which I've then fixed up per master branch
commit 7b2ae64b68132c1c643cb34d58cd5eab6f9de652
"Handle OpenACC 'self' clause for compute constructs in OpenACC 'kernels' decomposition",
see attached.
Grüße
Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
From 7b2ae64b68132c1c643cb34d58cd5eab6f9de652 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Mon, 23 Oct 2023 15:28:30 +0200
Subject: [PATCH] Handle OpenACC 'self' clause for compute constructs in
OpenACC 'kernels' decomposition
... to fix up recent commit 3a3596389c2e539cb8fd5dc5784a4e2afe193a2a
"OpenACC 2.7: Implement self clause for compute constructs" for that case.
gcc/
* omp-oacc-kernels-decompose.cc (omp_oacc_kernels_decompose_1):
Handle 'OMP_CLAUSE_SELF' like 'OMP_CLAUSE_IF'.
* omp-expand.cc (expand_omp_target): Handle 'OMP_CLAUSE_SELF' for
'GF_OMP_TARGET_KIND_OACC_DATA_KERNELS'.
gcc/testsuite/
* c-c++-common/goacc/self-clause-2.c: Verify
'--param=openacc-kernels=decompose'.
* gfortran.dg/goacc/kernels-tree.f95: Adjust.
libgomp/
* oacc-parallel.c (GOACC_data_start): Handle
'GOACC_FLAG_LOCAL_DEVICE'.
(GOACC_parallel_keyed): Simplify accordingly.
* testsuite/libgomp.oacc-fortran/self-1.f90: Adjust.
---
gcc/omp-expand.cc | 14 ++++++++++++--
gcc/omp-oacc-kernels-decompose.cc | 15 ++++++++-------
.../c-c++-common/goacc/self-clause-2.c | 6 ++++++
.../gfortran.dg/goacc/kernels-tree.f95 | 2 +-
libgomp/oacc-parallel.c | 17 +++++------------
.../testsuite/libgomp.oacc-fortran/self-1.f90 | 15 +++++++--------
6 files changed, 39 insertions(+), 30 deletions(-)
@@ -10334,9 +10334,19 @@ expand_omp_target (struct omp_region *region)
if ((c = omp_find_clause (clauses, OMP_CLAUSE_SELF)) != NULL_TREE)
{
- gcc_assert (is_gimple_omp_oacc (entry_stmt) && offloaded);
+ gcc_assert ((is_gimple_omp_oacc (entry_stmt) && offloaded)
+ || (gimple_omp_target_kind (entry_stmt)
+ == GF_OMP_TARGET_KIND_OACC_DATA_KERNELS));
- edge e = split_block_after_labels (new_bb);
+ edge e;
+ if (offloaded)
+ e = split_block_after_labels (new_bb);
+ else
+ {
+ gsi = gsi_last_nondebug_bb (new_bb);
+ gsi_prev (&gsi);
+ e = split_block (new_bb, gsi_stmt (gsi));
+ }
basic_block cond_bb = e->src;
new_bb = e->dest;
remove_edge (e);
@@ -1519,17 +1519,18 @@ omp_oacc_kernels_decompose_1 (gimple *kernels_stmt)
break;
}
}
- else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF)
+ else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_IF
+ || OMP_CLAUSE_CODE (c) == OMP_CLAUSE_SELF)
{
- /* If there is an 'if' clause, it must be duplicated to the
- enclosing data region. Temporarily remove the if clause's
- chain to avoid copying it. */
+ /* If there is an 'if' or 'self' clause, it must be duplicated to the
+ enclosing data region. Temporarily remove its chain to avoid
+ copying it. */
tree saved_chain = OMP_CLAUSE_CHAIN (c);
OMP_CLAUSE_CHAIN (c) = NULL;
- tree new_if_clause = unshare_expr (c);
+ tree new_clause = unshare_expr (c);
OMP_CLAUSE_CHAIN (c) = saved_chain;
- OMP_CLAUSE_CHAIN (new_if_clause) = data_clauses;
- data_clauses = new_if_clause;
+ OMP_CLAUSE_CHAIN (new_clause) = data_clauses;
+ data_clauses = new_clause;
}
}
/* Restore the original order of the clauses. */
@@ -1,6 +1,8 @@
/* See also 'if-clause-2.c'. */
/* { dg-additional-options "-fdump-tree-gimple" } */
+/* { dg-additional-options "--param=openacc-kernels=decompose" }
+ { dg-additional-options "-fdump-tree-omp_oacc_kernels_decompose" } */
void
f (short c)
@@ -11,6 +13,8 @@ f (short c)
#pragma acc kernels self(c) copy(c)
/* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "gimple" } } */
+ /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
+ { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:c \[len: [0-9]+\]\) self\(_[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } } */
++c;
#pragma acc serial self(c) copy(c)
@@ -29,6 +33,8 @@ g (short d)
#pragma acc kernels self copy(d)
/* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_kernels map\(tofrom:d \[len: [0-9]+\]\) self\(1\)$} 1 "gimple" } } */
+ /* { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels map\(tofrom:d \[len: [0-9]+\]\) self\(1\)$} 1 "omp_oacc_kernels_decompose" } }
+ { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single async\(-1\) num_gangs\(1\) map\(force_present:d \[len: [0-9]+\]\) self\(1+\)$} 1 "omp_oacc_kernels_decompose" } } */
++d;
#pragma acc serial self copy(d)
@@ -42,5 +42,5 @@ end program test
! { dg-final { scan-tree-dump-times "map\\(force_deviceptr:u\\)" 1 "original" } }
-! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels if\((?:D\.|_)[0-9]+\)$} 1 "omp_oacc_kernels_decompose" } }
+! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_data_kernels if\((?:D\.|_)[0-9]+\) self\(1\)$} 1 "omp_oacc_kernels_decompose" } }
! { dg-final { scan-tree-dump-times {(?n)#pragma omp target oacc_parallel_kernels_gang_single num_gangs\(1\) if\((?:D\.|_)[0-9]+\) self\(1\) async\(-1\)$} 1 "omp_oacc_kernels_decompose" } }
@@ -184,19 +184,11 @@ GOACC_parallel_keyed (int flags_m, void (*fn) (void *),
/* Host fallback if "if" clause is false or if the current device is set to
the host. */
- if (flags & GOACC_FLAG_HOST_FALLBACK)
- {
- prof_info.device_type = acc_device_host;
- api_info.device_type = prof_info.device_type;
- goacc_save_and_set_bind (acc_device_host);
- fn (hostaddrs);
- goacc_restore_bind ();
- goto out_prof;
- }
- else if (flags & GOACC_FLAG_LOCAL_DEVICE)
- {
+ if ((flags & GOACC_FLAG_HOST_FALLBACK)
/* TODO: a proper pthreads based "multi-core CPU" local device
implementation. Currently, this is still the same as host-fallback. */
+ || (flags & GOACC_FLAG_LOCAL_DEVICE))
+ {
prof_info.device_type = acc_device_host;
api_info.device_type = prof_info.device_type;
goacc_save_and_set_bind (acc_device_host);
@@ -457,7 +449,8 @@ GOACC_data_start (int flags_m, size_t mapnum,
/* Host fallback or 'do nothing'. */
if ((acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
- || (flags & GOACC_FLAG_HOST_FALLBACK))
+ || (flags & GOACC_FLAG_HOST_FALLBACK)
+ || (flags & GOACC_FLAG_LOCAL_DEVICE))
{
prof_info.device_type = acc_device_host;
api_info.device_type = prof_info.device_type;
@@ -2,7 +2,6 @@
! This is 'if-1.f90' with 'self(!cond)' instead of 'if(cond)' on compute
! constructs.
-! ..., which the exception of certain 'kernels' constructs.
! { dg-do run }
! { dg-additional-options "-cpp" }
@@ -523,7 +522,7 @@ program main
a(:) = 16.0
- !$acc kernels if (0 == 1) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels self (0 /= 1) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -569,7 +568,7 @@ program main
a(:) = 22.0
- !$acc kernels if (zero == 1) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels self (zero /= 1) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -615,7 +614,7 @@ program main
a(:) = 76.0
- !$acc kernels if (.FALSE.) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels self (.TRUE.) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -665,7 +664,7 @@ program main
nn = 0
- !$acc kernels if (nn == 1) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels self (nn /= 1) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -715,7 +714,7 @@ program main
nn = 0;
- !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if ((nn + nn) > 0) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) self (.NOT. ((nn + nn) > 0)) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -735,7 +734,7 @@ program main
a(:) = 91.0
- !$acc kernels copyin (a(1:N)) copyout (b(1:N)) if (-2 > 0) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels copyin (a(1:N)) copyout (b(1:N)) self (.NOT. (-2 > 0)) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
@@ -781,7 +780,7 @@ program main
a(:) = 87.0
- !$acc kernels if (one == 0) ! { dg-line l_compute[incr c_compute] }
+ !$acc kernels self (one /= 0) ! { dg-line l_compute[incr c_compute] }
! { dg-note {OpenACC 'kernels' decomposition: variable 'i' in 'copy' clause requested to be made addressable} {} { target *-*-* } l_compute$c_compute }
! { dg-note {variable 'i' already made addressable} {} { target *-*-* } l_compute$c_compute } */
! { dg-note {beginning 'parloops' part in OpenACC 'kernels' region} "" { target *-*-* } .+1 }
--
2.34.1