Hi Sandra,
looks quite okay, but I have a couple of remarks:
Sandra Loosemore wrote:
> From: Kwok Cheung Yeung<kcy@codesourcery.com>
>
> This patch implements the libgomp runtime support for the dynamic
> target_device selector via the GOMP_evaluate_target_device function.
...
> --- /dev/null
> +++ b/libgomp/config/gcn/selector.c
...
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> + const char *isa)
> +{
> + if (kind && strcmp (kind, "gpu") != 0)
> + return false;
This should also match: kind == nohost.
> +
> + if (arch && strcmp (arch, "gcn") != 0)
> + return false;
"amdgcn" missing - we support both for better compatibility with LLVM.
> + if (!isa)
> + return true;
> +
> +#ifdef __GCN3__
> + if (strcmp (isa, "fiji") == 0 || strcmp (isa, "gfx803") == 0)
> + return true;
> +#endif
> +
> +#ifdef __GCN5__
> + if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
> + || strcmp (isa, "gfx908") == 0)
> + return true;
> +#endif
This misses gfx90a and gfx1030. Additionally, the last conditions
matches too much. Can you use
#ifdef __fiji__
#ifdef __gfx900__
etc.
instead?
> --- /dev/null
> +++ b/libgomp/config/linux/selector.c
...
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> + const char *isa)
> +{
> + if (kind && strcmp (kind, "cpu") != 0)
> + return false;
You also need to match "host".
> diff --git a/libgomp/config/linux/x86/selector.c b/libgomp/config/linux/x86/selector.c
> new file mode 100644
> index 00000000000..2b6c2ba165b
> --- /dev/null
> +++ b/libgomp/config/linux/x86/selector.c
...
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> + const char *isa)
> +{
> + if (kind && strcmp (kind, "cpu") != 0)
> + return false;
This misses "host" as well.
> + if (arch
> + && strcmp (arch, "x86") != 0
> + && strcmp (arch, "ia32") != 0
> +#ifdef __x86_64__
> + && strcmp (arch, "x86_64") != 0
> +#endif
> +#ifdef __ILP32__
> + && strcmp (arch, "x32") != 0
> +#endif
> + && strcmp (arch, "i386") != 0
> + && strcmp (arch, "i486") != 0
> +#ifndef __i486__
> + && strcmp (arch, "i586") != 0
> +#endif
> +#if !defined (__i486__) && !defined (__i586__)
> + && strcmp (arch, "i686") != 0
> +#endif
The 'i486' seems to lack a #ifdef __i486__ check.
And it seems to be such that
i486 implies i386
i586 implies i486 and i386
etc.
if I understand ix86_omp_device_kind_arch_isa in
gcc/config/i386/i386-options.cc correctly.
There is of course the problem that the compilation flags used for
libgomp are very likely different to the compilation flags of the user
program, which in term can differ between files.
Thus, I think we should update
https://gcc.gnu.org/onlinedocs/libgomp/OpenMP-Context-Selectors.html
(a) the host compiler always also matches "cpu"
(b) We probably should state somewhere that:
* on x86, both the arch = i486 to i686 and the isa flags depend on
the command line arguments more than on the actual hardware.
* that's especially true for dynamic selectors as the flags used
can differ between 'compilation units' and also the flags used
for the run-time library.
* For nvptx: on the device side, the -march= implies that all
sm_* lower than that value is set.
For target_device, the actual hardware is checked at run time,
implying the highest of the gcc-manual listed -march= values is
selected that the hardware actually supports at runtime.
For (b) we should have to find some better wording and possibly be less
precise but I think some kind of warning/note is needed here.
> + if (!isa)
> + return true;
> +
> +#ifdef __WBNOINVD__
> + if (strcmp (isa, "wbnoinvd") == 0) return true;
> +#endif
I think at least the following are missing:
-mavx10.1-256 and -mavx10.1-512
do not seem to have a #define
→ Maybe we should file a PR given that those
seem to be the only missing ones.
otherwise:
__AVX10_512BIT__ and "avx10-max-512bit"
__AVX10_1__ and "avx10.1"
__AMX_FP16__ and -mamx-fp16
__CMPCCXADD__ and "cmpccxadd"
__AVXNECONVERT__ and "avxneconvert"
__RAOINT__ and "raoint"
__PREFETCHI__ and "refetchi"
__USER_MSR__ and "usermsr".
__EVEX256__ and "evex512".
__AVXVNNIINT8__ and "avxvnniint8"
__SM4__ and "sm4"
__SHA512__ and "sha512"
__SM3__ and "sm3"
__AVXVNNIINT16__ and "avxvnniint16"
__AMX_COMPLEX__ and "amx-complex"
__AVXIFMA__ and avxifma"
and possibly some more but it might be also be complete.
> +++ b/libgomp/config/nvptx/selector.c
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> + const char *isa)
> +{
> + if (kind && strcmp (kind, "gpu") != 0)
> + return false;
"nohost" missing.
> --- a/libgomp/libgomp.map
> +++ b/libgomp/libgomp.map
> @@ -414,6 +414,7 @@ GOMP_5.1 {
> GOMP_scope_start;
> GOMP_warning;
> GOMP_teams4;
> + GOMP_evaluate_target_device;
> } GOMP_5.0.1;
This looks wrong. In my understanding you cannot just randomly
add entries to old map entries but it needs to be a new group
in a new compiler release. In any case, I believe for GCC 14
it should be added to GOMP_5.1.2.
But in doubt ask Jakub, who knows this inside out.
> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c
> @@ -3984,6 +3984,20 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
> GOMP_PLUGIN_target_task_completion, async_data);
> }
>
> +bool
> +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
> + const char *arch, const char *isa)
> +{
> + struct agent_info *agent = get_agent_info (device_num);
> +
> + if (kind && strcmp (kind, "gpu") != 0)
> + return false;
"nohost" missing
> + if (arch && strcmp (arch, "gcn") != 0)
> + return false;
"amdgcn" missing.
> index c04c3acd679..9dcd8a6f6eb 100644
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
...
> +bool
> +GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
> + const char *arch, const char *isa)
> +{
> + if (kind && strcmp (kind, "gpu") != 0)
> + return false;
"nohost" missing.
> +++ b/libgomp/selector.c
> @@ -0,0 +1,36 @@
> +/* Copyright (C) 2022 Free Software Foundation, Inc.
> + Contributed by Mentor, a Siemens Business.
> +
> + This file is part of the GNU Offloading and Multi Processing Library
> + (libgomp).
> +
> + Libgomp is free software; you can redistribute it and/or modify it
> + under the terms of the GNU General Public License as published by
> + the Free Software Foundation; either version 3, or (at your option)
> + any later version.
> +
> + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
> + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> + FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + more details.
> +
> + Under Section 7 of GPL version 3, you are granted additional
> + permissions described in the GCC Runtime Library Exception, version
> + 3.1, as published by the Free Software Foundation.
> +
> + You should have received a copy of the GNU General Public License and
> + a copy of the GCC Runtime Library Exception along with this program;
> + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
> +<http://www.gnu.org/licenses/>. */
> +
> +/* This file contains a placeholder implementation of
> + GOMP_evaluate_current_device. */
> +
> +#include "libgomp.h"
> +
> +bool
> +GOMP_evaluate_current_device (const char *kind, const char *arch,
> + const char *isa)
> +{
> + return false;
> +}
Isn't this called in some cases on the host? If so, it should
support kind == "host" and kind == "cpu".
> diff --git a/libgomp/target.c b/libgomp/target.c
> index 1367e9cce6c..206987953dc 100644
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -5088,6 +5088,43 @@ omp_pause_resource_all (omp_pause_resource_t kind)
> ialias (omp_pause_resource)
> ialias (omp_pause_resource_all)
>
> +bool
> +GOMP_evaluate_target_device (int device_num, const char *kind,
> + const char *arch, const char *isa)
> +{
> + bool result = true;
> +
> + if (device_num < 0)
> + device_num = omp_get_default_device ();
As mentioned with regards to 1/8, 'omp_initial_device == -1' according
to the OpenMP standard and there is additionally 'omp_invalid_device'.
Thanks,
Tobias
@@ -82,6 +82,8 @@ typedef enum {
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
} CUdevice_attribute;
@@ -72,7 +72,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
target.c splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \
oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \
priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c
+ oacc-target.c target-indirect.c selector.c
include $(top_srcdir)/plugin/Makefrag.am
@@ -219,7 +219,7 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
oacc-parallel.lo oacc-host.lo oacc-init.lo oacc-mem.lo \
oacc-async.lo oacc-plugin.lo oacc-cuda.lo priority_queue.lo \
affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
- oacc-target.lo target-indirect.lo $(am__objects_1)
+ oacc-target.lo target-indirect.lo selector.lo $(am__objects_1)
libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
AM_V_P = $(am__v_P_@AM_V@)
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -552,7 +552,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c \
oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
affinity-fmt.c teams.c allocator.c oacc-profiling.c \
- oacc-target.c target-indirect.c $(am__append_3)
+ oacc-target.c target-indirect.c selector.c $(am__append_3)
# Nvidia PTX OpenACC plugin.
@PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -777,6 +777,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ptrlock.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scope.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sections.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/selector.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sem.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/single.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/splay-tree.Plo@am__quote@
new file mode 100644
@@ -0,0 +1,57 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an AMD GCN GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+
+ if (arch && strcmp (arch, "gcn") != 0)
+ return false;
+
+ if (!isa)
+ return true;
+
+#ifdef __GCN3__
+ if (strcmp (isa, "fiji") == 0 || strcmp (isa, "gfx803") == 0)
+ return true;
+#endif
+
+#ifdef __GCN5__
+ if (strcmp (isa, "gfx900") == 0 || strcmp (isa, "gfx906") != 0
+ || strcmp (isa, "gfx908") == 0)
+ return true;
+#endif
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a generic implementation of
+ GOMP_evaluate_current_device when run on a Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "cpu") != 0)
+ return false;
+
+ if (!arch && !isa)
+ return true;
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,325 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ an x86/x64-based Linux host. */
+
+#include <string.h>
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "cpu") != 0)
+ return false;
+
+ if (arch
+ && strcmp (arch, "x86") != 0
+ && strcmp (arch, "ia32") != 0
+#ifdef __x86_64__
+ && strcmp (arch, "x86_64") != 0
+#endif
+#ifdef __ILP32__
+ && strcmp (arch, "x32") != 0
+#endif
+ && strcmp (arch, "i386") != 0
+ && strcmp (arch, "i486") != 0
+#ifndef __i486__
+ && strcmp (arch, "i586") != 0
+#endif
+#if !defined (__i486__) && !defined (__i586__)
+ && strcmp (arch, "i686") != 0
+#endif
+ )
+ return false;
+
+ if (!isa)
+ return true;
+
+#ifdef __WBNOINVD__
+ if (strcmp (isa, "wbnoinvd") == 0) return true;
+#endif
+#ifdef __AVX512VP2INTERSECT__
+ if (strcmp (isa, "avx512vp2intersect") == 0) return true;
+#endif
+#ifdef __MMX__
+ if (strcmp (isa, "mmx") == 0) return true;
+#endif
+#ifdef __3dNOW__
+ if (strcmp (isa, "3dnow") == 0) return true;
+#endif
+#ifdef __3dNOW_A__
+ if (strcmp (isa, "3dnowa") == 0) return true;
+#endif
+#ifdef __SSE__
+ if (strcmp (isa, "sse") == 0) return true;
+#endif
+#ifdef __SSE2__
+ if (strcmp (isa, "sse2") == 0) return true;
+#endif
+#ifdef __SSE3__
+ if (strcmp (isa, "sse3") == 0) return true;
+#endif
+#ifdef __SSSE3__
+ if (strcmp (isa, "ssse3") == 0) return true;
+#endif
+#ifdef __SSE4_1__
+ if (strcmp (isa, "sse4.1") == 0) return true;
+#endif
+#ifdef __SSE4_2__
+ if (strcmp (isa, "sse4") == 0 || strcmp (isa, "sse4.2") == 0) return true;
+#endif
+#ifdef __AES__
+ if (strcmp (isa, "aes") == 0) return true;
+#endif
+#ifdef __SHA__
+ if (strcmp (isa, "sha") == 0) return true;
+#endif
+#ifdef __PCLMUL__
+ if (strcmp (isa, "pclmul") == 0) return true;
+#endif
+#ifdef __AVX__
+ if (strcmp (isa, "avx") == 0) return true;
+#endif
+#ifdef __AVX2__
+ if (strcmp (isa, "avx2") == 0) return true;
+#endif
+#ifdef __AVX512F__
+ if (strcmp (isa, "avx512f") == 0) return true;
+#endif
+#ifdef __AVX512ER__
+ if (strcmp (isa, "avx512er") == 0) return true;
+#endif
+#ifdef __AVX512CD__
+ if (strcmp (isa, "avx512cd") == 0) return true;
+#endif
+#ifdef __AVX512PF__
+ if (strcmp (isa, "avx512pf") == 0) return true;
+#endif
+#ifdef __AVX512DQ__
+ if (strcmp (isa, "avx512dq") == 0) return true;
+#endif
+#ifdef __AVX512BW__
+ if (strcmp (isa, "avx512bw") == 0) return true;
+#endif
+#ifdef __AVX512VL__
+ if (strcmp (isa, "avx512vl") == 0) return true;
+#endif
+#ifdef __AVX512VBMI__
+ if (strcmp (isa, "avx512vbmi") == 0) return true;
+#endif
+#ifdef __AVX512IFMA__
+ if (strcmp (isa, "avx512ifma") == 0) return true;
+#endif
+#ifdef __AVX5124VNNIW__
+ if (strcmp (isa, "avx5124vnniw") == 0) return true;
+#endif
+#ifdef __AVX512VBMI2__
+ if (strcmp (isa, "avx512vbmi2") == 0) return true;
+#endif
+#ifdef __AVX512VNNI__
+ if (strcmp (isa, "avx512vnni") == 0) return true;
+#endif
+#ifdef __PCONFIG__
+ if (strcmp (isa, "pconfig") == 0) return true;
+#endif
+#ifdef __SGX__
+ if (strcmp (isa, "sgx") == 0) return true;
+#endif
+#ifdef __AVX5124FMAPS__
+ if (strcmp (isa, "avx5124fmaps") == 0) return true;
+#endif
+#ifdef __AVX512BITALG__
+ if (strcmp (isa, "avx512bitalg") == 0) return true;
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+ if (strcmp (isa, "avx512vpopcntdq") == 0) return true;
+#endif
+#ifdef __FMA__
+ if (strcmp (isa, "fma") == 0) return true;
+#endif
+#ifdef __RTM__
+ if (strcmp (isa, "rtm") == 0) return true;
+#endif
+#ifdef __SSE4A__
+ if (strcmp (isa, "sse4a") == 0) return true;
+#endif
+#ifdef __FMA4__
+ if (strcmp (isa, "fma4") == 0) return true;
+#endif
+#ifdef __XOP__
+ if (strcmp (isa, "xop") == 0) return true;
+#endif
+#ifdef __LWP__
+ if (strcmp (isa, "lwp") == 0) return true;
+#endif
+#ifdef __ABM__
+ if (strcmp (isa, "abm") == 0) return true;
+#endif
+#ifdef __BMI__
+ if (strcmp (isa, "bmi") == 0) return true;
+#endif
+#ifdef __BMI2__
+ if (strcmp (isa, "bmi2") == 0) return true;
+#endif
+#ifdef __LZCNT__
+ if (strcmp (isa, "lzcnt") == 0) return true;
+#endif
+#ifdef __TBM__
+ if (strcmp (isa, "tbm") == 0) return true;
+#endif
+#ifdef __CRC32__
+ if (strcmp (isa, "crc32") == 0) return true;
+#endif
+#ifdef __POPCNT__
+ if (strcmp (isa, "popcnt") == 0) return true;
+#endif
+#ifdef __FSGSBASE__
+ if (strcmp (isa, "fsgsbase") == 0) return true;
+#endif
+#ifdef __RDRND__
+ if (strcmp (isa, "rdrnd") == 0) return true;
+#endif
+#ifdef __F16C__
+ if (strcmp (isa, "f16c") == 0) return true;
+#endif
+#ifdef __RDSEED__
+ if (strcmp (isa, "rdseed") == 0) return true;
+#endif
+#ifdef __PRFCHW__
+ if (strcmp (isa, "prfchw") == 0) return true;
+#endif
+#ifdef __ADX__
+ if (strcmp (isa, "adx") == 0) return true;
+#endif
+#ifdef __FXSR__
+ if (strcmp (isa, "fxsr") == 0) return true;
+#endif
+#ifdef __XSAVE__
+ if (strcmp (isa, "xsave") == 0) return true;
+#endif
+#ifdef __XSAVEOPT__
+ if (strcmp (isa, "xsaveopt") == 0) return true;
+#endif
+#ifdef __PREFETCHWT1__
+ if (strcmp (isa, "prefetchwt1") == 0) return true;
+#endif
+#ifdef __CLFLUSHOPT__
+ if (strcmp (isa, "clflushopt") == 0) return true;
+#endif
+#ifdef __CLZERO__
+ if (strcmp (isa, "clzero") == 0) return true;
+#endif
+#ifdef __XSAVEC__
+ if (strcmp (isa, "xsavec") == 0) return true;
+#endif
+#ifdef __XSAVES__
+ if (strcmp (isa, "xsaves") == 0) return true;
+#endif
+#ifdef __CLWB__
+ if (strcmp (isa, "clwb") == 0) return true;
+#endif
+#ifdef __MWAITX__
+ if (strcmp (isa, "mwaitx") == 0) return true;
+#endif
+#ifdef __PKU__
+ if (strcmp (isa, "pku") == 0) return true;
+#endif
+#ifdef __RDPID__
+ if (strcmp (isa, "rdpid") == 0) return true;
+#endif
+#ifdef __GFNI__
+ if (strcmp (isa, "gfni") == 0) return true;
+#endif
+#ifdef __SHSTK__
+ if (strcmp (isa, "shstk") == 0) return true;
+#endif
+#ifdef __VAES__
+ if (strcmp (isa, "vaes") == 0) return true;
+#endif
+#ifdef __VPCLMULQDQ__
+ if (strcmp (isa, "vpclmulqdq") == 0) return true;
+#endif
+#ifdef __MOVDIRI__
+ if (strcmp (isa, "movdiri") == 0) return true;
+#endif
+#ifdef __MOVDIR64B__
+ if (strcmp (isa, "movdir64b") == 0) return true;
+#endif
+#ifdef __WAITPKG__
+ if (strcmp (isa, "waitpkg") == 0) return true;
+#endif
+#ifdef __CLDEMOTE__
+ if (strcmp (isa, "cldemote") == 0) return true;
+#endif
+#ifdef __SERIALIZE__
+ if (strcmp (isa, "serialize") == 0) return true;
+#endif
+#ifdef __PTWRITE__
+ if (strcmp (isa, "ptwrite") == 0) return true;
+#endif
+#ifdef __AVX512BF16__
+ if (strcmp (isa, "avx512bf16") == 0) return true;
+#endif
+#ifdef __AVX512FP16__
+ if (strcmp (isa, "avx512fp16") == 0) return true;
+#endif
+#ifdef __ENQCMD__
+ if (strcmp (isa, "enqcmd") == 0) return true;
+#endif
+#ifdef __TSXLDTRK__
+ if (strcmp (isa, "tsxldtrk") == 0) return true;
+#endif
+#ifdef __AMX_TILE__
+ if (strcmp (isa, "amx-tile") == 0) return true;
+#endif
+#ifdef __AMX_INT8__
+ if (strcmp (isa, "amx-int8") == 0) return true;
+#endif
+#ifdef __AMX_BF16__
+ if (strcmp (isa, "amx-bf16") == 0) return true;
+#endif
+#ifdef __LAHF_SAHF__
+ if (strcmp (isa, "sahf") == 0) return true;
+#endif
+#ifdef __MOVBE__
+ if (strcmp (isa, "movbe") == 0) return true;
+#endif
+#ifdef __UINTR__
+ if (strcmp (isa, "uintr") == 0) return true;
+#endif
+#ifdef __HRESET__
+ if (strcmp (isa, "hreset") == 0) return true;
+#endif
+#ifdef __KL__
+ if (strcmp (isa, "kl") == 0) return true;
+#endif
+#ifdef __WIDEKL__
+ if (strcmp (isa, "widekl") == 0) return true;
+#endif
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,65 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains an implementation of GOMP_evaluate_current_device for
+ a Nvidia GPU. */
+
+#include "libgomp.h"
+#include <string.h>
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+
+ if (arch && strcmp (arch, "nvptx") != 0)
+ return false;
+
+ if (!isa)
+ return true;
+
+ if (strcmp (isa, "sm_30") == 0)
+ return true;
+#if __PTX_SM__ >= 350
+ if (strcmp (isa, "sm_35") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 530
+ if (strcmp (isa, "sm_53") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 750
+ if (strcmp (isa, "sm_75") == 0)
+ return true;
+#endif
+#if __PTX_SM__ >= 800
+ if (strcmp (isa, "sm_80") == 0)
+ return true;
+#endif
+
+ return false;
+}
@@ -152,6 +152,8 @@ extern int GOMP_OFFLOAD_memcpy3d (int, int, size_t, size_t, size_t, void *,
extern bool GOMP_OFFLOAD_can_run (void *);
extern void GOMP_OFFLOAD_run (int, void *, void *, void **);
extern void GOMP_OFFLOAD_async_run (int, void *, void *, void **, void *);
+extern bool GOMP_OFFLOAD_evaluate_device (int, const char *, const char *,
+ const char *);
extern void GOMP_OFFLOAD_openacc_exec (void (*) (void *), size_t, void **,
void **, unsigned *, void *);
@@ -1415,6 +1415,7 @@ struct gomp_device_descr
__typeof (GOMP_OFFLOAD_can_run) *can_run_func;
__typeof (GOMP_OFFLOAD_run) *run_func;
__typeof (GOMP_OFFLOAD_async_run) *async_run_func;
+ __typeof (GOMP_OFFLOAD_evaluate_device) *evaluate_device_func;
/* Splay tree containing information about mapped memory regions. */
struct splay_tree_s mem_map;
@@ -414,6 +414,7 @@ GOMP_5.1 {
GOMP_scope_start;
GOMP_warning;
GOMP_teams4;
+ GOMP_evaluate_target_device;
} GOMP_5.0.1;
GOMP_5.1.1 {
@@ -337,6 +337,11 @@ extern void GOMP_single_copy_end (void *);
extern void GOMP_scope_start (uintptr_t *);
+/* selector.c */
+
+extern bool GOMP_evaluate_current_device (const char *, const char *,
+ const char *);
+
/* target.c */
extern void GOMP_target (int, void (*) (void *), const void *,
@@ -359,6 +364,9 @@ extern void GOMP_teams (unsigned int, unsigned int);
extern bool GOMP_teams4 (unsigned int, unsigned int, unsigned int, bool);
extern void *GOMP_target_map_indirect_ptr (void *);
+extern bool GOMP_evaluate_target_device (int, const char *, const char *,
+ const char *);
+
/* teams.c */
extern void GOMP_teams_reg (void (*) (void *), void *, unsigned, unsigned,
@@ -136,6 +136,16 @@ host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars,
fn (vars);
}
+static bool
+host_evaluate_device (int device_num __attribute__ ((unused)),
+ const char *kind __attribute__ ((unused)),
+ const char *arch __attribute__ ((unused)),
+ const char *isa __attribute__ ((unused)))
+{
+ __builtin_unreachable ();
+ return false;
+}
+
static void
host_openacc_exec (void (*fn) (void *),
size_t mapnum __attribute__ ((unused)),
@@ -285,6 +295,7 @@ static struct gomp_device_descr host_dispatch =
.memcpy2d_func = NULL,
.memcpy3d_func = NULL,
.run_func = host_run,
+ .evaluate_device_func = host_evaluate_device,
.mem_map = { NULL },
.mem_map_rev = { NULL },
@@ -3984,6 +3984,20 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars,
GOMP_PLUGIN_target_task_completion, async_data);
}
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ struct agent_info *agent = get_agent_info (device_num);
+
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+ if (arch && strcmp (arch, "gcn") != 0)
+ return false;
+
+ return !isa || isa_code (isa) == agent->device_isa;
+}
+
/* }}} */
/* {{{ OpenACC Plugin API */
@@ -317,6 +317,7 @@ struct ptx_device
int max_threads_per_block;
int max_threads_per_multiprocessor;
int default_dims[GOMP_DIM_MAX];
+ int compute_major, compute_minor;
/* Length as used by the CUDA Runtime API ('struct cudaDeviceProp'). */
char name[256];
@@ -541,6 +542,14 @@ nvptx_open_device (int n)
for (int i = 0; i != GOMP_DIM_MAX; i++)
ptx_dev->default_dims[i] = 0;
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev);
+ ptx_dev->compute_major = pi;
+
+ CUDA_CALL_ERET (NULL, cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev);
+ ptx_dev->compute_minor = pi;
+
CUDA_CALL_ERET (NULL, cuDeviceGetName, ptx_dev->name, sizeof ptx_dev->name,
dev);
@@ -2312,3 +2321,39 @@ GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars, void **args)
}
/* TODO: Implement GOMP_OFFLOAD_async_run. */
+
+#define CHECK_ISA(major, minor) \
+ if (device->compute_major >= major && device->compute_minor >= minor \
+ && strcmp (isa, "sm_"#major#minor) == 0) \
+ return true
+
+bool
+GOMP_OFFLOAD_evaluate_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ if (kind && strcmp (kind, "gpu") != 0)
+ return false;
+ if (arch && strcmp (arch, "nvptx") != 0)
+ return false;
+ if (!isa)
+ return true;
+
+ struct ptx_device *device = ptx_devices[device_num];
+
+ CHECK_ISA (3, 0);
+ CHECK_ISA (3, 5);
+ CHECK_ISA (3, 7);
+ CHECK_ISA (5, 0);
+ CHECK_ISA (5, 2);
+ CHECK_ISA (5, 3);
+ CHECK_ISA (6, 0);
+ CHECK_ISA (6, 1);
+ CHECK_ISA (6, 2);
+ CHECK_ISA (7, 0);
+ CHECK_ISA (7, 2);
+ CHECK_ISA (7, 5);
+ CHECK_ISA (8, 0);
+ CHECK_ISA (8, 6);
+
+ return false;
+}
new file mode 100644
@@ -0,0 +1,36 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Mentor, a Siemens Business.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This file contains a placeholder implementation of
+ GOMP_evaluate_current_device. */
+
+#include "libgomp.h"
+
+bool
+GOMP_evaluate_current_device (const char *kind, const char *arch,
+ const char *isa)
+{
+ return false;
+}
@@ -5088,6 +5088,43 @@ omp_pause_resource_all (omp_pause_resource_t kind)
ialias (omp_pause_resource)
ialias (omp_pause_resource_all)
+bool
+GOMP_evaluate_target_device (int device_num, const char *kind,
+ const char *arch, const char *isa)
+{
+ bool result = true;
+
+ if (device_num < 0)
+ device_num = omp_get_default_device ();
+
+ if (kind && strcmp (kind, "any") == 0)
+ kind = NULL;
+
+ gomp_debug (1, "%s: device_num = %u, kind=%s, arch=%s, isa=%s",
+ __FUNCTION__, device_num, kind, arch, isa);
+
+ if (omp_get_device_num () == device_num)
+ result = GOMP_evaluate_current_device (kind, arch, isa);
+ else
+ {
+ if (!omp_is_initial_device ())
+ /* Accelerators are not expected to know about other devices. */
+ result = false;
+ else
+ {
+ struct gomp_device_descr *device = resolve_device (device_num, true);
+ if (device == NULL)
+ result = false;
+ else if (device->evaluate_device_func)
+ result = device->evaluate_device_func (device_num, kind, arch,
+ isa);
+ }
+ }
+
+ gomp_debug (1, " -> %s\n", result ? "true" : "false");
+ return result;
+}
+
#ifdef PLUGIN_SUPPORT
/* This function tries to load a plugin for DEVICE. Name of plugin is passed
@@ -5140,6 +5177,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device,
DLSYM (free);
DLSYM (dev2host);
DLSYM (host2dev);
+ DLSYM (evaluate_device);
DLSYM_OPT (memcpy2d, memcpy2d);
DLSYM_OPT (memcpy3d, memcpy3d);
device->capabilities = device->get_caps_func ();