libgompd: Add thread handles

Message ID 4086807d-97d1-ec58-1617-24dda537010a@gmail.com
State New, archived
Headers
Series libgompd: Add thread handles |

Commit Message

Ahmed Sayed Mousse Sept. 27, 2022, 1:12 a.m. UTC
  /This patch is the initial implementation of OpenMP-API specs book section //20.5.5 with title "Thread Handles". //libgomp/ChangeLog ///2022-07-01 Ahmed Sayed <ahmedsayedmousse@gmail.com> ///* Makefile.am (libgompd_la_SOURCES): Add ompd-threads.c.///* Makefile.in: Regenerate. * team.c (gomp_free_thread): Call 
ompd_bp_thread_end./* ompd-support.c (gompd_thread_initial_tls_bias): 
New variable. (gompd_load): Initialize gompd_thread_initial_tls_bias. * 
ompd-threads.c: New file.///
  

Comments

Ahmed Sayed Mousse Sept. 27, 2022, 1:20 a.m. UTC | #1
For some reason my mail client keeps missing things up. Maybe it is my 
fault.

Anyway this is it again with the right format I hope.

/This patch is the initial implementation of OpenMP-API specs book section //20.5.5 with title "Thread Handles". //libgomp/ChangeLog ///2022-07-01 Ahmed Sayed <ahmedsayedmousse@gmail.com> ///* Makefile.am (libgompd_la_SOURCES): Add ompd-threads.c.///* Makefile.in: Regenerate. * team.c (gomp_free_thread): Call 
ompd_bp_thread_end./* ompd-support.c (gompd_thread_initial_tls_bias): 
New variable. (gompd_load): Initialize gompd_thread_initial_tls_bias. * 
ompd-threads.c: New file.///
diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..23f5bede1bf 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c
 
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..7acdcbf31d5 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
 	oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+	ompd-threads.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -801,6 +802,7 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-threads.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
diff --git a/libgomp/ompd-support.c b/libgomp/ompd-support.c
index 27c5ad148e0..5b1afd37788 100644
--- a/libgomp/ompd-support.c
+++ b/libgomp/ompd-support.c
@@ -33,6 +33,8 @@ const unsigned short gompd_sizeof_gomp_thread_handle
   __attribute__ ((used)) OMPD_SECTION = 0;
 #endif
 
+unsigned long gompd_thread_initial_tls_bias __attribute__ ((used));
+
 /* Get offset of the member m in struct t.  */
 #define gompd_get_offset(t, m) \
   const unsigned short gompd_access_##t##_##m __attribute__ ((used)) \
@@ -67,6 +69,11 @@ gompd_load (void)
   gompd_state |= OMPD_ENABLED;
   ompd_dll_locations = &ompd_dll_locations_array[0];
   ompd_dll_locations_valid ();
+
+  #if defined(LIBGOMP_USE_PTHREADS) && !defined(GOMP_NEEDS_THREAD_HANDLE)
+  gompd_thread_initial_tls_bias = (unsigned long) ((char *) &gomp_tls_data
+						   - (char *) pthread_self ());
+  #endif
 }
 
 #ifndef __ELF__
diff --git a/libgomp/ompd-threads.c b/libgomp/ompd-threads.c
new file mode 100644
index 00000000000..723ef740181
--- /dev/null
+++ b/libgomp/ompd-threads.c
@@ -0,0 +1,222 @@
+/* Copyright (C) The GNU Toolchain Authors.
+   Contributed by Ahmed Sayed <ahmedsayedmousse@gmail.com>.
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains the implementation of functions defined in
+   Section 5.5 ThreadHandles. */
+
+
+#include "ompd-helper.h"
+
+ompd_rc_t
+ompd_get_thread_in_parallel (ompd_parallel_handle_t *parallel_handle,
+			     int thread_num,
+			     ompd_thread_handle_t **thread_handle)
+{
+
+  if (parallel_handle == NULL)
+    return ompd_rc_stale_handle;
+  CHECK (parallel_handle->ah);
+
+  ompd_address_space_context_t *context = parallel_handle->ah->context;
+  ompd_rc_t ret;
+
+  ompd_word_t team_size_var = 1;
+  if (parallel_handle->th.address)
+    gompd_get_team_size (parallel_handle, &team_size_var);
+
+  if (thread_num < 0 || thread_num >= team_size_var)
+    return ompd_rc_bad_input;
+
+  ompd_word_t temp_offset;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_addr_t temp_addr;
+
+  ACCESS_VALUE (context, NULL, "gompd_access_gomp_thread_pool_threads",
+		temp_offset, 1, ret, symbol_addr, temp_symbol_addr, temp_addr);
+
+  symbol_addr.address += thread_num * target_sizes.sizeof_pointer;
+
+  DEREFERENCE (context, NULL, symbol_addr, target_sizes.sizeof_pointer, 1,
+	       temp_addr, ret, 1);
+
+  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
+				 (void **) thread_handle);
+
+  CHECK_RET (ret);
+
+  if (symbol_addr.address == 0)
+    return ompd_rc_unsupported;
+
+  (*thread_handle)->th = symbol_addr;
+  (*thread_handle)->ah = parallel_handle->ah;
+  return ret;
+}
+
+/* The ompd_get_thread_handle function that maps a native thread to an
+   OMPD thread handle.  */
+
+ompd_rc_t
+ompd_get_thread_handle (ompd_address_space_handle_t *handle,
+			ompd_thread_id_t kind, ompd_size_t sizeof_thread_id,
+			const void *thread_id,
+			ompd_thread_handle_t **thread_handle)
+{
+  CHECK (handle);
+  if (kind != OMPD_THREAD_ID_PTHREAD)
+    return ompd_rc_unsupported;
+
+  ompd_address_space_context_t *context = handle->context;
+  ompd_thread_context_t *tcontext;
+  ompd_rc_t ret;
+
+  ret = callbacks->get_thread_context_for_thread_id (context, kind,
+						     sizeof_thread_id,
+						     thread_id, &tcontext);
+  CHECK_RET (ret);
+
+  ompd_size_t temp_symbol_size, symbol_size;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+
+  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread", symbol_size,
+	     temp_symbol_size, target_sizes.sizeof_short, 1, ret,
+	     temp_symbol_addr);
+
+  GET_VALUE (context, tcontext, "gomp_tls_data", symbol_addr.address,
+	     temp_symbol_addr.address, symbol_size, 1, ret, symbol_addr);
+
+  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
+				 (void **) thread_handle);
+
+  CHECK_RET (ret);
+
+  (*thread_handle)->ah = handle;
+  (*thread_handle)->th = symbol_addr;
+  (*thread_handle)->thread_context = tcontext;
+  return ret;
+}
+
+
+ompd_rc_t
+ompd_rel_thread_handle (ompd_thread_handle_t *thread_handle)
+{
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  ompd_rc_t ret;
+  ret = callbacks->free_memory ((void *) thread_handle);
+  if (ret != ompd_rc_ok)
+  return ret;
+
+  return ompd_rc_ok;
+}
+
+
+/* Return -1, 0 or 1 for thread_handle_1 <, == or > thread_handle_2.  */
+ompd_rc_t
+ompd_thread_handle_compare (ompd_thread_handle_t *thread_handle_1,
+			    ompd_thread_handle_t *thread_handle_2,
+			    int	*cmp_value)
+{
+
+  if (thread_handle_1 == NULL || thread_handle_2 == NULL)
+    return ompd_rc_stale_handle;
+  if (cmp_value == NULL)
+    return ompd_rc_bad_input;
+  if (thread_handle_1->ah->kind != thread_handle_2->ah->kind)
+    return ompd_rc_bad_input;
+
+  if (thread_handle_1->th.address < thread_handle_2->th.address)
+    *cmp_value = -1;
+  else if (thread_handle_1->th.address > thread_handle_2->th.address)
+    *cmp_value = 1;
+  else
+    *cmp_value = 0;
+
+  return ompd_rc_ok;
+}
+
+
+ompd_rc_t
+ompd_get_thread_id (ompd_thread_handle_t *thread_handle, ompd_thread_id_t kind,
+		    ompd_size_t sizeof_thread_id, void *thread_id)
+{
+  if (kind != OMPD_THREAD_ID_PTHREAD)
+    return ompd_rc_unsupported;
+  if (thread_id == NULL)
+    return ompd_rc_bad_input;
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  CHECK (thread_handle->ah);
+  ompd_address_space_context_t *context = thread_handle->ah->context;
+
+  ompd_rc_t ret;
+  ompd_address_t taddr = thread_handle->th;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_size_t temp_symbol_size, symbol_size;
+  ompd_word_t temp_offset, offset;
+
+  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread_handle", symbol_size,
+	     temp_symbol_size, target_sizes.sizeof_short, 1, ret, symbol_addr);
+
+  if (symbol_size == 0)
+    {
+      GET_VALUE (context, NULL, "gompd_thread_initial_tls_bias", offset,
+		 temp_offset, target_sizes.sizeof_long, 1, ret, symbol_addr);
+
+      ret = callbacks->symbol_addr_lookup (context, NULL,"gomp_tls_data",
+					   &symbol_addr, NULL);
+      ret = callbacks->device_to_host (context, &temp_symbol_addr.address,
+				       target_sizes.sizeof_long_long, 1,
+				       &symbol_addr.address);
+      CHECK_RET (ret);
+
+      taddr.address = symbol_addr.address + offset;
+      ret = callbacks->read_memory (context, NULL, &taddr,
+				    target_sizes.sizeof_long_long, thread_id);
+    }
+  else
+    {
+      if (sizeof_thread_id != symbol_size)
+        return ompd_rc_bad_input;
+
+      GET_VALUE (context, NULL, "gompd_access_gomp_thread_handle", offset,
+		 temp_offset, target_sizes.sizeof_short, 1, ret, symbol_addr);
+      taddr.address += offset;
+
+      ret = callbacks->read_memory (context, NULL, &taddr, symbol_size,
+				    thread_id);
+    }
+  return ret;
+}
+
+
+/* OMPD doesn't support GPUs for now.  */
+ompd_rc_t ompd_get_device_from_thread (ompd_thread_handle_t *thread_handle,
+				       ompd_address_space_handle_t **device)
+{
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+  return ompd_rc_unsupported;
+}
diff --git a/libgomp/team.c b/libgomp/team.c
index d53246961b7..9a84dc18bdb 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -77,6 +77,7 @@ gomp_thread_start (void *xdata)
   void *local_data;
 
   ompd_bp_thread_begin ();
+
 #if defined HAVE_TLS || defined USE_EMUTLS
   thr = &gomp_tls_data;
 #else
@@ -313,6 +314,8 @@ gomp_free_thread (void *arg __attribute__((unused)))
       gomp_end_task ();
       free (task);
     }
+
+  ompd_bp_thread_end ();
 }
 
 /* Launch a team.  */
  
Bernhard Reutner-Fischer Sept. 27, 2022, 6:40 a.m. UTC | #2
On Tue, 27 Sep 2022 03:20:51 +0200
Ahmed Sayed Mousse via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

> diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
> index 6d913a93e7f..23f5bede1bf 100644
> --- a/libgomp/Makefile.am
> +++ b/libgomp/Makefile.am
> @@ -94,7 +94,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
>  	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
>  	oacc-target.c ompd-support.c
>  
> -libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
> +libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
>  
>  include $(top_srcdir)/plugin/Makefrag.am
>  
> diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
> index 40f896b5f03..7acdcbf31d5 100644
> --- a/libgomp/Makefile.in
> +++ b/libgomp/Makefile.in
> @@ -233,7 +233,8 @@ am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
>  	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
>  	oacc-target.lo ompd-support.lo $(am__objects_1)
>  libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
> -am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
> +am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
> +	ompd-threads.lo
>  libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
>  AM_V_P = $(am__v_P_@AM_V@)
>  am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
> @@ -583,7 +584,7 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
>  	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
>  	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
>  	oacc-target.c ompd-support.c $(am__append_7)
> -libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
> +libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
>  
>  # Nvidia PTX OpenACC plugin.
>  @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
> @@ -801,6 +802,7 @@ distclean-compile:
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
> +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-threads.Plo@am__quote@
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
>  @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
> diff --git a/libgomp/ompd-support.c b/libgomp/ompd-support.c
> index 27c5ad148e0..5b1afd37788 100644
> --- a/libgomp/ompd-support.c
> +++ b/libgomp/ompd-support.c
> @@ -33,6 +33,8 @@ const unsigned short gompd_sizeof_gomp_thread_handle
>    __attribute__ ((used)) OMPD_SECTION = 0;
>  #endif
>  
> +unsigned long gompd_thread_initial_tls_bias __attribute__ ((used));
> +
>  /* Get offset of the member m in struct t.  */
>  #define gompd_get_offset(t, m) \
>    const unsigned short gompd_access_##t##_##m __attribute__ ((used)) \
> @@ -67,6 +69,11 @@ gompd_load (void)
>    gompd_state |= OMPD_ENABLED;
>    ompd_dll_locations = &ompd_dll_locations_array[0];
>    ompd_dll_locations_valid ();
> +
> +  #if defined(LIBGOMP_USE_PTHREADS) && !defined(GOMP_NEEDS_THREAD_HANDLE)
> +  gompd_thread_initial_tls_bias = (unsigned long) ((char *) &gomp_tls_data
> +						   - (char *) pthread_self ());
> +  #endif
>  }
>  
>  #ifndef __ELF__
> diff --git a/libgomp/ompd-threads.c b/libgomp/ompd-threads.c
> new file mode 100644
> index 00000000000..723ef740181
> --- /dev/null
> +++ b/libgomp/ompd-threads.c
> @@ -0,0 +1,222 @@
> +/* Copyright (C) The GNU Toolchain Authors.
> +   Contributed by Ahmed Sayed <ahmedsayedmousse@gmail.com>.
> +   This file is part of the GNU Offloading and Multi Processing Library
> +   (libgomp).
> +
> +   Libgomp is free software; you can redistribute it and/or modify it
> +   under the terms of the GNU General Public License as published by
> +   the Free Software Foundation; either version 3, or (at your option)
> +   any later version.
> +
> +   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
> +   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
> +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> +   more details.
> +
> +   Under Section 7 of GPL version 3, you are granted additional
> +   permissions described in the GCC Runtime Library Exception, version
> +   3.1, as published by the Free Software Foundation.
> +
> +   You should have received a copy of the GNU General Public License and
> +   a copy of the GCC Runtime Library Exception along with this program;
> +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +/* This file contains the implementation of functions defined in
> +   Section 5.5 ThreadHandles. */
> +
> +
> +#include "ompd-helper.h"
> +
> +ompd_rc_t
> +ompd_get_thread_in_parallel (ompd_parallel_handle_t *parallel_handle,
> +			     int thread_num,
> +			     ompd_thread_handle_t **thread_handle)
> +{
> +
> +  if (parallel_handle == NULL)
> +    return ompd_rc_stale_handle;
> +  CHECK (parallel_handle->ah);
> +
> +  ompd_address_space_context_t *context = parallel_handle->ah->context;
> +  ompd_rc_t ret;
> +
> +  ompd_word_t team_size_var = 1;
> +  if (parallel_handle->th.address)
> +    gompd_get_team_size (parallel_handle, &team_size_var);
> +
> +  if (thread_num < 0 || thread_num >= team_size_var)
> +    return ompd_rc_bad_input;
> +
> +  ompd_word_t temp_offset;
> +  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
> +  ompd_addr_t temp_addr;
> +
> +  ACCESS_VALUE (context, NULL, "gompd_access_gomp_thread_pool_threads",
> +		temp_offset, 1, ret, symbol_addr, temp_symbol_addr, temp_addr);
> +
> +  symbol_addr.address += thread_num * target_sizes.sizeof_pointer;
> +
> +  DEREFERENCE (context, NULL, symbol_addr, target_sizes.sizeof_pointer, 1,
> +	       temp_addr, ret, 1);
> +
> +  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
> +				 (void **) thread_handle);
> +
> +  CHECK_RET (ret);
> +
> +  if (symbol_addr.address == 0)
> +    return ompd_rc_unsupported;

Does the above leak the allocated memory, i.e. do you have to move the
check to right before the DEREFERENCE?

> +
> +  (*thread_handle)->th = symbol_addr;
> +  (*thread_handle)->ah = parallel_handle->ah;
> +  return ret;
> +}
> +
> +/* The ompd_get_thread_handle function that maps a native thread to an
> +   OMPD thread handle.  */
> +
> +ompd_rc_t
> +ompd_get_thread_handle (ompd_address_space_handle_t *handle,
> +			ompd_thread_id_t kind, ompd_size_t sizeof_thread_id,
> +			const void *thread_id,
> +			ompd_thread_handle_t **thread_handle)
> +{
> +  CHECK (handle);
> +  if (kind != OMPD_THREAD_ID_PTHREAD)
> +    return ompd_rc_unsupported;
> +
> +  ompd_address_space_context_t *context = handle->context;
> +  ompd_thread_context_t *tcontext;
> +  ompd_rc_t ret;
> +
> +  ret = callbacks->get_thread_context_for_thread_id (context, kind,
> +						     sizeof_thread_id,
> +						     thread_id, &tcontext);
> +  CHECK_RET (ret);
> +
> +  ompd_size_t temp_symbol_size, symbol_size;
> +  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
> +
> +  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread", symbol_size,
> +	     temp_symbol_size, target_sizes.sizeof_short, 1, ret,
> +	     temp_symbol_addr);
> +
> +  GET_VALUE (context, tcontext, "gomp_tls_data", symbol_addr.address,
> +	     temp_symbol_addr.address, symbol_size, 1, ret, symbol_addr);
> +
> +  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
> +				 (void **) thread_handle);
> +
> +  CHECK_RET (ret);
> +
> +  (*thread_handle)->ah = handle;
> +  (*thread_handle)->th = symbol_addr;
> +  (*thread_handle)->thread_context = tcontext;
> +  return ret;
> +}
> +
> +
> +ompd_rc_t
> +ompd_rel_thread_handle (ompd_thread_handle_t *thread_handle)
> +{
> +  if (thread_handle == NULL)
> +    return ompd_rc_stale_handle;
> +
> +  ompd_rc_t ret;
> +  ret = callbacks->free_memory ((void *) thread_handle);
> +  if (ret != ompd_rc_ok)
> +  return ret;

You seem to usually use CHECK_RET for the above.

> +
> +  return ompd_rc_ok;
> +}
> +
> +
> +/* Return -1, 0 or 1 for thread_handle_1 <, == or > thread_handle_2.  */
> +ompd_rc_t
> +ompd_thread_handle_compare (ompd_thread_handle_t *thread_handle_1,
> +			    ompd_thread_handle_t *thread_handle_2,
> +			    int	*cmp_value)
> +{
> +
> +  if (thread_handle_1 == NULL || thread_handle_2 == NULL)
> +    return ompd_rc_stale_handle;
> +  if (cmp_value == NULL)
> +    return ompd_rc_bad_input;
> +  if (thread_handle_1->ah->kind != thread_handle_2->ah->kind)
> +    return ompd_rc_bad_input;
> +
> +  if (thread_handle_1->th.address < thread_handle_2->th.address)
> +    *cmp_value = -1;
> +  else if (thread_handle_1->th.address > thread_handle_2->th.address)
> +    *cmp_value = 1;
> +  else
> +    *cmp_value = 0;
> +
> +  return ompd_rc_ok;
> +}
> +
> +
> +ompd_rc_t
> +ompd_get_thread_id (ompd_thread_handle_t *thread_handle, ompd_thread_id_t kind,
> +		    ompd_size_t sizeof_thread_id, void *thread_id)
> +{
> +  if (kind != OMPD_THREAD_ID_PTHREAD)
> +    return ompd_rc_unsupported;
> +  if (thread_id == NULL)
> +    return ompd_rc_bad_input;
> +  if (thread_handle == NULL)
> +    return ompd_rc_stale_handle;
> +
> +  CHECK (thread_handle->ah);
> +  ompd_address_space_context_t *context = thread_handle->ah->context;
> +
> +  ompd_rc_t ret;
> +  ompd_address_t taddr = thread_handle->th;
> +  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
> +  ompd_size_t temp_symbol_size, symbol_size;
> +  ompd_word_t temp_offset, offset;
> +
> +  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread_handle", symbol_size,
> +	     temp_symbol_size, target_sizes.sizeof_short, 1, ret, symbol_addr);
> +
> +  if (symbol_size == 0)
> +    {
> +      GET_VALUE (context, NULL, "gompd_thread_initial_tls_bias", offset,
> +		 temp_offset, target_sizes.sizeof_long, 1, ret, symbol_addr);
> +
> +      ret = callbacks->symbol_addr_lookup (context, NULL,"gomp_tls_data",
> +					   &symbol_addr, NULL);

The above can never fail, no?
thanks,

> +      ret = callbacks->device_to_host (context, &temp_symbol_addr.address,
> +				       target_sizes.sizeof_long_long, 1,
> +				       &symbol_addr.address);
> +      CHECK_RET (ret);
> +
> +      taddr.address = symbol_addr.address + offset;
> +      ret = callbacks->read_memory (context, NULL, &taddr,
> +				    target_sizes.sizeof_long_long, thread_id);
> +    }
> +  else
> +    {
> +      if (sizeof_thread_id != symbol_size)
> +        return ompd_rc_bad_input;
> +
> +      GET_VALUE (context, NULL, "gompd_access_gomp_thread_handle", offset,
> +		 temp_offset, target_sizes.sizeof_short, 1, ret, symbol_addr);
> +      taddr.address += offset;
> +
> +      ret = callbacks->read_memory (context, NULL, &taddr, symbol_size,
> +				    thread_id);
> +    }
> +  return ret;
> +}
> +
> +
> +/* OMPD doesn't support GPUs for now.  */
> +ompd_rc_t ompd_get_device_from_thread (ompd_thread_handle_t *thread_handle,
> +				       ompd_address_space_handle_t **device)
> +{
> +  if (thread_handle == NULL)
> +    return ompd_rc_stale_handle;
> +  return ompd_rc_unsupported;
> +}
> diff --git a/libgomp/team.c b/libgomp/team.c
> index d53246961b7..9a84dc18bdb 100644
> --- a/libgomp/team.c
> +++ b/libgomp/team.c
> @@ -77,6 +77,7 @@ gomp_thread_start (void *xdata)
>    void *local_data;
>  
>    ompd_bp_thread_begin ();
> +
>  #if defined HAVE_TLS || defined USE_EMUTLS
>    thr = &gomp_tls_data;
>  #else
> @@ -313,6 +314,8 @@ gomp_free_thread (void *arg __attribute__((unused)))
>        gomp_end_task ();
>        free (task);
>      }
> +
> +  ompd_bp_thread_end ();
>  }
>  
>  /* Launch a team.  */
  

Patch

diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
index 6d913a93e7f..23f5bede1bf 100644
--- a/libgomp/Makefile.am
+++ b/libgomp/Makefile.am
@@ -94,7 +94,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c error.c \
 	priority_queue.c affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c
 
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
 
 include $(top_srcdir)/plugin/Makefrag.am
 
diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 40f896b5f03..7acdcbf31d5 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -233,7 +233,8 @@  am_libgomp_la_OBJECTS = alloc.lo atomic.lo barrier.lo critical.lo \
 	affinity-fmt.lo teams.lo allocator.lo oacc-profiling.lo \
 	oacc-target.lo ompd-support.lo $(am__objects_1)
 libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS)
-am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo
+am_libgompd_la_OBJECTS = ompd-init.lo ompd-helper.lo ompd-icv.lo \
+	ompd-threads.lo
 libgompd_la_OBJECTS = $(am_libgompd_la_OBJECTS)
 AM_V_P = $(am__v_P_@AM_V@)
 am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
@@ -583,7 +584,7 @@  libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
 	oacc-async.c oacc-plugin.c oacc-cuda.c priority_queue.c \
 	affinity-fmt.c teams.c allocator.c oacc-profiling.c \
 	oacc-target.c ompd-support.c $(am__append_7)
-libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c
+libgompd_la_SOURCES = ompd-init.c ompd-helper.c ompd-icv.c ompd-threads.c
 
 # Nvidia PTX OpenACC plugin.
 @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION)
@@ -801,6 +802,7 @@  distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-icv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-init.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-support.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ompd-threads.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ordered.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parallel.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/priority_queue.Plo@am__quote@
diff --git a/libgomp/ompd-support.c b/libgomp/ompd-support.c
index 27c5ad148e0..5b1afd37788 100644
--- a/libgomp/ompd-support.c
+++ b/libgomp/ompd-support.c
@@ -33,6 +33,8 @@  const unsigned short gompd_sizeof_gomp_thread_handle
   __attribute__ ((used)) OMPD_SECTION = 0;
 #endif
 
+unsigned long gompd_thread_initial_tls_bias __attribute__ ((used));
+
 /* Get offset of the member m in struct t.  */
 #define gompd_get_offset(t, m) \
   const unsigned short gompd_access_##t##_##m __attribute__ ((used)) \
@@ -67,6 +69,11 @@  gompd_load (void)
   gompd_state |= OMPD_ENABLED;
   ompd_dll_locations = &ompd_dll_locations_array[0];
   ompd_dll_locations_valid ();
+
+  #if defined(LIBGOMP_USE_PTHREADS) && !defined(GOMP_NEEDS_THREAD_HANDLE)
+  gompd_thread_initial_tls_bias = (unsigned long) ((char *) &gomp_tls_data
+						   - (char *) pthread_self ());
+  #endif
 }
 
 #ifndef __ELF__
diff --git a/libgomp/ompd-threads.c b/libgomp/ompd-threads.c
new file mode 100644
index 00000000000..723ef740181
--- /dev/null
+++ b/libgomp/ompd-threads.c
@@ -0,0 +1,222 @@ 
+/* Copyright (C) The GNU Toolchain Authors.
+   Contributed by Ahmed Sayed <ahmedsayedmousse@gmail.com>.
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This file contains the implementation of functions defined in
+   Section 5.5 ThreadHandles. */
+
+
+#include "ompd-helper.h"
+
+ompd_rc_t
+ompd_get_thread_in_parallel (ompd_parallel_handle_t *parallel_handle,
+			     int thread_num,
+			     ompd_thread_handle_t **thread_handle)
+{
+
+  if (parallel_handle == NULL)
+    return ompd_rc_stale_handle;
+  CHECK (parallel_handle->ah);
+
+  ompd_address_space_context_t *context = parallel_handle->ah->context;
+  ompd_rc_t ret;
+
+  ompd_word_t team_size_var = 1;
+  if (parallel_handle->th.address)
+    gompd_get_team_size (parallel_handle, &team_size_var);
+
+  if (thread_num < 0 || thread_num >= team_size_var)
+    return ompd_rc_bad_input;
+
+  ompd_word_t temp_offset;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_addr_t temp_addr;
+
+  ACCESS_VALUE (context, NULL, "gompd_access_gomp_thread_pool_threads",
+		temp_offset, 1, ret, symbol_addr, temp_symbol_addr, temp_addr);
+
+  symbol_addr.address += thread_num * target_sizes.sizeof_pointer;
+
+  DEREFERENCE (context, NULL, symbol_addr, target_sizes.sizeof_pointer, 1,
+	       temp_addr, ret, 1);
+
+  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
+				 (void **) thread_handle);
+
+  CHECK_RET (ret);
+
+  if (symbol_addr.address == 0)
+    return ompd_rc_unsupported;
+
+  (*thread_handle)->th = symbol_addr;
+  (*thread_handle)->ah = parallel_handle->ah;
+  return ret;
+}
+
+/* The ompd_get_thread_handle function that maps a native thread to an
+   OMPD thread handle.  */
+
+ompd_rc_t
+ompd_get_thread_handle (ompd_address_space_handle_t *handle,
+			ompd_thread_id_t kind, ompd_size_t sizeof_thread_id,
+			const void *thread_id,
+			ompd_thread_handle_t **thread_handle)
+{
+  CHECK (handle);
+  if (kind != OMPD_THREAD_ID_PTHREAD)
+    return ompd_rc_unsupported;
+
+  ompd_address_space_context_t *context = handle->context;
+  ompd_thread_context_t *tcontext;
+  ompd_rc_t ret;
+
+  ret = callbacks->get_thread_context_for_thread_id (context, kind,
+						     sizeof_thread_id,
+						     thread_id, &tcontext);
+  CHECK_RET (ret);
+
+  ompd_size_t temp_symbol_size, symbol_size;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+
+  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread", symbol_size,
+	     temp_symbol_size, target_sizes.sizeof_short, 1, ret,
+	     temp_symbol_addr);
+
+  GET_VALUE (context, tcontext, "gomp_tls_data", symbol_addr.address,
+	     temp_symbol_addr.address, symbol_size, 1, ret, symbol_addr);
+
+  ret = callbacks->alloc_memory (sizeof (ompd_thread_handle_t),
+				 (void **) thread_handle);
+
+  CHECK_RET (ret);
+
+  (*thread_handle)->ah = handle;
+  (*thread_handle)->th = symbol_addr;
+  (*thread_handle)->thread_context = tcontext;
+  return ret;
+}
+
+
+ompd_rc_t
+ompd_rel_thread_handle (ompd_thread_handle_t *thread_handle)
+{
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  ompd_rc_t ret;
+  ret = callbacks->free_memory ((void *) thread_handle);
+  if (ret != ompd_rc_ok)
+  return ret;
+
+  return ompd_rc_ok;
+}
+
+
+/* Return -1, 0 or 1 for thread_handle_1 <, == or > thread_handle_2.  */
+ompd_rc_t
+ompd_thread_handle_compare (ompd_thread_handle_t *thread_handle_1,
+			    ompd_thread_handle_t *thread_handle_2,
+			    int	*cmp_value)
+{
+
+  if (thread_handle_1 == NULL || thread_handle_2 == NULL)
+    return ompd_rc_stale_handle;
+  if (cmp_value == NULL)
+    return ompd_rc_bad_input;
+  if (thread_handle_1->ah->kind != thread_handle_2->ah->kind)
+    return ompd_rc_bad_input;
+
+  if (thread_handle_1->th.address < thread_handle_2->th.address)
+    *cmp_value = -1;
+  else if (thread_handle_1->th.address > thread_handle_2->th.address)
+    *cmp_value = 1;
+  else
+    *cmp_value = 0;
+
+  return ompd_rc_ok;
+}
+
+
+ompd_rc_t
+ompd_get_thread_id (ompd_thread_handle_t *thread_handle, ompd_thread_id_t kind,
+		    ompd_size_t sizeof_thread_id, void *thread_id)
+{
+  if (kind != OMPD_THREAD_ID_PTHREAD)
+    return ompd_rc_unsupported;
+  if (thread_id == NULL)
+    return ompd_rc_bad_input;
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+
+  CHECK (thread_handle->ah);
+  ompd_address_space_context_t *context = thread_handle->ah->context;
+
+  ompd_rc_t ret;
+  ompd_address_t taddr = thread_handle->th;
+  ompd_address_t temp_symbol_addr, symbol_addr = {OMPD_SEGMENT_UNSPECIFIED, 0};
+  ompd_size_t temp_symbol_size, symbol_size;
+  ompd_word_t temp_offset, offset;
+
+  GET_VALUE (context, NULL, "gompd_sizeof_gomp_thread_handle", symbol_size,
+	     temp_symbol_size, target_sizes.sizeof_short, 1, ret, symbol_addr);
+
+  if (symbol_size == 0)
+    {
+      GET_VALUE (context, NULL, "gompd_thread_initial_tls_bias", offset,
+		 temp_offset, target_sizes.sizeof_long, 1, ret, symbol_addr);
+
+      ret = callbacks->symbol_addr_lookup (context, NULL,"gomp_tls_data",
+					   &symbol_addr, NULL);
+      ret = callbacks->device_to_host (context, &temp_symbol_addr.address,
+				       target_sizes.sizeof_long_long, 1,
+				       &symbol_addr.address);
+      CHECK_RET (ret);
+
+      taddr.address = symbol_addr.address + offset;
+      ret = callbacks->read_memory (context, NULL, &taddr,
+				    target_sizes.sizeof_long_long, thread_id);
+    }
+  else
+    {
+      if (sizeof_thread_id != symbol_size)
+        return ompd_rc_bad_input;
+
+      GET_VALUE (context, NULL, "gompd_access_gomp_thread_handle", offset,
+		 temp_offset, target_sizes.sizeof_short, 1, ret, symbol_addr);
+      taddr.address += offset;
+
+      ret = callbacks->read_memory (context, NULL, &taddr, symbol_size,
+				    thread_id);
+    }
+  return ret;
+}
+
+
+/* OMPD doesn't support GPUs for now.  */
+ompd_rc_t ompd_get_device_from_thread (ompd_thread_handle_t *thread_handle,
+				       ompd_address_space_handle_t **device)
+{
+  if (thread_handle == NULL)
+    return ompd_rc_stale_handle;
+  return ompd_rc_unsupported;
+}
diff --git a/libgomp/team.c b/libgomp/team.c
index d53246961b7..9a84dc18bdb 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -77,6 +77,7 @@  gomp_thread_start (void *xdata)
   void *local_data;
 
   ompd_bp_thread_begin ();
+
 #if defined HAVE_TLS || defined USE_EMUTLS
   thr = &gomp_tls_data;
 #else
@@ -313,6 +314,8 @@  gomp_free_thread (void *arg __attribute__((unused)))
       gomp_end_task ();
       free (task);
     }
+
+  ompd_bp_thread_end ();
 }
 
 /* Launch a team.  */