new file mode 100644
@@ -0,0 +1,6 @@
+#define RUNLOCALFN(a, b, c) \
+ do \
+ { \
+ a (b); \
+ } \
+ while (0)
new file mode 100644
@@ -0,0 +1,12 @@
+static inline int
+do_spin_for_count (int *addr, int val, unsigned long long count)
+{
+ unsigned long long i;
+ for (i = 0; i < count; i++)
+ if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0))
+ return 0;
+ else
+ cpu_relax ();
+ return 1;
+}
+
@@ -44,21 +44,16 @@
extern int gomp_futex_wait, gomp_futex_wake;
#include <futex.h>
-
+#include <spin.h>
static inline int do_spin (int *addr, int val)
{
- unsigned long long i, count = gomp_spin_count_var;
+ unsigned long long count = gomp_spin_count_var;
if (__builtin_expect (__atomic_load_n (&gomp_managed_threads,
MEMMODEL_RELAXED)
> gomp_available_cpus, 0))
count = gomp_throttled_spin_count_var;
- for (i = 0; i < count; i++)
- if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val, 0))
- return 0;
- else
- cpu_relax ();
- return 1;
+ return do_spin_for_count (addr, val, count);
}
static inline void do_wait (int *addr, int val)
new file mode 100644
@@ -0,0 +1,19 @@
+#ifdef __x86_64__
+static inline void
+gomp_thread_delay(unsigned int count)
+{
+ unsigned long long i;
+ for (i = 0; i < count * gomp_thread_delay_count; i++)
+ __builtin_ia32_pause ();
+}
+
+#define RUNLOCALFN(a, b, c) \
+ do \
+ { \
+ gomp_thread_delay(c); \
+ a (b); \
+ } \
+ while (0)
+#else
+# include "../../../../include/localfn.h"
+#endif
new file mode 100644
@@ -0,0 +1,66 @@
+#include "../mutex.c"
+
+#ifdef __x86_64__
+static inline int
+do_spin_for_count_generic (int *addr, int val, unsigned long long count)
+{
+ unsigned long long i;
+ for (i = 0; i < count; i++)
+ if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val,
+ 0))
+ return 0;
+ else
+ cpu_relax ();
+ return 1;
+}
+
+#ifndef __WAITPKG__
+#pragma GCC push_options
+#pragma GCC target("waitpkg")
+#define __DISABLE_WAITPKG__
+#endif /* __WAITPKG__ */
+
+static inline unsigned long long __rdtsc(void)
+{
+ unsigned long long var;
+ unsigned int hi, lo;
+
+ __asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
+
+ var = ((unsigned long long)hi << 32) | lo;
+ return var;
+}
+
+#define PAUSE_TP 200
+static inline int
+do_spin_for_backoff_tpause (int *addr, int val, unsigned long long count)
+{
+ unsigned int ctrl = 1;
+ unsigned long long wait_time = 1;
+ unsigned long long mask = 1ULL << __builtin_ia32_bsrdi(count * PAUSE_TP);
+ do
+ {
+ __builtin_ia32_tpause (ctrl, wait_time + __rdtsc());
+ wait_time = (wait_time << 1) | 1;
+ if (__builtin_expect (__atomic_load_n (addr, MEMMODEL_RELAXED) != val,
+ 0))
+ return 0;
+ }
+ while ((wait_time & mask) == 0);
+ return 1;
+}
+
+#ifdef __DISABLE_WAITPKG__
+#undef __DISABLE_WAITPKG__
+#pragma GCC pop_options
+#endif /* __DISABLE_WAITPKG__ */
+
+int do_spin_for_count (int *addr, int val, unsigned long long count)
+{
+ if(__builtin_cpu_supports ("waitpkg"))
+ return do_spin_for_backoff_tpause(addr, val, count);
+ else
+ return do_spin_for_count_generic(addr, val, count);
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,5 @@
+#ifdef __x86_64__
+extern int do_spin_for_count (int *, int, unsigned long long) ;
+#else
+# include "../spin.h"
+#endif
@@ -106,6 +106,7 @@ gomp_mutex_t gomp_managed_threads_lock;
#endif
unsigned long gomp_available_cpus = 1, gomp_managed_threads = 1;
unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
+unsigned long long gomp_thread_delay_count;
unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
char *gomp_bind_var_list;
unsigned long gomp_bind_var_list_len;
@@ -2419,6 +2420,9 @@ initialize_env (void)
else if (all != NULL && gomp_get_icv_flag (all->flags, GOMP_ICV_WAIT_POLICY))
wait_policy = all->icvs.wait_policy;
+ if (!parse_spincount ("GOMP_DELAYCOUNT", &gomp_thread_delay_count))
+ gomp_thread_delay_count = 300;
+
if (!parse_spincount ("GOMP_SPINCOUNT", &gomp_spin_count_var))
{
/* Using a rough estimation of 100000 spins per msec,
@@ -596,6 +596,7 @@ extern bool gomp_cancel_var;
extern enum gomp_target_offload_t gomp_target_offload_var;
extern int gomp_max_task_priority_var;
extern unsigned long long gomp_spin_count_var, gomp_throttled_spin_count_var;
+extern unsigned long long gomp_thread_delay_count;
extern unsigned long gomp_available_cpus, gomp_managed_threads;
extern unsigned long *gomp_nthreads_var_list, gomp_nthreads_var_list_len;
extern char *gomp_bind_var_list;
@@ -30,6 +30,7 @@
#include "pool.h"
#include <stdlib.h>
#include <string.h>
+#include "localfn.h"
#ifdef LIBGOMP_USE_PTHREADS
pthread_attr_t gomp_thread_attr;
@@ -62,7 +63,6 @@ struct gomp_thread_start_data
pthread_t handle;
};
-
/* This function is a pthread_create entry point. This contains the idle
loop in which a thread waits to be called up to become part of a team. */
@@ -111,7 +111,8 @@ gomp_thread_start (void *xdata)
gomp_barrier_wait (&team->barrier);
- local_fn (local_data);
+ RUNLOCALFN(local_fn, local_data, thr->ts.team_id);
+
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);
gomp_barrier_wait_last (&team->barrier);
@@ -126,7 +127,8 @@ gomp_thread_start (void *xdata)
struct gomp_team *team = thr->ts.team;
struct gomp_task *task = thr->task;
- local_fn (local_data);
+ RUNLOCALFN(local_fn, local_data, thr->ts.team_id);
+
gomp_team_barrier_wait_final (&team->barrier);
gomp_finish_task (task);