aboutsummaryrefslogtreecommitdiff
path: root/openmp/runtime/src/kmp.h
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp.h')
-rw-r--r--openmp/runtime/src/kmp.h280
1 files changed, 184 insertions, 96 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 87e91a0f8d10..05264f4433d3 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -138,6 +138,10 @@ typedef unsigned int kmp_hwloc_depth_t;
#include "ompt-internal.h"
#endif
+#if OMPD_SUPPORT
+#include "ompd-specific.h"
+#endif
+
#ifndef UNLIKELY
#define UNLIKELY(x) (x)
#endif
@@ -595,6 +599,35 @@ typedef int PACKED_REDUCTION_METHOD_T;
#include <pthread.h>
#endif
+enum kmp_hw_t : int {
+ KMP_HW_UNKNOWN = -1,
+ KMP_HW_SOCKET = 0,
+ KMP_HW_PROC_GROUP,
+ KMP_HW_NUMA,
+ KMP_HW_DIE,
+ KMP_HW_LLC,
+ KMP_HW_L3,
+ KMP_HW_TILE,
+ KMP_HW_MODULE,
+ KMP_HW_L2,
+ KMP_HW_L1,
+ KMP_HW_CORE,
+ KMP_HW_THREAD,
+ KMP_HW_LAST
+};
+
+#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \
+ KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
+#define KMP_ASSERT_VALID_HW_TYPE(type) \
+ KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)
+
+#define KMP_FOREACH_HW_TYPE(type) \
+ for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \
+ type = (kmp_hw_t)((int)type + 1))
+
+const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);
+const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);
+
/* Only Linux* OS and Windows* OS support thread affinity. */
#if KMP_AFFINITY_SUPPORTED
@@ -629,8 +662,6 @@ extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
#if KMP_USE_HWLOC
extern hwloc_topology_t __kmp_hwloc_topology;
extern int __kmp_hwloc_error;
-extern int __kmp_numa_detected;
-extern int __kmp_tile_depth;
#endif
extern size_t __kmp_affin_mask_size;
@@ -758,27 +789,12 @@ enum affinity_type {
affinity_default
};
-enum affinity_gran {
- affinity_gran_fine = 0,
- affinity_gran_thread,
- affinity_gran_core,
- affinity_gran_tile,
- affinity_gran_numa,
- affinity_gran_package,
- affinity_gran_node,
-#if KMP_GROUP_AFFINITY
- // The "group" granularity isn't necesssarily coarser than all of the
- // other levels, but we put it last in the enum.
- affinity_gran_group,
-#endif /* KMP_GROUP_AFFINITY */
- affinity_gran_default
-};
-
enum affinity_top_method {
affinity_top_method_all = 0, // try all (supported) methods, in order
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
affinity_top_method_apicid,
affinity_top_method_x2apicid,
+ affinity_top_method_x2apicid_1f,
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
#if KMP_GROUP_AFFINITY
@@ -794,7 +810,7 @@ enum affinity_top_method {
#define affinity_respect_mask_default (-1)
extern enum affinity_type __kmp_affinity_type; /* Affinity type */
-extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
+extern kmp_hw_t __kmp_affinity_gran; /* Affinity granularity */
extern int __kmp_affinity_gran_levels; /* corresponding int value */
extern int __kmp_affinity_dups; /* Affinity duplicate masks */
extern enum affinity_top_method __kmp_affinity_top_method;
@@ -817,7 +833,7 @@ extern char *__kmp_cpuinfo_file;
typedef enum kmp_proc_bind_t {
proc_bind_false = 0,
proc_bind_true,
- proc_bind_master,
+ proc_bind_primary,
proc_bind_close,
proc_bind_spread,
proc_bind_intel, // use KMP_AFFINITY interface
@@ -835,6 +851,10 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
extern int __kmp_display_affinity;
extern char *__kmp_affinity_format;
static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
+#if OMPT_SUPPORT
+extern int __kmp_tool;
+extern char *__kmp_tool_libraries;
+#endif // OMPT_SUPPORT
#if KMP_AFFINITY_SUPPORTED
#define KMP_PLACE_ALL (-1)
@@ -863,6 +883,7 @@ typedef struct kmp_hws_item {
} kmp_hws_item_t;
extern kmp_hws_item_t __kmp_hws_socket;
+extern kmp_hws_item_t __kmp_hws_die;
extern kmp_hws_item_t __kmp_hws_node;
extern kmp_hws_item_t __kmp_hws_tile;
extern kmp_hws_item_t __kmp_hws_core;
@@ -929,6 +950,10 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space;
extern omp_memspace_handle_t const omp_const_mem_space;
extern omp_memspace_handle_t const omp_high_bw_mem_space;
extern omp_memspace_handle_t const omp_low_lat_mem_space;
+// Preview of target memory support
+extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;
+extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;
typedef struct {
omp_alloctrait_key_t key;
@@ -945,6 +970,10 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc;
extern omp_allocator_handle_t const omp_cgroup_mem_alloc;
extern omp_allocator_handle_t const omp_pteam_mem_alloc;
extern omp_allocator_handle_t const omp_thread_mem_alloc;
+// Preview of target memory support
+extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;
+extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;
extern omp_allocator_handle_t const kmp_max_mem_alloc;
extern omp_allocator_handle_t __kmp_def_allocator;
@@ -982,6 +1011,7 @@ extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
extern void __kmp_init_memkind();
extern void __kmp_fini_memkind();
+extern void __kmp_init_target_mem();
/* ------------------------------------------------------------------------ */
@@ -1046,13 +1076,11 @@ extern void __kmp_fini_memkind();
/* Calculate new number of monitor wakeups for a specific block time based on
previous monitor_wakeups. Only allow increasing number of wakeups */
#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
- (((blocktime) == KMP_MAX_BLOCKTIME) \
+ (((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) \
+ : ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS \
+ : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
? (monitor_wakeups) \
- : ((blocktime) == KMP_MIN_BLOCKTIME) \
- ? KMP_MAX_MONITOR_WAKEUPS \
- : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \
- ? (monitor_wakeups) \
- : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
+ : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))
/* Calculate number of intervals for a specific block time based on
monitor_wakeups */
@@ -1097,7 +1125,10 @@ extern kmp_uint64 __kmp_now_nsec();
#define KMP_MAX_CHUNK (INT_MAX - 1)
#define KMP_DEFAULT_CHUNK 1
+#define KMP_MIN_DISP_NUM_BUFF 1
#define KMP_DFLT_DISP_NUM_BUFF 7
+#define KMP_MAX_DISP_NUM_BUFF 4096
+
#define KMP_MAX_ORDERED 8
#define KMP_MAX_FIELDS 32
@@ -1182,7 +1213,6 @@ typedef struct kmp_cpuinfo {
int stepping; // CPUID(1).EAX[3:0] ( Stepping )
int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise.
int rtm; // 0 if RTM instructions are not supported, 1 otherwise.
- int cpu_stackoffset;
int apic_id;
int physical_id;
int logical_id;
@@ -1339,8 +1369,7 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); }
#endif
#endif // KMP_HAVE_WAITPKG_INTRINSICS
KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline int
-__kmp_tpause(uint32_t hint, uint64_t counter) {
+static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {
#if !KMP_HAVE_WAITPKG_INTRINSICS
uint32_t timeHi = uint32_t(counter >> 32);
uint32_t timeLo = uint32_t(counter & 0xffffffff);
@@ -1356,8 +1385,7 @@ __kmp_tpause(uint32_t hint, uint64_t counter) {
#endif
}
KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline void
-__kmp_umonitor(void *cacheline) {
+static inline void __kmp_umonitor(void *cacheline) {
#if !KMP_HAVE_WAITPKG_INTRINSICS
__asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "
:
@@ -1368,8 +1396,7 @@ __kmp_umonitor(void *cacheline) {
#endif
}
KMP_ATTRIBUTE_TARGET_WAITPKG
-static inline int
-__kmp_umwait(uint32_t hint, uint64_t counter) {
+static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {
#if !KMP_HAVE_WAITPKG_INTRINSICS
uint32_t timeHi = uint32_t(counter >> 32);
uint32_t timeLo = uint32_t(counter & 0xffffffff);
@@ -1422,7 +1449,8 @@ enum cons_type {
ct_ordered_in_pdo,
ct_master,
ct_reduce,
- ct_barrier
+ ct_barrier,
+ ct_masked
};
#define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)
@@ -1570,7 +1598,7 @@ struct private_common {
struct private_common *next;
struct private_common *link;
void *gbl_addr;
- void *par_addr; /* par_addr == gbl_addr for MASTER thread */
+ void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */
size_t cmn_size;
};
@@ -1645,14 +1673,12 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
kmp_int32 lb;
kmp_int32 st;
kmp_int32 tc;
- kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put
- after ub */
- kmp_lock_t *th_steal_lock; // lock used for chunk stealing
- // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
+ kmp_lock_t *steal_lock; // lock used for chunk stealing
+ // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)
// a) parm3 is properly aligned and
- // b) all parm1-4 are in the same cache line.
+ // b) all parm1-4 are on the same cache line.
// Because of parm1-4 are used together, performance seems to be better
- // if they are in the same line (not measured though).
+ // if they are on the same cache line (not measured though).
struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template
kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should
@@ -1664,9 +1690,6 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
kmp_uint32 ordered_lower;
kmp_uint32 ordered_upper;
#if KMP_OS_WINDOWS
- // This var can be placed in the hole between 'tc' and 'parm1', instead of
- // 'static_steal_counter'. It would be nice to measure execution times.
- // Conditional if/endif can be removed at all.
kmp_int32 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info32_t;
@@ -1678,9 +1701,7 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
kmp_int64 lb; /* lower-bound */
kmp_int64 st; /* stride */
kmp_int64 tc; /* trip count (number of iterations) */
- kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put
- after ub */
- kmp_lock_t *th_steal_lock; // lock used for chunk stealing
+ kmp_lock_t *steal_lock; // lock used for chunk stealing
/* parm[1-4] are used in different ways by different scheduling algorithms */
// KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
@@ -1699,9 +1720,6 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
kmp_uint64 ordered_lower;
kmp_uint64 ordered_upper;
#if KMP_OS_WINDOWS
- // This var can be placed in the hole between 'tc' and 'parm1', instead of
- // 'static_steal_counter'. It would be nice to measure execution times.
- // Conditional if/endif can be removed at all.
kmp_int64 last_upper;
#endif /* KMP_OS_WINDOWS */
} dispatch_private_info64_t;
@@ -1755,9 +1773,8 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info {
} u;
enum sched_type schedule; /* scheduling algorithm */
kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */
+ std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer
kmp_int32 ordered_bumped;
- // To retain the structure size after making ordered_iteration scalar
- kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
// Stack of buffers for nest of serial regions
struct dispatch_private_info *next;
kmp_int32 type_size; /* the size of types in private_info */
@@ -1772,7 +1789,7 @@ typedef struct dispatch_shared_info32 {
/* chunk index under dynamic, number of idle threads under static-steal;
iteration index otherwise */
volatile kmp_uint32 iteration;
- volatile kmp_uint32 num_done;
+ volatile kmp_int32 num_done;
volatile kmp_uint32 ordered_iteration;
// Dummy to retain the structure size after making ordered_iteration scalar
kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];
@@ -1782,7 +1799,7 @@ typedef struct dispatch_shared_info64 {
/* chunk index under dynamic, number of idle threads under static-steal;
iteration index otherwise */
volatile kmp_uint64 iteration;
- volatile kmp_uint64 num_done;
+ volatile kmp_int64 num_done;
volatile kmp_uint64 ordered_iteration;
// Dummy to retain the structure size after making ordered_iteration scalar
kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];
@@ -1818,7 +1835,7 @@ typedef struct kmp_disp {
dispatch_private_info_t *th_dispatch_pr_current;
dispatch_private_info_t *th_disp_buffer;
- kmp_int32 th_disp_index;
+ kmp_uint32 th_disp_index;
kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index
volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags
kmp_int64 *th_doacross_info; // info on loop bounds
@@ -1882,9 +1899,8 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */
0, /* Single level (degenerate) tree */
bp_tree_bar =
1, /* Balanced tree with branching factor 2^n */
- bp_hyper_bar =
- 2, /* Hypercube-embedded tree with min branching
- factor 2^n */
+ bp_hyper_bar = 2, /* Hypercube-embedded tree with min
+ branching factor 2^n */
bp_hierarchical_bar = 3, /* Machine hierarchy tree */
bp_last_bar /* Placeholder to mark the end */
} kmp_bar_pat_e;
@@ -1969,9 +1985,9 @@ union KMP_ALIGN_CACHE kmp_barrier_team_union {
kmp_uint64 b_arrived; /* STATE => task reached synch point. */
#if USE_DEBUGGER
// The following two fields are indended for the debugger solely. Only
- // master of the team accesses these fields: the first one is increased by
- // 1 when master arrives to a barrier, the second one is increased by one
- // when all the threads arrived.
+ // primary thread of the team accesses these fields: the first one is
+ // increased by 1 when the primary thread arrives to a barrier, the second
+ // one is increased by one when all the threads arrived.
kmp_uint b_master_arrived;
kmp_uint b_team_arrived;
#endif
@@ -2217,6 +2233,7 @@ typedef struct kmp_taskgroup {
// Block of data to perform task reduction
void *reduce_data; // reduction related info
kmp_int32 reduce_num_data; // number of data items to reduce
+ uintptr_t *gomp_data; // gomp reduction data
} kmp_taskgroup_t;
// forward declarations
@@ -2224,15 +2241,24 @@ typedef union kmp_depnode kmp_depnode_t;
typedef struct kmp_depnode_list kmp_depnode_list_t;
typedef struct kmp_dephash_entry kmp_dephash_entry_t;
+#define KMP_DEP_IN 0x1
+#define KMP_DEP_OUT 0x2
+#define KMP_DEP_INOUT 0x3
+#define KMP_DEP_MTX 0x4
+#define KMP_DEP_SET 0x8
// Compiler sends us this info:
typedef struct kmp_depend_info {
kmp_intptr_t base_addr;
size_t len;
- struct {
- bool in : 1;
- bool out : 1;
- bool mtx : 1;
- } flags;
+ union {
+ kmp_uint8 flag;
+ struct {
+ unsigned in : 1;
+ unsigned out : 1;
+ unsigned mtx : 1;
+ unsigned set : 1;
+ } flags;
+ };
} kmp_depend_info_t;
// Internal structures to work with task dependencies:
@@ -2266,9 +2292,9 @@ union KMP_ALIGN_CACHE kmp_depnode {
struct kmp_dephash_entry {
kmp_intptr_t addr;
kmp_depnode_t *last_out;
- kmp_depnode_list_t *last_ins;
- kmp_depnode_list_t *last_mtxs;
- kmp_int32 last_flag;
+ kmp_depnode_list_t *last_set;
+ kmp_depnode_list_t *prev_set;
+ kmp_uint8 last_flag;
kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */
kmp_dephash_entry_t *next_in_bucket;
};
@@ -2501,7 +2527,7 @@ typedef struct kmp_teams_size {
// This struct stores a thread that acts as a "root" for a contention
// group. Contention groups are rooted at kmp_root threads, but also at
-// each master thread of each team created in the teams construct.
+// each primary thread of each team created in the teams construct.
// This struct therefore also stores a thread_limit associated with
// that contention group, and a counter to track the number of threads
// active in that contention group. Each thread has a list of these: CG
@@ -2513,7 +2539,7 @@ typedef struct kmp_teams_size {
typedef struct kmp_cg_root {
kmp_info_p *cg_root; // "root" thread for a contention group
// The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or
- // thread_limit clause for teams masters
+ // thread_limit clause for teams primary threads
kmp_int32 cg_thread_limit;
kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root
struct kmp_cg_root *up; // pointer to higher level CG root in list
@@ -2523,8 +2549,9 @@ typedef struct kmp_cg_root {
typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* Start with the readonly data which is cache aligned and padded. This is
- written before the thread starts working by the master. Uber masters may
- update themselves later. Usage does not consider serialized regions. */
+ written before the thread starts working by the primary thread. Uber
+ masters may update themselves later. Usage does not consider serialized
+ regions. */
kmp_desc_t th_info;
kmp_team_p *th_team; /* team we belong to */
kmp_root_p *th_root; /* pointer to root of task hierarchy */
@@ -2535,7 +2562,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
/* The following are cached from the team info structure */
/* TODO use these in more places as determined to be needed via profiling */
int th_team_nproc; /* number of threads in a team */
- kmp_info_p *th_team_master; /* the team's master thread */
+ kmp_info_p *th_team_master; /* the team's primary thread */
int th_team_serialized; /* team is serialized */
microtask_t th_teams_microtask; /* save entry address for teams construct */
int th_teams_level; /* save initial level of teams construct */
@@ -2556,7 +2583,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */
#endif
omp_allocator_handle_t th_def_allocator; /* default allocator */
- /* The data set by the master at reinit, then R/W by the worker */
+ /* The data set by the primary thread at reinit, then R/W by the worker */
KMP_ALIGN_CACHE int
th_set_nproc; /* if > 0, then only use this request for the next fork */
#if KMP_NESTED_HOT_TEAMS
@@ -2592,7 +2619,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
ompt_thread_info_t ompt_thread_info;
#endif
- /* The following are also read by the master during reinit */
+ /* The following are also read by the primary thread during reinit */
struct common_table *th_pri_common;
volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */
@@ -2669,7 +2696,9 @@ typedef union KMP_ALIGN_CACHE kmp_info {
// OpenMP thread team data structures
-typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t;
+typedef struct kmp_base_data {
+ volatile kmp_uint32 t_value;
+} kmp_base_data_t;
typedef union KMP_ALIGN_CACHE kmp_sleep_team {
double dt_align; /* use worst case alignment */
@@ -2690,7 +2719,7 @@ typedef int (*launch_t)(int gtid);
// Set up how many argv pointers will fit in cache lines containing
// t_inline_argv. Historically, we have supported at least 96 bytes. Using a
-// larger value for more space between the master write/worker read section and
+// larger value for more space between the primary write/worker read section and
// read/write by all section seems to buy more performance on EPCC PARALLEL.
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#define KMP_INLINE_ARGV_BYTES \
@@ -2716,11 +2745,11 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier
std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions
- // Master only
+ // Primary thread only
// ---------------------------------------------------------------------------
- KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team
- int t_master_this_cons; // "this_construct" single counter of master in parent
- // team
+ KMP_ALIGN_CACHE int t_master_tid; // tid of primary thread in parent team
+ int t_master_this_cons; // "this_construct" single counter of primary thread
+ // in parent team
ident_t *t_ident; // if volatile, have to change too much other crud to
// volatile too
kmp_team_p *t_parent; // parent team
@@ -2732,7 +2761,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
kmp_uint64 t_region_time; // region begin timestamp
#endif /* USE_ITT_BUILD */
- // Master write, workers read
+ // Primary thread write, workers read
// --------------------------------------------------------------------------
KMP_ALIGN_CACHE void **t_argv;
int t_argc;
@@ -2768,7 +2797,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
kmp_r_sched_t t_sched; // run-time schedule for the team
#if KMP_AFFINITY_SUPPORTED
int t_first_place; // first & last place in parent thread's partition.
- int t_last_place; // Restore these values to master after par region.
+ int t_last_place; // Restore these values to primary thread after par region.
#endif // KMP_AFFINITY_SUPPORTED
int t_display_affinity;
int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
@@ -2843,6 +2872,9 @@ typedef struct kmp_base_root {
kmp_lock_t r_begin_lock;
volatile int r_begin;
int r_blocktime; /* blocktime for this root and descendants */
+#if KMP_AFFINITY_SUPPORTED
+ int r_affinity_assigned;
+#endif // KMP_AFFINITY_SUPPORTED
} kmp_base_root_t;
typedef union KMP_ALIGN_CACHE kmp_root {
@@ -2975,6 +3007,7 @@ extern enum sched_type __kmp_static; /* default static scheduling method */
extern enum sched_type __kmp_guided; /* default guided scheduling method */
extern enum sched_type __kmp_auto; /* default auto scheduling method */
extern int __kmp_chunk; /* default runtime chunk size */
+extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */
extern size_t __kmp_stksize; /* stack size per thread */
#if KMP_USE_MONITOR
@@ -3043,9 +3076,8 @@ extern int __kmp_ncores; /* Total number of cores for threads placement */
extern int __kmp_abort_delay;
extern int __kmp_need_register_atfork_specified;
-extern int
- __kmp_need_register_atfork; /* At initialization, call pthread_atfork to
- install fork handler */
+extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
+ to install fork handler */
extern int __kmp_gtid_mode; /* Method of getting gtid, values:
0 - not set, will be set at runtime
1 - using stack search
@@ -3123,6 +3155,8 @@ extern const char *__kmp_speculative_statsfile;
extern int __kmp_display_env; /* TRUE or FALSE */
extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */
extern int __kmp_omp_cancellation; /* TRUE or FALSE */
+extern int __kmp_nteams;
+extern int __kmp_teams_thread_limit;
/* ------------------------------------------------------------------------- */
@@ -3331,6 +3365,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
kmp_proc_bind_t proc_bind);
extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
int num_threads);
+extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
+ int num_teams_ub, int num_threads);
extern void __kmp_yield();
@@ -3409,7 +3445,7 @@ extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,
,
void *itt_sync_obj
#endif
- );
+);
extern void __kmp_release_64(kmp_flag_64<> *flag);
extern void __kmp_infinite_loop(void);
@@ -3430,7 +3466,7 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr);
extern void __kmp_expand_host_name(char *buffer, size_t size);
extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && KMP_ARCH_AARCH64)
extern void
__kmp_initialize_system_tick(void); /* Initialize timer tick value */
#endif
@@ -3460,6 +3496,16 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
#if KMP_OS_LINUX || KMP_OS_FREEBSD
extern int kmp_set_thread_affinity_mask_initial(void);
#endif
+static inline void __kmp_assign_root_init_mask() {
+ int gtid = __kmp_entry_gtid();
+ kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
+ if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
+ __kmp_affinity_set_init_mask(gtid, TRUE);
+ r->r.r_affinity_assigned = TRUE;
+ }
+}
+#else /* KMP_AFFINITY_SUPPORTED */
+#define __kmp_assign_root_init_mask() /* Nothing */
#endif /* KMP_AFFINITY_SUPPORTED */
// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
// format string is for affinity, so platforms that do not support
@@ -3667,7 +3713,7 @@ extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,
,
void **exit_frame_ptr
#endif
- );
+);
/* ------------------------------------------------------------------------ */
@@ -3701,6 +3747,9 @@ KMP_EXPORT void __kmpc_flush(ident_t *);
KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);
KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
+KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,
+ kmp_int32 filter);
+KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);
KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,
@@ -3744,12 +3793,9 @@ KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
kmp_routine_entry_t task_entry);
-KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
- kmp_int32 flags,
- size_t sizeof_kmp_task_t,
- size_t sizeof_shareds,
- kmp_routine_entry_t task_entry,
- kmp_int64 device_id);
+KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
+ size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);
KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,
kmp_task_t *task);
KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,
@@ -3817,6 +3863,10 @@ KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
kmp_task_affinity_info_t *affin_list);
+KMP_EXPORT void __kmp_set_num_teams(int num_teams);
+KMP_EXPORT int __kmp_get_max_teams(void);
+KMP_EXPORT void __kmp_set_teams_thread_limit(int limit);
+KMP_EXPORT int __kmp_get_teams_thread_limit(void);
/* Lock interface routines (fast versions with gtid passed in) */
KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,
@@ -3885,6 +3935,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams,
kmp_int32 num_threads);
+/* Function for OpenMP 5.1 num_teams clause */
+KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_teams_lb,
+ kmp_int32 num_teams_ub,
+ kmp_int32 num_threads);
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
kmpc_micro microtask, ...);
struct kmp_dim { // loop bounds info casted to kmp_int64
@@ -3955,6 +4010,11 @@ KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);
KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);
+void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format);
+size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size);
+void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format);
+size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
+ char const *format);
enum kmp_target_offload_kind {
tgt_disabled = 0,
@@ -4031,11 +4091,33 @@ extern void __kmp_hidden_helper_main_thread_release();
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
+#define KMP_HIDDEN_HELPER_TEAM(team) \
+ (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
+
// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a
// main thread, is skipped.
#define KMP_GTID_TO_SHADOW_GTID(gtid) \
((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)
+// Return the adjusted gtid value by subtracting from gtid the number
+// of hidden helper threads. This adjusted value is the gtid the thread would
+// have received if there were no hidden helper threads.
+static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {
+ int adjusted_gtid = gtid;
+ if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&
+ gtid - __kmp_hidden_helper_threads_num >= 0) {
+ adjusted_gtid -= __kmp_hidden_helper_threads_num;
+ }
+ return adjusted_gtid;
+}
+
+// Support for error directive
+typedef enum kmp_severity_t {
+ severity_warning = 1,
+ severity_fatal = 2
+} kmp_severity_t;
+extern void __kmpc_error(ident_t *loc, int severity, const char *message);
+
#ifdef __cplusplus
}
#endif
@@ -4082,6 +4164,12 @@ int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
#endif /* USE_ITT_BUILD */
kmp_int32 is_constrained);
+extern int __kmp_nesting_mode;
+extern int __kmp_nesting_mode_nlevels;
+extern int *__kmp_nesting_nth_level;
+extern void __kmp_init_nesting_mode();
+extern void __kmp_set_nesting_mode_threads();
+
/// This class safely opens and closes a C-style FILE* object using RAII
/// semantics. There are also methods which allow using stdout or stderr as
/// the underlying FILE* object. With the implicit conversion operator to