diff options
Diffstat (limited to 'openmp/runtime/src/kmp.h')
-rw-r--r-- | openmp/runtime/src/kmp.h | 280 |
1 files changed, 184 insertions, 96 deletions
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 87e91a0f8d10..05264f4433d3 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -138,6 +138,10 @@ typedef unsigned int kmp_hwloc_depth_t; #include "ompt-internal.h" #endif +#if OMPD_SUPPORT +#include "ompd-specific.h" +#endif + #ifndef UNLIKELY #define UNLIKELY(x) (x) #endif @@ -595,6 +599,35 @@ typedef int PACKED_REDUCTION_METHOD_T; #include <pthread.h> #endif +enum kmp_hw_t : int { + KMP_HW_UNKNOWN = -1, + KMP_HW_SOCKET = 0, + KMP_HW_PROC_GROUP, + KMP_HW_NUMA, + KMP_HW_DIE, + KMP_HW_LLC, + KMP_HW_L3, + KMP_HW_TILE, + KMP_HW_MODULE, + KMP_HW_L2, + KMP_HW_L1, + KMP_HW_CORE, + KMP_HW_THREAD, + KMP_HW_LAST +}; + +#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \ + KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST) +#define KMP_ASSERT_VALID_HW_TYPE(type) \ + KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST) + +#define KMP_FOREACH_HW_TYPE(type) \ + for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \ + type = (kmp_hw_t)((int)type + 1)) + +const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false); +const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false); + /* Only Linux* OS and Windows* OS support thread affinity. */ #if KMP_AFFINITY_SUPPORTED @@ -629,8 +662,6 @@ extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; #if KMP_USE_HWLOC extern hwloc_topology_t __kmp_hwloc_topology; extern int __kmp_hwloc_error; -extern int __kmp_numa_detected; -extern int __kmp_tile_depth; #endif extern size_t __kmp_affin_mask_size; @@ -758,27 +789,12 @@ enum affinity_type { affinity_default }; -enum affinity_gran { - affinity_gran_fine = 0, - affinity_gran_thread, - affinity_gran_core, - affinity_gran_tile, - affinity_gran_numa, - affinity_gran_package, - affinity_gran_node, -#if KMP_GROUP_AFFINITY - // The "group" granularity isn't necesssarily coarser than all of the - // other levels, but we put it last in the enum. - affinity_gran_group, -#endif /* KMP_GROUP_AFFINITY */ - affinity_gran_default -}; - enum affinity_top_method { affinity_top_method_all = 0, // try all (supported) methods, in order #if KMP_ARCH_X86 || KMP_ARCH_X86_64 affinity_top_method_apicid, affinity_top_method_x2apicid, + affinity_top_method_x2apicid_1f, #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too #if KMP_GROUP_AFFINITY @@ -794,7 +810,7 @@ enum affinity_top_method { #define affinity_respect_mask_default (-1) extern enum affinity_type __kmp_affinity_type; /* Affinity type */ -extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ +extern kmp_hw_t __kmp_affinity_gran; /* Affinity granularity */ extern int __kmp_affinity_gran_levels; /* corresponding int value */ extern int __kmp_affinity_dups; /* Affinity duplicate masks */ extern enum affinity_top_method __kmp_affinity_top_method; @@ -817,7 +833,7 @@ extern char *__kmp_cpuinfo_file; typedef enum kmp_proc_bind_t { proc_bind_false = 0, proc_bind_true, - proc_bind_master, + proc_bind_primary, proc_bind_close, proc_bind_spread, proc_bind_intel, // use KMP_AFFINITY interface @@ -835,6 +851,10 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; extern int __kmp_display_affinity; extern char *__kmp_affinity_format; static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; +#if OMPT_SUPPORT +extern int __kmp_tool; +extern char *__kmp_tool_libraries; +#endif // OMPT_SUPPORT #if KMP_AFFINITY_SUPPORTED #define KMP_PLACE_ALL (-1) @@ -863,6 +883,7 @@ typedef struct kmp_hws_item { } kmp_hws_item_t; extern kmp_hws_item_t __kmp_hws_socket; +extern kmp_hws_item_t __kmp_hws_die; extern kmp_hws_item_t __kmp_hws_node; extern kmp_hws_item_t __kmp_hws_tile; extern kmp_hws_item_t __kmp_hws_core; @@ -929,6 +950,10 @@ extern omp_memspace_handle_t const omp_large_cap_mem_space; extern omp_memspace_handle_t const omp_const_mem_space; extern omp_memspace_handle_t const omp_high_bw_mem_space; extern omp_memspace_handle_t const omp_low_lat_mem_space; +// Preview of target memory support +extern omp_memspace_handle_t const llvm_omp_target_host_mem_space; +extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space; +extern omp_memspace_handle_t const llvm_omp_target_device_mem_space; typedef struct { omp_alloctrait_key_t key; @@ -945,6 +970,10 @@ extern omp_allocator_handle_t const omp_low_lat_mem_alloc; extern omp_allocator_handle_t const omp_cgroup_mem_alloc; extern omp_allocator_handle_t const omp_pteam_mem_alloc; extern omp_allocator_handle_t const omp_thread_mem_alloc; +// Preview of target memory support +extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc; +extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc; +extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc; extern omp_allocator_handle_t const kmp_max_mem_alloc; extern omp_allocator_handle_t __kmp_def_allocator; @@ -982,6 +1011,7 @@ extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al); extern void __kmp_init_memkind(); extern void __kmp_fini_memkind(); +extern void __kmp_init_target_mem(); /* ------------------------------------------------------------------------ */ @@ -1046,13 +1076,11 @@ extern void __kmp_fini_memkind(); /* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups. Only allow increasing number of wakeups */ #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - (((blocktime) == KMP_MAX_BLOCKTIME) \ + (((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) \ + : ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS \ + : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ ? (monitor_wakeups) \ - : ((blocktime) == KMP_MIN_BLOCKTIME) \ - ? KMP_MAX_MONITOR_WAKEUPS \ - : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ - ? (monitor_wakeups) \ - : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) + : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) /* Calculate number of intervals for a specific block time based on monitor_wakeups */ @@ -1097,7 +1125,10 @@ extern kmp_uint64 __kmp_now_nsec(); #define KMP_MAX_CHUNK (INT_MAX - 1) #define KMP_DEFAULT_CHUNK 1 +#define KMP_MIN_DISP_NUM_BUFF 1 #define KMP_DFLT_DISP_NUM_BUFF 7 +#define KMP_MAX_DISP_NUM_BUFF 4096 + #define KMP_MAX_ORDERED 8 #define KMP_MAX_FIELDS 32 @@ -1182,7 +1213,6 @@ typedef struct kmp_cpuinfo { int stepping; // CPUID(1).EAX[3:0] ( Stepping ) int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. int rtm; // 0 if RTM instructions are not supported, 1 otherwise. - int cpu_stackoffset; int apic_id; int physical_id; int logical_id; @@ -1339,8 +1369,7 @@ static inline void __kmp_x86_pause(void) { _mm_pause(); } #endif #endif // KMP_HAVE_WAITPKG_INTRINSICS KMP_ATTRIBUTE_TARGET_WAITPKG -static inline int -__kmp_tpause(uint32_t hint, uint64_t counter) { +static inline int __kmp_tpause(uint32_t hint, uint64_t counter) { #if !KMP_HAVE_WAITPKG_INTRINSICS uint32_t timeHi = uint32_t(counter >> 32); uint32_t timeLo = uint32_t(counter & 0xffffffff); @@ -1356,8 +1385,7 @@ __kmp_tpause(uint32_t hint, uint64_t counter) { #endif } KMP_ATTRIBUTE_TARGET_WAITPKG -static inline void -__kmp_umonitor(void *cacheline) { +static inline void __kmp_umonitor(void *cacheline) { #if !KMP_HAVE_WAITPKG_INTRINSICS __asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 " : @@ -1368,8 +1396,7 @@ __kmp_umonitor(void *cacheline) { #endif } KMP_ATTRIBUTE_TARGET_WAITPKG -static inline int -__kmp_umwait(uint32_t hint, uint64_t counter) { +static inline int __kmp_umwait(uint32_t hint, uint64_t counter) { #if !KMP_HAVE_WAITPKG_INTRINSICS uint32_t timeHi = uint32_t(counter >> 32); uint32_t timeLo = uint32_t(counter & 0xffffffff); @@ -1422,7 +1449,8 @@ enum cons_type { ct_ordered_in_pdo, ct_master, ct_reduce, - ct_barrier + ct_barrier, + ct_masked }; #define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered) @@ -1570,7 +1598,7 @@ struct private_common { struct private_common *next; struct private_common *link; void *gbl_addr; - void *par_addr; /* par_addr == gbl_addr for MASTER thread */ + void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */ size_t cmn_size; }; @@ -1645,14 +1673,12 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { kmp_int32 lb; kmp_int32 st; kmp_int32 tc; - kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put - after ub */ - kmp_lock_t *th_steal_lock; // lock used for chunk stealing - // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) + kmp_lock_t *steal_lock; // lock used for chunk stealing + // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on) // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. + // b) all parm1-4 are on the same cache line. // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). + // if they are on the same cache line (not measured though). struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should @@ -1664,9 +1690,6 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { kmp_uint32 ordered_lower; kmp_uint32 ordered_upper; #if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of - // 'static_steal_counter'. It would be nice to measure execution times. - // Conditional if/endif can be removed at all. kmp_int32 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info32_t; @@ -1678,9 +1701,7 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { kmp_int64 lb; /* lower-bound */ kmp_int64 st; /* stride */ kmp_int64 tc; /* trip count (number of iterations) */ - kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put - after ub */ - kmp_lock_t *th_steal_lock; // lock used for chunk stealing + kmp_lock_t *steal_lock; // lock used for chunk stealing /* parm[1-4] are used in different ways by different scheduling algorithms */ // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) @@ -1699,9 +1720,6 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { kmp_uint64 ordered_lower; kmp_uint64 ordered_upper; #if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of - // 'static_steal_counter'. It would be nice to measure execution times. - // Conditional if/endif can be removed at all. kmp_int64 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info64_t; @@ -1755,9 +1773,8 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info { } u; enum sched_type schedule; /* scheduling algorithm */ kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */ + std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer kmp_int32 ordered_bumped; - // To retain the structure size after making ordered_iteration scalar - kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; // Stack of buffers for nest of serial regions struct dispatch_private_info *next; kmp_int32 type_size; /* the size of types in private_info */ @@ -1772,7 +1789,7 @@ typedef struct dispatch_shared_info32 { /* chunk index under dynamic, number of idle threads under static-steal; iteration index otherwise */ volatile kmp_uint32 iteration; - volatile kmp_uint32 num_done; + volatile kmp_int32 num_done; volatile kmp_uint32 ordered_iteration; // Dummy to retain the structure size after making ordered_iteration scalar kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1]; @@ -1782,7 +1799,7 @@ typedef struct dispatch_shared_info64 { /* chunk index under dynamic, number of idle threads under static-steal; iteration index otherwise */ volatile kmp_uint64 iteration; - volatile kmp_uint64 num_done; + volatile kmp_int64 num_done; volatile kmp_uint64 ordered_iteration; // Dummy to retain the structure size after making ordered_iteration scalar kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3]; @@ -1818,7 +1835,7 @@ typedef struct kmp_disp { dispatch_private_info_t *th_dispatch_pr_current; dispatch_private_info_t *th_disp_buffer; - kmp_int32 th_disp_index; + kmp_uint32 th_disp_index; kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags kmp_int64 *th_doacross_info; // info on loop bounds @@ -1882,9 +1899,8 @@ typedef enum kmp_bar_pat { /* Barrier communication patterns */ 0, /* Single level (degenerate) tree */ bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */ - bp_hyper_bar = - 2, /* Hypercube-embedded tree with min branching - factor 2^n */ + bp_hyper_bar = 2, /* Hypercube-embedded tree with min + branching factor 2^n */ bp_hierarchical_bar = 3, /* Machine hierarchy tree */ bp_last_bar /* Placeholder to mark the end */ } kmp_bar_pat_e; @@ -1969,9 +1985,9 @@ union KMP_ALIGN_CACHE kmp_barrier_team_union { kmp_uint64 b_arrived; /* STATE => task reached synch point. */ #if USE_DEBUGGER // The following two fields are indended for the debugger solely. Only - // master of the team accesses these fields: the first one is increased by - // 1 when master arrives to a barrier, the second one is increased by one - // when all the threads arrived. + // primary thread of the team accesses these fields: the first one is + // increased by 1 when the primary thread arrives to a barrier, the second + // one is increased by one when all the threads arrived. kmp_uint b_master_arrived; kmp_uint b_team_arrived; #endif @@ -2217,6 +2233,7 @@ typedef struct kmp_taskgroup { // Block of data to perform task reduction void *reduce_data; // reduction related info kmp_int32 reduce_num_data; // number of data items to reduce + uintptr_t *gomp_data; // gomp reduction data } kmp_taskgroup_t; // forward declarations @@ -2224,15 +2241,24 @@ typedef union kmp_depnode kmp_depnode_t; typedef struct kmp_depnode_list kmp_depnode_list_t; typedef struct kmp_dephash_entry kmp_dephash_entry_t; +#define KMP_DEP_IN 0x1 +#define KMP_DEP_OUT 0x2 +#define KMP_DEP_INOUT 0x3 +#define KMP_DEP_MTX 0x4 +#define KMP_DEP_SET 0x8 // Compiler sends us this info: typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; - struct { - bool in : 1; - bool out : 1; - bool mtx : 1; - } flags; + union { + kmp_uint8 flag; + struct { + unsigned in : 1; + unsigned out : 1; + unsigned mtx : 1; + unsigned set : 1; + } flags; + }; } kmp_depend_info_t; // Internal structures to work with task dependencies: @@ -2266,9 +2292,9 @@ union KMP_ALIGN_CACHE kmp_depnode { struct kmp_dephash_entry { kmp_intptr_t addr; kmp_depnode_t *last_out; - kmp_depnode_list_t *last_ins; - kmp_depnode_list_t *last_mtxs; - kmp_int32 last_flag; + kmp_depnode_list_t *last_set; + kmp_depnode_list_t *prev_set; + kmp_uint8 last_flag; kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */ kmp_dephash_entry_t *next_in_bucket; }; @@ -2501,7 +2527,7 @@ typedef struct kmp_teams_size { // This struct stores a thread that acts as a "root" for a contention // group. Contention groups are rooted at kmp_root threads, but also at -// each master thread of each team created in the teams construct. +// each primary thread of each team created in the teams construct. // This struct therefore also stores a thread_limit associated with // that contention group, and a counter to track the number of threads // active in that contention group. Each thread has a list of these: CG @@ -2513,7 +2539,7 @@ typedef struct kmp_teams_size { typedef struct kmp_cg_root { kmp_info_p *cg_root; // "root" thread for a contention group // The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or - // thread_limit clause for teams masters + // thread_limit clause for teams primary threads kmp_int32 cg_thread_limit; kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root struct kmp_cg_root *up; // pointer to higher level CG root in list @@ -2523,8 +2549,9 @@ typedef struct kmp_cg_root { typedef struct KMP_ALIGN_CACHE kmp_base_info { /* Start with the readonly data which is cache aligned and padded. This is - written before the thread starts working by the master. Uber masters may - update themselves later. Usage does not consider serialized regions. */ + written before the thread starts working by the primary thread. Uber + masters may update themselves later. Usage does not consider serialized + regions. */ kmp_desc_t th_info; kmp_team_p *th_team; /* team we belong to */ kmp_root_p *th_root; /* pointer to root of task hierarchy */ @@ -2535,7 +2562,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { /* The following are cached from the team info structure */ /* TODO use these in more places as determined to be needed via profiling */ int th_team_nproc; /* number of threads in a team */ - kmp_info_p *th_team_master; /* the team's master thread */ + kmp_info_p *th_team_master; /* the team's primary thread */ int th_team_serialized; /* team is serialized */ microtask_t th_teams_microtask; /* save entry address for teams construct */ int th_teams_level; /* save initial level of teams construct */ @@ -2556,7 +2583,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ #endif omp_allocator_handle_t th_def_allocator; /* default allocator */ - /* The data set by the master at reinit, then R/W by the worker */ + /* The data set by the primary thread at reinit, then R/W by the worker */ KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ #if KMP_NESTED_HOT_TEAMS @@ -2592,7 +2619,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { ompt_thread_info_t ompt_thread_info; #endif - /* The following are also read by the master during reinit */ + /* The following are also read by the primary thread during reinit */ struct common_table *th_pri_common; volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ @@ -2669,7 +2696,9 @@ typedef union KMP_ALIGN_CACHE kmp_info { // OpenMP thread team data structures -typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t; +typedef struct kmp_base_data { + volatile kmp_uint32 t_value; +} kmp_base_data_t; typedef union KMP_ALIGN_CACHE kmp_sleep_team { double dt_align; /* use worst case alignment */ @@ -2690,7 +2719,7 @@ typedef int (*launch_t)(int gtid); // Set up how many argv pointers will fit in cache lines containing // t_inline_argv. Historically, we have supported at least 96 bytes. Using a -// larger value for more space between the master write/worker read section and +// larger value for more space between the primary write/worker read section and // read/write by all section seems to buy more performance on EPCC PARALLEL. #if KMP_ARCH_X86 || KMP_ARCH_X86_64 #define KMP_INLINE_ARGV_BYTES \ @@ -2716,11 +2745,11 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions - // Master only + // Primary thread only // --------------------------------------------------------------------------- - KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team - int t_master_this_cons; // "this_construct" single counter of master in parent - // team + KMP_ALIGN_CACHE int t_master_tid; // tid of primary thread in parent team + int t_master_this_cons; // "this_construct" single counter of primary thread + // in parent team ident_t *t_ident; // if volatile, have to change too much other crud to // volatile too kmp_team_p *t_parent; // parent team @@ -2732,7 +2761,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { kmp_uint64 t_region_time; // region begin timestamp #endif /* USE_ITT_BUILD */ - // Master write, workers read + // Primary thread write, workers read // -------------------------------------------------------------------------- KMP_ALIGN_CACHE void **t_argv; int t_argc; @@ -2768,7 +2797,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { kmp_r_sched_t t_sched; // run-time schedule for the team #if KMP_AFFINITY_SUPPORTED int t_first_place; // first & last place in parent thread's partition. - int t_last_place; // Restore these values to master after par region. + int t_last_place; // Restore these values to primary thread after par region. #endif // KMP_AFFINITY_SUPPORTED int t_display_affinity; int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via @@ -2843,6 +2872,9 @@ typedef struct kmp_base_root { kmp_lock_t r_begin_lock; volatile int r_begin; int r_blocktime; /* blocktime for this root and descendants */ +#if KMP_AFFINITY_SUPPORTED + int r_affinity_assigned; +#endif // KMP_AFFINITY_SUPPORTED } kmp_base_root_t; typedef union KMP_ALIGN_CACHE kmp_root { @@ -2975,6 +3007,7 @@ extern enum sched_type __kmp_static; /* default static scheduling method */ extern enum sched_type __kmp_guided; /* default guided scheduling method */ extern enum sched_type __kmp_auto; /* default auto scheduling method */ extern int __kmp_chunk; /* default runtime chunk size */ +extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */ extern size_t __kmp_stksize; /* stack size per thread */ #if KMP_USE_MONITOR @@ -3043,9 +3076,8 @@ extern int __kmp_ncores; /* Total number of cores for threads placement */ extern int __kmp_abort_delay; extern int __kmp_need_register_atfork_specified; -extern int - __kmp_need_register_atfork; /* At initialization, call pthread_atfork to - install fork handler */ +extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork + to install fork handler */ extern int __kmp_gtid_mode; /* Method of getting gtid, values: 0 - not set, will be set at runtime 1 - using stack search @@ -3123,6 +3155,8 @@ extern const char *__kmp_speculative_statsfile; extern int __kmp_display_env; /* TRUE or FALSE */ extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ extern int __kmp_omp_cancellation; /* TRUE or FALSE */ +extern int __kmp_nteams; +extern int __kmp_teams_thread_limit; /* ------------------------------------------------------------------------- */ @@ -3331,6 +3365,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid, kmp_proc_bind_t proc_bind); extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, int num_threads); +extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb, + int num_teams_ub, int num_threads); extern void __kmp_yield(); @@ -3409,7 +3445,7 @@ extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag, , void *itt_sync_obj #endif - ); +); extern void __kmp_release_64(kmp_flag_64<> *flag); extern void __kmp_infinite_loop(void); @@ -3430,7 +3466,7 @@ extern void __kmp_check_stack_overlap(kmp_info_t *thr); extern void __kmp_expand_host_name(char *buffer, size_t size); extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && KMP_ARCH_AARCH64) extern void __kmp_initialize_system_tick(void); /* Initialize timer tick value */ #endif @@ -3460,6 +3496,16 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); #if KMP_OS_LINUX || KMP_OS_FREEBSD extern int kmp_set_thread_affinity_mask_initial(void); #endif +static inline void __kmp_assign_root_init_mask() { + int gtid = __kmp_entry_gtid(); + kmp_root_t *r = __kmp_threads[gtid]->th.th_root; + if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) { + __kmp_affinity_set_init_mask(gtid, TRUE); + r->r.r_affinity_assigned = TRUE; + } +} +#else /* KMP_AFFINITY_SUPPORTED */ +#define __kmp_assign_root_init_mask() /* Nothing */ #endif /* KMP_AFFINITY_SUPPORTED */ // No need for KMP_AFFINITY_SUPPORTED guard as only one field in the // format string is for affinity, so platforms that do not support @@ -3667,7 +3713,7 @@ extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, , void **exit_frame_ptr #endif - ); +); /* ------------------------------------------------------------------------ */ @@ -3701,6 +3747,9 @@ KMP_EXPORT void __kmpc_flush(ident_t *); KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid); KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid, + kmp_int32 filter); +KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid); KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, @@ -3744,12 +3793,9 @@ KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, size_t sizeof_kmp_task_t, size_t sizeof_shareds, kmp_routine_entry_t task_entry); -KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, - kmp_int32 flags, - size_t sizeof_kmp_task_t, - size_t sizeof_shareds, - kmp_routine_entry_t task_entry, - kmp_int64 device_id); +KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc( + ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, + size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id); KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task); KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, @@ -3817,6 +3863,10 @@ KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, kmp_task_affinity_info_t *affin_list); +KMP_EXPORT void __kmp_set_num_teams(int num_teams); +KMP_EXPORT int __kmp_get_max_teams(void); +KMP_EXPORT void __kmp_set_teams_thread_limit(int limit); +KMP_EXPORT int __kmp_get_teams_thread_limit(void); /* Lock interface routines (fast versions with gtid passed in) */ KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, @@ -3885,6 +3935,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads); +/* Function for OpenMP 5.1 num_teams clause */ +KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_teams_lb, + kmp_int32 num_teams_ub, + kmp_int32 num_threads); KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); struct kmp_dim { // loop bounds info casted to kmp_int64 @@ -3955,6 +4010,11 @@ KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int); KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *); KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); +void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format); +size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size); +void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format); +size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size, + char const *format); enum kmp_target_offload_kind { tgt_disabled = 0, @@ -4031,11 +4091,33 @@ extern void __kmp_hidden_helper_main_thread_release(); #define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \ ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num) +#define KMP_HIDDEN_HELPER_TEAM(team) \ + (team->t.t_threads[0] == __kmp_hidden_helper_main_thread) + // Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a // main thread, is skipped. #define KMP_GTID_TO_SHADOW_GTID(gtid) \ ((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2) +// Return the adjusted gtid value by subtracting from gtid the number +// of hidden helper threads. This adjusted value is the gtid the thread would +// have received if there were no hidden helper threads. +static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) { + int adjusted_gtid = gtid; + if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 && + gtid - __kmp_hidden_helper_threads_num >= 0) { + adjusted_gtid -= __kmp_hidden_helper_threads_num; + } + return adjusted_gtid; +} + +// Support for error directive +typedef enum kmp_severity_t { + severity_warning = 1, + severity_fatal = 2 +} kmp_severity_t; +extern void __kmpc_error(ident_t *loc, int severity, const char *message); + #ifdef __cplusplus } #endif @@ -4082,6 +4164,12 @@ int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); +extern int __kmp_nesting_mode; +extern int __kmp_nesting_mode_nlevels; +extern int *__kmp_nesting_nth_level; +extern void __kmp_init_nesting_mode(); +extern void __kmp_set_nesting_mode_threads(); + /// This class safely opens and closes a C-style FILE* object using RAII /// semantics. There are also methods which allow using stdout or stderr as /// the underlying FILE* object. With the implicit conversion operator to |