diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2020-01-17 20:45:01 +0000 |
commit | 706b4fc47bbc608932d3b491ae19a3b9cde9497b (patch) | |
tree | 4adf86a776049cbf7f69a1929c4babcbbef925eb /openmp | |
parent | 7cc9cf2bf09f069cb2dd947ead05d0b54301fb71 (diff) | |
download | src-706b4fc47bbc608932d3b491ae19a3b9cde9497b.tar.gz src-706b4fc47bbc608932d3b491ae19a3b9cde9497b.zip |
Vendor import of llvm-project master e26a78e70, the last commit beforevendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
the llvmorg-11-init tag, from which release/10.x was branched.
Notes
Notes:
svn path=/vendor/llvm-project/master/; revision=356843
svn path=/vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085/; revision=356844; tag=vendor/llvm-project/llvmorg-10-init-17466-ge26a78e7085
Diffstat (limited to 'openmp')
35 files changed, 207 insertions, 110 deletions
diff --git a/openmp/CREDITS.txt b/openmp/CREDITS.txt index b14bb9a1e6e6..ede45b10fea2 100644 --- a/openmp/CREDITS.txt +++ b/openmp/CREDITS.txt @@ -53,6 +53,10 @@ N: Steven Noonan E: steven@uplinklabs.net D: Patches for the ARM architecture and removal of several inconsistencies. +N: Joachim Protze +E: protze@itc.rwth-aachen.de +D: OpenMP Tools Interface, Archer tool + N: Alp Toker E: alp@nuanti.com D: Making build work for FreeBSD. diff --git a/openmp/runtime/src/extractExternal.cpp b/openmp/runtime/src/extractExternal.cpp index b3e55b555d28..f512ecb2b16d 100644 --- a/openmp/runtime/src/extractExternal.cpp +++ b/openmp/runtime/src/extractExternal.cpp @@ -57,7 +57,7 @@ protected: ~_rstream() { delete[] buf; } }; -// A stream encapuslating the content of a file or the content of a string, +// A stream encapsulating the content of a file or the content of a string, // overriding the >> operator to read various integer types in binary form, // as well as a symbol table entry. class rstream : public _rstream { diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt index 822f73c0ef1d..3a3035b26673 100644 --- a/openmp/runtime/src/i18n/en_US.txt +++ b/openmp/runtime/src/i18n/en_US.txt @@ -293,7 +293,7 @@ AffUseGlobCpuid "%1$s: Affinity capable, using global cpuid info" AffCapableUseFlat "%1$s: Affinity capable, using default \"flat\" topology" AffNotCapableUseLocCpuid "%1$s: Affinity not capable, using local cpuid info" AffNotCapableUseCpuinfo "%1$s: Affinity not capable, using cpuinfo file" -AffFlatTopology "%1$s: Affinity not capable, assumming \"flat\" topology" +AffFlatTopology "%1$s: Affinity not capable, assuming \"flat\" topology" InitOSProcSetRespect "%1$s: Initial OS proc set respected: %2$s" InitOSProcSetNotRespect "%1$s: Initial OS proc set not respected: %2$s" AvailableOSProc "%1$s: %2$d available OS procs" @@ -372,7 +372,7 @@ AffParseFilename "%1$s: parsing %2$s." MsgExiting "%1$s - exiting." IncompatibleLibrary "Incompatible %1$s library with version %2$s found." IttFunctionError "ittnotify: Function %1$s failed:" -IttUnknownError "ittnofify: Error #%1$d." +IttUnknownError "ittnotify: Error #%1$d." EnvMiddleWarn "%1$s must be set prior to first parallel region or certain API calls; ignored." CnsLockNotDestroyed "Lock initialized at %1$s(%2$d) was not destroyed" # %1, %2, %3, %4 -- file, line, func, col diff --git a/openmp/runtime/src/include/omp_lib.f.var b/openmp/runtime/src/include/omp_lib.f.var index 19f14d75f21c..d631438f55ad 100644 --- a/openmp/runtime/src/include/omp_lib.f.var +++ b/openmp/runtime/src/include/omp_lib.f.var @@ -953,7 +953,7 @@ !dec$ if defined(__APPLE__) !*** -!*** The Mac entry points are in lowercase, with an both an underscore +!*** The Mac entry points are in lowercase, with both an underscore !*** appended and an underscore prepended. !*** diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 23eebe673126..086ab3bb011e 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -203,7 +203,7 @@ enum { KMP_IDENT_WORK_LOOP = 0x200, /*! To mark a sections directive in OMPT callbacks */ KMP_IDENT_WORK_SECTIONS = 0x400, - /*! To mark a distirbute construct in OMPT callbacks */ + /*! To mark a distribute construct in OMPT callbacks */ KMP_IDENT_WORK_DISTRIBUTE = 0x800, /*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and not currently used. If one day we need more bits, then we can use @@ -868,7 +868,7 @@ extern int __kmp_hws_abs_flag; // absolute or per-item number requested /* OpenMP 5.0 Memory Management support */ #ifndef __OMP_H -// Duplicate type definitios from omp.h +// Duplicate type definitions from omp.h typedef uintptr_t omp_uintptr_t; typedef enum { @@ -929,7 +929,7 @@ extern omp_allocator_handle_t const omp_thread_mem_alloc; extern omp_allocator_handle_t const kmp_max_mem_alloc; extern omp_allocator_handle_t __kmp_def_allocator; -// end of duplicate type definitios from omp.h +// end of duplicate type definitions from omp.h #endif extern int __kmp_memkind_available; @@ -1389,7 +1389,7 @@ The type for a microtask which gets passed to @ref __kmpc_fork_call(). The arguments to the outlined function are @param global_tid the global thread identity of the thread executing the function. -@param bound_tid the local identitiy of the thread executing the function +@param bound_tid the local identity of the thread executing the function @param ... pointers to shared variables accessed by the function. */ typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...); @@ -2253,7 +2253,7 @@ typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ unsigned started : 1; /* 1==started, 0==not started */ unsigned executing : 1; /* 1==executing, 0==not executing */ unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocateed */ + unsigned freed : 1; /* 1==freed, 0==allocated */ unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ unsigned reserved31 : 7; /* reserved for library use */ @@ -2654,7 +2654,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { int t_level; // nested parallel level KMP_ALIGN_CACHE int t_max_argc; - int t_max_nproc; // max threads this team can handle (dynamicly expandable) + int t_max_nproc; // max threads this team can handle (dynamically expandable) int t_serialized; // levels deep of serialized teams dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system int t_id; // team's id, assigned by debugger. @@ -3115,7 +3115,6 @@ extern void __kmp_internal_begin(void); extern void __kmp_internal_end_library(int gtid); extern void __kmp_internal_end_thread(int gtid); extern void __kmp_internal_end_atexit(void); -extern void __kmp_internal_end_fini(void); extern void __kmp_internal_end_dtor(void); extern void __kmp_internal_end_dest(void *); diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 4c7ed3181197..650e9ff35e1b 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -1124,7 +1124,7 @@ static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os, // - Older OSes are usually found on machines with older chips, which do not // support HT. // - The performance penalty for mistakenly identifying a machine as HT when - // it isn't (which results in blocktime being incorrecly set to 0) is + // it isn't (which results in blocktime being incorrectly set to 0) is // greater than the penalty when for mistakenly identifying a machine as // being 1 thread/core when it is really HT enabled (which results in // blocktime being incorrectly set to a positive value). @@ -2076,7 +2076,7 @@ static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, return -1; } - // Set the file pointer back to the begginning, so that we can scan the file + // Set the file pointer back to the beginning, so that we can scan the file // again, this time performing a full parse of the data. Allocate a vector of // ProcCpuInfo object, where we will place the data. Adding an extra element // at the end allows us to remove a lot of extra checks for termination @@ -2461,7 +2461,7 @@ restart_radix_check: threadInfo[i][threadIdIndex] = threadIdCt++; } - // Aparrently the thread id field was specified for some entries and + // Apparently the thread id field was specified for some entries and // not others. Start the thread id counter off at the next higher // thread id. else if (threadIdCt <= threadInfo[i][threadIdIndex]) { @@ -4194,7 +4194,7 @@ static void __kmp_aux_affinity_initialize(void) { if (__kmp_affinity_top_method == affinity_top_method_all) { // In the default code path, errors are not fatal - we just try using // another method. We only emit a warning message if affinity is on, or the - // verbose flag is set, an the nowarnings flag was not set. + // verbose flag is set, and the nowarnings flag was not set. const char *file_name = NULL; int line = 0; #if KMP_USE_HWLOC @@ -5300,7 +5300,7 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { } } -#if KMP_OS_LINUX +#if KMP_OS_LINUX || KMP_OS_FREEBSD // We don't need this entry for Windows because // there is GetProcessAffinityMask() api // diff --git a/openmp/runtime/src/kmp_alloc.cpp b/openmp/runtime/src/kmp_alloc.cpp index 861940120e90..16893d0ffca5 100644 --- a/openmp/runtime/src/kmp_alloc.cpp +++ b/openmp/runtime/src/kmp_alloc.cpp @@ -57,7 +57,7 @@ static void bectl(kmp_info_t *th, bget_compact_t compact, multiple of this size. This MUST be a power of two. */ /* On IA-32 architecture with Linux* OS, malloc() does not - ensure 16 byte alignmnent */ + ensure 16 byte alignment */ #if KMP_ARCH_X86 || !KMP_HAVE_QUAD diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index f1ee3d2cd486..148b2da3f524 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -141,7 +141,7 @@ Full list of functions ====================== This leads to the generation of 376 atomic functions, as follows. -Functons for integers +Functions for integers --------------------- There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). @@ -483,8 +483,8 @@ Functions for Complex types --------------------------- Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. The names here are based on the size of the component float, -*not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation -on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. +*not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an +operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. @code __kmpc_atomic_cmplx4_add diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index e17986b16a95..a6d87b5d7a2e 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -15,9 +15,7 @@ #include "kmp_itt.h" #include "kmp_os.h" #include "kmp_stats.h" -#if OMPT_SUPPORT #include "ompt-specific.h" -#endif #if KMP_MIC #include <immintrin.h> @@ -128,8 +126,11 @@ static bool __kmp_linear_barrier_gather_template( gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); ANNOTATE_REDUCE_AFTER(reduce); + OMPT_REDUCTION_DECL(this_thr, gtid); + OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, other_threads[i]->th.th_local.reduce_data); + OMPT_REDUCTION_END; ANNOTATE_REDUCE_BEFORE(reduce); ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } @@ -355,8 +356,11 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); + OMPT_REDUCTION_DECL(this_thr, gtid); + OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + OMPT_REDUCTION_END; ANNOTATE_REDUCE_BEFORE(reduce); ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } @@ -600,8 +604,11 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid)); ANNOTATE_REDUCE_AFTER(reduce); + OMPT_REDUCTION_DECL(this_thr, gtid); + OMPT_REDUCTION_BEGIN; (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); + OMPT_REDUCTION_END; ANNOTATE_REDUCE_BEFORE(reduce); ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } @@ -912,6 +919,8 @@ static void __kmp_hierarchical_barrier_gather( flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); if (reduce) { ANNOTATE_REDUCE_AFTER(reduce); + OMPT_REDUCTION_DECL(this_thr, gtid); + OMPT_REDUCTION_BEGIN; for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; ++child_tid) { KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " @@ -923,6 +932,7 @@ static void __kmp_hierarchical_barrier_gather( (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data); } + OMPT_REDUCTION_END; ANNOTATE_REDUCE_BEFORE(reduce); ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index d39bf9af4334..ac9a93590ad0 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -18,10 +18,7 @@ #include "kmp_itt.h" #include "kmp_lock.h" #include "kmp_stats.h" - -#if OMPT_SUPPORT #include "ompt-specific.h" -#endif #define MAX_MESSAGE 512 @@ -3429,13 +3426,18 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); + OMPT_REDUCTION_DECL(th, global_tid); if (packed_reduction_method == critical_reduce_block) { + OMPT_REDUCTION_BEGIN; + __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); retval = 1; } else if (packed_reduction_method == empty_reduce_block) { + OMPT_REDUCTION_BEGIN; + // usage: if team size == 1, no synchronization is required ( Intel // platforms only ) retval = 1; @@ -3536,15 +3538,20 @@ void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); + OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid); + if (packed_reduction_method == critical_reduce_block) { __kmp_end_critical_section_reduce_block(loc, global_tid, lck); + OMPT_REDUCTION_END; } else if (packed_reduction_method == empty_reduce_block) { // usage: if team size == 1, no synchronization is required ( on Intel // platforms only ) + OMPT_REDUCTION_END; + } else if (packed_reduction_method == atomic_reduce_block) { // neither master nor other workers should get here @@ -3556,6 +3563,7 @@ void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, tree_reduce_block)) { // only master gets here + // OMPT: tree reduction is annotated in the barrier code } else { @@ -3629,13 +3637,17 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); + OMPT_REDUCTION_DECL(th, global_tid); + if (packed_reduction_method == critical_reduce_block) { + OMPT_REDUCTION_BEGIN; __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); retval = 1; } else if (packed_reduction_method == empty_reduce_block) { + OMPT_REDUCTION_BEGIN; // usage: if team size == 1, no synchronization is required ( Intel // platforms only ) retval = 1; @@ -3723,10 +3735,13 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // this barrier should be visible to a customer and to the threading profile // tool (it's a terminating barrier on constructs if NOWAIT not specified) + OMPT_REDUCTION_DECL(th, global_tid); if (packed_reduction_method == critical_reduce_block) { __kmp_end_critical_section_reduce_block(loc, global_tid, lck); + OMPT_REDUCTION_END; + // TODO: implicit barrier: should be exposed #if OMPT_SUPPORT ompt_frame_t *ompt_frame; @@ -3749,6 +3764,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, } else if (packed_reduction_method == empty_reduce_block) { + OMPT_REDUCTION_END; + // usage: if team size==1, no synchronization is required (Intel platforms only) // TODO: implicit barrier: should be exposed diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 161a2c696357..a91ffa2ba299 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -379,14 +379,15 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, } break; } else { - KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d falling-through to " - "kmp_sch_static_balanced\n", - gtid)); - schedule = kmp_sch_static_balanced; - /* too few iterations: fall-through to kmp_sch_static_balanced */ + /* too few chunks: switching to kmp_sch_dynamic_chunked */ + schedule = kmp_sch_dynamic_chunked; + KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to " + "kmp_sch_dynamic_chunked\n", + gtid)); + if (pr->u.p.parm1 <= 0) + pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + break; } // if - /* FALL-THROUGH to static balanced */ - KMP_FALLTHROUGH(); } // case #endif case kmp_sch_static_balanced: { @@ -1532,7 +1533,7 @@ int __kmp_dispatch_next_algorithm(int gtid, if ((T)remaining < pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default // use dynamic-style shcedule - // atomically inrement iterations, get old value + // atomically increment iterations, get old value init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunkspec); remaining = trip - init; @@ -1601,7 +1602,7 @@ int __kmp_dispatch_next_algorithm(int gtid, // compare with K*nproc*(chunk+1), K=2 by default if ((T)remaining < pr->u.p.parm2) { // use dynamic-style shcedule - // atomically inrement iterations, get old value + // atomically increment iterations, get old value init = test_then_add<ST>(RCAST(volatile ST *, &sh->u.s.iteration), (ST)chunk); remaining = trip - init; @@ -1892,7 +1893,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, typedef typename traits_t<T>::signed_t ST; // This is potentially slightly misleading, schedule(runtime) will appear here // even if the actual runtme schedule is static. (Which points out a - // disadavantage of schedule(runtime): even when static scheduling is used it + // disadvantage of schedule(runtime): even when static scheduling is used it // costs more than a compile time choice to use static scheduling would.) KMP_TIME_PARTITIONED_BLOCK(OMP_loop_dynamic_scheduling); diff --git a/openmp/runtime/src/kmp_dispatch_hier.h b/openmp/runtime/src/kmp_dispatch_hier.h index 24a6d6691240..3d7faea04272 100644 --- a/openmp/runtime/src/kmp_dispatch_hier.h +++ b/openmp/runtime/src/kmp_dispatch_hier.h @@ -1071,7 +1071,7 @@ void __kmp_dispatch_init_hierarchy(ident_t *loc, int n, my_unit->reset_shared_barrier(); my_unit->hier_pr.flags.contains_last = FALSE; // Last layer, initialize the private buffers with entire loop information - // Now the next next_algorithim() call will get the first chunk of + // Now the next next_algorithm() call will get the first chunk of // iterations properly if (i == n - 1) { __kmp_dispatch_init_algorithm<T>( diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index e480e0151e1c..89172c0b704c 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -531,7 +531,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { int gtid; #if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ - KMP_OS_HURD + KMP_OS_HURD|| KMP_OS_OPENBSD gtid = __kmp_entry_gtid(); #elif KMP_OS_WINDOWS if (!__kmp_init_parallel || @@ -956,7 +956,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_DEVICES)(void) { } // This function always returns true when called on host device. -// Compilier/libomptarget should handle when it is called inside target region. +// Compiler/libomptarget should handle when it is called inside target region. int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) KMP_WEAK_ATTRIBUTE; int FTN_STDCALL KMP_EXPAND_NAME(FTN_IS_INITIAL_DEVICE)(void) { return 1; // This is the host diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index 10841d265958..e0739a737d9c 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -495,8 +495,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { // argument to __kmp_GOMP_fork_call). // // Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub, -// but the Gnu codegen expects an excluside upper bound, so the adjustment -// "*p_ub += stride" compenstates for the discrepancy. +// but the Gnu codegen expects an exclusive upper bound, so the adjustment +// "*p_ub += stride" compensates for the discrepancy. // // Correction: the gnu codegen always adjusts the upper bound by +-1, not the // stride value. We adjust the dispatch parameters accordingly (by +-1), but @@ -1743,7 +1743,7 @@ void __GOMP_taskloop(void (*func)(void *), void *data, // 4 byte version of GOMP_doacross_post // This verison needs to create a temporary array which converts 4 byte -// integers into 8 byte integeres +// integers into 8 byte integers template <typename T, bool need_conversion = (sizeof(long) == 4)> void __kmp_GOMP_doacross_post(T *count); diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl index 6e37ce0f083c..04f00285ba35 100644 --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// // Inline function definitions. This file should be included into kmp_itt.h file -// for production build (to let compliler inline functions) or into kmp_itt.c +// for production build (to let compiler inline functions) or into kmp_itt.c // file for debug build (to reduce the number of files to recompile and save // build time). @@ -474,7 +474,7 @@ LINKAGE void __kmp_itt_region_joined(int gtid) { ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have barrier object or barrier data structure. Barrier is just a counter in team and thread structures. We could use an address of team - structure as an barrier sync object, but ITT wants different objects for + structure as a barrier sync object, but ITT wants different objects for different barriers (even whithin the same team). So let us use team address as barrier sync object for the first barrier, then increase it by one for the next barrier, and so on (but wrap it not to use addresses outside of team @@ -502,10 +502,10 @@ void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, // Now form the barrier id. Encode barrier type (bt) in barrier id too, so // barriers of different types do not have the same ids. KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); - // This conditon is a must (we would have zero divide otherwise). + // This condition is a must (we would have zero divide otherwise). KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); // More strong condition: make sure we have room at least for for two - // differtent ids (for each barrier type). + // different ids (for each barrier type). object = reinterpret_cast<void *>( kmp_uintptr_t(team) + counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index 9ad86a51657d..75a15f084c69 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -462,7 +462,7 @@ struct kmp_base_drdpa_lock { // written by the acquiring thread) than it does in the simple ticket locks // (where it is written by the releasing thread). // - // Since now_serving is only read an written in the critical section, + // Since now_serving is only read and written in the critical section, // it is non-volatile, but it needs to exist on a separate cache line, // as it is invalidated at every lock acquire. // diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index cd942a9c4430..bfe7765b2a96 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -313,7 +313,7 @@ extern "C" { # define KMP_FALLTHROUGH() [[fallthrough]] #elif __has_cpp_attribute(clang::fallthrough) # define KMP_FALLTHROUGH() [[clang::fallthrough]] -#elif __has_attribute(fallthough) || __GNUC__ >= 7 +#elif __has_attribute(fallthrough) || __GNUC__ >= 7 # define KMP_FALLTHROUGH() __attribute__((__fallthrough__)) #else # define KMP_FALLTHROUGH() ((void)0) diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 3238deafc01b..779c08e9771d 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -93,9 +93,9 @@ #define KMP_ARCH_X86 0 #define KMP_ARCH_X86_64 0 #define KMP_ARCH_AARCH64 0 -#define KMP_ARCH_PPC64_BE 0 -#define KMP_ARCH_PPC64_LE 0 -#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) +#define KMP_ARCH_PPC64_ELFv1 0 +#define KMP_ARCH_PPC64_ELFv2 0 +#define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_ELFv2 || KMP_ARCH_PPC64_ELFv1) #define KMP_ARCH_MIPS 0 #define KMP_ARCH_MIPS64 0 #define KMP_ARCH_RISCV64 0 @@ -118,12 +118,12 @@ #undef KMP_ARCH_X86 #define KMP_ARCH_X86 1 #elif defined __powerpc64__ -#if defined __LITTLE_ENDIAN__ -#undef KMP_ARCH_PPC64_LE -#define KMP_ARCH_PPC64_LE 1 +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#undef KMP_ARCH_PPC64_ELFv2 +#define KMP_ARCH_PPC64_ELFv2 1 #else -#undef KMP_ARCH_PPC64_BE -#define KMP_ARCH_PPC64_BE 1 +#undef KMP_ARCH_PPC64_ELFv1 +#define KMP_ARCH_PPC64_ELFv1 1 #endif #elif defined __aarch64__ #undef KMP_ARCH_AARCH64 @@ -143,7 +143,7 @@ #endif #if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ - defined(__ARM_ARCH_7A__) + defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7VE__) #define KMP_ARCH_ARMV7 1 #endif diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index dd6e0ff70193..acd157db8e52 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -3927,8 +3927,8 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) { } __kmp_reap_thread(root->r.r_uber_thread, 1); - // We canot put root thread to __kmp_thread_pool, so we have to reap it istead - // of freeing. + // We canot put root thread to __kmp_thread_pool, so we have to reap it + // instead of freeing. root->r.r_uber_thread = NULL; /* mark root as no longer in use */ root->r.r_begin = FALSE; @@ -5799,16 +5799,10 @@ void __kmp_internal_end_dest(void *specific_gtid) { #if KMP_OS_UNIX && KMP_DYNAMIC_LIB -// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases -// destructors work perfectly, but in real libomp.so I have no evidence it is -// ever called. However, -fini linker option in makefile.mk works fine. - __attribute__((destructor)) void __kmp_internal_end_dtor(void) { __kmp_internal_end_atexit(); } -void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } - #endif /* [Windows] josh: when the atexit handler is called, there may still be more @@ -7158,7 +7152,7 @@ int __kmp_invoke_teams_master(int gtid) { /* this sets the requested number of threads for the next parallel region encountered by this team. since this should be enclosed in the forkjoin - critical section it should avoid race conditions with assymmetrical nested + critical section it should avoid race conditions with asymmetrical nested parallelism */ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index 692ca26d0e42..c7dec4d218c6 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -1034,7 +1034,7 @@ static void __kmp_parse_nested_num_threads(const char *var, const char *env, } // The next character is ',' if (*next == ',') { - // ',' is the fisrt character + // ',' is the first character if (total == 0 || prev_comma) { total++; } @@ -4205,7 +4205,7 @@ static void __kmp_stg_parse_spin_backoff_params(const char *name, } // The next character is ',' if (*next == ',') { - // ',' is the fisrt character + // ',' is the first character if (total == 0 || prev_comma) { total++; } @@ -4304,7 +4304,7 @@ static void __kmp_stg_parse_adaptive_lock_props(const char *name, } // The next character is ',' if (*next == ',') { - // ',' is the fisrt character + // ',' is the first character if (total == 0 || prev_comma) { total++; } diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp index 71f2dd93be79..dabd0c35b85c 100644 --- a/openmp/runtime/src/kmp_stats.cpp +++ b/openmp/runtime/src/kmp_stats.cpp @@ -679,7 +679,7 @@ void kmp_stats_output_module::printEvents(FILE *eventsOut, void kmp_stats_output_module::windupExplicitTimers() { // Wind up any explicit timers. We assume that it's fair at this point to just - // walk all the explcit timers in all threads and say "it's over". + // walk all the explicit timers in all threads and say "it's over". // If the timer wasn't running, this won't record anything anyway. kmp_stats_list::iterator it; for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp index fb748d1a54ab..75fd1e25f347 100644 --- a/openmp/runtime/src/kmp_str.cpp +++ b/openmp/runtime/src/kmp_str.cpp @@ -251,7 +251,7 @@ void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) { char *base = NULL; // Pointer to the beginning of basename. fname->path = __kmp_str_format("%s", path); // Original code used strdup() function to copy a string, but on Windows* OS - // Intel(R) 64 it causes assertioon id debug heap, so I had to replace + // Intel(R) 64 it causes assertion id debug heap, so I had to replace // strdup with __kmp_str_format(). if (KMP_OS_WINDOWS) { __kmp_str_replace(fname->path, '\\', '/'); diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index f8aa51dd904a..e1618f5cd9df 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -85,19 +85,19 @@ static kmp_dephash_t *__kmp_dephash_extend(kmp_info_t *thread, h->nelements = current_dephash->nelements; h->buckets = (kmp_dephash_entry **)(h + 1); h->generation = gen; - + h->nconflicts = 0; // insert existing elements in the new table for (size_t i = 0; i < current_dephash->size; i++) { - kmp_dephash_entry_t *next; - for (kmp_dephash_entry_t *entry = current_dephash->buckets[i]; entry; entry = next) { + kmp_dephash_entry_t *next, *entry; + for (entry = current_dephash->buckets[i]; entry; entry = next) { next = entry->next_in_bucket; // Compute the new hash using the new size, and insert the entry in // the new bucket. kmp_int32 new_bucket = __kmp_dephash_hash(entry->addr, h->size); + entry->next_in_bucket = h->buckets[new_bucket]; if (entry->next_in_bucket) { h->nconflicts++; } - entry->next_in_bucket = h->buckets[new_bucket]; h->buckets[new_bucket] = entry; } } @@ -417,7 +417,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); #endif KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d " - "possibly aliased dependencies, %d non-aliased depedencies : " + "possibly aliased dependencies, %d non-aliased dependencies : " "dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier)); diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index d037299f1477..15ffc1454fe9 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -933,7 +933,7 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, } } else { KMP_DEBUG_ASSERT(resumed_task != - NULL); // verify that resumed task is passed as arguemnt + NULL); // verify that resumed task is passed as argument } // Free this task and then ancestor tasks if they have no children. @@ -3090,7 +3090,7 @@ static void __kmp_enable_tasking(kmp_task_team_t *task_team, * master thread may exit the barrier code and free the team data structure, * and return the threads to the thread pool). * - * This does not work with the the tasking code, as the thread is still + * This does not work with the tasking code, as the thread is still * expected to participate in the execution of any tasks that may have been * spawned my a member of the team, and the thread still needs access to all * to each thread in the team, so that it can steal work from it. @@ -3169,7 +3169,7 @@ static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) { // __kmp_realloc_task_threads_data: // Allocates a threads_data array for a task team, either by allocating an // initial array or enlarging an existing array. Only the first thread to get -// the lock allocs or enlarges the array and re-initializes the array eleemnts. +// the lock allocs or enlarges the array and re-initializes the array elements. // That thread returns "TRUE", the rest return "FALSE". // Assumes that the new array size is given by task_team -> tt.tt_nproc. // The current size is given by task_team -> tt.tt_max_threads. @@ -3757,7 +3757,7 @@ static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) { @param gtid Global Thread ID of encountering thread @param ptask Task which execution is completed -Execute the completation of a proxy task from a thread of that is part of the +Execute the completion of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. */ void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) { @@ -3782,7 +3782,7 @@ void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) { @ingroup TASKING @param ptask Task which execution is completed -Execute the completation of a proxy task from a thread that could not belong to +Execute the completion of a proxy task from a thread that could not belong to the team. */ void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) { @@ -4144,7 +4144,8 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, } else { next_task_bounds.set_ub(upper); } - if (ptask_dup != NULL) // set lastprivate flag, construct fistprivates, etc. + if (ptask_dup != NULL) // set lastprivate flag, construct firstprivates, + // etc. ptask_dup(next_task, task, lastpriv); KA_TRACE(40, ("__kmp_taskloop_linear: T#%d; task #%llu: task %p: lower %lld, " @@ -4193,7 +4194,7 @@ void __kmp_taskloop_recur(ident_t *, int, kmp_task_t *, kmp_uint64 *, #endif void *); -// Execute part of the the taskloop submitted as a task. +// Execute part of the taskloop submitted as a task. int __kmp_taskloop_task(int gtid, void *ptask) { __taskloop_params_t *p = (__taskloop_params_t *)((kmp_task_t *)ptask)->shareds; @@ -4240,8 +4241,8 @@ int __kmp_taskloop_task(int gtid, void *ptask) { return 0; } -// Schedule part of the the taskloop as a task, -// execute the rest of the the taskloop. +// Schedule part of the taskloop as a task, +// execute the rest of the taskloop. // // loc Source location information // gtid Global thread ID @@ -4313,7 +4314,7 @@ void __kmp_taskloop_recur(ident_t *loc, int gtid, kmp_task_t *task, next_task = __kmp_task_dup_alloc(thread, task); // duplicate the task // adjust lower bound (upper bound is not changed) for the 2nd half *(kmp_uint64 *)((char *)next_task + lower_offset) = lb1; - if (ptask_dup != NULL) // construct fistprivates, etc. + if (ptask_dup != NULL) // construct firstprivates, etc. ptask_dup(next_task, task, 0); *ub = ub0; // adjust upper bound for the 1st half diff --git a/openmp/runtime/src/kmp_wrapper_getpid.h b/openmp/runtime/src/kmp_wrapper_getpid.h index 70db857bcbae..257772ad92bc 100644 --- a/openmp/runtime/src/kmp_wrapper_getpid.h +++ b/openmp/runtime/src/kmp_wrapper_getpid.h @@ -29,6 +29,8 @@ #elif KMP_OS_NETBSD #include <lwp.h> #define __kmp_gettid() _lwp_self() +#elif KMP_OS_OPENBSD +#define __kmp_gettid() syscall(SYS_getthrid) #elif defined(SYS_gettid) // Hopefully other Unix systems define SYS_gettid syscall for getting os thread // id diff --git a/openmp/runtime/src/kmp_wrapper_malloc.h b/openmp/runtime/src/kmp_wrapper_malloc.h index a50387c7f7cc..1544c5df3d64 100644 --- a/openmp/runtime/src/kmp_wrapper_malloc.h +++ b/openmp/runtime/src/kmp_wrapper_malloc.h @@ -24,7 +24,7 @@ On Linux* OS, alloca() function is declared in <alloca.h> header, while on Windows* OS there is no <alloca.h> header, function _alloca() (note underscore!) is declared in <malloc.h>. This header eliminates these - differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on + differences, so client code including "kmp_wrapper_malloc.h" can rely on following routines: malloc diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h index da6a0e424726..a5901b511148 100644 --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -99,7 +99,7 @@ #define ompt_callback_cancel_implemented ompt_event_MAY_ALWAYS_OPTIONAL -#define ompt_callback_reduction_implemented ompt_event_UNIMPLEMENTED +#define ompt_callback_reduction_implemented ompt_event_MAY_ALWAYS_OPTIONAL #define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp index 41b2827007b6..22eac2ebf7b8 100644 --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -268,6 +268,22 @@ ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { } __kmp_str_free(&libs); } + if (ret) + return ret; + +#if KMP_OS_UNIX + { // Non-standard: load archer tool if application is built with TSan + const char *fname = "libarcher.so"; + void *h = dlopen(fname, RTLD_LAZY); + if (h) { + start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool"); + if (start_tool) + ret = (*start_tool)(omp_version, runtime_version); + if (ret) + return ret; + } + } +#endif return ret; } diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h index 47d8a1669846..5ba240c1a950 100644 --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -15,6 +15,7 @@ #include "kmp.h" +#if OMPT_SUPPORT /***************************************************************************** * forward declarations ****************************************************************************/ @@ -101,5 +102,30 @@ inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) { inline const char *ompt_get_runtime_version() { return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; } +#endif // OMPT_SUPPRORT + +// macros providing the OMPT callbacks for reduction clause +#if OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL(this_thr, gtid) \ + ompt_data_t *my_task_data = OMPT_CUR_TASK_DATA(this_thr); \ + ompt_data_t *my_parallel_data = OMPT_CUR_TEAM_DATA(this_thr); \ + void *return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); +#define OMPT_REDUCTION_BEGIN \ + if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ + ompt_callbacks.ompt_callback(ompt_callback_reduction)( \ + ompt_sync_region_reduction, ompt_scope_begin, my_parallel_data, \ + my_task_data, return_address); \ + } +#define OMPT_REDUCTION_END \ + if (ompt_enabled.enabled && ompt_enabled.ompt_callback_reduction) { \ + ompt_callbacks.ompt_callback(ompt_callback_reduction)( \ + ompt_sync_region_reduction, ompt_scope_end, my_parallel_data, \ + my_task_data, return_address); \ + } +#else // OMPT_SUPPORT && OMPT_OPTIONAL +#define OMPT_REDUCTION_DECL(this_thr, gtid) +#define OMPT_REDUCTION_BEGIN +#define OMPT_REDUCTION_END +#endif // ! OMPT_SUPPORT && OMPT_OPTIONAL #endif diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h index ed46cd7761c3..d730c48ec705 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify.h @@ -1448,7 +1448,7 @@ ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, /** @endcond */ /** - * @brief Record an free begin occurrence. + * @brief Record a free begin occurrence. */ void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); @@ -1468,7 +1468,7 @@ ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) /** @endcond */ /** - * @brief Record an free end occurrence. + * @brief Record a free end occurrence. */ void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); @@ -1488,7 +1488,7 @@ ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) /** @endcond */ /** - * @brief Record an reallocation begin occurrence. + * @brief Record a reallocation begin occurrence. */ void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); @@ -1508,7 +1508,7 @@ ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* add /** @endcond */ /** - * @brief Record an reallocation end occurrence. + * @brief Record a reallocation end occurrence. */ void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); @@ -2644,7 +2644,7 @@ ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info /** * @ingroup clockdomains - * @brief Recalculate clock domains frequences and clock base timestamps. + * @brief Recalculate clock domains frequencies and clock base timestamps. */ void ITTAPI __itt_clock_domain_reset(void); @@ -3957,7 +3957,7 @@ ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) /** @endcond */ /** - * @brief Destroy the inforamtion about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create() */ void ITTAPI __itt_stack_caller_destroy(__itt_caller id); diff --git a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp index c48b3f420bb7..8f9e2a655ae4 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp +++ b/openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp @@ -786,7 +786,7 @@ static const char* __itt_get_env_var(const char* name) } else { - /* If environment variable is empty, GetEnvirornmentVariables() + /* If environment variable is empty, GetEnvironmentVariables() * returns zero (number of characters (not including terminating null), * and GetLastError() returns ERROR_SUCCESS. */ DWORD err = GetLastError(); diff --git a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h index eae33e0b1942..a4061e168d1d 100644 --- a/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h +++ b/openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h @@ -957,9 +957,9 @@ ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ -/** @brief Record an frame begin occurrence. */ +/** @brief Record a frame begin occurrence. */ void ITTAPI __itt_frame_begin(__itt_frame frame); -/** @brief Record an frame end occurrence. */ +/** @brief Record a frame end occurrence. */ void ITTAPI __itt_frame_end (__itt_frame frame); /** @cond exclude_from_documentation */ diff --git a/openmp/runtime/src/z_Linux_asm.S b/openmp/runtime/src/z_Linux_asm.S index b491fcf186aa..8090ff759fe1 100644 --- a/openmp/runtime/src/z_Linux_asm.S +++ b/openmp/runtime/src/z_Linux_asm.S @@ -1367,12 +1367,12 @@ KMP_LABEL(kmp_1): // return: r3 (always 1/TRUE) // .text -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 .abiversion 2 # endif .globl __kmp_invoke_microtask -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 .p2align 4 # else .p2align 2 @@ -1380,7 +1380,7 @@ KMP_LABEL(kmp_1): .type __kmp_invoke_microtask,@function -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 __kmp_invoke_microtask: .Lfunc_begin0: .Lfunc_gep0: @@ -1424,7 +1424,7 @@ __kmp_invoke_microtask: .cfi_offset lr, 16 // Compute the size necessary for the local stack frame. -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 li 12, 72 # else li 12, 88 @@ -1502,7 +1502,7 @@ __kmp_invoke_microtask: // for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and // 32 + 8*8 == 96 bytes above r1 for ELFv2. addi 4, 4, 40 -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 addi 12, 1, 88 # else addi 12, 1, 104 @@ -1514,7 +1514,7 @@ __kmp_invoke_microtask: bdnz .Lnext .Lcall: -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 std 2, 24(1) mr 12, 3 #else @@ -1530,7 +1530,7 @@ __kmp_invoke_microtask: mtctr 12 bctrl -# if KMP_ARCH_PPC64_LE +# if KMP_ARCH_PPC64_ELFv2 ld 2, 24(1) # else ld 2, 40(1) diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 0ee12927e4bf..1daa3d31047e 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -54,7 +54,7 @@ #include <sys/sysctl.h> #include <sys/user.h> #include <pthread_np.h> -#elif KMP_OS_NETBSD +#elif KMP_OS_NETBSD || KMP_OS_OPENBSD #include <sys/types.h> #include <sys/sysctl.h> #endif @@ -1287,7 +1287,7 @@ static void __kmp_atfork_child(void) { ++__kmp_fork_count; #if KMP_AFFINITY_SUPPORTED -#if KMP_OS_LINUX +#if KMP_OS_LINUX || KMP_OS_FREEBSD // reset the affinity in the child to the initial thread // affinity in the parent kmp_set_thread_affinity_mask_initial(); @@ -2130,9 +2130,36 @@ int __kmp_is_address_mapped(void *addr) { } } KMP_INTERNAL_FREE(kiv); -#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD +#elif KMP_OS_OPENBSD + + int mib[3]; + mib[0] = CTL_KERN; + mib[1] = KERN_PROC_VMMAP; + mib[2] = getpid(); + + size_t size; + uint64_t end; + rc = sysctl(mib, 3, NULL, &size, NULL, 0); + KMP_ASSERT(!rc); + KMP_ASSERT(size); + end = size; + + struct kinfo_vmentry kiv = {.kve_start = 0}; + + while ((rc = sysctl(mib, 3, &kiv, &size, NULL, 0)) == 0) { + KMP_ASSERT(size); + if (kiv.kve_end == end) + break; + + if (kiv.kve_start >= (uint64_t)addr && kiv.kve_end <= (uint64_t)addr) { + found = 1; + break; + } + kiv.kve_start += 1; + } +#elif KMP_OS_DRAGONFLY - // FIXME(DragonFly, OpenBSD): Implement this + // FIXME(DragonFly): Implement this found = 1; #else @@ -2187,7 +2214,7 @@ int __kmp_get_load_balance(int max) { int __kmp_get_load_balance(int max) { static int permanent_error = 0; static int glb_running_threads = 0; // Saved count of the running threads for - // the thread balance algortihm + // the thread balance algorithm static double glb_call_time = 0; /* Thread balance algorithm call time */ int running_threads = 0; // Number of running threads in the system. @@ -2295,7 +2322,7 @@ int __kmp_get_load_balance(int max) { if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) { ++total_threads; - // Consruct complete stat file path. Easiest way would be: + // Construct complete stat file path. Easiest way would be: // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, // task_entry->d_name ); // but seriae of __kmp_str_buf_cat works a bit faster. diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp index c149dda56e8e..f463ef6d6edc 100644 --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -1504,7 +1504,7 @@ void __kmp_free_handle(kmp_thread_t tHandle) { int __kmp_get_load_balance(int max) { static ULONG glb_buff_size = 100 * 1024; - // Saved count of the running threads for the thread balance algortihm + // Saved count of the running threads for the thread balance algorithm static int glb_running_threads = 0; static double glb_call_time = 0; /* Thread balance algorithm call time */ |