aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-12-02 21:02:54 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-12-02 21:02:54 +0000
commitf65dcba83ce5035ab88a85fe17628b447eb56e1b (patch)
tree35f37bb72b3cfc6060193e66c76ee7c9478969b0
parent846a2208a8ab099f595fe7e8b2e6d54a7b5e67fb (diff)
downloadsrc-f65dcba83ce5035ab88a85fe17628b447eb56e1b.tar.gz
src-f65dcba83ce5035ab88a85fe17628b447eb56e1b.zip
Vendor import of llvm-project main llvmorg-14-init-11187-g222442ec2d71.vendor/llvm-project/llvmorg-14-init-11187-g222442ec2d71
-rw-r--r--clang/include/clang/AST/Decl.h7
-rw-r--r--clang/include/clang/AST/DeclBase.h6
-rw-r--r--clang/include/clang/AST/Expr.h4
-rw-r--r--clang/include/clang/AST/GlobalDecl.h9
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchers.h7
-rw-r--r--clang/include/clang/Analysis/CFG.h2
-rw-r--r--clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h134
-rw-r--r--clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h27
-rw-r--r--clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h29
-rw-r--r--clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h95
-rw-r--r--clang/include/clang/Basic/Attr.td34
-rw-r--r--clang/include/clang/Basic/AttrDocs.td34
-rw-r--r--clang/include/clang/Basic/Builtins.def13
-rw-r--r--clang/include/clang/Basic/BuiltinsPPC.def7
-rw-r--r--clang/include/clang/Basic/DiagnosticDriverKinds.td8
-rw-r--r--clang/include/clang/Basic/DiagnosticGroups.td14
-rw-r--r--clang/include/clang/Basic/DiagnosticIDs.h2
-rw-r--r--clang/include/clang/Basic/DiagnosticParseKinds.td3
-rw-r--r--clang/include/clang/Basic/DiagnosticSemaKinds.td42
-rw-r--r--clang/include/clang/Basic/SyncScope.h73
-rw-r--r--clang/include/clang/Driver/Options.td28
-rw-r--r--clang/include/clang/Frontend/PrecompiledPreamble.h4
-rw-r--r--clang/include/clang/Sema/Sema.h29
-rw-r--r--clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h16
-rw-r--r--clang/lib/AST/ASTContext.cpp9
-rw-r--r--clang/lib/AST/ASTStructuralEquivalence.cpp42
-rw-r--r--clang/lib/AST/Decl.cpp6
-rw-r--r--clang/lib/AST/DeclBase.cpp4
-rw-r--r--clang/lib/AST/Expr.cpp12
-rw-r--r--clang/lib/AST/ExprConstant.cpp2
-rw-r--r--clang/lib/AST/MicrosoftMangle.cpp103
-rw-r--r--clang/lib/AST/StmtPrinter.cpp3
-rw-r--r--clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp35
-rw-r--r--clang/lib/Basic/Targets/AArch64.cpp4
-rw-r--r--clang/lib/Basic/Targets/ARM.cpp32
-rw-r--r--clang/lib/Basic/Targets/ARM.h3
-rw-r--r--clang/lib/Basic/Targets/OSTargets.cpp14
-rw-r--r--clang/lib/Basic/Targets/SPIR.h38
-rw-r--r--clang/lib/Basic/Targets/X86.cpp14
-rw-r--r--clang/lib/CodeGen/BackendUtil.cpp20
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp46
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp3
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp3
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp132
-rw-r--r--clang/lib/CodeGen/CodeGenModule.h1
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp34
-rw-r--r--clang/lib/Driver/Driver.cpp7
-rw-r--r--clang/lib/Driver/ToolChains/Arch/AArch64.cpp15
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp121
-rw-r--r--clang/lib/Driver/ToolChains/Cuda.cpp2
-rw-r--r--clang/lib/Driver/ToolChains/Flang.cpp3
-rw-r--r--clang/lib/Driver/ToolChains/FreeBSD.cpp3
-rw-r--r--clang/lib/Driver/ToolChains/Linux.cpp3
-rw-r--r--clang/lib/Driver/ToolChains/PPCFreeBSD.cpp28
-rw-r--r--clang/lib/Driver/ToolChains/PPCFreeBSD.h33
-rw-r--r--clang/lib/Driver/ToolChains/PS4CPU.h1
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp13
-rw-r--r--clang/lib/Format/Format.cpp13
-rw-r--r--clang/lib/Format/FormatToken.h1
-rw-r--r--clang/lib/Format/FormatTokenLexer.cpp49
-rw-r--r--clang/lib/Format/SortJavaScriptImports.cpp4
-rw-r--r--clang/lib/Format/TokenAnalyzer.cpp8
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp80
-rw-r--r--clang/lib/Format/UnwrappedLineFormatter.cpp23
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp159
-rw-r--r--clang/lib/Format/UnwrappedLineParser.h1
-rw-r--r--clang/lib/Format/WhitespaceManager.cpp13
-rw-r--r--clang/lib/Frontend/InitPreprocessor.cpp5
-rw-r--r--clang/lib/Frontend/PrecompiledPreamble.cpp5
-rw-r--r--clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp2
-rw-r--r--clang/lib/Frontend/Rewrite/RewriteObjC.cpp2
-rw-r--r--clang/lib/Headers/altivec.h60
-rw-r--r--clang/lib/Headers/ppc_wrappers/emmintrin.h5
-rw-r--r--clang/lib/Headers/ppc_wrappers/mm_malloc.h2
-rw-r--r--clang/lib/Headers/ppc_wrappers/mmintrin.h5
-rw-r--r--clang/lib/Headers/ppc_wrappers/pmmintrin.h5
-rw-r--r--clang/lib/Headers/ppc_wrappers/smmintrin.h5
-rw-r--r--clang/lib/Headers/ppc_wrappers/tmmintrin.h5
-rw-r--r--clang/lib/Headers/ppc_wrappers/xmmintrin.h5
-rw-r--r--clang/lib/Headers/stdatomic.h6
-rw-r--r--clang/lib/Parse/ParseDecl.cpp14
-rw-r--r--clang/lib/Parse/ParseStmt.cpp3
-rw-r--r--clang/lib/Sema/AnalysisBasedWarnings.cpp2
-rw-r--r--clang/lib/Sema/SemaChecking.cpp36
-rw-r--r--clang/lib/Sema/SemaDecl.cpp118
-rw-r--r--clang/lib/Sema/SemaDeclAttr.cpp161
-rw-r--r--clang/lib/Sema/SemaExpr.cpp15
-rw-r--r--clang/lib/Sema/SemaExprCXX.cpp5
-rw-r--r--clang/lib/Sema/SemaStmt.cpp9
-rw-r--r--clang/lib/StaticAnalyzer/Core/CoreEngine.cpp4
-rw-r--r--clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp39
-rw-r--r--clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp62
-rw-r--r--clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp26
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp59
-rw-r--r--clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp21
-rw-r--r--clang/utils/TableGen/ASTTableGen.cpp2
-rw-r--r--compiler-rt/include/profile/MemProfData.inc61
-rw-r--r--compiler-rt/lib/asan/asan_interceptors.cpp35
-rw-r--r--compiler-rt/lib/asan/asan_report.cpp4
-rw-r--r--compiler-rt/lib/asan/asan_rtl.cpp8
-rw-r--r--compiler-rt/lib/cfi/cfi.cpp4
-rw-r--r--compiler-rt/lib/memprof/memprof_allocator.cpp5
-rw-r--r--compiler-rt/lib/memprof/memprof_interceptors.cpp4
-rw-r--r--compiler-rt/lib/memprof/memprof_rawprofile.cpp69
-rw-r--r--compiler-rt/lib/memprof/memprof_rawprofile.h11
-rw-r--r--compiler-rt/lib/memprof/tests/rawprofile.cpp17
-rw-r--r--compiler-rt/lib/profile/InstrProfilingMerge.c3
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp72
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common.h4
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc11
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h53
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h140
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp9
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_leb128.h87
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp5
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp40
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp221
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h92
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp16
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h2
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_win.cpp5
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_defs.h1
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp47
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mman.cpp32
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_mman.h2
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp143
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl.cpp56
-rw-r--r--compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp2
-rw-r--r--libcxx/CREDITS.TXT4
-rw-r--r--libcxx/include/__bit/byteswap.h55
-rw-r--r--libcxx/include/__bsd_locale_fallbacks.h6
-rw-r--r--libcxx/include/__compare/partial_order.h71
-rw-r--r--libcxx/include/__compare/strong_order.h136
-rw-r--r--libcxx/include/__compare/weak_order.h100
-rw-r--r--libcxx/include/__config35
-rw-r--r--libcxx/include/__iterator/reverse_iterator.h12
-rw-r--r--libcxx/include/__iterator/wrap_iter.h86
-rw-r--r--libcxx/include/__memory/allocator_traits.h8
-rw-r--r--libcxx/include/__memory/unique_ptr.h14
-rw-r--r--libcxx/include/__numeric/accumulate.h52
-rw-r--r--libcxx/include/__numeric/adjacent_difference.h72
-rw-r--r--libcxx/include/__numeric/exclusive_scan.h53
-rw-r--r--libcxx/include/__numeric/gcd_lcm.h96
-rw-r--r--libcxx/include/__numeric/inclusive_scan.h60
-rw-r--r--libcxx/include/__numeric/inner_product.h53
-rw-r--r--libcxx/include/__numeric/iota.h32
-rw-r--r--libcxx/include/__numeric/midpoint.h85
-rw-r--r--libcxx/include/__numeric/partial_sum.h70
-rw-r--r--libcxx/include/__numeric/reduce.h47
-rw-r--r--libcxx/include/__numeric/transform_exclusive_scan.h49
-rw-r--r--libcxx/include/__numeric/transform_inclusive_scan.h58
-rw-r--r--libcxx/include/__numeric/transform_reduce.h54
-rw-r--r--libcxx/include/__random/bernoulli_distribution.h143
-rw-r--r--libcxx/include/__random/binomial_distribution.h225
-rw-r--r--libcxx/include/__random/cauchy_distribution.h162
-rw-r--r--libcxx/include/__random/chi_squared_distribution.h144
-rw-r--r--libcxx/include/__random/default_random_engine.h25
-rw-r--r--libcxx/include/__random/discard_block_engine.h203
-rw-r--r--libcxx/include/__random/discrete_distribution.h260
-rw-r--r--libcxx/include/__random/exponential_distribution.h155
-rw-r--r--libcxx/include/__random/extreme_value_distribution.h161
-rw-r--r--libcxx/include/__random/fisher_f_distribution.h160
-rw-r--r--libcxx/include/__random/gamma_distribution.h213
-rw-r--r--libcxx/include/__random/generate_canonical.h53
-rw-r--r--libcxx/include/__random/geometric_distribution.h141
-rw-r--r--libcxx/include/__random/independent_bits_engine.h271
-rw-r--r--libcxx/include/__random/is_seed_sequence.h31
-rw-r--r--libcxx/include/__random/knuth_b.h26
-rw-r--r--libcxx/include/__random/linear_congruential_engine.h398
-rw-r--r--libcxx/include/__random/log2.h74
-rw-r--r--libcxx/include/__random/lognormal_distribution.h163
-rw-r--r--libcxx/include/__random/mersenne_twister_engine.h534
-rw-r--r--libcxx/include/__random/negative_binomial_distribution.h176
-rw-r--r--libcxx/include/__random/normal_distribution.h208
-rw-r--r--libcxx/include/__random/piecewise_constant_distribution.h356
-rw-r--r--libcxx/include/__random/piecewise_linear_distribution.h372
-rw-r--r--libcxx/include/__random/poisson_distribution.h276
-rw-r--r--libcxx/include/__random/random_device.h71
-rw-r--r--libcxx/include/__random/ranlux.h31
-rw-r--r--libcxx/include/__random/seed_seq.h150
-rw-r--r--libcxx/include/__random/shuffle_order_engine.h283
-rw-r--r--libcxx/include/__random/student_t_distribution.h153
-rw-r--r--libcxx/include/__random/subtract_with_carry_engine.h352
-rw-r--r--libcxx/include/__random/uniform_int_distribution.h38
-rw-r--r--libcxx/include/__random/uniform_random_bit_generator.h45
-rw-r--r--libcxx/include/__random/uniform_real_distribution.h160
-rw-r--r--libcxx/include/__random/weibull_distribution.h155
-rw-r--r--libcxx/include/__ranges/concepts.h2
-rw-r--r--libcxx/include/__utility/priority_tag.h26
-rw-r--r--libcxx/include/bit13
-rw-r--r--libcxx/include/compare21
-rw-r--r--libcxx/include/deque59
-rw-r--r--libcxx/include/filesystem18
-rw-r--r--libcxx/include/format127
-rw-r--r--libcxx/include/forward_list29
-rw-r--r--libcxx/include/list50
-rw-r--r--libcxx/include/module.modulemap57
-rw-r--r--libcxx/include/numeric489
-rw-r--r--libcxx/include/random5364
-rw-r--r--libcxx/include/ranges2
-rw-r--r--libcxx/include/string_view63
-rw-r--r--libcxx/include/type_traits4
-rw-r--r--libcxx/include/utility1
-rw-r--r--libcxx/include/vector62
-rw-r--r--libcxx/include/version6
-rw-r--r--libcxx/src/filesystem/filesystem_common.h10
-rw-r--r--libunwind/src/Unwind-EHABI.cpp29
-rw-r--r--lld/COFF/Chunks.cpp3
-rw-r--r--lld/COFF/Writer.cpp6
-rw-r--r--lld/ELF/AArch64ErrataFix.cpp4
-rw-r--r--lld/ELF/ARMErrataFix.cpp4
-rw-r--r--lld/ELF/Arch/ARM.cpp202
-rw-r--r--lld/ELF/Arch/Hexagon.cpp2
-rw-r--r--lld/ELF/Arch/RISCV.cpp2
-rw-r--r--lld/ELF/Arch/X86.cpp9
-rw-r--r--lld/ELF/Arch/X86_64.cpp60
-rw-r--r--lld/ELF/Config.h19
-rw-r--r--lld/ELF/Driver.cpp45
-rw-r--r--lld/ELF/ICF.cpp8
-rw-r--r--lld/ELF/InputFiles.cpp56
-rw-r--r--lld/ELF/InputFiles.h28
-rw-r--r--lld/ELF/InputSection.cpp4
-rw-r--r--lld/ELF/InputSection.h16
-rw-r--r--lld/ELF/LTO.cpp2
-rw-r--r--lld/ELF/LinkerScript.cpp301
-rw-r--r--lld/ELF/LinkerScript.h59
-rw-r--r--lld/ELF/MapFile.cpp96
-rw-r--r--lld/ELF/MapFile.h3
-rw-r--r--lld/ELF/Options.td30
-rw-r--r--lld/ELF/OutputSections.cpp48
-rw-r--r--lld/ELF/OutputSections.h7
-rw-r--r--lld/ELF/Relocations.cpp72
-rw-r--r--lld/ELF/ScriptParser.cpp70
-rw-r--r--lld/ELF/SymbolTable.cpp2
-rw-r--r--lld/ELF/Symbols.cpp41
-rw-r--r--lld/ELF/Symbols.h6
-rw-r--r--lld/ELF/SyntheticSections.cpp156
-rw-r--r--lld/ELF/SyntheticSections.h15
-rw-r--r--lld/ELF/Thunks.cpp64
-rw-r--r--lld/ELF/Writer.cpp244
-rw-r--r--lld/ELF/Writer.h1
-rw-r--r--lld/MachO/InputSection.cpp7
-rw-r--r--lld/MachO/Symbols.h7
-rw-r--r--lld/MachO/SyntheticSections.cpp22
-rw-r--r--lld/MachO/UnwindInfoSection.cpp5
-rw-r--r--lld/docs/ReleaseNotes.rst5
-rw-r--r--lld/docs/ld.lld.118
-rw-r--r--lldb/bindings/interface/SBDebugger.i3
-rw-r--r--lldb/bindings/interface/SBTarget.i3
-rw-r--r--lldb/bindings/interface/SBValue.i3
-rw-r--r--lldb/bindings/python/python-swigsafecast.swig58
-rw-r--r--lldb/bindings/python/python-wrapper.swig299
-rw-r--r--lldb/bindings/python/python.swig1
-rw-r--r--lldb/include/lldb/API/SBDebugger.h2
-rw-r--r--lldb/include/lldb/API/SBTarget.h5
-rw-r--r--lldb/include/lldb/API/SBValue.h6
-rw-r--r--lldb/include/lldb/Core/Debugger.h8
-rw-r--r--lldb/include/lldb/Interpreter/OptionGroupFormat.h9
-rw-r--r--lldb/include/lldb/Symbol/ObjectFile.h28
-rw-r--r--lldb/include/lldb/Symbol/Symtab.h9
-rw-r--r--lldb/include/lldb/Target/Platform.h24
-rw-r--r--lldb/include/lldb/Target/Process.h38
-rw-r--r--lldb/source/API/SBDebugger.cpp62
-rw-r--r--lldb/source/API/SBTarget.cpp11
-rw-r--r--lldb/source/API/SBValue.cpp13
-rw-r--r--lldb/source/Commands/CommandObjectMemory.cpp17
-rw-r--r--lldb/source/Core/Debugger.cpp89
-rw-r--r--lldb/source/Core/Module.cpp11
-rw-r--r--lldb/source/Interpreter/CommandObject.cpp3
-rw-r--r--lldb/source/Interpreter/OptionGroupFormat.cpp40
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp2
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp63
-rw-r--r--lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h13
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp22
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp3
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp2
-rw-r--r--lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp13
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp7
-rw-r--r--lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h2
-rw-r--r--lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp244
-rw-r--r--lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h11
-rw-r--r--lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp21
-rw-r--r--lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h2
-rw-r--r--lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h2
-rw-r--r--lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp2
-rw-r--r--lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h2
-rw-r--r--lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp148
-rw-r--r--lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h57
-rw-r--r--lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td12
-rw-r--r--lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp4
-rw-r--r--lldb/source/Plugins/Process/elf-core/ProcessElfCore.h8
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp96
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h23
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp13
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp6
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h4
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp12
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h1
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp62
-rw-r--r--lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h10
-rw-r--r--lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp6
-rw-r--r--lldb/source/Plugins/Process/minidump/ProcessMinidump.h6
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp4
-rw-r--r--lldb/source/Plugins/Process/scripted/ScriptedProcess.h6
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h152
-rw-r--r--lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp183
-rw-r--r--lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp3
-rw-r--r--lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp12
-rw-r--r--lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp5
-rw-r--r--lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp1
-rw-r--r--lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp2
-rw-r--r--lldb/source/Symbol/ObjectFile.cpp38
-rw-r--r--lldb/source/Symbol/Symtab.cpp9
-rw-r--r--lldb/source/Target/Platform.cpp40
-rw-r--r--lldb/source/Target/Process.cpp7
-rw-r--r--lldb/source/Target/RemoteAwarePlatform.cpp11
-rw-r--r--lldb/tools/driver/Driver.cpp88
-rw-r--r--lldb/tools/lldb-server/lldb-platform.cpp26
-rw-r--r--llvm/include/llvm-c/Core.h11
-rw-r--r--llvm/include/llvm/ADT/APInt.h4
-rw-r--r--llvm/include/llvm/ADT/SCCIterator.h133
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h31
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h8
-rw-r--r--llvm/include/llvm/Analysis/Lint.h9
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h29
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.def6
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h2
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h7
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h5
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h49
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h42
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetFrameLowering.h14
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h6
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h65
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h5
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h5
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h175
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h4
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h125
-rw-r--r--llvm/include/llvm/IR/Instructions.h18
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td18
-rw-r--r--llvm/include/llvm/IR/Operator.h8
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h25
-rw-r--r--llvm/include/llvm/IR/Type.h2
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def156
-rw-r--r--llvm/include/llvm/IR/Verifier.h6
-rw-r--r--llvm/include/llvm/MCA/HardwareUnits/Scheduler.h7
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc61
-rw-r--r--llvm/include/llvm/ProfileData/RawMemProfReader.h43
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def3
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.h9
-rw-r--r--llvm/include/llvm/Support/ARMAttributeParser.h4
-rw-r--r--llvm/include/llvm/Support/ARMBuildAttributes.h24
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def1
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.h2
-rw-r--r--llvm/include/llvm/Support/GenericDomTree.h16
-rw-r--r--llvm/include/llvm/Support/HTTPClient.h113
-rw-r--r--llvm/include/llvm/Support/Mutex.h4
-rw-r--r--llvm/include/llvm/Support/RWMutex.h8
-rw-r--r--llvm/include/llvm/Support/TargetParser.h12
-rw-r--r--llvm/include/llvm/Support/ThreadPool.h70
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td96
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h104
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdater.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileInference.h284
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h162
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp201
-rw-r--r--llvm/lib/Analysis/DivergenceAnalysis.cpp10
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp2
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp46
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp184
-rw-r--r--llvm/lib/Analysis/IntervalPartition.cpp16
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp15
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp60
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp6
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp22
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp2
-rw-r--r--llvm/lib/Analysis/RegionPass.cpp3
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp363
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp126
-rw-r--r--llvm/lib/Analysis/SyncDependenceAnalysis.cpp59
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp19
-rw-r--r--llvm/lib/Analysis/VFABIDemangling.cpp1
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp13
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp4
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp9
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp7
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp28
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp4
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp14
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp213
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp134
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp8
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp4
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp4
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp2
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp15
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp16
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp619
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp36
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp27
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp6
-rw-r--r--llvm/lib/CodeGen/IndirectBrExpandPass.cpp10
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp8
-rw-r--r--llvm/lib/CodeGen/LatencyPriorityQueue.cpp6
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp70
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h63
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp51
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp14
-rw-r--r--llvm/lib/CodeGen/LiveRangeUtils.h2
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp10
-rw-r--r--llvm/lib/CodeGen/LocalStackSlotAllocation.cpp8
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp21
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp17
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp24
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp12
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp24
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp6
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp13
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp9
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp119
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp12
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp131
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp7
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp5
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp21
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp50
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp109
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp102
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp6
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp29
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp10
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp21
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp35
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp80
-rw-r--r--llvm/lib/Demangle/DLangDemangle.cpp253
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngine.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h40
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp234
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp2
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp26
-rw-r--r--llvm/lib/IR/Core.cpp8
-rw-r--r--llvm/lib/IR/DIBuilder.cpp15
-rw-r--r--llvm/lib/IR/Instructions.cpp2
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp67
-rw-r--r--llvm/lib/IR/Operator.cpp21
-rw-r--r--llvm/lib/IR/PassTimingInfo.cpp2
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp6
-rw-r--r--llvm/lib/IR/Verifier.cpp31
-rw-r--r--llvm/lib/InterfaceStub/ELFObjHandler.cpp2
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp6
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp8
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp48
-rw-r--r--llvm/lib/MCA/InstrBuilder.cpp2
-rw-r--r--llvm/lib/MCA/Stages/ExecuteStage.cpp2
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp4
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp32
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp4
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp13
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp1
-rw-r--r--llvm/lib/Option/OptTable.cpp7
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp2
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp4
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp1
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp11
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp2
-rw-r--r--llvm/lib/ProfileData/RawMemProfReader.cpp121
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp2
-rw-r--r--llvm/lib/Support/AArch64TargetParser.cpp50
-rw-r--r--llvm/lib/Support/ARMAttributeParser.cpp26
-rw-r--r--llvm/lib/Support/ARMBuildAttrs.cpp4
-rw-r--r--llvm/lib/Support/CommandLine.cpp16
-rw-r--r--llvm/lib/Support/HTTPClient.cpp97
-rw-r--r--llvm/lib/Support/KnownBits.cpp9
-rw-r--r--llvm/lib/Support/Regex.cpp6
-rw-r--r--llvm/lib/Support/StringExtras.cpp3
-rw-r--r--llvm/lib/Support/StringRef.cpp16
-rw-r--r--llvm/lib/Support/TargetParser.cpp48
-rw-r--r--llvm/lib/Support/ThreadPool.cpp29
-rw-r--r--llvm/lib/TableGen/TGLexer.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp9
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp297
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td16
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td11
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp10
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp57
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp41
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp49
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td29
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp46
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/CaymanInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp19
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp80
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp67
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td176
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp155
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h29
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td135
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td6
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp16
-rw-r--r--llvm/lib/Target/ARC/ARCMCInstLower.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARM.h2
-rw-r--r--llvm/lib/Target/ARM/ARM.td4
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp48
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp126
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h24
-rw-r--r--llvm/lib/Target/ARM/ARMBranchTargets.cpp135
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp92
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp24
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp20
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp206
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td29
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td8
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td60
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td138
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp20
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp57
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h25
-rw-r--r--llvm/lib/Target/ARM/ARMPredicates.td2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.td10
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h3
-rw-r--r--llvm/lib/Target/ARM/ARMSystemRegister.td18
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp33
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp17
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp67
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h24
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp5
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp13
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp5
-rw-r--r--llvm/lib/Target/ARM/Thumb2SizeReduction.cpp12
-rw-r--r--llvm/lib/Target/BPF/BPFMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/BitTracker.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp23
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp30
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp31
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMux.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp23
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp22
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPeephole.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp1
-rw-r--r--llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp8
-rw-r--r--llvm/lib/Target/Lanai/LanaiFrameLowering.cpp9
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/Lanai/LanaiMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MSP430MCInstLower.cpp4
-rw-r--r--llvm/lib/Target/Mips/Mips16FrameLowering.cpp11
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsConstantIslandPass.cpp36
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsExpandPseudo.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsFrameLowering.h5
-rw-r--r--llvm/lib/Target/Mips/MipsMCInstLower.cpp3
-rw-r--r--llvm/lib/Target/Mips/MipsSEFrameLowering.cpp14
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp54
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp15
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td5
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td18
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchSelector.cpp30
-rw-r--r--llvm/lib/Target/PowerPC/PPCCTRLoops.cpp15
-rw-r--r--llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp3
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp41
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp150
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp34
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td12
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td39
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td5
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp73
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def75
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h8
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp91
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp137
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp28
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td2
-rw-r--r--llvm/lib/Target/Sparc/DelaySlotFiller.cpp8
-rw-r--r--llvm/lib/Target/Sparc/LeonPasses.cpp12
-rw-r--r--llvm/lib/Target/Sparc/SparcFrameLowering.cpp15
-rw-r--r--llvm/lib/Target/Sparc/SparcMCInstLower.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp36
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h14
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp19
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp6
-rw-r--r--llvm/lib/Target/X86/X86DomainReassignment.cpp4
-rw-r--r--llvm/lib/Target/X86/X86DynAllocaExpander.cpp8
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FixupLEAs.cpp7
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp7
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp66
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h19
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp126
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86RegisterBanks.td2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp251
-rw-r--r--llvm/lib/Target/XCore/XCoreFrameLowering.cpp8
-rw-r--r--llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp4
-rw-r--r--llvm/lib/Target/XCore/XCoreMCInstLower.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp132
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp37
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp3
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp41
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp90
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp69
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp2
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp23
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp66
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp54
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp127
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp67
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp189
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp71
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp18
-rw-r--r--llvm/lib/Transforms/Utils/CloneModule.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/GuardUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp57
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileInference.cpp462
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp68
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp52
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp980
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp612
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp20
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h23
-rw-r--r--llvm/tools/llvm-cov/CodeCoverage.cpp42
-rw-r--r--llvm/tools/llvm-cov/CoverageFilters.cpp7
-rw-r--r--llvm/tools/llvm-cov/CoverageFilters.h14
-rw-r--r--llvm/tools/llvm-diff/lib/DifferenceEngine.cpp26
-rw-r--r--llvm/tools/llvm-dwarfdump/Statistics.cpp139
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp24
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp13
-rw-r--r--llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp71
-rw-r--r--llvm/utils/TableGen/AsmWriterInst.cpp3
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp11
743 files changed, 23440 insertions, 13594 deletions
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 85a3a8ab6970..2eacf1105c18 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -1840,7 +1840,8 @@ enum class MultiVersionKind {
None,
Target,
CPUSpecific,
- CPUDispatch
+ CPUDispatch,
+ TargetClones
};
/// Represents a function declaration or definition.
@@ -2459,6 +2460,10 @@ public:
/// the target functionality.
bool isTargetMultiVersion() const;
+ /// True if this function is a multiversioned dispatch function as a part of
+ /// the target-clones functionality.
+ bool isTargetClonesMultiVersion() const;
+
/// \brief Get the associated-constraints of this function declaration.
/// Currently, this will either be a vector of size 1 containing the
/// trailing-requires-clause or an empty vector.
diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 18468c8ca1c4..2a0a19597391 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -352,7 +352,7 @@ protected:
DeclContext *Parent, std::size_t Extra = 0);
private:
- bool AccessDeclContextSanity() const;
+ bool AccessDeclContextCheck() const;
/// Get the module ownership kind to use for a local lexical child of \p DC,
/// which may be either a local or (rarely) an imported declaration.
@@ -472,11 +472,11 @@ public:
void setAccess(AccessSpecifier AS) {
Access = AS;
- assert(AccessDeclContextSanity());
+ assert(AccessDeclContextCheck());
}
AccessSpecifier getAccess() const {
- assert(AccessDeclContextSanity());
+ assert(AccessDeclContextCheck());
return AccessSpecifier(Access);
}
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 991abef73363..2c63406fba18 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6305,8 +6305,10 @@ public:
bool isCmpXChg() const {
return getOp() == AO__c11_atomic_compare_exchange_strong ||
getOp() == AO__c11_atomic_compare_exchange_weak ||
+ getOp() == AO__hip_atomic_compare_exchange_strong ||
getOp() == AO__opencl_atomic_compare_exchange_strong ||
getOp() == AO__opencl_atomic_compare_exchange_weak ||
+ getOp() == AO__hip_atomic_compare_exchange_weak ||
getOp() == AO__atomic_compare_exchange ||
getOp() == AO__atomic_compare_exchange_n;
}
@@ -6341,6 +6343,8 @@ public:
auto Kind =
(Op >= AO__opencl_atomic_load && Op <= AO__opencl_atomic_fetch_max)
? AtomicScopeModelKind::OpenCL
+ : (Op >= AO__hip_atomic_load && Op <= AO__hip_atomic_fetch_max)
+ ? AtomicScopeModelKind::HIP
: AtomicScopeModelKind::None;
return AtomicScopeModel::create(Kind);
}
diff --git a/clang/include/clang/AST/GlobalDecl.h b/clang/include/clang/AST/GlobalDecl.h
index 8cb56fb4ae90..88abba28c991 100644
--- a/clang/include/clang/AST/GlobalDecl.h
+++ b/clang/include/clang/AST/GlobalDecl.h
@@ -18,6 +18,7 @@
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclObjC.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/DeclTemplate.h"
#include "clang/Basic/ABI.h"
#include "clang/Basic/LLVM.h"
#include "llvm/ADT/DenseMapInfo.h"
@@ -129,8 +130,12 @@ public:
}
KernelReferenceKind getKernelReferenceKind() const {
- assert(isa<FunctionDecl>(getDecl()) &&
- cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>() &&
+ assert(((isa<FunctionDecl>(getDecl()) &&
+ cast<FunctionDecl>(getDecl())->hasAttr<CUDAGlobalAttr>()) ||
+ (isa<FunctionTemplateDecl>(getDecl()) &&
+ cast<FunctionTemplateDecl>(getDecl())
+ ->getTemplatedDecl()
+ ->hasAttr<CUDAGlobalAttr>())) &&
"Decl is not a GPU kernel!");
return static_cast<KernelReferenceKind>(Value.getInt());
}
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index d6e5b215462b..5221d05477d0 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -4227,8 +4227,8 @@ AST_MATCHER(VarDecl, isInitCapture) { return Node.isInitCapture(); }
/// lambdaExpr(forEachLambdaCapture(
/// lambdaCapture(capturesVar(varDecl(hasType(isInteger()))))))
/// will trigger two matches, binding for 'x' and 'y' respectively.
-AST_MATCHER_P(LambdaExpr, forEachLambdaCapture, LambdaCaptureMatcher,
- InnerMatcher) {
+AST_MATCHER_P(LambdaExpr, forEachLambdaCapture,
+ internal::Matcher<LambdaCapture>, InnerMatcher) {
BoundNodesTreeBuilder Result;
bool Matched = false;
for (const auto &Capture : Node.captures()) {
@@ -4655,7 +4655,8 @@ extern const internal::VariadicAllOfMatcher<LambdaCapture> lambdaCapture;
/// lambdaExpr(hasAnyCapture(lambdaCapture())) and
/// lambdaExpr(hasAnyCapture(lambdaCapture(refersToVarDecl(hasName("t")))))
/// both match `[=](){ return t; }`.
-AST_MATCHER_P(LambdaExpr, hasAnyCapture, LambdaCaptureMatcher, InnerMatcher) {
+AST_MATCHER_P(LambdaExpr, hasAnyCapture, internal::Matcher<LambdaCapture>,
+ InnerMatcher) {
for (const LambdaCapture &Capture : Node.captures()) {
clang::ast_matchers::internal::BoundNodesTreeBuilder Result(*Builder);
if (InnerMatcher.matches(Capture, Finder, &Result)) {
diff --git a/clang/include/clang/Analysis/CFG.h b/clang/include/clang/Analysis/CFG.h
index 3b9b22e87f35..b8e453fcc235 100644
--- a/clang/include/clang/Analysis/CFG.h
+++ b/clang/include/clang/Analysis/CFG.h
@@ -515,7 +515,7 @@ public:
/// of the most derived class while we're in the base class.
VirtualBaseBranch,
- /// Number of different kinds, for validity checks. We subtract 1 so that
+ /// Number of different kinds, for assertions. We subtract 1 so that
/// to keep receiving compiler warnings when we don't cover all enum values
/// in a switch.
NumKindsMinusOne = VirtualBaseBranch
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
new file mode 100644
index 000000000000..a5d4a5d6ba40
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -0,0 +1,134 @@
+//===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines base types and functions for building dataflow analyses
+// that run over Control-Flow Graphs (CFGs).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
+
+#include <iterator>
+#include <utility>
+#include <vector>
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Stmt.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace clang {
+namespace dataflow {
+
+/// Base class template for dataflow analyses built on a single lattice type.
+///
+/// Requirements:
+///
+/// `Derived` must be derived from a specialization of this class template and
+/// must provide the following public members:
+/// * `LatticeT initialElement()` - returns a lattice element that models the
+/// initial state of a basic block;
+/// * `LatticeT transfer(const Stmt *, const LatticeT &, Environment &)` -
+/// applies the analysis transfer function for a given statement and lattice
+/// element.
+///
+/// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
+/// provide the following public members:
+/// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
+/// argument by computing their least upper bound, modifies the object if
+/// necessary, and returns an effect indicating whether any changes were
+/// made to it;
+/// * `bool operator==(const LatticeT &) const` - returns true if and only if
+/// the object is equal to the argument.
+template <typename Derived, typename LatticeT>
+class DataflowAnalysis : public TypeErasedDataflowAnalysis {
+public:
+ /// Bounded join-semilattice that is used in the analysis.
+ using Lattice = LatticeT;
+
+ explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
+
+ ASTContext &getASTContext() final { return Context; }
+
+ TypeErasedLattice typeErasedInitialElement() final {
+ return {static_cast<Derived *>(this)->initialElement()};
+ }
+
+ LatticeJoinEffect joinTypeErased(TypeErasedLattice &E1,
+ const TypeErasedLattice &E2) final {
+ Lattice &L1 = llvm::any_cast<Lattice &>(E1.Value);
+ const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
+ return L1.join(L2);
+ }
+
+ bool isEqualTypeErased(const TypeErasedLattice &E1,
+ const TypeErasedLattice &E2) final {
+ const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value);
+ const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value);
+ return L1 == L2;
+ }
+
+ TypeErasedLattice transferTypeErased(const Stmt *Stmt,
+ const TypeErasedLattice &E,
+ Environment &Env) final {
+ const Lattice &L = llvm::any_cast<const Lattice &>(E.Value);
+ return {static_cast<Derived *>(this)->transfer(Stmt, L, Env)};
+ }
+
+private:
+ ASTContext &Context;
+};
+
+// Model of the program at a given program point.
+template <typename LatticeT> struct DataflowAnalysisState {
+ // Model of a program property.
+ LatticeT Lattice;
+
+ // Model of the state of the program (store and heap).
+ Environment Env;
+};
+
+/// Performs dataflow analysis and returns a mapping from basic block IDs to
+/// dataflow analysis states that model the respective basic blocks. Indices
+/// of the returned vector correspond to basic block IDs.
+///
+/// Requirements:
+///
+/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to
+/// ensure that all sub-expressions in a basic block are evaluated.
+template <typename AnalysisT>
+std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+runDataflowAnalysis(const CFG &Cfg, AnalysisT &Analysis,
+ const Environment &InitEnv) {
+ auto TypeErasedBlockStates =
+ runTypeErasedDataflowAnalysis(Cfg, Analysis, InitEnv);
+ std::vector<
+ llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
+ BlockStates;
+ BlockStates.reserve(TypeErasedBlockStates.size());
+ llvm::transform(std::move(TypeErasedBlockStates),
+ std::back_inserter(BlockStates), [](auto &OptState) {
+ return std::move(OptState).map([](auto &&State) {
+ return DataflowAnalysisState<typename AnalysisT::Lattice>{
+ llvm::any_cast<typename AnalysisT::Lattice>(
+ std::move(State.Lattice.Value)),
+ std::move(State.Env)};
+ });
+ });
+ return BlockStates;
+}
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
new file mode 100644
index 000000000000..69a5c2e47b66
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -0,0 +1,27 @@
+//===-- DataflowEnvironment.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an Environment class that is used by dataflow analyses
+// that run over Control-Flow Graphs (CFGs) to keep track of the state of the
+// program at given program points.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
+
+namespace clang {
+namespace dataflow {
+
+/// Holds the state of the program (store and heap) at a given program point.
+class Environment {};
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWENVIRONMENT_H
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h
new file mode 100644
index 000000000000..37d2e0200410
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h
@@ -0,0 +1,29 @@
+//===- DataflowLattice.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines base types for building lattices to be used in dataflow
+// analyses that run over Control-Flow Graphs (CFGs).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H
+
+namespace clang {
+namespace dataflow {
+
+/// Effect indicating whether a lattice join operation resulted in a new value.
+enum class LatticeJoinEffect {
+ Unchanged,
+ Changed,
+};
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWLATTICE_H
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
new file mode 100644
index 000000000000..9448b911f471
--- /dev/null
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -0,0 +1,95 @@
+//===- TypeErasedDataflowAnalysis.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines type-erased base types and functions for building dataflow
+// analyses that run over Control-Flow Graphs (CFGs).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H
+#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H
+
+#include <vector>
+
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Stmt.h"
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "llvm/ADT/Any.h"
+#include "llvm/ADT/Optional.h"
+
+namespace clang {
+namespace dataflow {
+
+/// Type-erased lattice element container.
+///
+/// Requirements:
+///
+/// The type of the object stored in the container must be a bounded
+/// join-semilattice.
+struct TypeErasedLattice {
+ llvm::Any Value;
+};
+
+/// Type-erased base class for dataflow analyses built on a single lattice type.
+class TypeErasedDataflowAnalysis {
+public:
+ virtual ~TypeErasedDataflowAnalysis() {}
+
+ /// Returns the `ASTContext` that is used by the analysis.
+ virtual ASTContext &getASTContext() = 0;
+
+ /// Returns a type-erased lattice element that models the initial state of a
+ /// basic block.
+ virtual TypeErasedLattice typeErasedInitialElement() = 0;
+
+ /// Joins two type-erased lattice elements by computing their least upper
+ /// bound. Places the join result in the left element and returns an effect
+ /// indicating whether any changes were made to it.
+ virtual LatticeJoinEffect joinTypeErased(TypeErasedLattice &,
+ const TypeErasedLattice &) = 0;
+
+ /// Returns true if and only if the two given type-erased lattice elements are
+ /// equal.
+ virtual bool isEqualTypeErased(const TypeErasedLattice &,
+ const TypeErasedLattice &) = 0;
+
+ /// Applies the analysis transfer function for a given statement and
+ /// type-erased lattice element.
+ virtual TypeErasedLattice transferTypeErased(const Stmt *,
+ const TypeErasedLattice &,
+ Environment &) = 0;
+};
+
+/// Type-erased model of the program at a given program point.
+struct TypeErasedDataflowAnalysisState {
+ /// Type-erased model of a program property.
+ TypeErasedLattice Lattice;
+
+ /// Model of the state of the program (store and heap).
+ Environment Env;
+};
+
+/// Performs dataflow analysis and returns a mapping from basic block IDs to
+/// dataflow analysis states that model the respective basic blocks. Indices
+/// of the returned vector correspond to basic block IDs.
+///
+/// Requirements:
+///
+/// `Cfg` must have been built with `CFG::BuildOptions::setAllAlwaysAdd()` to
+/// ensure that all sub-expressions in a basic block are evaluated.
+std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>
+runTypeErasedDataflowAnalysis(const CFG &Cfg,
+ TypeErasedDataflowAnalysis &Analysis,
+ const Environment &InitEnv);
+
+} // namespace dataflow
+} // namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_TYPEERASEDDATAFLOWANALYSIS_H
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 39588d94cf09..fab3f3edfb83 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2677,6 +2677,40 @@ def Target : InheritableAttr {
}];
}
+def TargetClones : InheritableAttr {
+ let Spellings = [GCC<"target_clones">];
+ let Args = [VariadicStringArgument<"featuresStrs">];
+ let Documentation = [TargetClonesDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag>;
+ let AdditionalMembers = [{
+ StringRef getFeatureStr(unsigned Index) const {
+ return *(featuresStrs_begin() + Index);
+ }
+ // 'default' is always moved to the end, so it isn't considered
+ // when mangling the index.
+ unsigned getMangledIndex(unsigned Index) const {
+ if (getFeatureStr(Index) == "default")
+ return std::count_if(featuresStrs_begin(), featuresStrs_end(),
+ [](StringRef S) { return S != "default"; });
+
+ return std::count_if(featuresStrs_begin(), featuresStrs_begin() + Index,
+ [](StringRef S) { return S != "default"; });
+ }
+
+ // True if this is the first of this version to appear in the config string.
+ // This is used to make sure we don't try to emit this function multiple
+ // times.
+ bool isFirstOfVersion(unsigned Index) const {
+ StringRef FeatureStr(getFeatureStr(Index));
+ return 0 == std::count_if(
+ featuresStrs_begin(), featuresStrs_begin() + Index,
+ [FeatureStr](StringRef S) { return S == FeatureStr; });
+ }
+ }];
+}
+
+def : MutualExclusions<[TargetClones, Target, CPUDispatch, CPUSpecific]>;
+
def MinVectorWidth : InheritableAttr {
let Spellings = [Clang<"min_vector_width">];
let Args = [UnsignedArgument<"VectorWidth">];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index e7afb3699eb1..10cce4c2d689 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2233,6 +2233,40 @@ Additionally, a function may not become multiversioned after its first use.
}];
}
+def TargetClonesDocs : Documentation {
+ let Category = DocCatFunction;
+ let Content = [{
+Clang supports the ``target_clones("OPTIONS")`` attribute. This attribute may be
+attached to a function declaration and causes function multiversioning, where
+multiple versions of the function will be emitted with different code
+generation options. Additionally, these versions will be resolved at runtime
+based on the priority of their attribute options. All ``target_clone`` functions
+are considered multiversioned functions.
+
+All multiversioned functions must contain a ``default`` (fallback)
+implementation, otherwise usages of the function are considered invalid.
+Additionally, a function may not become multiversioned after its first use.
+
+The options to ``target_clones`` can either be a target-specific architecture
+(specified as ``arch=CPU``), or one of a list of subtarget features.
+
+Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
+"avx", "xop" and largely correspond to the machine specific options handled by
+the front end.
+
+The versions can either be listed as a comma-separated sequence of string
+literals or as a single string literal containing a comma-separated list of
+versions. For compatibility with GCC, the two formats can be mixed. For
+example, the following will emit 4 versions of the function:
+
+ .. code-block:: c++
+
+ __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default")))
+ void foo() {}
+
+}];
+}
+
def MinVectorWidthDocs : Documentation {
let Category = DocCatFunction;
let Content = [{
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index b05777889e79..ad8b66aa490b 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -854,6 +854,19 @@ ATOMIC_BUILTIN(__opencl_atomic_fetch_max, "v.", "t")
ATOMIC_BUILTIN(__atomic_fetch_min, "v.", "t")
ATOMIC_BUILTIN(__atomic_fetch_max, "v.", "t")
+// HIP atomic builtins.
+ATOMIC_BUILTIN(__hip_atomic_load, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_store, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_compare_exchange_weak, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_compare_exchange_strong, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_exchange, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_add, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_and, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_or, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_xor, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_min, "v.", "t")
+ATOMIC_BUILTIN(__hip_atomic_fetch_max, "v.", "t")
+
#undef ATOMIC_BUILTIN
// Non-overloaded atomic builtins.
diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index cd6b2df10e52..70b0184f199f 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -404,6 +404,7 @@ BUILTIN(__builtin_altivec_vbpermd, "V2ULLiV2ULLiV16Uc", "")
// P8 Crypto built-ins.
BUILTIN(__builtin_altivec_crypto_vsbox, "V2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_crypto_vpermxor, "V16UcV16UcV16UcV16Uc", "")
+BUILTIN(__builtin_altivec_crypto_vpermxor_be, "V16UcV16UcV16UcV16Uc", "")
BUILTIN(__builtin_altivec_crypto_vshasigmaw, "V4UiV4UiIiIi", "")
BUILTIN(__builtin_altivec_crypto_vshasigmad, "V2ULLiV2ULLiIiIi", "")
BUILTIN(__builtin_altivec_crypto_vcipher, "V2ULLiV2ULLiV2ULLi", "")
@@ -424,6 +425,12 @@ BUILTIN(__builtin_altivec_vctzh, "V8UsV8Us", "")
BUILTIN(__builtin_altivec_vctzw, "V4UiV4Ui", "")
BUILTIN(__builtin_altivec_vctzd, "V2ULLiV2ULLi", "")
+// P8 BCD builtins.
+BUILTIN(__builtin_ppc_bcdadd, "V16UcV16UcV16UcIi", "")
+BUILTIN(__builtin_ppc_bcdsub, "V16UcV16UcV16UcIi", "")
+BUILTIN(__builtin_ppc_bcdadd_p, "iiV16UcV16Uc", "")
+BUILTIN(__builtin_ppc_bcdsub_p, "iiV16UcV16Uc", "")
+
BUILTIN(__builtin_altivec_vclzlsbb, "SiV16Uc", "")
BUILTIN(__builtin_altivec_vctzlsbb, "SiV16Uc", "")
BUILTIN(__builtin_altivec_vprtybw, "V4UiV4Ui", "")
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index ff8c36910e13..2f50918b527b 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -189,6 +189,12 @@ def err_drv_invalid_mtp : Error<
"invalid thread pointer reading mode '%0'">;
def err_drv_missing_arg_mtp : Error<
"missing argument to '%0'">;
+def warn_drv_missing_plugin_name : Warning<
+ "missing plugin name in %0">,
+ InGroup<InvalidCommandLineArgument>;
+def warn_drv_missing_plugin_arg : Warning<
+ "missing plugin argument for plugin %0 in %1">,
+ InGroup<InvalidCommandLineArgument>;
def err_drv_invalid_libcxx_deployment : Error<
"invalid deployment target for -stdlib=libc++ (requires %0 or later)">;
def err_drv_invalid_argument_to_option : Error<
@@ -394,6 +400,8 @@ def warn_ignoring_verify_debuginfo_preserve_export : Warning<
InGroup<UnusedCommandLineArgument>;
def err_invalid_branch_protection: Error <
"invalid branch protection option '%0' in '%1'">;
+def warn_unsupported_branch_protection: Warning <
+ "invalid branch protection option '%0' in '%1'">, InGroup<BranchProtection>;
def err_invalid_sls_hardening : Error<
"invalid sls hardening option '%0' in '%1'">;
def err_sls_hardening_arm_not_supported : Error<
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 85d373845c81..90df3a424406 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -56,7 +56,9 @@ def CoroutineMissingUnhandledException :
DiagGroup<"coroutine-missing-unhandled-exception">;
def DeprecatedExperimentalCoroutine :
DiagGroup<"deprecated-experimental-coroutine">;
-def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException, DeprecatedExperimentalCoroutine]>;
+def DeprecatedCoroutine :
+ DiagGroup<"deprecated-coroutine", [DeprecatedExperimentalCoroutine]>;
+def Coroutine : DiagGroup<"coroutine", [CoroutineMissingUnhandledException, DeprecatedCoroutine]>;
def ObjCBoolConstantConversion : DiagGroup<"objc-bool-constant-conversion">;
def ConstantConversion : DiagGroup<"constant-conversion",
[BitFieldConstantConversion,
@@ -1273,9 +1275,14 @@ def : DiagGroup<"spirv-compat", [SpirCompat]>; // Alias.
// Warning for the GlobalISel options.
def GlobalISel : DiagGroup<"global-isel">;
+// A warning group for the GNU extension to allow mixed specifier types for
+// target-clones multiversioning.
+def TargetClonesMixedSpecifiers : DiagGroup<"target-clones-mixed-specifiers">;
+
// A warning group specifically for warnings related to function
// multiversioning.
-def FunctionMultiVersioning : DiagGroup<"function-multiversion">;
+def FunctionMultiVersioning
+ : DiagGroup<"function-multiversion", [TargetClonesMixedSpecifiers]>;
def NoDeref : DiagGroup<"noderef">;
@@ -1331,3 +1338,6 @@ def PedanticMacros : DiagGroup<"pedantic-macros",
BuiltinMacroRedefined,
RestrictExpansionMacro,
FinalMacro]>;
+
+def BranchProtection : DiagGroup<"branch-protection">;
+
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index aef86516707c..375930c14848 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -30,7 +30,7 @@ namespace clang {
// Size of each of the diagnostic categories.
enum {
DIAG_SIZE_COMMON = 300,
- DIAG_SIZE_DRIVER = 250,
+ DIAG_SIZE_DRIVER = 300,
DIAG_SIZE_FRONTEND = 150,
DIAG_SIZE_SERIALIZATION = 120,
DIAG_SIZE_LEX = 400,
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 1bc2e8b0c7ef..92e877074ad3 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1548,6 +1548,9 @@ def note_meant_to_use_typename : Note<
let CategoryName = "Coroutines Issue" in {
def err_for_co_await_not_range_for : Error<
"'co_await' modifier can only be applied to range-based for loop">;
+def warn_deprecated_for_co_await : Warning<
+ "'for co_await' belongs to CoroutineTS instead of C++20, which is deprecated">,
+ InGroup<DeprecatedCoroutine>;
}
let CategoryName = "Concepts Issue" in {
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index dc67f86f25ca..fb5bd53f7432 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2979,9 +2979,13 @@ def err_attribute_requires_opencl_version : Error<
"attribute %0 is supported in the OpenCL version %1%select{| onwards}2">;
def err_invalid_branch_protection_spec : Error<
"invalid or misplaced branch protection specification '%0'">;
+def warn_unsupported_branch_protection_spec : Warning<
+ "unsupported branch protection specification '%0'">, InGroup<BranchProtection>;
+
def warn_unsupported_target_attribute
: Warning<"%select{unsupported|duplicate|unknown}0%select{| architecture|"
- " tune CPU}1 '%2' in the 'target' attribute string; 'target' "
+ " tune CPU}1 '%2' in the '%select{target|target_clones}3' "
+ "attribute string; '%select{target|target_clones}3' "
"attribute ignored">,
InGroup<IgnoredAttributes>;
def err_attribute_unsupported
@@ -9864,6 +9868,8 @@ def warn_duplicate_attribute_exact : Warning<
def warn_duplicate_attribute : Warning<
"attribute %0 is already applied with different arguments">,
InGroup<IgnoredAttributes>;
+def err_disallowed_duplicate_attribute : Error<
+ "attribute %0 cannot appear more than once on a declaration">;
def warn_sync_fetch_and_nand_semantics_change : Warning<
"the semantics of this intrinsic changed with GCC "
@@ -11254,9 +11260,11 @@ def err_multiversion_duplicate : Error<
"multiversioned function redeclarations require identical target attributes">;
def err_multiversion_noproto : Error<
"multiversioned function must have a prototype">;
-def err_multiversion_disallowed_other_attr : Error<
- "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioning cannot be combined"
- " with attribute %1">;
+def err_multiversion_disallowed_other_attr
+ : Error<"attribute "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "multiversioning cannot be combined"
+ " with attribute %1">;
def err_multiversion_mismatched_attrs
: Error<"attributes on multiversioned functions must all match, attribute "
"%0 %select{is missing|has different arguments}1">;
@@ -11264,11 +11272,14 @@ def err_multiversion_diff : Error<
"multiversioned function declaration has a different %select{calling convention"
"|return type|constexpr specification|inline specification|linkage|"
"language linkage}0">;
-def err_multiversion_doesnt_support : Error<
- "attribute '%select{target|cpu_specific|cpu_dispatch}0' multiversioned functions do not "
- "yet support %select{function templates|virtual functions|"
- "deduced return types|constructors|destructors|deleted functions|"
- "defaulted functions|constexpr functions|consteval function}1">;
+def err_multiversion_doesnt_support
+ : Error<"attribute "
+ "'%select{|target|cpu_specific|cpu_dispatch|target_clones}0' "
+ "multiversioned functions do not "
+ "yet support %select{function templates|virtual functions|"
+ "deduced return types|constructors|destructors|deleted functions|"
+ "defaulted functions|constexpr functions|consteval "
+ "function|lambdas}1">;
def err_multiversion_not_allowed_on_main : Error<
"'main' cannot be a multiversioned function">;
def err_multiversion_not_supported : Error<
@@ -11285,6 +11296,19 @@ def warn_multiversion_duplicate_entries : Warning<
def warn_dispatch_body_ignored : Warning<
"body of cpu_dispatch function will be ignored">,
InGroup<FunctionMultiVersioning>;
+def err_target_clone_must_have_default
+ : Error<"'target_clones' multiversioning requires a default target">;
+def err_target_clone_doesnt_match
+ : Error<"'target_clones' attribute does not match previous declaration">;
+def warn_target_clone_mixed_values
+ : ExtWarn<
+ "mixing 'target_clones' specifier mechanisms is permitted for GCC "
+ "compatibility; use a comma separated sequence of string literals, "
+ "or a string literal containing a comma-separated list of versions">,
+ InGroup<TargetClonesMixedSpecifiers>;
+def warn_target_clone_duplicate_options
+ : Warning<"version list contains duplicate entries">,
+ InGroup<FunctionMultiVersioning>;
// three-way comparison operator diagnostics
def err_implied_comparison_category_type_not_found : Error<
diff --git a/clang/include/clang/Basic/SyncScope.h b/clang/include/clang/Basic/SyncScope.h
index ce8fb9cbed13..34703310af2b 100644
--- a/clang/include/clang/Basic/SyncScope.h
+++ b/clang/include/clang/Basic/SyncScope.h
@@ -40,6 +40,11 @@ namespace clang {
/// Update getAsString.
///
enum class SyncScope {
+ HIPSingleThread,
+ HIPWavefront,
+ HIPWorkgroup,
+ HIPAgent,
+ HIPSystem,
OpenCLWorkGroup,
OpenCLDevice,
OpenCLAllSVMDevices,
@@ -49,6 +54,16 @@ enum class SyncScope {
inline llvm::StringRef getAsString(SyncScope S) {
switch (S) {
+ case SyncScope::HIPSingleThread:
+ return "hip_singlethread";
+ case SyncScope::HIPWavefront:
+ return "hip_wavefront";
+ case SyncScope::HIPWorkgroup:
+ return "hip_workgroup";
+ case SyncScope::HIPAgent:
+ return "hip_agent";
+ case SyncScope::HIPSystem:
+ return "hip_system";
case SyncScope::OpenCLWorkGroup:
return "opencl_workgroup";
case SyncScope::OpenCLDevice:
@@ -62,7 +77,7 @@ inline llvm::StringRef getAsString(SyncScope S) {
}
/// Defines the kind of atomic scope models.
-enum class AtomicScopeModelKind { None, OpenCL };
+enum class AtomicScopeModelKind { None, OpenCL, HIP };
/// Defines the interface for synch scope model.
class AtomicScopeModel {
@@ -138,6 +153,58 @@ public:
}
};
+/// Defines the synch scope model for HIP.
+class AtomicScopeHIPModel : public AtomicScopeModel {
+public:
+ /// The enum values match the pre-defined macros
+ /// __HIP_MEMORY_SCOPE_*, which are used to define memory_scope_*
+ /// enums in hip-c.h.
+ enum ID {
+ SingleThread = 1,
+ Wavefront = 2,
+ Workgroup = 3,
+ Agent = 4,
+ System = 5,
+ Last = System
+ };
+
+ AtomicScopeHIPModel() {}
+
+ SyncScope map(unsigned S) const override {
+ switch (static_cast<ID>(S)) {
+ case SingleThread:
+ return SyncScope::HIPSingleThread;
+ case Wavefront:
+ return SyncScope::HIPWavefront;
+ case Workgroup:
+ return SyncScope::HIPWorkgroup;
+ case Agent:
+ return SyncScope::HIPAgent;
+ case System:
+ return SyncScope::HIPSystem;
+ }
+ llvm_unreachable("Invalid language synch scope value");
+ }
+
+ bool isValid(unsigned S) const override {
+ return S >= static_cast<unsigned>(SingleThread) &&
+ S <= static_cast<unsigned>(Last);
+ }
+
+ ArrayRef<unsigned> getRuntimeValues() const override {
+ static_assert(Last == System, "Does not include all synch scopes");
+ static const unsigned Scopes[] = {
+ static_cast<unsigned>(SingleThread), static_cast<unsigned>(Wavefront),
+ static_cast<unsigned>(Workgroup), static_cast<unsigned>(Agent),
+ static_cast<unsigned>(System)};
+ return llvm::makeArrayRef(Scopes);
+ }
+
+ unsigned getFallBackValue() const override {
+ return static_cast<unsigned>(System);
+ }
+};
+
inline std::unique_ptr<AtomicScopeModel>
AtomicScopeModel::create(AtomicScopeModelKind K) {
switch (K) {
@@ -145,9 +212,11 @@ AtomicScopeModel::create(AtomicScopeModelKind K) {
return std::unique_ptr<AtomicScopeModel>{};
case AtomicScopeModelKind::OpenCL:
return std::make_unique<AtomicScopeOpenCLModel>();
+ case AtomicScopeModelKind::HIP:
+ return std::make_unique<AtomicScopeHIPModel>();
}
llvm_unreachable("Invalid atomic scope model kind");
}
-}
+} // namespace clang
#endif
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9bde64cf49fd..4e6dd2050344 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2504,6 +2504,9 @@ defm rwpi : BoolFOption<"rwpi",
NegFlag<SetFalse>>;
def fplugin_EQ : Joined<["-"], "fplugin=">, Group<f_Group>, Flags<[NoXarchOption]>, MetaVarName<"<dsopath>">,
HelpText<"Load the named plugin (dynamic shared object)">;
+def fplugin_arg : Joined<["-"], "fplugin-arg-">,
+ MetaVarName<"<name>-<arg>">,
+ HelpText<"Pass <arg> to plugin <name>">;
def fpass_plugin_EQ : Joined<["-"], "fpass-plugin=">,
Group<f_Group>, Flags<[CC1Option]>, MetaVarName<"<dsopath>">,
HelpText<"Load pass plugin from a dynamic shared object file (only with new pass manager).">,
@@ -2786,10 +2789,11 @@ def fvisibility_ms_compat : Flag<["-"], "fvisibility-ms-compat">, Group<f_Group>
def fvisibility_global_new_delete_hidden : Flag<["-"], "fvisibility-global-new-delete-hidden">, Group<f_Group>,
HelpText<"Give global C++ operator new and delete declarations hidden visibility">, Flags<[CC1Option]>,
MarshallingInfoFlag<LangOpts<"GlobalAllocationFunctionVisibilityHidden">>;
-def fnew_infallible : Flag<["-"], "fnew-infallible">, Group<f_Group>,
- HelpText<"Treats throwing global C++ operator new as always returning valid memory "
- "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">,
- Flags<[CC1Option]>, MarshallingInfoFlag<LangOpts<"NewInfallible">>;
+defm new_infallible : BoolFOption<"new-infallible",
+ LangOpts<"NewInfallible">, DefaultFalse,
+ PosFlag<SetTrue, [], "Enable">, NegFlag<SetFalse, [], "Disable">,
+ BothFlags<[CC1Option], " treating throwing global C++ operator new as always returning valid memory "
+ "(annotates with __attribute__((returns_nonnull)) and throw()). This is detectable in source.">>;
defm whole_program_vtables : BoolFOption<"whole-program-vtables",
CodeGenOpts<"WholeProgramVTables">, DefaultFalse,
PosFlag<SetTrue, [CC1Option], "Enables whole-program vtable optimization. Requires -flto">,
@@ -4519,7 +4523,7 @@ def frecord_marker_EQ : Joined<["-"], "frecord-marker=">, Group<gfortran_Group>;
defm aggressive_function_elimination : BooleanFFlag<"aggressive-function-elimination">, Group<gfortran_Group>;
defm align_commons : BooleanFFlag<"align-commons">, Group<gfortran_Group>;
defm all_intrinsics : BooleanFFlag<"all-intrinsics">, Group<gfortran_Group>;
-defm automatic : BooleanFFlag<"automatic">, Group<gfortran_Group>;
+def fautomatic : Flag<["-"], "fautomatic">; // -fno-automatic is significant
defm backtrace : BooleanFFlag<"backtrace">, Group<gfortran_Group>;
defm bounds_check : BooleanFFlag<"bounds-check">, Group<gfortran_Group>;
defm check_array_temporaries : BooleanFFlag<"check-array-temporaries">, Group<gfortran_Group>;
@@ -4616,6 +4620,9 @@ defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string in
defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">;
defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">;
defm implicit_none : OptInFC1FFlag<"implicit-none", "No implicit typing allowed unless overridden by IMPLICIT statements">;
+
+def fno_automatic : Flag<["-"], "fno-automatic">, Group<f_Group>,
+ HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">;
}
def J : JoinedOrSeparate<["-"], "J">,
@@ -5059,9 +5066,10 @@ def msmall_data_limit : Separate<["-"], "msmall-data-limit">,
def funwind_tables_EQ : Joined<["-"], "funwind-tables=">,
HelpText<"Generate unwinding tables for all functions">,
MarshallingInfoInt<CodeGenOpts<"UnwindTables">>;
-def mconstructor_aliases : Flag<["-"], "mconstructor-aliases">,
- HelpText<"Emit complete constructors and destructors as aliases when possible">,
- MarshallingInfoFlag<CodeGenOpts<"CXXCtorDtorAliases">>;
+defm constructor_aliases : BoolOption<"m", "constructor-aliases",
+ CodeGenOpts<"CXXCtorDtorAliases">, DefaultFalse,
+ PosFlag<SetTrue, [], "Enable">, NegFlag<SetFalse, [], "Disable">,
+ BothFlags<[CC1Option], " emitting complete constructors and destructors as aliases when possible">>;
def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">,
HelpText<"Link the given bitcode file before performing optimizations.">;
def mlink_builtin_bitcode : Separate<["-"], "mlink-builtin-bitcode">,
@@ -5174,10 +5182,6 @@ defm debug_pass_manager : BoolOption<"f", "debug-pass-manager",
CodeGenOpts<"DebugPassManager">, DefaultFalse,
PosFlag<SetTrue, [], "Prints debug information for the new pass manager">,
NegFlag<SetFalse, [], "Disables debug printing for the new pass manager">>;
-def fexperimental_debug_variable_locations : Flag<["-"],
- "fexperimental-debug-variable-locations">,
- HelpText<"Use experimental new value-tracking variable locations">,
- MarshallingInfoFlag<CodeGenOpts<"ValueTrackingVariableLocations">>;
def fverify_debuginfo_preserve
: Flag<["-"], "fverify-debuginfo-preserve">,
HelpText<"Enable Debug Info Metadata preservation testing in "
diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h
index bb7fd97fe5df..dacbffef0b12 100644
--- a/clang/include/clang/Frontend/PrecompiledPreamble.h
+++ b/clang/include/clang/Frontend/PrecompiledPreamble.h
@@ -274,7 +274,7 @@ class PreambleCallbacks {
public:
virtual ~PreambleCallbacks() = default;
- /// Called before FrontendAction::BeginSourceFile.
+ /// Called before FrontendAction::Execute.
/// Can be used to store references to various CompilerInstance fields
/// (e.g. SourceManager) that may be interesting to the consumers of other
/// callbacks.
@@ -291,7 +291,7 @@ public:
/// used instead, but having only this method allows a simpler API.
virtual void HandleTopLevelDecl(DeclGroupRef DG);
/// Creates wrapper class for PPCallbacks so we can also process information
- /// about includes that are inside of a preamble
+ /// about includes that are inside of a preamble. Called after BeforeExecute.
virtual std::unique_ptr<PPCallbacks> createPPCallbacks();
/// The returned CommentHandler will be added to the preprocessor if not null.
virtual CommentHandler *getCommentHandler();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 43ce5d983217..1a82a9498d1d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1296,6 +1296,11 @@ public:
EK_Decltype, EK_TemplateArgument, EK_Other
} ExprContext;
+ // A context can be nested in both a discarded statement context and
+ // an immediate function context, so they need to be tracked independently.
+ bool InDiscardedStatement;
+ bool InImmediateFunctionContext;
+
ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context,
unsigned NumCleanupObjects,
CleanupInfo ParentCleanup,
@@ -1303,7 +1308,8 @@ public:
ExpressionKind ExprContext)
: Context(Context), ParentCleanup(ParentCleanup),
NumCleanupObjects(NumCleanupObjects), NumTypos(0),
- ManglingContextDecl(ManglingContextDecl), ExprContext(ExprContext) {}
+ ManglingContextDecl(ManglingContextDecl), ExprContext(ExprContext),
+ InDiscardedStatement(false), InImmediateFunctionContext(false) {}
bool isUnevaluated() const {
return Context == ExpressionEvaluationContext::Unevaluated ||
@@ -1317,7 +1323,13 @@ public:
}
bool isImmediateFunctionContext() const {
- return Context == ExpressionEvaluationContext::ImmediateFunctionContext;
+ return Context == ExpressionEvaluationContext::ImmediateFunctionContext ||
+ InImmediateFunctionContext;
+ }
+
+ bool isDiscardedStatementContext() const {
+ return Context == ExpressionEvaluationContext::DiscardedStatement ||
+ InDiscardedStatement;
}
};
@@ -4351,6 +4363,10 @@ public:
llvm::Error isValidSectionSpecifier(StringRef Str);
bool checkSectionName(SourceLocation LiteralLoc, StringRef Str);
bool checkTargetAttr(SourceLocation LiteralLoc, StringRef Str);
+ bool checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
+ const StringLiteral *Literal,
+ bool &HasDefault, bool &HasCommas,
+ SmallVectorImpl<StringRef> &Strings);
bool checkMSInheritanceAttrOnDefinition(
CXXRecordDecl *RD, SourceRange Range, bool BestCase,
MSInheritanceModel SemanticSpelling);
@@ -9150,14 +9166,7 @@ public:
bool isImmediateFunctionContext() const {
assert(!ExprEvalContexts.empty() &&
"Must be in an expression evaluation context");
- for (const ExpressionEvaluationContextRecord &context :
- llvm::reverse(ExprEvalContexts)) {
- if (context.isImmediateFunctionContext())
- return true;
- if (context.isUnevaluated())
- return false;
- }
- return false;
+ return ExprEvalContexts.back().isImmediateFunctionContext();
}
/// RAII class used to determine whether SFINAE has
diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index c52da3305f7c..af02fa2e7e87 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -195,15 +195,23 @@ public:
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine &Path) override;
- void clearIgnoredFiles() { IgnoredFiles.clear(); }
- void ignoreFile(StringRef Filename);
+ /// Disable minimization of the given file.
+ void disableMinimization(StringRef Filename);
+ /// Enable minimization of all files.
+ void enableMinimizationOfAllFiles() { NotToBeMinimized.clear(); }
private:
- bool shouldIgnoreFile(StringRef Filename);
+ /// Check whether the file should be minimized.
+ bool shouldMinimize(StringRef Filename);
llvm::ErrorOr<const CachedFileSystemEntry *>
getOrCreateFileSystemEntry(const StringRef Filename);
+ /// Create a cached file system entry based on the initial status result.
+ CachedFileSystemEntry
+ createFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
+ StringRef Filename, bool ShouldMinimize);
+
/// The global cache shared between worker threads.
DependencyScanningFilesystemSharedCache &SharedCache;
/// The local cache is used by the worker thread to cache file system queries
@@ -214,7 +222,7 @@ private:
/// currently active preprocessor.
ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings;
/// The set of files that should not be minimized.
- llvm::StringSet<> IgnoredFiles;
+ llvm::StringSet<> NotToBeMinimized;
};
} // end namespace dependencies
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 294cc20f76c5..2d85d72e5b8a 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -11800,6 +11800,15 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
Target->getTargetOpts().FeaturesAsWritten.begin(),
Target->getTargetOpts().FeaturesAsWritten.end());
Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
+ } else if (const auto *TC = FD->getAttr<TargetClonesAttr>()) {
+ std::vector<std::string> Features;
+ StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex());
+ if (VersionStr.startswith("arch="))
+ TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1);
+ else if (VersionStr != "default")
+ Features.push_back((StringRef{"+"} + VersionStr).str());
+
+ Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features);
} else {
FeatureMap = Target->getTargetOpts().FeatureMap;
}
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index e85feb779190..7fd24e2aa9ad 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -1347,6 +1347,42 @@ IsStructurallyEquivalentLambdas(StructuralEquivalenceContext &Context,
return true;
}
+/// Determine if context of a class is equivalent.
+static bool IsRecordContextStructurallyEquivalent(RecordDecl *D1,
+ RecordDecl *D2) {
+ // The context should be completely equal, including anonymous and inline
+ // namespaces.
+ // We compare objects as part of full translation units, not subtrees of
+ // translation units.
+ DeclContext *DC1 = D1->getDeclContext()->getNonTransparentContext();
+ DeclContext *DC2 = D2->getDeclContext()->getNonTransparentContext();
+ while (true) {
+ // Special case: We allow a struct defined in a function to be equivalent
+ // with a similar struct defined outside of a function.
+ if ((DC1->isFunctionOrMethod() && DC2->isTranslationUnit()) ||
+ (DC2->isFunctionOrMethod() && DC1->isTranslationUnit()))
+ return true;
+
+ if (DC1->getDeclKind() != DC2->getDeclKind())
+ return false;
+ if (DC1->isTranslationUnit())
+ break;
+ if (DC1->isInlineNamespace() != DC2->isInlineNamespace())
+ return false;
+ if (const auto *ND1 = dyn_cast<NamedDecl>(DC1)) {
+ const auto *ND2 = cast<NamedDecl>(DC2);
+ if (!DC1->isInlineNamespace() &&
+ !IsStructurallyEquivalent(ND1->getIdentifier(), ND2->getIdentifier()))
+ return false;
+ }
+
+ DC1 = DC1->getParent()->getNonTransparentContext();
+ DC2 = DC2->getParent()->getNonTransparentContext();
+ }
+
+ return true;
+}
+
/// Determine structural equivalence of two records.
static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
RecordDecl *D1, RecordDecl *D2) {
@@ -1386,6 +1422,12 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
}
}
+ // If the records occur in different context (namespace), these should be
+ // different. This is specially important if the definition of one or both
+ // records is missing.
+ if (!IsRecordContextStructurallyEquivalent(D1, D2))
+ return false;
+
// If both declarations are class template specializations, we know
// the ODR applies, so check the template and template arguments.
const auto *Spec1 = dyn_cast<ClassTemplateSpecializationDecl>(D1);
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 5ea091edcf4c..68dfef248f65 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -3271,6 +3271,8 @@ MultiVersionKind FunctionDecl::getMultiVersionKind() const {
return MultiVersionKind::CPUDispatch;
if (hasAttr<CPUSpecificAttr>())
return MultiVersionKind::CPUSpecific;
+ if (hasAttr<TargetClonesAttr>())
+ return MultiVersionKind::TargetClones;
return MultiVersionKind::None;
}
@@ -3286,6 +3288,10 @@ bool FunctionDecl::isTargetMultiVersion() const {
return isMultiVersion() && hasAttr<TargetAttr>();
}
+bool FunctionDecl::isTargetClonesMultiVersion() const {
+ return isMultiVersion() && hasAttr<TargetClonesAttr>();
+}
+
void
FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) {
redeclarable_base::setPreviousDecl(PrevDecl);
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 4044404f74ef..064012ba865c 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -964,7 +964,7 @@ SourceLocation Decl::getBodyRBrace() const {
return {};
}
-bool Decl::AccessDeclContextSanity() const {
+bool Decl::AccessDeclContextCheck() const {
#ifndef NDEBUG
// Suppress this check if any of the following hold:
// 1. this is the translation unit (and thus has no parent)
@@ -1212,7 +1212,7 @@ bool DeclContext::Encloses(const DeclContext *DC) const {
return getPrimaryContext()->Encloses(DC);
for (; DC; DC = DC->getParent())
- if (DC->getPrimaryContext() == this)
+ if (!isa<LinkageSpecDecl>(DC) && DC->getPrimaryContext() == this)
return true;
return false;
}
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 7bd3dce43f4d..d3cb2ff3734c 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4681,6 +4681,7 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
return 2;
case AO__opencl_atomic_load:
+ case AO__hip_atomic_load:
case AO__c11_atomic_store:
case AO__c11_atomic_exchange:
case AO__atomic_load:
@@ -4713,7 +4714,15 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__atomic_fetch_max:
return 3;
+ case AO__hip_atomic_exchange:
+ case AO__hip_atomic_fetch_add:
+ case AO__hip_atomic_fetch_and:
+ case AO__hip_atomic_fetch_or:
+ case AO__hip_atomic_fetch_xor:
+ case AO__hip_atomic_fetch_min:
+ case AO__hip_atomic_fetch_max:
case AO__opencl_atomic_store:
+ case AO__hip_atomic_store:
case AO__opencl_atomic_exchange:
case AO__opencl_atomic_fetch_add:
case AO__opencl_atomic_fetch_sub:
@@ -4728,9 +4737,10 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
case AO__c11_atomic_compare_exchange_strong:
case AO__c11_atomic_compare_exchange_weak:
return 5;
-
+ case AO__hip_atomic_compare_exchange_strong:
case AO__opencl_atomic_compare_exchange_strong:
case AO__opencl_atomic_compare_exchange_weak:
+ case AO__hip_atomic_compare_exchange_weak:
case AO__atomic_compare_exchange:
case AO__atomic_compare_exchange_n:
return 6;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fe96db9ca918..99babd58b027 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7483,7 +7483,7 @@ public:
const Expr *Source = E->getSourceExpr();
if (!Source)
return Error(E);
- if (Source == E) { // sanity checking.
+ if (Source == E) {
assert(0 && "OpaqueValueExpr recursively refers to itself");
return Error(E);
}
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 163d4e95386e..79a448a2435c 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -21,6 +21,7 @@
#include "clang/AST/DeclTemplate.h"
#include "clang/AST/Expr.h"
#include "clang/AST/ExprCXX.h"
+#include "clang/AST/GlobalDecl.h"
#include "clang/AST/Mangle.h"
#include "clang/AST/VTableBuilder.h"
#include "clang/Basic/ABI.h"
@@ -39,6 +40,18 @@ using namespace clang;
namespace {
+// Get GlobalDecl of DeclContext of local entities.
+static GlobalDecl getGlobalDeclAsDeclContext(const DeclContext *DC) {
+ GlobalDecl GD;
+ if (auto *CD = dyn_cast<CXXConstructorDecl>(DC))
+ GD = GlobalDecl(CD, Ctor_Complete);
+ else if (auto *DD = dyn_cast<CXXDestructorDecl>(DC))
+ GD = GlobalDecl(DD, Dtor_Complete);
+ else
+ GD = GlobalDecl(cast<FunctionDecl>(DC));
+ return GD;
+}
+
struct msvc_hashing_ostream : public llvm::raw_svector_ostream {
raw_ostream &OS;
llvm::SmallString<64> Buffer;
@@ -345,9 +358,9 @@ public:
raw_ostream &getStream() const { return Out; }
- void mangle(const NamedDecl *D, StringRef Prefix = "?");
- void mangleName(const NamedDecl *ND);
- void mangleFunctionEncoding(const FunctionDecl *FD, bool ShouldMangle);
+ void mangle(GlobalDecl GD, StringRef Prefix = "?");
+ void mangleName(GlobalDecl GD);
+ void mangleFunctionEncoding(GlobalDecl GD, bool ShouldMangle);
void mangleVariableEncoding(const VarDecl *VD);
void mangleMemberDataPointer(const CXXRecordDecl *RD, const ValueDecl *VD,
StringRef Prefix = "$");
@@ -370,7 +383,7 @@ public:
const FunctionDecl *D = nullptr,
bool ForceThisQuals = false,
bool MangleExceptionSpec = true);
- void mangleNestedName(const NamedDecl *ND);
+ void mangleNestedName(GlobalDecl GD);
private:
bool isStructorDecl(const NamedDecl *ND) const {
@@ -384,10 +397,10 @@ private:
AddrSpace == LangAS::ptr32_uptr));
}
- void mangleUnqualifiedName(const NamedDecl *ND) {
- mangleUnqualifiedName(ND, ND->getDeclName());
+ void mangleUnqualifiedName(GlobalDecl GD) {
+ mangleUnqualifiedName(GD, cast<NamedDecl>(GD.getDecl())->getDeclName());
}
- void mangleUnqualifiedName(const NamedDecl *ND, DeclarationName Name);
+ void mangleUnqualifiedName(GlobalDecl GD, DeclarationName Name);
void mangleSourceName(StringRef Name);
void mangleOperatorName(OverloadedOperatorKind OO, SourceLocation Loc);
void mangleCXXDtorType(CXXDtorType T);
@@ -396,9 +409,9 @@ private:
void manglePointerCVQualifiers(Qualifiers Quals);
void manglePointerExtQualifiers(Qualifiers Quals, QualType PointeeType);
- void mangleUnscopedTemplateName(const TemplateDecl *ND);
+ void mangleUnscopedTemplateName(GlobalDecl GD);
void
- mangleTemplateInstantiationName(const TemplateDecl *TD,
+ mangleTemplateInstantiationName(GlobalDecl GD,
const TemplateArgumentList &TemplateArgs);
void mangleObjCMethodName(const ObjCMethodDecl *MD);
@@ -533,7 +546,8 @@ MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral *SL) {
return true;
}
-void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) {
+void MicrosoftCXXNameMangler::mangle(GlobalDecl GD, StringRef Prefix) {
+ const NamedDecl *D = cast<NamedDecl>(GD.getDecl());
// MSVC doesn't mangle C++ names the same way it mangles extern "C" names.
// Therefore it's really important that we don't decorate the
// name with leading underscores or leading/trailing at signs. So, by
@@ -542,9 +556,9 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) {
// <mangled-name> ::= ? <name> <type-encoding>
Out << Prefix;
- mangleName(D);
+ mangleName(GD);
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
- mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD));
+ mangleFunctionEncoding(GD, Context.shouldMangleDeclName(FD));
else if (const VarDecl *VD = dyn_cast<VarDecl>(D))
mangleVariableEncoding(VD);
else if (isa<MSGuidDecl>(D))
@@ -558,8 +572,9 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) {
llvm_unreachable("Tried to mangle unexpected NamedDecl!");
}
-void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD,
+void MicrosoftCXXNameMangler::mangleFunctionEncoding(GlobalDecl GD,
bool ShouldMangle) {
+ const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
// <type-encoding> ::= <function-class> <function-type>
// Since MSVC operates on the type as written and not the canonical type, it
@@ -770,13 +785,13 @@ void MicrosoftCXXNameMangler::mangleVirtualMemPtrThunk(
mangleCallingConvention(MD->getType()->castAs<FunctionProtoType>());
}
-void MicrosoftCXXNameMangler::mangleName(const NamedDecl *ND) {
+void MicrosoftCXXNameMangler::mangleName(GlobalDecl GD) {
// <name> ::= <unscoped-name> {[<named-scope>]+ | [<nested-name>]}? @
// Always start with the unqualified name.
- mangleUnqualifiedName(ND);
+ mangleUnqualifiedName(GD);
- mangleNestedName(ND);
+ mangleNestedName(GD);
// Terminate the whole name with an '@'.
Out << '@';
@@ -844,13 +859,14 @@ void MicrosoftCXXNameMangler::mangleBits(llvm::APInt Value) {
}
}
-static const TemplateDecl *
-isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) {
+static GlobalDecl isTemplate(GlobalDecl GD,
+ const TemplateArgumentList *&TemplateArgs) {
+ const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
// Check if we have a function template.
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
if (const TemplateDecl *TD = FD->getPrimaryTemplate()) {
TemplateArgs = FD->getTemplateSpecializationArgs();
- return TD;
+ return GD.getWithDecl(TD);
}
}
@@ -858,21 +874,22 @@ isTemplate(const NamedDecl *ND, const TemplateArgumentList *&TemplateArgs) {
if (const ClassTemplateSpecializationDecl *Spec =
dyn_cast<ClassTemplateSpecializationDecl>(ND)) {
TemplateArgs = &Spec->getTemplateArgs();
- return Spec->getSpecializedTemplate();
+ return GD.getWithDecl(Spec->getSpecializedTemplate());
}
// Check if we have a variable template.
if (const VarTemplateSpecializationDecl *Spec =
dyn_cast<VarTemplateSpecializationDecl>(ND)) {
TemplateArgs = &Spec->getTemplateArgs();
- return Spec->getSpecializedTemplate();
+ return GD.getWithDecl(Spec->getSpecializedTemplate());
}
- return nullptr;
+ return GlobalDecl();
}
-void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
+void MicrosoftCXXNameMangler::mangleUnqualifiedName(GlobalDecl GD,
DeclarationName Name) {
+ const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
// <unqualified-name> ::= <operator-name>
// ::= <ctor-dtor-name>
// ::= <source-name>
@@ -880,11 +897,11 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
// Check if we have a template.
const TemplateArgumentList *TemplateArgs = nullptr;
- if (const TemplateDecl *TD = isTemplate(ND, TemplateArgs)) {
+ if (GlobalDecl TD = isTemplate(GD, TemplateArgs)) {
// Function templates aren't considered for name back referencing. This
// makes sense since function templates aren't likely to occur multiple
// times in a symbol.
- if (isa<FunctionTemplateDecl>(TD)) {
+ if (isa<FunctionTemplateDecl>(TD.getDecl())) {
mangleTemplateInstantiationName(TD, *TemplateArgs);
Out << '@';
return;
@@ -945,7 +962,19 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
switch (Name.getNameKind()) {
case DeclarationName::Identifier: {
if (const IdentifierInfo *II = Name.getAsIdentifierInfo()) {
- mangleSourceName(II->getName());
+ bool IsDeviceStub =
+ ND &&
+ ((isa<FunctionDecl>(ND) && ND->hasAttr<CUDAGlobalAttr>()) ||
+ (isa<FunctionTemplateDecl>(ND) &&
+ cast<FunctionTemplateDecl>(ND)
+ ->getTemplatedDecl()
+ ->hasAttr<CUDAGlobalAttr>())) &&
+ GD.getKernelReferenceKind() == KernelReferenceKind::Stub;
+ if (IsDeviceStub)
+ mangleSourceName(
+ (llvm::Twine("__device_stub__") + II->getName()).str());
+ else
+ mangleSourceName(II->getName());
break;
}
@@ -1146,7 +1175,8 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
// <postfix> ::= <unqualified-name> [<postfix>]
// ::= <substitution> [<postfix>]
-void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) {
+void MicrosoftCXXNameMangler::mangleNestedName(GlobalDecl GD) {
+ const NamedDecl *ND = cast<NamedDecl>(GD.getDecl());
const DeclContext *DC = getEffectiveDeclContext(ND);
while (!DC->isTranslationUnit()) {
if (isa<TagDecl>(ND) || isa<VarDecl>(ND)) {
@@ -1229,7 +1259,7 @@ void MicrosoftCXXNameMangler::mangleNestedName(const NamedDecl *ND) {
} else if (isa<NamedDecl>(DC)) {
ND = cast<NamedDecl>(DC);
if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) {
- mangle(FD, "?");
+ mangle(getGlobalDeclAsDeclContext(FD), "?");
break;
} else {
mangleUnqualifiedName(ND);
@@ -1418,7 +1448,7 @@ void MicrosoftCXXNameMangler::mangleObjCMethodName(const ObjCMethodDecl *MD) {
}
void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
- const TemplateDecl *TD, const TemplateArgumentList &TemplateArgs) {
+ GlobalDecl GD, const TemplateArgumentList &TemplateArgs) {
// <template-name> ::= <unscoped-template-name> <template-args>
// ::= <substitution>
// Always start with the unqualified name.
@@ -1433,8 +1463,8 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
TemplateArgBackReferences.swap(OuterTemplateArgsContext);
PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
- mangleUnscopedTemplateName(TD);
- mangleTemplateArgs(TD, TemplateArgs);
+ mangleUnscopedTemplateName(GD);
+ mangleTemplateArgs(cast<TemplateDecl>(GD.getDecl()), TemplateArgs);
// Restore the previous back reference contexts.
NameBackReferences.swap(OuterTemplateContext);
@@ -1443,11 +1473,10 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
}
-void
-MicrosoftCXXNameMangler::mangleUnscopedTemplateName(const TemplateDecl *TD) {
+void MicrosoftCXXNameMangler::mangleUnscopedTemplateName(GlobalDecl GD) {
// <unscoped-template-name> ::= ?$ <unqualified-name>
Out << "?$";
- mangleUnqualifiedName(TD);
+ mangleUnqualifiedName(GD);
}
void MicrosoftCXXNameMangler::mangleIntegerLiteral(
@@ -3323,17 +3352,17 @@ void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD,
if (auto *CD = dyn_cast<CXXConstructorDecl>(D)) {
auto Type = GD.getCtorType();
MicrosoftCXXNameMangler mangler(*this, MHO, CD, Type);
- return mangler.mangle(D);
+ return mangler.mangle(GD);
}
if (auto *DD = dyn_cast<CXXDestructorDecl>(D)) {
auto Type = GD.getDtorType();
MicrosoftCXXNameMangler mangler(*this, MHO, DD, Type);
- return mangler.mangle(D);
+ return mangler.mangle(GD);
}
MicrosoftCXXNameMangler Mangler(*this, MHO);
- return Mangler.mangle(D);
+ return Mangler.mangle(GD);
}
void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers,
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index fc267d7006a1..b65a38d1e566 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1691,7 +1691,8 @@ void StmtPrinter::VisitAtomicExpr(AtomicExpr *Node) {
PrintExpr(Node->getPtr());
if (Node->getOp() != AtomicExpr::AO__c11_atomic_load &&
Node->getOp() != AtomicExpr::AO__atomic_load_n &&
- Node->getOp() != AtomicExpr::AO__opencl_atomic_load) {
+ Node->getOp() != AtomicExpr::AO__opencl_atomic_load &&
+ Node->getOp() != AtomicExpr::AO__hip_atomic_load) {
OS << ", ";
PrintExpr(Node->getVal1());
}
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
new file mode 100644
index 000000000000..bb7eb9971068
--- /dev/null
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -0,0 +1,35 @@
+//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines type-erased base types and functions for building dataflow
+// analyses that run over Control-Flow Graphs (CFGs).
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "clang/Analysis/CFG.h"
+#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
+#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
+#include "llvm/ADT/Optional.h"
+
+using namespace clang;
+using namespace dataflow;
+
+std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>
+runTypeErasedDataflowAnalysis(const CFG &Cfg,
+ TypeErasedDataflowAnalysis &Analysis,
+ const Environment &InitEnv) {
+ // FIXME: Consider enforcing that `Cfg` meets the requirements that
+ // are specified in the header. This could be done by remembering
+ // what options were used to build `Cfg` and asserting on them here.
+
+ // FIXME: Implement work list-based algorithm to compute the fixed
+ // point of `Analysis::transform` for every basic block in `Cfg`.
+ return {};
+}
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index f75b8ffcb53d..4d403ae1809d 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -140,8 +140,8 @@ bool AArch64TargetInfo::setABI(const std::string &Name) {
bool AArch64TargetInfo::validateBranchProtection(StringRef Spec,
BranchProtectionInfo &BPI,
StringRef &Err) const {
- llvm::AArch64::ParsedBranchProtection PBP;
- if (!llvm::AArch64::parseBranchProtection(Spec, PBP, Err))
+ llvm::ARM::ParsedBranchProtection PBP;
+ if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err))
return false;
BPI.SignReturnAddr =
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index fc6b01c87fd2..f330780300f2 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -367,6 +367,28 @@ bool ARMTargetInfo::setABI(const std::string &Name) {
return false;
}
+bool ARMTargetInfo::validateBranchProtection(StringRef Spec,
+ BranchProtectionInfo &BPI,
+ StringRef &Err) const {
+ llvm::ARM::ParsedBranchProtection PBP;
+ if (!llvm::ARM::parseBranchProtection(Spec, PBP, Err))
+ return false;
+
+ BPI.SignReturnAddr =
+ llvm::StringSwitch<LangOptions::SignReturnAddressScopeKind>(PBP.Scope)
+ .Case("non-leaf", LangOptions::SignReturnAddressScopeKind::NonLeaf)
+ .Case("all", LangOptions::SignReturnAddressScopeKind::All)
+ .Default(LangOptions::SignReturnAddressScopeKind::None);
+
+ // Don't care for the sign key, beyond issuing a warning.
+ if (PBP.Key == "b_key")
+ Err = "b-key";
+ BPI.SignKey = LangOptions::SignReturnAddressKeyKind::AKey;
+
+ BPI.BranchTargetEnforcement = PBP.BranchTargetEnforcement;
+ return true;
+}
+
// FIXME: This should be based on Arch attributes, not CPU names.
bool ARMTargetInfo::initFeatureMap(
llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
@@ -874,6 +896,16 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__ARM_BF16_FORMAT_ALTERNATIVE", "1");
}
+ if (Opts.BranchTargetEnforcement)
+ Builder.defineMacro("__ARM_FEATURE_BTI_DEFAULT", "1");
+
+ if (Opts.hasSignReturnAddress()) {
+ unsigned Value = Opts.isSignReturnAddressWithAKey() ? 1 : 2;
+ if (Opts.isSignReturnAddressScopeAll())
+ Value |= 1 << 2;
+ Builder.defineMacro("__ARM_FEATURE_PAC_DEFAULT", Twine(Value));
+ }
+
switch (ArchKind) {
default:
break;
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index d54a049042d6..7d0011d134ea 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -123,6 +123,9 @@ public:
StringRef getABI() const override;
bool setABI(const std::string &Name) override;
+ bool validateBranchProtection(StringRef, BranchProtectionInfo &,
+ StringRef &) const override;
+
// FIXME: This should be based on Arch attributes, not CPU names.
bool
initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
diff --git a/clang/lib/Basic/Targets/OSTargets.cpp b/clang/lib/Basic/Targets/OSTargets.cpp
index 7cd4a5190120..53748bf067cd 100644
--- a/clang/lib/Basic/Targets/OSTargets.cpp
+++ b/clang/lib/Basic/Targets/OSTargets.cpp
@@ -181,8 +181,10 @@ static void addVisualCDefines(const LangOptions &Opts, MacroBuilder &Builder) {
Builder.defineMacro("_HAS_CHAR16_T_LANGUAGE_SUPPORT", Twine(1));
if (Opts.isCompatibleWithMSVC(LangOptions::MSVC2015)) {
- if (Opts.CPlusPlus20)
- Builder.defineMacro("_MSVC_LANG", "201705L");
+ if (Opts.CPlusPlus2b)
+ Builder.defineMacro("_MSVC_LANG", "202004L");
+ else if (Opts.CPlusPlus20)
+ Builder.defineMacro("_MSVC_LANG", "202002L");
else if (Opts.CPlusPlus17)
Builder.defineMacro("_MSVC_LANG", "201703L");
else if (Opts.CPlusPlus14)
@@ -201,6 +203,14 @@ static void addVisualCDefines(const LangOptions &Opts, MacroBuilder &Builder) {
}
Builder.defineMacro("_INTEGRAL_MAX_BITS", "64");
+
+ // Starting with VS 2022 17.1, MSVC predefines the below macro to inform
+ // users of the execution character set defined at compile time.
+ // The value given is the Windows Code Page Identifier:
+ // https://docs.microsoft.com/en-us/windows/win32/intl/code-page-identifiers
+ //
+ // Clang currently only supports UTF-8, so we'll use 65001
+ Builder.defineMacro("_MSVC_EXECUTION_CHARACTER_SET", "65001");
}
void addWindowsDefines(const llvm::Triple &Triple, const LangOptions &Opts,
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 704b1843dfed..8cf18b6c20f1 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -56,9 +56,14 @@ static const unsigned SPIRDefIsGenMap[] = {
0, // opencl_generic
0, // opencl_global_device
0, // opencl_global_host
- 0, // cuda_device
- 0, // cuda_constant
- 0, // cuda_shared
+ // cuda_* address space mapping is intended for HIPSPV (HIP to SPIR-V
+ // translation). This mapping is enabled when the language mode is HIP.
+ 1, // cuda_device
+ // cuda_constant pointer can be casted to default/"flat" pointer, but in
+ // SPIR-V casts between constant and generic pointers are not allowed. For
+ // this reason cuda_constant is mapped to SPIR-V CrossWorkgroup.
+ 1, // cuda_constant
+ 3, // cuda_shared
1, // sycl_global
5, // sycl_global_device
6, // sycl_global_host
@@ -74,6 +79,8 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
protected:
BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
: TargetInfo(Triple) {
+ assert((Triple.isSPIR() || Triple.isSPIRV()) &&
+ "Invalid architecture for SPIR or SPIR-V.");
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
"SPIR(-V) target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
@@ -137,11 +144,16 @@ public:
// FIXME: SYCL specification considers unannotated pointers and references
// to be pointing to the generic address space. See section 5.9.3 of
// SYCL 2020 specification.
- // Currently, there is no way of representing SYCL's default address space
- // language semantic along with the semantics of embedded C's default
- // address space in the same address space map. Hence the map needs to be
- // reset to allow mapping to the desired value of 'Default' entry for SYCL.
- setAddressSpaceMap(/*DefaultIsGeneric=*/Opts.SYCLIsDevice);
+ // Currently, there is no way of representing SYCL's and HIP's default
+ // address space language semantic along with the semantics of embedded C's
+ // default address space in the same address space map. Hence the map needs
+ // to be reset to allow mapping to the desired value of 'Default' entry for
+ // SYCL and HIP.
+ setAddressSpaceMap(
+ /*DefaultIsGeneric=*/Opts.SYCLIsDevice ||
+ // The address mapping from HIP language for device code is only defined
+ // for SPIR-V.
+ (getTriple().isSPIRV() && Opts.HIP && Opts.CUDAIsDevice));
}
void setSupportedOpenCLOpts() override {
@@ -159,6 +171,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRTargetInfo : public BaseSPIRTargetInfo {
public:
SPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: BaseSPIRTargetInfo(Triple, Opts) {
+ assert(Triple.isSPIR() && "Invalid architecture for SPIR.");
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
"SPIR target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
@@ -177,6 +190,8 @@ class LLVM_LIBRARY_VISIBILITY SPIR32TargetInfo : public SPIRTargetInfo {
public:
SPIR32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: SPIRTargetInfo(Triple, Opts) {
+ assert(Triple.getArch() == llvm::Triple::spir &&
+ "Invalid architecture for 32-bit SPIR.");
PointerWidth = PointerAlign = 32;
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
@@ -192,6 +207,8 @@ class LLVM_LIBRARY_VISIBILITY SPIR64TargetInfo : public SPIRTargetInfo {
public:
SPIR64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: SPIRTargetInfo(Triple, Opts) {
+ assert(Triple.getArch() == llvm::Triple::spir64 &&
+ "Invalid architecture for 64-bit SPIR.");
PointerWidth = PointerAlign = 64;
SizeType = TargetInfo::UnsignedLong;
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
@@ -207,6 +224,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public BaseSPIRTargetInfo {
public:
SPIRVTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: BaseSPIRTargetInfo(Triple, Opts) {
+ assert(Triple.isSPIRV() && "Invalid architecture for SPIR-V.");
assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
"SPIR-V target must use unknown OS");
assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
@@ -225,6 +243,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public SPIRVTargetInfo {
public:
SPIRV32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: SPIRVTargetInfo(Triple, Opts) {
+ assert(Triple.getArch() == llvm::Triple::spirv32 &&
+ "Invalid architecture for 32-bit SPIR-V.");
PointerWidth = PointerAlign = 32;
SizeType = TargetInfo::UnsignedInt;
PtrDiffType = IntPtrType = TargetInfo::SignedInt;
@@ -240,6 +260,8 @@ class LLVM_LIBRARY_VISIBILITY SPIRV64TargetInfo : public SPIRVTargetInfo {
public:
SPIRV64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
: SPIRVTargetInfo(Triple, Opts) {
+ assert(Triple.getArch() == llvm::Triple::spirv64 &&
+ "Invalid architecture for 64-bit SPIR-V.");
PointerWidth = PointerAlign = 64;
SizeType = TargetInfo::UnsignedLong;
PtrDiffType = IntPtrType = TargetInfo::SignedLong;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5e3686893719..5c4bd364b06a 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -239,9 +239,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512ER = true;
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
+ HasFloat16 = true;
} else if (Feature == "+avx512pf") {
HasAVX512PF = true;
- HasLegalHalfType = true;
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -369,8 +369,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
.Default(NoXOP);
XOPLevel = std::max(XOPLevel, XLevel);
}
- // Turn on _float16 for x86 (feature sse2)
- HasFloat16 = SSELevel >= SSE2;
// LLVM doesn't have a separate switch for fpmath, so only accept it if it
// matches the selected sse level.
@@ -384,12 +382,10 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
SimdDefaultAlign =
hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
- if (!HasX87) {
- if (LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
- HasLongDouble = false;
- if (getTriple().getArch() == llvm::Triple::x86)
- HasFPReturn = false;
- }
+ // FIXME: We should allow long double type on 32-bits to match with GCC.
+ // This requires backend to be able to lower f80 without x87 first.
+ if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended())
+ HasLongDouble = false;
return true;
}
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 648c7b3df8ed..510f3911939c 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1034,8 +1034,9 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager(
if (!ThinLinkOS)
return;
}
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- CodeGenOpts.EnableSplitLTOUnit);
+ if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
PerModulePasses.add(createWriteThinLTOBitcodePass(
*OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr));
} else {
@@ -1049,8 +1050,9 @@ void EmitAssemblyHelper::EmitAssemblyWithLegacyPassManager(
if (EmitLTOSummary) {
if (!TheModule->getModuleFlag("ThinLTO"))
TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- uint32_t(1));
+ if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ uint32_t(1));
}
PerModulePasses.add(createBitcodeWriterPass(
@@ -1451,8 +1453,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (!ThinLinkOS)
return;
}
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- CodeGenOpts.EnableSplitLTOUnit);
+ if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ CodeGenOpts.EnableSplitLTOUnit);
MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os()
: nullptr));
} else {
@@ -1465,8 +1468,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (EmitLTOSummary) {
if (!TheModule->getModuleFlag("ThinLTO"))
TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
- TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
- uint32_t(1));
+ if (!TheModule->getModuleFlag("EnableSplitLTOUnit"))
+ TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit",
+ uint32_t(1));
}
MPM.addPass(
BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, EmitLTOSummary));
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 326ca8d50533..b68e6328acdf 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -524,12 +524,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm_unreachable("Already handled!");
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
FailureOrder, Size, Order, Scope);
return;
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
FailureOrder, Size, Order, Scope);
return;
@@ -565,6 +567,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
}
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load: {
llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
@@ -576,6 +579,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n: {
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
@@ -586,6 +590,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
}
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
@@ -597,6 +602,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
: llvm::Instruction::Add;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_add:
+ case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
Op = E->getValueType()->isFloatingType() ? llvm::AtomicRMWInst::FAdd
@@ -618,6 +624,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
PostOpMinMax = true;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_min:
+ case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Min
@@ -628,6 +635,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
PostOpMinMax = true;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_max:
+ case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
Op = E->getValueType()->isSignedIntegerType() ? llvm::AtomicRMWInst::Max
@@ -638,6 +646,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
PostOp = llvm::Instruction::And;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_and:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
Op = llvm::AtomicRMWInst::And;
@@ -647,6 +656,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
PostOp = llvm::Instruction::Or;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
Op = llvm::AtomicRMWInst::Or;
@@ -656,6 +666,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
PostOp = llvm::Instruction::Xor;
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
Op = llvm::AtomicRMWInst::Xor;
@@ -838,6 +849,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
break;
@@ -857,7 +869,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__atomic_compare_exchange_n:
case AtomicExpr::AO__atomic_compare_exchange:
Val1 = EmitPointerWithAlignment(E->getVal1());
@@ -873,6 +887,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_sub:
+ case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_sub:
if (MemTy->isPointerType()) {
@@ -901,7 +916,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__opencl_atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__atomic_store_n:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__c11_atomic_fetch_and:
@@ -916,8 +933,11 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_and:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_nand:
case AtomicExpr::AO__atomic_and_fetch:
case AtomicExpr::AO__atomic_or_fetch:
@@ -926,7 +946,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_max_fetch:
case AtomicExpr::AO__atomic_min_fetch:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__hip_atomic_fetch_min:
Val1 = EmitValToTemp(*this, E->getVal1());
break;
}
@@ -968,11 +990,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__hip_atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__atomic_fetch_nand:
@@ -984,6 +1009,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__atomic_add_fetch:
@@ -993,7 +1019,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__atomic_sub_fetch:
case AtomicExpr::AO__atomic_xor_fetch:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__atomic_max_fetch:
case AtomicExpr::AO__atomic_min_fetch:
// For these, only library calls for certain sizes exist.
@@ -1014,10 +1042,15 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_exchange:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__opencl_atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_store_n:
@@ -1079,7 +1112,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__atomic_compare_exchange:
case AtomicExpr::AO__atomic_compare_exchange_n:
LibCallName = "__atomic_compare_exchange";
@@ -1101,6 +1136,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
case AtomicExpr::AO__atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_exchange:
LibCallName = "__atomic_exchange";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
MemTy, E->getExprLoc(), TInfo.Width);
@@ -1109,6 +1145,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// void __atomic_store_N(T *mem, T val, int order)
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
LibCallName = "__atomic_store";
@@ -1121,6 +1158,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
// T __atomic_load_N(T *mem, int order)
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load:
case AtomicExpr::AO__atomic_load_n:
LibCallName = "__atomic_load";
@@ -1133,6 +1171,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__opencl_atomic_fetch_add:
case AtomicExpr::AO__atomic_fetch_add:
+ case AtomicExpr::AO__hip_atomic_fetch_add:
LibCallName = "__atomic_fetch_add";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
LoweredMemTy, E->getExprLoc(), TInfo.Width);
@@ -1144,6 +1183,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_and:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
case AtomicExpr::AO__atomic_fetch_and:
LibCallName = "__atomic_fetch_and";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -1156,6 +1196,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__opencl_atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
case AtomicExpr::AO__atomic_fetch_or:
LibCallName = "__atomic_fetch_or";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -1180,6 +1221,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_xor:
case AtomicExpr::AO__opencl_atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__atomic_fetch_xor:
LibCallName = "__atomic_fetch_xor";
AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
@@ -1190,6 +1232,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_min:
case AtomicExpr::AO__atomic_fetch_min:
+ case AtomicExpr::AO__hip_atomic_fetch_min:
case AtomicExpr::AO__opencl_atomic_fetch_min:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_min"
@@ -1202,6 +1245,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
LLVM_FALLTHROUGH;
case AtomicExpr::AO__c11_atomic_fetch_max:
case AtomicExpr::AO__atomic_fetch_max:
+ case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__opencl_atomic_fetch_max:
LibCallName = E->getValueType()->isSignedIntegerType()
? "__atomic_fetch_max"
@@ -1291,10 +1335,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
E->getOp() == AtomicExpr::AO__opencl_atomic_store ||
+ E->getOp() == AtomicExpr::AO__hip_atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store ||
E->getOp() == AtomicExpr::AO__atomic_store_n;
bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
E->getOp() == AtomicExpr::AO__opencl_atomic_load ||
+ E->getOp() == AtomicExpr::AO__hip_atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load ||
E->getOp() == AtomicExpr::AO__atomic_load_n;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 849423c8b9ba..5d6df59cc405 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -170,8 +170,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
// Convert the type of the pointer to a pointer to the stored type.
Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
+ unsigned SrcAddrSpace = Address->getType()->getPointerAddressSpace();
Value *BC = CGF.Builder.CreateBitCast(
- Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
+ Address, llvm::PointerType::get(Val->getType(), SrcAddrSpace), "cast");
LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
LV.setNontemporal(true);
CGF.EmitStoreOfScalar(Val, LV, false);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 4f14459e4d28..f6853a22cd36 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4510,6 +4510,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
Address Replacement(CGF.Builder.CreateLoad(Pair.second),
CGF.getContext().getDeclAlign(Pair.first));
Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
+ if (auto *DI = CGF.getDebugInfo())
+ DI->EmitDeclareOfAutoVariable(Pair.first, Pair.second.getPointer(),
+ CGF.Builder, /*UsePointerValue*/ true);
}
// Adjust mapping for internal locals by mapping actual memory instead of
// a pointer to this memory.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 59f3e0270571..9ba1a5c25e81 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -710,10 +710,25 @@ void CodeGenModule::Release() {
1);
}
- if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 ||
+ // Add module metadata for return address signing (ignoring
+ // non-leaf/all) and stack tagging. These are actually turned on by function
+ // attributes, but we use module metadata to emit build attributes. This is
+ // needed for LTO, where the function attributes are inside bitcode
+ // serialised into a global variable by the time build attributes are
+ // emitted, so we can't access them.
+ if (Context.getTargetInfo().hasFeature("ptrauth") &&
+ LangOpts.getSignReturnAddressScope() !=
+ LangOptions::SignReturnAddressScopeKind::None)
+ getModule().addModuleFlag(llvm::Module::Override,
+ "sign-return-address-buildattr", 1);
+ if (LangOpts.Sanitize.has(SanitizerKind::MemTag))
+ getModule().addModuleFlag(llvm::Module::Override,
+ "tag-stack-memory-buildattr", 1);
+
+ if (Arch == llvm::Triple::thumb || Arch == llvm::Triple::thumbeb ||
+ Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_32 ||
Arch == llvm::Triple::aarch64_be) {
- getModule().addModuleFlag(llvm::Module::Error,
- "branch-target-enforcement",
+ getModule().addModuleFlag(llvm::Module::Error, "branch-target-enforcement",
LangOpts.BranchTargetEnforcement);
getModule().addModuleFlag(llvm::Module::Error, "sign-return-address",
@@ -722,9 +737,11 @@ void CodeGenModule::Release() {
getModule().addModuleFlag(llvm::Module::Error, "sign-return-address-all",
LangOpts.isSignReturnAddressScopeAll());
- getModule().addModuleFlag(llvm::Module::Error,
- "sign-return-address-with-bkey",
- !LangOpts.isSignReturnAddressWithAKey());
+ if (Arch != llvm::Triple::thumb && Arch != llvm::Triple::thumbeb) {
+ getModule().addModuleFlag(llvm::Module::Error,
+ "sign-return-address-with-bkey",
+ !LangOpts.isSignReturnAddressWithAKey());
+ }
}
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
@@ -1266,6 +1283,20 @@ static bool isUniqueInternalLinkageDecl(GlobalDecl GD,
(CGM.getFunctionLinkage(GD) == llvm::GlobalValue::InternalLinkage);
}
+static void AppendTargetClonesMangling(const CodeGenModule &CGM,
+ const TargetClonesAttr *Attr,
+ unsigned VersionIndex,
+ raw_ostream &Out) {
+ Out << '.';
+ StringRef FeatureStr = Attr->getFeatureStr(VersionIndex);
+ if (FeatureStr.startswith("arch="))
+ Out << "arch_" << FeatureStr.substr(sizeof("arch=") - 1);
+ else
+ Out << FeatureStr;
+
+ Out << '.' << Attr->getMangledIndex(VersionIndex);
+}
+
static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
const NamedDecl *ND,
bool OmitMultiVersionMangling = false) {
@@ -1319,6 +1350,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,
case MultiVersionKind::Target:
AppendTargetMangling(CGM, FD->getAttr<TargetAttr>(), Out);
break;
+ case MultiVersionKind::TargetClones:
+ AppendTargetClonesMangling(CGM, FD->getAttr<TargetClonesAttr>(),
+ GD.getMultiVersionIndex(), Out);
+ break;
case MultiVersionKind::None:
llvm_unreachable("None multiversion type isn't valid here");
}
@@ -1983,8 +2018,9 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD,
FD = FD ? FD->getMostRecentDecl() : FD;
const auto *TD = FD ? FD->getAttr<TargetAttr>() : nullptr;
const auto *SD = FD ? FD->getAttr<CPUSpecificAttr>() : nullptr;
+ const auto *TC = FD ? FD->getAttr<TargetClonesAttr>() : nullptr;
bool AddedAttr = false;
- if (TD || SD) {
+ if (TD || SD || TC) {
llvm::StringMap<bool> FeatureMap;
getContext().getFunctionFeatureMap(FeatureMap, GD);
@@ -3226,6 +3262,12 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
for (unsigned I = 0; I < Spec->cpus_size(); ++I)
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
// Requires multiple emits.
+ } else if (FD->isTargetClonesMultiVersion()) {
+ auto *Clone = FD->getAttr<TargetClonesAttr>();
+ for (unsigned I = 0; I < Clone->featuresStrs_size(); ++I)
+ if (Clone->isFirstOfVersion(I))
+ EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
+ EmitTargetClonesResolver(GD);
} else
EmitGlobalFunctionDefinition(GD, GV);
}
@@ -3307,6 +3349,63 @@ llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM,
return llvm::GlobalValue::WeakODRLinkage;
}
+void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) {
+ const auto *FD = cast<FunctionDecl>(GD.getDecl());
+ assert(FD && "Not a FunctionDecl?");
+ const auto *TC = FD->getAttr<TargetClonesAttr>();
+ assert(TC && "Not a target_clones Function?");
+
+ QualType CanonTy = Context.getCanonicalType(FD->getType());
+ llvm::Type *DeclTy = getTypes().ConvertType(CanonTy);
+
+ if (const auto *CXXFD = dyn_cast<CXXMethodDecl>(FD)) {
+ const CGFunctionInfo &FInfo = getTypes().arrangeCXXMethodDeclaration(CXXFD);
+ DeclTy = getTypes().GetFunctionType(FInfo);
+ }
+
+ llvm::Function *ResolverFunc;
+ if (getTarget().supportsIFunc()) {
+ auto *IFunc = cast<llvm::GlobalIFunc>(
+ GetOrCreateMultiVersionResolver(GD, DeclTy, FD));
+ ResolverFunc = cast<llvm::Function>(IFunc->getResolver());
+ } else
+ ResolverFunc =
+ cast<llvm::Function>(GetOrCreateMultiVersionResolver(GD, DeclTy, FD));
+
+ SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
+ for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size();
+ ++VersionIndex) {
+ if (!TC->isFirstOfVersion(VersionIndex))
+ continue;
+ StringRef Version = TC->getFeatureStr(VersionIndex);
+ StringRef MangledName =
+ getMangledName(GD.getWithMultiVersionIndex(VersionIndex));
+ llvm::Constant *Func = GetGlobalValue(MangledName);
+ assert(Func &&
+ "Should have already been created before calling resolver emit");
+
+ StringRef Architecture;
+ llvm::SmallVector<StringRef, 1> Feature;
+
+ if (Version.startswith("arch="))
+ Architecture = Version.drop_front(sizeof("arch=") - 1);
+ else if (Version != "default")
+ Feature.push_back(Version);
+
+ Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
+ }
+
+ const TargetInfo &TI = getTarget();
+ std::stable_sort(
+ Options.begin(), Options.end(),
+ [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
+ const CodeGenFunction::MultiVersionResolverOption &RHS) {
+ return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
+ });
+ CodeGenFunction CGF(*this);
+ CGF.EmitMultiVersionResolver(ResolverFunc, Options);
+}
+
void CodeGenModule::emitMultiVersionFunctions() {
std::vector<GlobalDecl> MVFuncsToEmit;
MultiVersionFuncs.swap(MVFuncsToEmit);
@@ -3511,8 +3610,25 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(
// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
// replaced later if necessary (target multiversioning only).
- if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion())
+ if (FD->isTargetMultiVersion())
MultiVersionFuncs.push_back(GD);
+ else if (FD->isTargetClonesMultiVersion()) {
+ // In target_clones multiversioning, make sure we emit this if used.
+ auto DDI =
+ DeferredDecls.find(getMangledName(GD.getWithMultiVersionIndex(0)));
+ if (DDI != DeferredDecls.end()) {
+ addDeferredDeclToEmit(GD);
+ DeferredDecls.erase(DDI);
+ } else {
+ // Emit the symbol of the 1st variant, so that the deferred decls know we
+ // need it, otherwise the only global value will be the resolver/ifunc,
+ // which end up getting broken if we search for them with GetGlobalValue'.
+ GetOrCreateLLVMFunction(
+ getMangledName(GD.getWithMultiVersionIndex(0)), DeclTy, FD,
+ /*ForVTable=*/false, /*DontDefer=*/true,
+ /*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
+ }
+ }
if (getTarget().supportsIFunc()) {
llvm::Type *ResolverType = llvm::FunctionType::get(
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index fbed22376c82..e1c7f486d334 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1500,6 +1500,7 @@ private:
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
void emitCPUDispatchDefinition(GlobalDecl GD);
+ void EmitTargetClonesResolver(GlobalDecl GD);
void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
void EmitObjCIvarInitializations(ObjCImplementationDecl *D);
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 302dc653c46e..36e0319c8ab9 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -6364,6 +6364,26 @@ public:
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
+ auto *Fn = cast<llvm::Function>(GV);
+
+ if (const auto *TA = FD->getAttr<TargetAttr>()) {
+ ParsedTargetAttr Attr = TA->parse();
+ if (!Attr.BranchProtection.empty()) {
+ TargetInfo::BranchProtectionInfo BPI;
+ StringRef DiagMsg;
+ (void)CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
+ BPI, DiagMsg);
+
+ static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
+ assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 &&
+ "Unexpected SignReturnAddressScopeKind");
+ Fn->addFnAttr("sign-return-address",
+ SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);
+
+ Fn->addFnAttr("branch-target-enforcement",
+ BPI.BranchTargetEnforcement ? "true" : "false");
+ }
+ }
const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>();
if (!Attr)
@@ -6379,8 +6399,6 @@ public:
case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break;
}
- llvm::Function *Fn = cast<llvm::Function>(GV);
-
Fn->addFnAttr("interrupt", Kind);
ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind();
@@ -9339,17 +9357,25 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
llvm::LLVMContext &Ctx) const {
std::string Name;
switch (Scope) {
+ case SyncScope::HIPSingleThread:
+ Name = "singlethread";
+ break;
+ case SyncScope::HIPWavefront:
+ case SyncScope::OpenCLSubGroup:
+ Name = "wavefront";
+ break;
+ case SyncScope::HIPWorkgroup:
case SyncScope::OpenCLWorkGroup:
Name = "workgroup";
break;
+ case SyncScope::HIPAgent:
case SyncScope::OpenCLDevice:
Name = "agent";
break;
+ case SyncScope::HIPSystem:
case SyncScope::OpenCLAllSVMDevices:
Name = "";
break;
- case SyncScope::OpenCLSubGroup:
- Name = "wavefront";
}
if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 8023d03013a1..d501bd026219 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -38,6 +38,7 @@
#include "ToolChains/NaCl.h"
#include "ToolChains/NetBSD.h"
#include "ToolChains/OpenBSD.h"
+#include "ToolChains/PPCFreeBSD.h"
#include "ToolChains/PPCLinux.h"
#include "ToolChains/PS4CPU.h"
#include "ToolChains/RISCVToolchain.h"
@@ -5302,7 +5303,11 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::NetBSD>(*this, Target, Args);
break;
case llvm::Triple::FreeBSD:
- TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args);
+ if (Target.isPPC())
+ TC = std::make_unique<toolchains::PPCFreeBSDToolChain>(*this, Target,
+ Args);
+ else
+ TC = std::make_unique<toolchains::FreeBSD>(*this, Target, Args);
break;
case llvm::Triple::Minix:
TC = std::make_unique<toolchains::Minix>(*this, Target, Args);
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 0b60d097b9ca..abc32f22d2a1 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -225,7 +225,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
bool success = true;
// Enable NEON by default.
Features.push_back("+neon");
- llvm::StringRef WaMArch = "";
+ llvm::StringRef WaMArch;
if (ForAS)
for (const auto *A :
Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler))
@@ -235,7 +235,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
// Call getAArch64ArchFeaturesFromMarch only if "-Wa,-march=" or
// "-Xassembler -march" is detected. Otherwise it may return false
// and causes Clang to error out.
- if (WaMArch.size())
+ if (!WaMArch.empty())
success = getAArch64ArchFeaturesFromMarch(D, WaMArch, Args, Features);
else if ((A = Args.getLastArg(options::OPT_march_EQ)))
success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features);
@@ -259,8 +259,15 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
success = getAArch64MicroArchFeaturesFromMcpu(
D, getAArch64TargetCPU(Args, Triple, A), Args, Features);
- if (!success)
- D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
+ if (!success) {
+ auto Diag = D.Diag(diag::err_drv_clang_unsupported);
+ // If "-Wa,-march=" is used, 'WaMArch' will contain the argument's value,
+ // while 'A' is uninitialized. Only dereference 'A' in the other case.
+ if (!WaMArch.empty())
+ Diag << "-march=" + WaMArch.str();
+ else
+ Diag << A->getAsString(Args);
+ }
if (Args.getLastArg(options::OPT_mgeneral_regs_only)) {
Features.push_back("-fp-armv8");
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 55518cd7926f..c5aaa067c4f5 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -403,7 +403,7 @@ shouldUseExceptionTablesForObjCExceptions(const ObjCRuntime &runtime,
}
/// Adds exception related arguments to the driver command arguments. There's a
-/// master flag, -fexceptions and also language specific flags to enable/disable
+/// main flag, -fexceptions and also language specific flags to enable/disable
/// C++ and Objective-C exceptions. This makes it possible to for example
/// disable C++ exceptions but enable Objective-C exceptions.
static bool addExceptionArgs(const ArgList &Args, types::ID InputType,
@@ -1603,6 +1603,49 @@ void RenderARMABI(const Driver &D, const llvm::Triple &Triple,
}
}
+static void CollectARMPACBTIOptions(const Driver &D, const ArgList &Args,
+ ArgStringList &CmdArgs, bool isAArch64) {
+ const Arg *A = isAArch64
+ ? Args.getLastArg(options::OPT_msign_return_address_EQ,
+ options::OPT_mbranch_protection_EQ)
+ : Args.getLastArg(options::OPT_mbranch_protection_EQ);
+ if (!A)
+ return;
+
+ StringRef Scope, Key;
+ bool IndirectBranches;
+
+ if (A->getOption().matches(options::OPT_msign_return_address_EQ)) {
+ Scope = A->getValue();
+ if (!Scope.equals("none") && !Scope.equals("non-leaf") &&
+ !Scope.equals("all"))
+ D.Diag(diag::err_invalid_branch_protection)
+ << Scope << A->getAsString(Args);
+ Key = "a_key";
+ IndirectBranches = false;
+ } else {
+ StringRef DiagMsg;
+ llvm::ARM::ParsedBranchProtection PBP;
+ if (!llvm::ARM::parseBranchProtection(A->getValue(), PBP, DiagMsg))
+ D.Diag(diag::err_invalid_branch_protection)
+ << DiagMsg << A->getAsString(Args);
+ if (!isAArch64 && PBP.Key == "b_key")
+ D.Diag(diag::warn_unsupported_branch_protection)
+ << "b-key" << A->getAsString(Args);
+ Scope = PBP.Scope;
+ Key = PBP.Key;
+ IndirectBranches = PBP.BranchTargetEnforcement;
+ }
+
+ CmdArgs.push_back(
+ Args.MakeArgString(Twine("-msign-return-address=") + Scope));
+ if (!Scope.equals("none"))
+ CmdArgs.push_back(
+ Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
+ if (IndirectBranches)
+ CmdArgs.push_back("-mbranch-target-enforce");
+}
+
void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
ArgStringList &CmdArgs, bool KernelOrKext) const {
RenderARMABI(getToolChain().getDriver(), Triple, Args, CmdArgs);
@@ -1644,6 +1687,10 @@ void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
CmdArgs.push_back("-mcmse");
AddAAPCSVolatileBitfieldArgs(Args, CmdArgs);
+
+ // Enable/disable return address signing and indirect branch targets.
+ CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs,
+ false /*isAArch64*/);
}
void Clang::RenderTargetOptions(const llvm::Triple &EffectiveTriple,
@@ -1783,40 +1830,8 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
}
// Enable/disable return address signing and indirect branch targets.
- if (Arg *A = Args.getLastArg(options::OPT_msign_return_address_EQ,
- options::OPT_mbranch_protection_EQ)) {
-
- const Driver &D = getToolChain().getDriver();
-
- StringRef Scope, Key;
- bool IndirectBranches;
-
- if (A->getOption().matches(options::OPT_msign_return_address_EQ)) {
- Scope = A->getValue();
- if (!Scope.equals("none") && !Scope.equals("non-leaf") &&
- !Scope.equals("all"))
- D.Diag(diag::err_invalid_branch_protection)
- << Scope << A->getAsString(Args);
- Key = "a_key";
- IndirectBranches = false;
- } else {
- StringRef Err;
- llvm::AArch64::ParsedBranchProtection PBP;
- if (!llvm::AArch64::parseBranchProtection(A->getValue(), PBP, Err))
- D.Diag(diag::err_invalid_branch_protection)
- << Err << A->getAsString(Args);
- Scope = PBP.Scope;
- Key = PBP.Key;
- IndirectBranches = PBP.BranchTargetEnforcement;
- }
-
- CmdArgs.push_back(
- Args.MakeArgString(Twine("-msign-return-address=") + Scope));
- CmdArgs.push_back(
- Args.MakeArgString(Twine("-msign-return-address-key=") + Key));
- if (IndirectBranches)
- CmdArgs.push_back("-mbranch-target-enforce");
- }
+ CollectARMPACBTIOptions(getToolChain().getDriver(), Args, CmdArgs,
+ true /*isAArch64*/);
// Handle -msve_vector_bits=<bits>
if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
@@ -5821,9 +5836,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_inlines_hidden_static_local_var,
options::OPT_fno_visibility_inlines_hidden_static_local_var);
Args.AddLastArg(CmdArgs, options::OPT_fvisibility_global_new_delete_hidden);
- Args.AddLastArg(CmdArgs, options::OPT_fnew_infallible);
Args.AddLastArg(CmdArgs, options::OPT_ftlsmodel_EQ);
+ if (Args.hasFlag(options::OPT_fnew_infallible,
+ options::OPT_fno_new_infallible, false))
+ CmdArgs.push_back("-fnew-infallible");
+
if (Args.hasFlag(options::OPT_fno_operator_names,
options::OPT_foperator_names, false))
CmdArgs.push_back("-fno-operator-names");
@@ -5886,7 +5904,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
// runtime.
if (Args.hasFlag(options::OPT_fopenmp_target_new_runtime,
options::OPT_fno_openmp_target_new_runtime,
- /*Default=*/false))
+ /*Default=*/!getToolChain().getTriple().isAMDGCN()))
CmdArgs.push_back("-fopenmp-target-new-runtime");
// When in OpenMP offloading mode, enable debugging on the device.
@@ -6659,6 +6677,35 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
A->claim();
}
+ // Turn -fplugin-arg-pluginname-key=value into
+ // -plugin-arg-pluginname key=value
+ // GCC has an actual plugin_argument struct with key/value pairs that it
+ // passes to its plugins, but we don't, so just pass it on as-is.
+ //
+ // The syntax for -fplugin-arg- is ambiguous if both plugin name and
+ // argument key are allowed to contain dashes. GCC therefore only
+ // allows dashes in the key. We do the same.
+ for (const Arg *A : Args.filtered(options::OPT_fplugin_arg)) {
+ auto ArgValue = StringRef(A->getValue());
+ auto FirstDashIndex = ArgValue.find('-');
+ StringRef PluginName = ArgValue.substr(0, FirstDashIndex);
+ StringRef Arg = ArgValue.substr(FirstDashIndex + 1);
+
+ A->claim();
+ if (FirstDashIndex == StringRef::npos || Arg.empty()) {
+ if (PluginName.empty()) {
+ D.Diag(diag::warn_drv_missing_plugin_name) << A->getAsString(Args);
+ } else {
+ D.Diag(diag::warn_drv_missing_plugin_arg)
+ << PluginName << A->getAsString(Args);
+ }
+ continue;
+ }
+
+ CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-arg-") + PluginName));
+ CmdArgs.push_back(Args.MakeArgString(Arg));
+ }
+
// Forward -fpass-plugin=name.so to -cc1.
for (const Arg *A : Args.filtered(options::OPT_fpass_plugin_EQ)) {
CmdArgs.push_back(
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 5397c7a9a0e6..ee573b89bed1 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -745,7 +745,7 @@ void CudaToolChain::addClangTargetOptions(
std::string BitcodeSuffix;
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
- options::OPT_fno_openmp_target_new_runtime, false))
+ options::OPT_fno_openmp_target_new_runtime, true))
BitcodeSuffix = "new-nvptx-" + GpuArch.str();
else
BitcodeSuffix = "nvptx-" + GpuArch.str();
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index b82c5d7600df..c169e3d45793 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -32,7 +32,8 @@ void Flang::AddFortranDialectOptions(const ArgList &Args,
options::OPT_fxor_operator, options::OPT_fno_xor_operator,
options::OPT_falternative_parameter_statement,
options::OPT_fdefault_real_8, options::OPT_fdefault_integer_8,
- options::OPT_fdefault_double_8, options::OPT_flarge_sizes});
+ options::OPT_fdefault_double_8, options::OPT_flarge_sizes,
+ options::OPT_fno_automatic});
}
void Flang::AddPreprocessingOptions(const ArgList &Args,
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp
index dc05f9893465..d08ea282f6df 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.cpp
+++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp
@@ -391,7 +391,8 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple,
}
ToolChain::CXXStdlibType FreeBSD::GetDefaultCXXStdlibType() const {
- if (getTriple().getOSMajorVersion() >= 10)
+ unsigned Major = getTriple().getOSMajorVersion();
+ if (Major >= 10 || Major == 0)
return ToolChain::CST_Libcxx;
return ToolChain::CST_Libstdcxx;
}
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index 0224383e63a1..198774506e5e 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -421,6 +421,9 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const {
(Triple.getEnvironment() == llvm::Triple::MuslEABIHF ||
tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard))
ArchName += "hf";
+ if (Arch == llvm::Triple::ppc &&
+ Triple.getSubArch() == llvm::Triple::PPCSubArch_spe)
+ ArchName = "powerpc-sf";
return "/lib/ld-musl-" + ArchName + ".so.1";
}
diff --git a/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp
new file mode 100644
index 000000000000..8d381c4f1437
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/PPCFreeBSD.cpp
@@ -0,0 +1,28 @@
+//===-- PPCFreeBSD.cpp - PowerPC ToolChain Implementations ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFreeBSD.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Driver/Options.h"
+#include "llvm/Support/Path.h"
+
+using namespace clang::driver::toolchains;
+using namespace llvm::opt;
+
+void PPCFreeBSDToolChain::AddClangSystemIncludeArgs(
+ const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+ if (!DriverArgs.hasArg(clang::driver::options::OPT_nostdinc) &&
+ !DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+ const Driver &D = getDriver();
+ SmallString<128> P(D.ResourceDir);
+ llvm::sys::path::append(P, "include", "ppc_wrappers");
+ addSystemInclude(DriverArgs, CC1Args, P);
+ }
+
+ FreeBSD::AddClangSystemIncludeArgs(DriverArgs, CC1Args);
+}
diff --git a/clang/lib/Driver/ToolChains/PPCFreeBSD.h b/clang/lib/Driver/ToolChains/PPCFreeBSD.h
new file mode 100644
index 000000000000..d5d9cf4e83a0
--- /dev/null
+++ b/clang/lib/Driver/ToolChains/PPCFreeBSD.h
@@ -0,0 +1,33 @@
+//===--- PPCFreeBSD.h - PowerPC ToolChain Implementations -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H
+#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H
+
+#include "FreeBSD.h"
+
+namespace clang {
+namespace driver {
+namespace toolchains {
+
+class LLVM_LIBRARY_VISIBILITY PPCFreeBSDToolChain : public FreeBSD {
+public:
+ PPCFreeBSDToolChain(const Driver &D, const llvm::Triple &Triple,
+ const llvm::opt::ArgList &Args)
+ : FreeBSD(D, Triple, Args) {}
+
+ void
+ AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+ llvm::opt::ArgStringList &CC1Args) const override;
+};
+
+} // end namespace toolchains
+} // end namespace driver
+} // end namespace clang
+
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_PPC_FREEBSD_H
diff --git a/clang/lib/Driver/ToolChains/PS4CPU.h b/clang/lib/Driver/ToolChains/PS4CPU.h
index 82f9523f84fb..4bedabaf267c 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.h
+++ b/clang/lib/Driver/ToolChains/PS4CPU.h
@@ -80,6 +80,7 @@ public:
return LangOptions::SSPStrong;
}
+ unsigned GetDefaultDwarfVersion() const override { return 4; }
llvm::DebuggerKind getDefaultDebuggerTuning() const override {
return llvm::DebuggerKind::SCE;
}
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 1e4f5690ef24..5073f5105d05 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -1984,9 +1984,17 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current,
} else if (Current.is(TT_LineComment) &&
(Current.Previous == nullptr ||
Current.Previous->isNot(TT_ImplicitStringLiteral))) {
+ bool RegularComments = [&]() {
+ for (const FormatToken *T = &Current; T && T->is(TT_LineComment);
+ T = T->Next) {
+ if (!(T->TokenText.startswith("//") || T->TokenText.startswith("#")))
+ return false;
+ }
+ return true;
+ }();
if (!Style.ReflowComments ||
CommentPragmasRegex.match(Current.TokenText.substr(2)) ||
- switchesFormatting(Current))
+ switchesFormatting(Current) || !RegularComments)
return nullptr;
return std::make_unique<BreakableLineCommentSection>(
Current, StartColumn, /*InPPDirective=*/false, Encoding, Style);
@@ -2195,11 +2203,10 @@ ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
// When breaking before a tab character, it may be moved by a few columns,
// but will still be expanded to the next tab stop, so we don't save any
// columns.
- if (NewRemainingTokenColumns == RemainingTokenColumns) {
+ if (NewRemainingTokenColumns >= RemainingTokenColumns) {
// FIXME: Do we need to adjust the penalty?
break;
}
- assert(NewRemainingTokenColumns < RemainingTokenColumns);
LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first
<< ", " << Split.second << "\n");
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 8ae29c54a762..17de1075aeaa 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -2988,9 +2988,8 @@ reformat(const FormatStyle &Style, StringRef Code,
// JSON only needs the formatting passing.
if (Style.isJson()) {
std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
- auto Env =
- Environment::make(Code, FileName, Ranges, FirstStartColumn,
- NextStartColumn, LastStartColumn);
+ auto Env = Environment::make(Code, FileName, Ranges, FirstStartColumn,
+ NextStartColumn, LastStartColumn);
if (!Env)
return {};
// Perform the actual formatting pass.
@@ -3118,9 +3117,7 @@ tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style,
auto Env = Environment::make(Code, FileName, Ranges);
if (!Env)
return {};
- return NamespaceEndCommentsFixer(*Env, Style)
- .process()
- .first;
+ return NamespaceEndCommentsFixer(*Env, Style).process().first;
}
tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
@@ -3130,9 +3127,7 @@ tooling::Replacements sortUsingDeclarations(const FormatStyle &Style,
auto Env = Environment::make(Code, FileName, Ranges);
if (!Env)
return {};
- return UsingDeclarationsSorter(*Env, Style)
- .process()
- .first;
+ return UsingDeclarationsSorter(*Env, Style).process().first;
}
LangOptions getFormattingLangOpts(const FormatStyle &Style) {
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 06d51dd95f50..1a2858018fde 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -76,6 +76,7 @@ namespace format {
TYPE(LineComment) \
TYPE(MacroBlockBegin) \
TYPE(MacroBlockEnd) \
+ TYPE(ModulePartitionColon) \
TYPE(NamespaceMacro) \
TYPE(NonNullAssertion) \
TYPE(NullCoalescingEqual) \
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 8075756cca03..64fbd2d5d45b 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -37,27 +37,40 @@ FormatTokenLexer::FormatTokenLexer(
getFormattingLangOpts(Style)));
Lex->SetKeepWhitespaceMode(true);
- for (const std::string &ForEachMacro : Style.ForEachMacros)
- Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro});
- for (const std::string &IfMacro : Style.IfMacros)
- Macros.insert({&IdentTable.get(IfMacro), TT_IfMacro});
- for (const std::string &AttributeMacro : Style.AttributeMacros)
- Macros.insert({&IdentTable.get(AttributeMacro), TT_AttributeMacro});
- for (const std::string &StatementMacro : Style.StatementMacros)
- Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro});
- for (const std::string &TypenameMacro : Style.TypenameMacros)
- Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro});
- for (const std::string &NamespaceMacro : Style.NamespaceMacros)
- Macros.insert({&IdentTable.get(NamespaceMacro), TT_NamespaceMacro});
+ for (const std::string &ForEachMacro : Style.ForEachMacros) {
+ auto Identifier = &IdentTable.get(ForEachMacro);
+ Macros.insert({Identifier, TT_ForEachMacro});
+ }
+ for (const std::string &IfMacro : Style.IfMacros) {
+ auto Identifier = &IdentTable.get(IfMacro);
+ Macros.insert({Identifier, TT_IfMacro});
+ }
+ for (const std::string &AttributeMacro : Style.AttributeMacros) {
+ auto Identifier = &IdentTable.get(AttributeMacro);
+ Macros.insert({Identifier, TT_AttributeMacro});
+ }
+ for (const std::string &StatementMacro : Style.StatementMacros) {
+ auto Identifier = &IdentTable.get(StatementMacro);
+ Macros.insert({Identifier, TT_StatementMacro});
+ }
+ for (const std::string &TypenameMacro : Style.TypenameMacros) {
+ auto Identifier = &IdentTable.get(TypenameMacro);
+ Macros.insert({Identifier, TT_TypenameMacro});
+ }
+ for (const std::string &NamespaceMacro : Style.NamespaceMacros) {
+ auto Identifier = &IdentTable.get(NamespaceMacro);
+ Macros.insert({Identifier, TT_NamespaceMacro});
+ }
for (const std::string &WhitespaceSensitiveMacro :
Style.WhitespaceSensitiveMacros) {
- Macros.insert(
- {&IdentTable.get(WhitespaceSensitiveMacro), TT_UntouchableMacroFunc});
+ auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro);
+ Macros.insert({Identifier, TT_UntouchableMacroFunc});
}
for (const std::string &StatementAttributeLikeMacro :
- Style.StatementAttributeLikeMacros)
- Macros.insert({&IdentTable.get(StatementAttributeLikeMacro),
- TT_StatementAttributeLikeMacro});
+ Style.StatementAttributeLikeMacros) {
+ auto Identifier = &IdentTable.get(StatementAttributeLikeMacro);
+ Macros.insert({Identifier, TT_StatementAttributeLikeMacro});
+ }
}
ArrayRef<FormatToken *> FormatTokenLexer::lex() {
@@ -739,6 +752,8 @@ bool FormatTokenLexer::tryMerge_TMacro() {
Tokens.pop_back();
Tokens.pop_back();
Tokens.back() = String;
+ if (FirstInLineIndex >= Tokens.size())
+ FirstInLineIndex = Tokens.size() - 1;
return true;
}
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index 515cfce725a4..77dc0d683e5f 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -553,9 +553,7 @@ tooling::Replacements sortJavaScriptImports(const FormatStyle &Style,
auto Env = Environment::make(Code, FileName, Ranges);
if (!Env)
return {};
- return JavaScriptImportSorter(*Env, Style)
- .process()
- .first;
+ return JavaScriptImportSorter(*Env, Style).process().first;
}
} // end namespace format
diff --git a/clang/lib/Format/TokenAnalyzer.cpp b/clang/lib/Format/TokenAnalyzer.cpp
index a619c6d939e9..d83e837ca134 100644
--- a/clang/lib/Format/TokenAnalyzer.cpp
+++ b/clang/lib/Format/TokenAnalyzer.cpp
@@ -37,7 +37,7 @@ namespace format {
// FIXME: Instead of printing the diagnostic we should store it and have a
// better way to return errors through the format APIs.
-class FatalDiagnosticConsumer: public DiagnosticConsumer {
+class FatalDiagnosticConsumer : public DiagnosticConsumer {
public:
void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
const Diagnostic &Info) override {
@@ -71,7 +71,8 @@ Environment::make(StringRef Code, StringRef FileName,
}
// Validate that we can get the buffer data without a fatal error.
Env->SM.getBufferData(Env->ID);
- if (Diags.fatalError()) return nullptr;
+ if (Diags.fatalError())
+ return nullptr;
return Env;
}
@@ -80,8 +81,7 @@ Environment::Environment(StringRef Code, StringRef FileName,
unsigned LastStartColumn)
: VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
- NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {
-}
+ NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {}
TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
: Style(Style), Env(Env),
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3897241cb858..a94d8cdc3b04 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -314,10 +314,11 @@ private:
//
// void (*FunctionPointer)(void);
// void (&FunctionReference)(void);
+ // void (&&FunctionReference)(void);
// void (^ObjCBlock)(void);
bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
bool ProbablyFunctionType =
- CurrentToken->isOneOf(tok::star, tok::amp, tok::caret);
+ CurrentToken->isOneOf(tok::star, tok::amp, tok::ampamp, tok::caret);
bool HasMultipleLines = false;
bool HasMultipleParametersOnALine = false;
bool MightBeObjCForRangeLoop =
@@ -902,9 +903,13 @@ private:
break;
}
}
- if (Contexts.back().ColonIsDictLiteral ||
- Style.Language == FormatStyle::LK_Proto ||
- Style.Language == FormatStyle::LK_TextProto) {
+ if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
+ Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
+ Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
+ Tok->setType(TT_ModulePartitionColon);
+ } else if (Contexts.back().ColonIsDictLiteral ||
+ Style.Language == FormatStyle::LK_Proto ||
+ Style.Language == FormatStyle::LK_TextProto) {
Tok->setType(TT_DictLiteral);
if (Style.Language == FormatStyle::LK_TextProto) {
if (FormatToken *Previous = Tok->getPreviousNonComment())
@@ -946,11 +951,15 @@ private:
!Line.First->isOneOf(tok::kw_enum, tok::kw_case,
tok::kw_default)) {
FormatToken *Prev = Tok->getPreviousNonComment();
+ if (!Prev)
+ break;
if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept))
Tok->setType(TT_CtorInitializerColon);
else if (Prev->is(tok::kw_try)) {
// Member initializer list within function try block.
FormatToken *PrevPrev = Prev->getPreviousNonComment();
+ if (!PrevPrev)
+ break;
if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
Tok->setType(TT_CtorInitializerColon);
} else
@@ -995,6 +1004,8 @@ private:
if (CurrentToken && CurrentToken->is(Keywords.kw_await))
next();
}
+ if (Style.isCpp() && CurrentToken && CurrentToken->is(tok::kw_co_await))
+ next();
Contexts.back().ColonIsForRangeExpr = true;
next();
if (!parseParens())
@@ -1578,6 +1589,8 @@ private:
if (TemplateCloser->is(tok::l_paren)) {
// No Matching Paren yet so skip to matching paren
TemplateCloser = untilMatchingParen(TemplateCloser);
+ if (!TemplateCloser)
+ break;
}
if (TemplateCloser->is(tok::less))
NestingLevel++;
@@ -2336,16 +2349,15 @@ void TokenAnnotator::setCommentLineLevels(
if (NextNonCommentLine && CommentLine &&
NextNonCommentLine->First->NewlinesBefore <= 1 &&
NextNonCommentLine->First->OriginalColumn ==
- AL->First->OriginalColumn) {
+ AL->First->OriginalColumn) {
// Align comments for preprocessor lines with the # in column 0 if
// preprocessor lines are not indented. Otherwise, align with the next
// line.
- AL->Level =
- (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
- (NextNonCommentLine->Type == LT_PreprocessorDirective ||
- NextNonCommentLine->Type == LT_ImportStatement))
- ? 0
- : NextNonCommentLine->Level;
+ AL->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
+ (NextNonCommentLine->Type == LT_PreprocessorDirective ||
+ NextNonCommentLine->Type == LT_ImportStatement))
+ ? 0
+ : NextNonCommentLine->Level;
} else {
NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr;
}
@@ -2639,8 +2651,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
if (Current->Role)
Current->Role->precomputeFormattingInfos(Current);
if (Current->MatchingParen &&
- Current->MatchingParen->opensBlockOrBlockTypeList(Style)) {
- assert(IndentLevel > 0);
+ Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
+ IndentLevel > 0) {
--IndentLevel;
}
Current->IndentLevel = IndentLevel;
@@ -2942,6 +2954,14 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
return false;
+ // operator co_await(x)
+ if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && Left.Previous &&
+ Left.Previous->is(tok::kw_operator))
+ return false;
+ // co_await (x), co_yield (x), co_return (x)
+ if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
+ Right.isNot(tok::semi))
+ return true;
// requires clause Concept1<T> && Concept2<T>
if (Left.is(TT_ConstraintJunctions) && Right.is(tok::identifier))
return true;
@@ -3159,9 +3179,13 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
if (Left.isIf(Line.Type != LT_PreprocessorDirective))
return Style.SpaceBeforeParensOptions.AfterControlStatements ||
spaceRequiredBeforeParens(Right);
+
+ // TODO add Operator overloading specific Options to
+ // SpaceBeforeParensOptions
+ if (Right.is(TT_OverloadedOperatorLParen))
+ return spaceRequiredBeforeParens(Right);
// Function declaration or definition
- if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName) ||
- Right.is(TT_OverloadedOperatorLParen))) {
+ if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
if (Line.mightBeFunctionDefinition())
return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
spaceRequiredBeforeParens(Right);
@@ -3238,9 +3262,35 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
auto HasExistingWhitespace = [&Right]() {
return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
};
+
if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
return true; // Never ever merge two identifiers.
+
+ // Leave a space between * and /* to avoid C4138 `comment end` found outside
+ // of comment.
+ if (Left.is(tok::star) && Right.is(tok::comment))
+ return true;
+
if (Style.isCpp()) {
+ // Space between import <iostream>.
+ // or import .....;
+ if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
+ return true;
+ // No space between module :.
+ if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
+ Right.is(TT_ModulePartitionColon))
+ return true;
+ // No space between import foo:bar but keep a space between import :bar;
+ if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
+ return false;
+ // No space between :bar;
+ if (Left.is(TT_ModulePartitionColon) &&
+ Right.isOneOf(tok::identifier, tok::kw_private))
+ return false;
+ if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
+ Line.First->is(Keywords.kw_import))
+ return false;
+
if (Left.is(tok::kw_operator))
return Right.is(tok::coloncolon);
if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index 299536cd806e..d099cfee9dea 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -320,9 +320,9 @@ private:
}
// Try to merge a control statement block with left brace wrapped
if (I[1]->First->is(tok::l_brace) &&
- (TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for,
- tok::kw_switch, tok::kw_try, tok::kw_do,
- TT_ForEachMacro) ||
+ (TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
+ tok::kw_for, tok::kw_switch, tok::kw_try,
+ tok::kw_do, TT_ForEachMacro) ||
(TheLine->First->is(tok::r_brace) && TheLine->First->Next &&
TheLine->First->Next->isOneOf(tok::kw_else, tok::kw_catch))) &&
Style.BraceWrapping.AfterControlStatement ==
@@ -335,7 +335,7 @@ private:
? 1
: 0;
} else if (I[1]->First->is(tok::l_brace) &&
- TheLine->First->isOneOf(tok::kw_if, tok::kw_while,
+ TheLine->First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while,
tok::kw_for)) {
return (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always)
@@ -569,7 +569,7 @@ private:
// Check that the current line allows merging. This depends on whether we
// are in a control flow statements as well as several style flags.
- if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
+ if (Line.First->is(tok::kw_case) ||
(Line.First->Next && Line.First->Next->is(tok::kw_else)))
return 0;
// default: in switch statement
@@ -578,20 +578,21 @@ private:
if (Tok && Tok->is(tok::colon))
return 0;
}
- if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
- tok::kw___try, tok::kw_catch, tok::kw___finally,
- tok::kw_for, tok::r_brace, Keywords.kw___except)) {
+ if (Line.First->isOneOf(tok::kw_if, tok::kw_else, tok::kw_while, tok::kw_do,
+ tok::kw_try, tok::kw___try, tok::kw_catch,
+ tok::kw___finally, tok::kw_for, tok::r_brace,
+ Keywords.kw___except)) {
if (Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never)
return 0;
// Don't merge when we can't except the case when
// the control statement block is empty
if (!Style.AllowShortIfStatementsOnASingleLine &&
- Line.startsWith(tok::kw_if) &&
+ Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
!Style.BraceWrapping.AfterControlStatement &&
!I[1]->First->is(tok::r_brace))
return 0;
if (!Style.AllowShortIfStatementsOnASingleLine &&
- Line.startsWith(tok::kw_if) &&
+ Line.First->isOneOf(tok::kw_if, tok::kw_else) &&
Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always &&
I + 2 != E && !I[2]->First->is(tok::r_brace))
@@ -676,7 +677,7 @@ private:
// { <-- current Line
// baz();
// }
- if (Line.First == Line.Last &&
+ if (Line.First == Line.Last && Line.First->isNot(TT_FunctionLBrace) &&
Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_MultiLine)
return 0;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 28d925858f77..5b9fe267aae6 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -28,9 +28,28 @@ namespace format {
class FormatTokenSource {
public:
virtual ~FormatTokenSource() {}
+
+ // Returns the next token in the token stream.
virtual FormatToken *getNextToken() = 0;
+ // Returns the token precedint the token returned by the last call to
+ // getNextToken() in the token stream, or nullptr if no such token exists.
+ virtual FormatToken *getPreviousToken() = 0;
+
+ // Returns the token that would be returned by the next call to
+ // getNextToken().
+ virtual FormatToken *peekNextToken() = 0;
+
+ // Returns whether we are at the end of the file.
+ // This can be different from whether getNextToken() returned an eof token
+ // when the FormatTokenSource is a view on a part of the token stream.
+ virtual bool isEOF() = 0;
+
+ // Gets the current position in the token stream, to be used by setPosition().
virtual unsigned getPosition() = 0;
+
+ // Resets the token stream to the state it was in when getPosition() returned
+ // Position, and return the token at that position in the stream.
virtual FormatToken *setPosition(unsigned Position) = 0;
};
@@ -108,6 +127,18 @@ public:
return Token;
}
+ FormatToken *getPreviousToken() override {
+ return PreviousTokenSource->getPreviousToken();
+ }
+
+ FormatToken *peekNextToken() override {
+ if (eof())
+ return &FakeEOF;
+ return PreviousTokenSource->peekNextToken();
+ }
+
+ bool isEOF() override { return PreviousTokenSource->isEOF(); }
+
unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
FormatToken *setPosition(unsigned Position) override {
@@ -199,16 +230,45 @@ public:
: Tokens(Tokens), Position(-1) {}
FormatToken *getNextToken() override {
+ if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
+ LLVM_DEBUG({
+ llvm::dbgs() << "Next ";
+ dbgToken(Position);
+ });
+ return Tokens[Position];
+ }
++Position;
+ LLVM_DEBUG({
+ llvm::dbgs() << "Next ";
+ dbgToken(Position);
+ });
return Tokens[Position];
}
+ FormatToken *getPreviousToken() override {
+ assert(Position > 0);
+ return Tokens[Position - 1];
+ }
+
+ FormatToken *peekNextToken() override {
+ int Next = Position + 1;
+ LLVM_DEBUG({
+ llvm::dbgs() << "Peeking ";
+ dbgToken(Next);
+ });
+ return Tokens[Next];
+ }
+
+ bool isEOF() override { return Tokens[Position]->is(tok::eof); }
+
unsigned getPosition() override {
+ LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
assert(Position >= 0);
return Position;
}
FormatToken *setPosition(unsigned P) override {
+ LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
Position = P;
return Tokens[Position];
}
@@ -216,6 +276,13 @@ public:
void reset() { Position = -1; }
private:
+ void dbgToken(int Position, llvm::StringRef Indent = "") {
+ FormatToken *Tok = Tokens[Position];
+ llvm::dbgs() << Indent << "[" << Position
+ << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
+ << ", Macro: " << !!Tok->MacroCtx << "\n";
+ }
+
ArrayRef<FormatToken *> Tokens;
int Position;
};
@@ -399,7 +466,7 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
FormatToken *Next;
do {
Next = Tokens->getNextToken();
- } while (Next && Next->is(tok::comment));
+ } while (Next->is(tok::comment));
FormatTok = Tokens->setPosition(StoredPosition);
if (Next && Next->isNot(tok::colon)) {
// default not followed by ':' is not a case label; treat it like
@@ -875,10 +942,7 @@ void UnwrappedLineParser::parsePPEndIf() {
parsePPUnknown();
// If the #endif of a potential include guard is the last thing in the file,
// then we found an include guard.
- unsigned TokenPosition = Tokens->getPosition();
- FormatToken *PeekNext = AllTokens[TokenPosition];
- if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
- PeekNext->is(tok::eof) &&
+ if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
Style.IndentPPDirectives != FormatStyle::PPDIS_None)
IncludeGuard = IG_Found;
}
@@ -1050,6 +1114,35 @@ static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
}
+void UnwrappedLineParser::parseModuleImport() {
+ nextToken();
+ while (!eof()) {
+ if (FormatTok->is(tok::colon)) {
+ FormatTok->setType(TT_ModulePartitionColon);
+ }
+ // Handle import <foo/bar.h> as we would an include statement.
+ else if (FormatTok->is(tok::less)) {
+ nextToken();
+ while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
+ // Mark tokens up to the trailing line comments as implicit string
+ // literals.
+ if (FormatTok->isNot(tok::comment) &&
+ !FormatTok->TokenText.startswith("//"))
+ FormatTok->setType(TT_ImplicitStringLiteral);
+ nextToken();
+ }
+ }
+ if (FormatTok->is(tok::semi)) {
+ nextToken();
+ break;
+ }
+ nextToken();
+ }
+
+ addUnwrappedLine();
+ return;
+}
+
// readTokenWithJavaScriptASI reads the next token and terminates the current
// line if JavaScript Automatic Semicolon Insertion must
// happen between the current token and the next token.
@@ -1097,7 +1190,6 @@ void UnwrappedLineParser::readTokenWithJavaScriptASI() {
}
void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
- assert(!FormatTok->is(tok::l_brace));
if (Style.Language == FormatStyle::LK_TableGen &&
FormatTok->is(tok::pp_include)) {
nextToken();
@@ -1249,6 +1341,10 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
addUnwrappedLine();
return;
}
+ if (Style.isCpp()) {
+ parseModuleImport();
+ return;
+ }
}
if (Style.isCpp() &&
FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
@@ -1402,9 +1498,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
// declaration.
if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
break;
- const unsigned Position = Tokens->getPosition() + 1;
- assert(Position < AllTokens.size());
- if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
+ if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
addUnwrappedLine();
return;
}
@@ -1488,7 +1582,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
unsigned StoredPosition = Tokens->getPosition();
FormatToken *Next = Tokens->getNextToken();
FormatTok = Tokens->setPosition(StoredPosition);
- if (Next && !mustBeJSIdent(Keywords, Next)) {
+ if (!mustBeJSIdent(Keywords, Next)) {
nextToken();
break;
}
@@ -2099,8 +2193,8 @@ void UnwrappedLineParser::parseIfThenElse() {
parseBlock();
addUnwrappedLine();
} else if (FormatTok->Tok.is(tok::kw_if)) {
- FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
- bool PrecededByComment = Previous->is(tok::comment);
+ FormatToken *Previous = Tokens->getPreviousToken();
+ bool PrecededByComment = Previous && Previous->is(tok::comment);
if (PrecededByComment) {
addUnwrappedLine();
++Line->Level;
@@ -2305,6 +2399,8 @@ void UnwrappedLineParser::parseForOrWhileLoop() {
if (Style.Language == FormatStyle::LK_JavaScript &&
FormatTok->is(Keywords.kw_await))
nextToken();
+ if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
+ nextToken();
if (FormatTok->Tok.is(tok::l_paren))
parseParens();
if (FormatTok->Tok.is(tok::l_brace)) {
@@ -2653,23 +2749,25 @@ bool UnwrappedLineParser::tryToParseSimpleAttribute() {
ScopedTokenPosition AutoPosition(Tokens);
FormatToken *Tok = Tokens->getNextToken();
// We already read the first [ check for the second.
- if (Tok && !Tok->is(tok::l_square)) {
+ if (!Tok->is(tok::l_square)) {
return false;
}
// Double check that the attribute is just something
// fairly simple.
- while (Tok) {
+ while (Tok->isNot(tok::eof)) {
if (Tok->is(tok::r_square)) {
break;
}
Tok = Tokens->getNextToken();
}
+ if (Tok->is(tok::eof))
+ return false;
Tok = Tokens->getNextToken();
- if (Tok && !Tok->is(tok::r_square)) {
+ if (!Tok->is(tok::r_square)) {
return false;
}
Tok = Tokens->getNextToken();
- if (Tok && Tok->is(tok::semi)) {
+ if (Tok->is(tok::semi)) {
return false;
}
return true;
@@ -2682,7 +2780,7 @@ void UnwrappedLineParser::parseJavaEnumBody() {
unsigned StoredPosition = Tokens->getPosition();
bool IsSimple = true;
FormatToken *Tok = Tokens->getNextToken();
- while (Tok) {
+ while (!Tok->is(tok::eof)) {
if (Tok->is(tok::r_brace))
break;
if (Tok->isOneOf(tok::l_brace, tok::semi)) {
@@ -3292,6 +3390,20 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
do {
FormatTok = Tokens->getNextToken();
assert(FormatTok);
+ while (FormatTok->getType() == TT_ConflictStart ||
+ FormatTok->getType() == TT_ConflictEnd ||
+ FormatTok->getType() == TT_ConflictAlternative) {
+ if (FormatTok->getType() == TT_ConflictStart) {
+ conditionalCompilationStart(/*Unreachable=*/false);
+ } else if (FormatTok->getType() == TT_ConflictAlternative) {
+ conditionalCompilationAlternative();
+ } else if (FormatTok->getType() == TT_ConflictEnd) {
+ conditionalCompilationEnd();
+ }
+ FormatTok = Tokens->getNextToken();
+ FormatTok->MustBreakBefore = true;
+ }
+
while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
(FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
distributeComments(Comments, FormatTok);
@@ -3313,19 +3425,6 @@ void UnwrappedLineParser::readToken(int LevelDifference) {
flushComments(isOnNewLine(*FormatTok));
parsePPDirective();
}
- while (FormatTok->getType() == TT_ConflictStart ||
- FormatTok->getType() == TT_ConflictEnd ||
- FormatTok->getType() == TT_ConflictAlternative) {
- if (FormatTok->getType() == TT_ConflictStart) {
- conditionalCompilationStart(/*Unreachable=*/false);
- } else if (FormatTok->getType() == TT_ConflictAlternative) {
- conditionalCompilationAlternative();
- } else if (FormatTok->getType() == TT_ConflictEnd) {
- conditionalCompilationEnd();
- }
- FormatTok = Tokens->getNextToken();
- FormatTok->MustBreakBefore = true;
- }
if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
!Line->InPPDirective) {
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index bcae0f3ad258..b4c082654597 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -110,6 +110,7 @@ private:
void parseCaseLabel();
void parseSwitch();
void parseNamespace();
+ void parseModuleImport();
void parseNew();
void parseAccessSpecifier();
bool parseEnum();
diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp
index 74136d2f5caa..fae8a1c3fdc6 100644
--- a/clang/lib/Format/WhitespaceManager.cpp
+++ b/clang/lib/Format/WhitespaceManager.cpp
@@ -372,8 +372,6 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End,
if (ContinuedStringLiteral)
Changes[i].Spaces += Shift;
- assert(Shift >= 0);
-
Changes[i].StartOfTokenColumn += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -915,7 +913,8 @@ void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
Changes[i].StartOfBlockComment->StartOfTokenColumn -
Changes[i].StartOfTokenColumn;
}
- assert(Shift >= 0);
+ if (Shift < 0)
+ continue;
Changes[i].Spaces += Shift;
if (i + 1 != Changes.size())
Changes[i + 1].PreviousEndOfTokenColumn += Shift;
@@ -1270,10 +1269,10 @@ WhitespaceManager::linkCells(CellDescriptions &&CellDesc) {
void WhitespaceManager::generateChanges() {
for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
const Change &C = Changes[i];
- if (i > 0) {
- assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
- C.OriginalWhitespaceRange.getBegin() &&
- "Generating two replacements for the same location");
+ if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
+ C.OriginalWhitespaceRange.getBegin()) {
+ // Do not generate two replacements for the same location.
+ continue;
}
if (C.CreateReplacement) {
std::string ReplacementText = C.PreviousLinePostfix;
diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp
index 0ecb024fc6b9..0c153446142e 100644
--- a/clang/lib/Frontend/InitPreprocessor.cpp
+++ b/clang/lib/Frontend/InitPreprocessor.cpp
@@ -505,6 +505,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
if (LangOpts.HIP) {
Builder.defineMacro("__HIP__");
Builder.defineMacro("__HIPCC__");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_SINGLETHREAD", "1");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_WAVEFRONT", "2");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_WORKGROUP", "3");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_AGENT", "4");
+ Builder.defineMacro("__HIP_MEMORY_SCOPE_SYSTEM", "5");
if (LangOpts.CUDAIsDevice)
Builder.defineMacro("__HIP_DEVICE_COMPILE__");
}
diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp
index af82ab3f5558..8aa80a4c96fb 100644
--- a/clang/lib/Frontend/PrecompiledPreamble.cpp
+++ b/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -412,10 +412,13 @@ llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
std::unique_ptr<PrecompilePreambleAction> Act;
Act.reset(new PrecompilePreambleAction(
StoreInMemory ? &Storage.asMemory().Data : nullptr, Callbacks));
- Callbacks.BeforeExecute(*Clang);
if (!Act->BeginSourceFile(*Clang.get(), Clang->getFrontendOpts().Inputs[0]))
return BuildPreambleError::BeginSourceFileFailed;
+ // Performed after BeginSourceFile to ensure Clang->Preprocessor can be
+ // referenced in the callback.
+ Callbacks.BeforeExecute(*Clang);
+
std::unique_ptr<PPCallbacks> DelegatedPPCallbacks =
Callbacks.createPPCallbacks();
if (DelegatedPPCallbacks)
diff --git a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 626ec4d71ccd..b4487f004715 100644
--- a/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -633,7 +633,7 @@ static bool IsHeaderFile(const std::string &Filename) {
return false;
}
- std::string Ext = std::string(Filename.begin()+DotPos+1, Filename.end());
+ std::string Ext = Filename.substr(DotPos + 1);
// C header: .h
// C++ header: .hh or .H;
return Ext == "h" || Ext == "hh" || Ext == "H";
diff --git a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
index 0750d36b02ac..b2ecb42c43dd 100644
--- a/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/clang/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -569,7 +569,7 @@ static bool IsHeaderFile(const std::string &Filename) {
return false;
}
- std::string Ext = std::string(Filename.begin()+DotPos+1, Filename.end());
+ std::string Ext = Filename.substr(DotPos + 1);
// C header: .h
// C++ header: .hh or .H;
return Ext == "h" || Ext == "hh" || Ext == "H";
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index fb808d7b0a4f..55195b0781fb 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -19,6 +19,10 @@
#define __CR6_EQ_REV 1
#define __CR6_LT 2
#define __CR6_LT_REV 3
+#define __CR6_GT 4
+#define __CR6_GT_REV 5
+#define __CR6_SO 6
+#define __CR6_SO_REV 7
/* Constants for vec_test_data_class */
#define __VEC_CLASS_FP_SUBNORMAL_N (1 << 0)
@@ -8413,9 +8417,20 @@ static __inline__ vector float __ATTRS_o_ai vec_round(vector float __a) {
}
#ifdef __VSX__
+#ifdef __XL_COMPAT_ALTIVEC__
+static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a);
+static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
+ double __fpscr = __builtin_readflm();
+ __builtin_setrnd(0);
+ vector double __rounded = vec_rint(__a);
+ __builtin_setflm(__fpscr);
+ return __rounded;
+}
+#else
static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
return __builtin_vsx_xvrdpi(__a);
}
+#endif
/* vec_rint */
@@ -19026,6 +19041,51 @@ vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) {
#endif /* __SIZEOF_INT128__ */
#endif /* __POWER10_VECTOR__ */
+#ifdef __POWER8_VECTOR__
+#define __bcdadd(__a, __b, __ps) __builtin_ppc_bcdadd((__a), (__b), (__ps))
+#define __bcdsub(__a, __b, __ps) __builtin_ppc_bcdsub((__a), (__b), (__ps))
+
+static __inline__ long __bcdadd_ofl(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdadd_p(__CR6_SO, __a, __b);
+}
+
+static __inline__ long __bcdsub_ofl(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __b);
+}
+
+static __inline__ long __bcd_invalid(vector unsigned char __a) {
+ return __builtin_ppc_bcdsub_p(__CR6_SO, __a, __a);
+}
+
+static __inline__ long __bcdcmpeq(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_EQ, __a, __b);
+}
+
+static __inline__ long __bcdcmplt(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_LT, __a, __b);
+}
+
+static __inline__ long __bcdcmpgt(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_GT, __a, __b);
+}
+
+static __inline__ long __bcdcmple(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_GT_REV, __a, __b);
+}
+
+static __inline__ long __bcdcmpge(vector unsigned char __a,
+ vector unsigned char __b) {
+ return __builtin_ppc_bcdsub_p(__CR6_LT_REV, __a, __b);
+}
+
+#endif // __POWER8_VECTOR__
+
#undef __ATTRS_o_ai
#endif /* __ALTIVEC_H */
diff --git a/clang/lib/Headers/ppc_wrappers/emmintrin.h b/clang/lib/Headers/ppc_wrappers/emmintrin.h
index 4dcb8485e2e9..82a71788b27a 100644
--- a/clang/lib/Headers/ppc_wrappers/emmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/emmintrin.h
@@ -35,7 +35,7 @@
#ifndef EMMINTRIN_H_
#define EMMINTRIN_H_
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
#include <altivec.h>
@@ -2319,6 +2319,7 @@ _mm_castsi128_pd(__m128i __A)
#else
#include_next <emmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* EMMINTRIN_H_ */
diff --git a/clang/lib/Headers/ppc_wrappers/mm_malloc.h b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
index 24b14c8e07c0..86cf1a0f7618 100644
--- a/clang/lib/Headers/ppc_wrappers/mm_malloc.h
+++ b/clang/lib/Headers/ppc_wrappers/mm_malloc.h
@@ -10,7 +10,7 @@
#ifndef _MM_MALLOC_H_INCLUDED
#define _MM_MALLOC_H_INCLUDED
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
#include <stdlib.h>
diff --git a/clang/lib/Headers/ppc_wrappers/mmintrin.h b/clang/lib/Headers/ppc_wrappers/mmintrin.h
index c55c44726f00..54e4ee9f4468 100644
--- a/clang/lib/Headers/ppc_wrappers/mmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/mmintrin.h
@@ -35,7 +35,7 @@
#ifndef _MMINTRIN_H_INCLUDED
#define _MMINTRIN_H_INCLUDED
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
#include <altivec.h>
/* The Intel API is flexible enough that we must allow aliasing with other
@@ -1445,6 +1445,7 @@ extern __inline __m64
#else
#include_next <mmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/clang/lib/Headers/ppc_wrappers/pmmintrin.h b/clang/lib/Headers/ppc_wrappers/pmmintrin.h
index 6d93383d5412..8d4046bd43f1 100644
--- a/clang/lib/Headers/ppc_wrappers/pmmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/pmmintrin.h
@@ -38,7 +38,7 @@
#ifndef PMMINTRIN_H_
#define PMMINTRIN_H_
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
/* We need definitions from the SSE2 and SSE header files*/
#include <emmintrin.h>
@@ -145,6 +145,7 @@ _mm_lddqu_si128 (__m128i const *__P)
#else
#include_next <pmmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* PMMINTRIN_H_ */
diff --git a/clang/lib/Headers/ppc_wrappers/smmintrin.h b/clang/lib/Headers/ppc_wrappers/smmintrin.h
index f41264b27584..674703245a69 100644
--- a/clang/lib/Headers/ppc_wrappers/smmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/smmintrin.h
@@ -29,7 +29,7 @@
#ifndef SMMINTRIN_H_
#define SMMINTRIN_H_
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
#include <altivec.h>
#include <tmmintrin.h>
@@ -104,6 +104,7 @@ extern __inline __m128i
#else
#include_next <smmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* _SMMINTRIN_H_ */
diff --git a/clang/lib/Headers/ppc_wrappers/tmmintrin.h b/clang/lib/Headers/ppc_wrappers/tmmintrin.h
index b5a935d5e47e..ebef7b8192d7 100644
--- a/clang/lib/Headers/ppc_wrappers/tmmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/tmmintrin.h
@@ -25,7 +25,7 @@
#ifndef TMMINTRIN_H_
#define TMMINTRIN_H_
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
#include <altivec.h>
@@ -490,6 +490,7 @@ _mm_mulhrs_pi16 (__m64 __A, __m64 __B)
#else
#include_next <tmmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* TMMINTRIN_H_ */
diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
index 0e45b96769f8..956603d36408 100644
--- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
@@ -34,7 +34,7 @@
#ifndef _XMMINTRIN_H_INCLUDED
#define _XMMINTRIN_H_INCLUDED
-#if defined(__linux__) && defined(__ppc64__)
+#if defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__))
/* Define four value permute mask */
#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
@@ -1838,6 +1838,7 @@ do { \
#else
#include_next <xmmintrin.h>
-#endif /* defined(__linux__) && defined(__ppc64__) */
+#endif /* defined(__ppc64__) && (defined(__linux__) || defined(__FreeBSD__)) \
+ */
#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/clang/lib/Headers/stdatomic.h b/clang/lib/Headers/stdatomic.h
index 665551ea69a4..1e47bcb2bacf 100644
--- a/clang/lib/Headers/stdatomic.h
+++ b/clang/lib/Headers/stdatomic.h
@@ -12,8 +12,12 @@
/* If we're hosted, fall back to the system's stdatomic.h. FreeBSD, for
* example, already has a Clang-compatible stdatomic.h header.
+ *
+ * Exclude the MSVC path as well as the MSVC header as of the 14.31.30818
+ * explicitly disallows `stdatomic.h` in the C mode via an `#error`. Fallback
+ * to the clang resource header until that is fully supported.
*/
-#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>)
+#if __STDC_HOSTED__ && __has_include_next(<stdatomic.h>) && !defined(_MSC_VER)
# include_next <stdatomic.h>
#else
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index a0871062395e..1bdeccc4cbf5 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -6978,13 +6978,13 @@ void Parser::ParseParameterDeclarationClause(
//
// We care about case 1) where the declarator type should be known, and
// the identifier should be null.
- if (!ParmDeclarator.isInvalidType() && !ParmDeclarator.hasName()) {
- if (Tok.getIdentifierInfo() &&
- Tok.getIdentifierInfo()->isKeyword(getLangOpts())) {
- Diag(Tok, diag::err_keyword_as_parameter) << PP.getSpelling(Tok);
- // Consume the keyword.
- ConsumeToken();
- }
+ if (!ParmDeclarator.isInvalidType() && !ParmDeclarator.hasName() &&
+ Tok.isNot(tok::raw_identifier) && !Tok.isAnnotation() &&
+ Tok.getIdentifierInfo() &&
+ Tok.getIdentifierInfo()->isKeyword(getLangOpts())) {
+ Diag(Tok, diag::err_keyword_as_parameter) << PP.getSpelling(Tok);
+ // Consume the keyword.
+ ConsumeToken();
}
// Inform the actions module about the parameter declarator, so it gets
// added to the current scope.
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index bb8718671bb0..292ab03e8614 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -2108,6 +2108,9 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
CoawaitLoc = SourceLocation();
}
+ if (CoawaitLoc.isValid() && getLangOpts().CPlusPlus20)
+ Diag(CoawaitLoc, diag::warn_deprecated_for_co_await);
+
// We need to perform most of the semantic analysis for a C++0x for-range
// statememt before parsing the body, in order to be able to deduce the type
// of an auto-typed loop variable.
diff --git a/clang/lib/Sema/AnalysisBasedWarnings.cpp b/clang/lib/Sema/AnalysisBasedWarnings.cpp
index 8544a4fccf4c..b4dcc9759b99 100644
--- a/clang/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/clang/lib/Sema/AnalysisBasedWarnings.cpp
@@ -464,7 +464,7 @@ static ControlFlowKind CheckFallThrough(AnalysisDeclContext &AC) {
// No more CFGElements in the block?
if (ri == re) {
const Stmt *Term = B.getTerminatorStmt();
- if (Term && isa<CXXTryStmt>(Term)) {
+ if (Term && (isa<CXXTryStmt>(Term) || isa<ObjCAtTryStmt>(Term))) {
HasAbnormalEdge = true;
continue;
}
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index a0f6702a5f82..33e2b3b5027d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5297,6 +5297,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
case AtomicExpr::AO__atomic_load:
return OrderingCABI != llvm::AtomicOrderingCABI::release &&
@@ -5304,6 +5305,7 @@ static bool isValidOrderingForOp(int64_t Ordering, AtomicExpr::AtomicOp Op) {
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
return OrderingCABI != llvm::AtomicOrderingCABI::consume &&
@@ -5380,6 +5382,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
"need to update code for modified C11 atomics");
bool IsOpenCL = Op >= AtomicExpr::AO__opencl_atomic_init &&
Op <= AtomicExpr::AO__opencl_atomic_fetch_max;
+ bool IsHIP = Op >= AtomicExpr::AO__hip_atomic_load &&
+ Op <= AtomicExpr::AO__hip_atomic_fetch_max;
bool IsC11 = (Op >= AtomicExpr::AO__c11_atomic_init &&
Op <= AtomicExpr::AO__c11_atomic_fetch_min) ||
IsOpenCL;
@@ -5397,6 +5401,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
case AtomicExpr::AO__c11_atomic_load:
case AtomicExpr::AO__opencl_atomic_load:
+ case AtomicExpr::AO__hip_atomic_load:
case AtomicExpr::AO__atomic_load_n:
Form = Load;
break;
@@ -5407,11 +5412,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
case AtomicExpr::AO__c11_atomic_store:
case AtomicExpr::AO__opencl_atomic_store:
+ case AtomicExpr::AO__hip_atomic_store:
case AtomicExpr::AO__atomic_store:
case AtomicExpr::AO__atomic_store_n:
Form = Copy;
break;
-
+ case AtomicExpr::AO__hip_atomic_fetch_add:
+ case AtomicExpr::AO__hip_atomic_fetch_min:
+ case AtomicExpr::AO__hip_atomic_fetch_max:
case AtomicExpr::AO__c11_atomic_fetch_add:
case AtomicExpr::AO__c11_atomic_fetch_sub:
case AtomicExpr::AO__opencl_atomic_fetch_add:
@@ -5426,6 +5434,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
case AtomicExpr::AO__c11_atomic_fetch_and:
case AtomicExpr::AO__c11_atomic_fetch_or:
case AtomicExpr::AO__c11_atomic_fetch_xor:
+ case AtomicExpr::AO__hip_atomic_fetch_and:
+ case AtomicExpr::AO__hip_atomic_fetch_or:
+ case AtomicExpr::AO__hip_atomic_fetch_xor:
case AtomicExpr::AO__c11_atomic_fetch_nand:
case AtomicExpr::AO__opencl_atomic_fetch_and:
case AtomicExpr::AO__opencl_atomic_fetch_or:
@@ -5452,6 +5463,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
break;
case AtomicExpr::AO__c11_atomic_exchange:
+ case AtomicExpr::AO__hip_atomic_exchange:
case AtomicExpr::AO__opencl_atomic_exchange:
case AtomicExpr::AO__atomic_exchange_n:
Form = Xchg;
@@ -5463,8 +5475,10 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_strong:
case AtomicExpr::AO__opencl_atomic_compare_exchange_weak:
+ case AtomicExpr::AO__hip_atomic_compare_exchange_weak:
Form = C11CmpXchg;
break;
@@ -5475,7 +5489,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
}
unsigned AdjustedNumArgs = NumArgs[Form];
- if (IsOpenCL && Op != AtomicExpr::AO__opencl_atomic_init)
+ if ((IsOpenCL || IsHIP) && Op != AtomicExpr::AO__opencl_atomic_init)
++AdjustedNumArgs;
// Check we have the right number of arguments.
if (Args.size() < AdjustedNumArgs) {
@@ -5532,8 +5546,8 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
// For an arithmetic operation, the implied arithmetic must be well-formed.
if (Form == Arithmetic) {
- // GCC does not enforce these rules for GNU atomics, but we do, because if
- // we didn't it would be very confusing. FIXME: For whom? How so?
+ // GCC does not enforce these rules for GNU atomics, but we do to help catch
+ // trivial type errors.
auto IsAllowedValueType = [&](QualType ValType) {
if (ValType->isIntegerType())
return true;
@@ -5574,8 +5588,9 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
if (!IsC11 && !AtomTy.isTriviallyCopyableType(Context) &&
!AtomTy->isScalarType()) {
// For GNU atomics, require a trivially-copyable type. This is not part of
- // the GNU atomics specification, but we enforce it, because if we didn't it
- // would be very confusing. FIXME: For whom? How so?
+ // the GNU atomics specification but we enforce it for consistency with
+ // other atomics which generally all require a trivially-copyable type. This
+ // is because atomics just copy bits.
Diag(ExprRange.getBegin(), diag::err_atomic_op_needs_trivial_copy)
<< Ptr->getType() << Ptr->getSourceRange();
return ExprError();
@@ -5614,7 +5629,7 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
// arguments are actually passed as pointers.
QualType ByValType = ValType; // 'CP'
bool IsPassedByAddress = false;
- if (!IsC11 && !IsN) {
+ if (!IsC11 && !IsHIP && !IsN) {
ByValType = Ptr->getType();
IsPassedByAddress = true;
}
@@ -5793,11 +5808,14 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
if ((Op == AtomicExpr::AO__c11_atomic_load ||
Op == AtomicExpr::AO__c11_atomic_store ||
Op == AtomicExpr::AO__opencl_atomic_load ||
- Op == AtomicExpr::AO__opencl_atomic_store ) &&
+ Op == AtomicExpr::AO__hip_atomic_load ||
+ Op == AtomicExpr::AO__opencl_atomic_store ||
+ Op == AtomicExpr::AO__hip_atomic_store) &&
Context.AtomicUsesUnsupportedLibcall(AE))
Diag(AE->getBeginLoc(), diag::err_atomic_load_store_uses_lib)
<< ((Op == AtomicExpr::AO__c11_atomic_load ||
- Op == AtomicExpr::AO__opencl_atomic_load)
+ Op == AtomicExpr::AO__opencl_atomic_load ||
+ Op == AtomicExpr::AO__hip_atomic_load)
? 0
: 1);
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index af174ac1ca1a..7be71ca49ea2 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -10268,13 +10268,9 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S,
const FunctionDecl *FD,
const FunctionDecl *CausedFD,
MultiVersionKind MVType) {
- bool IsCPUSpecificCPUDispatchMVType =
- MVType == MultiVersionKind::CPUDispatch ||
- MVType == MultiVersionKind::CPUSpecific;
- const auto Diagnose = [FD, CausedFD, IsCPUSpecificCPUDispatchMVType](
- Sema &S, const Attr *A) {
+ const auto Diagnose = [FD, CausedFD, MVType](Sema &S, const Attr *A) {
S.Diag(FD->getLocation(), diag::err_multiversion_disallowed_other_attr)
- << IsCPUSpecificCPUDispatchMVType << A;
+ << static_cast<unsigned>(MVType) << A;
if (CausedFD)
S.Diag(CausedFD->getLocation(), diag::note_multiversioning_caused_here);
return true;
@@ -10292,6 +10288,10 @@ static bool checkNonMultiVersionCompatAttributes(Sema &S,
if (MVType != MultiVersionKind::Target)
return Diagnose(S, A);
break;
+ case attr::TargetClones:
+ if (MVType != MultiVersionKind::TargetClones)
+ return Diagnose(S, A);
+ break;
default:
if (!AttrCompatibleWithMultiVersion(A->getKind(), MVType))
return Diagnose(S, A);
@@ -10318,6 +10318,7 @@ bool Sema::areMultiversionVariantFunctionsCompatible(
DefaultedFuncs = 6,
ConstexprFuncs = 7,
ConstevalFuncs = 8,
+ Lambda = 9,
};
enum Different {
CallingConv = 0,
@@ -10445,7 +10446,7 @@ static bool CheckMultiVersionAdditionalRules(Sema &S, const FunctionDecl *OldFD,
S.PDiag(diag::note_multiversioning_caused_here)),
PartialDiagnosticAt(NewFD->getLocation(),
S.PDiag(diag::err_multiversion_doesnt_support)
- << IsCPUSpecificCPUDispatchMVType),
+ << static_cast<unsigned>(MVType)),
PartialDiagnosticAt(NewFD->getLocation(),
S.PDiag(diag::err_multiversion_diff)),
/*TemplatesSupported=*/false,
@@ -10574,21 +10575,30 @@ static bool CheckTargetCausesMultiVersioning(
return false;
}
+static bool MultiVersionTypesCompatible(MultiVersionKind Old,
+ MultiVersionKind New) {
+ if (Old == New || Old == MultiVersionKind::None ||
+ New == MultiVersionKind::None)
+ return true;
+
+ return (Old == MultiVersionKind::CPUDispatch &&
+ New == MultiVersionKind::CPUSpecific) ||
+ (Old == MultiVersionKind::CPUSpecific &&
+ New == MultiVersionKind::CPUDispatch);
+}
+
/// Check the validity of a new function declaration being added to an existing
/// multiversioned declaration collection.
static bool CheckMultiVersionAdditionalDecl(
Sema &S, FunctionDecl *OldFD, FunctionDecl *NewFD,
MultiVersionKind NewMVType, const TargetAttr *NewTA,
const CPUDispatchAttr *NewCPUDisp, const CPUSpecificAttr *NewCPUSpec,
- bool &Redeclaration, NamedDecl *&OldDecl, bool &MergeTypeWithPrevious,
- LookupResult &Previous) {
+ const TargetClonesAttr *NewClones, bool &Redeclaration, NamedDecl *&OldDecl,
+ bool &MergeTypeWithPrevious, LookupResult &Previous) {
MultiVersionKind OldMVType = OldFD->getMultiVersionKind();
// Disallow mixing of multiversioning types.
- if ((OldMVType == MultiVersionKind::Target &&
- NewMVType != MultiVersionKind::Target) ||
- (NewMVType == MultiVersionKind::Target &&
- OldMVType != MultiVersionKind::Target)) {
+ if (!MultiVersionTypesCompatible(OldMVType, NewMVType)) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
S.Diag(OldFD->getLocation(), diag::note_previous_declaration);
NewFD->setInvalidDecl();
@@ -10613,7 +10623,12 @@ static bool CheckMultiVersionAdditionalDecl(
if (S.IsOverload(NewFD, CurFD, UseMemberUsingDeclRules))
continue;
- if (NewMVType == MultiVersionKind::Target) {
+ switch (NewMVType) {
+ case MultiVersionKind::None:
+ assert(OldMVType == MultiVersionKind::TargetClones &&
+ "Only target_clones can be omitted in subsequent declarations");
+ break;
+ case MultiVersionKind::Target: {
const auto *CurTA = CurFD->getAttr<TargetAttr>();
if (CurTA->getFeaturesStr() == NewTA->getFeaturesStr()) {
NewFD->setIsMultiVersion();
@@ -10629,7 +10644,30 @@ static bool CheckMultiVersionAdditionalDecl(
NewFD->setInvalidDecl();
return true;
}
- } else {
+ break;
+ }
+ case MultiVersionKind::TargetClones: {
+ const auto *CurClones = CurFD->getAttr<TargetClonesAttr>();
+ Redeclaration = true;
+ OldDecl = CurFD;
+ MergeTypeWithPrevious = true;
+ NewFD->setIsMultiVersion();
+
+ if (CurClones && NewClones &&
+ (CurClones->featuresStrs_size() != NewClones->featuresStrs_size() ||
+ !std::equal(CurClones->featuresStrs_begin(),
+ CurClones->featuresStrs_end(),
+ NewClones->featuresStrs_begin()))) {
+ S.Diag(NewFD->getLocation(), diag::err_target_clone_doesnt_match);
+ S.Diag(CurFD->getLocation(), diag::note_previous_declaration);
+ NewFD->setInvalidDecl();
+ return true;
+ }
+
+ return false;
+ }
+ case MultiVersionKind::CPUSpecific:
+ case MultiVersionKind::CPUDispatch: {
const auto *CurCPUSpec = CurFD->getAttr<CPUSpecificAttr>();
const auto *CurCPUDisp = CurFD->getAttr<CPUDispatchAttr>();
// Handle CPUDispatch/CPUSpecific versions.
@@ -10684,8 +10722,8 @@ static bool CheckMultiVersionAdditionalDecl(
}
}
}
- // If the two decls aren't the same MVType, there is no possible error
- // condition.
+ break;
+ }
}
}
@@ -10721,7 +10759,6 @@ static bool CheckMultiVersionAdditionalDecl(
return false;
}
-
/// Check the validity of a mulitversion function declaration.
/// Also sets the multiversion'ness' of the function itself.
///
@@ -10735,23 +10772,14 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
const auto *NewTA = NewFD->getAttr<TargetAttr>();
const auto *NewCPUDisp = NewFD->getAttr<CPUDispatchAttr>();
const auto *NewCPUSpec = NewFD->getAttr<CPUSpecificAttr>();
-
- // Mixing Multiversioning types is prohibited.
- if ((NewTA && NewCPUDisp) || (NewTA && NewCPUSpec) ||
- (NewCPUDisp && NewCPUSpec)) {
- S.Diag(NewFD->getLocation(), diag::err_multiversion_types_mixed);
- NewFD->setInvalidDecl();
- return true;
- }
-
- MultiVersionKind MVType = NewFD->getMultiVersionKind();
+ const auto *NewClones = NewFD->getAttr<TargetClonesAttr>();
+ MultiVersionKind MVType = NewFD->getMultiVersionKind();
// Main isn't allowed to become a multiversion function, however it IS
// permitted to have 'main' be marked with the 'target' optimization hint.
if (NewFD->isMain()) {
- if ((MVType == MultiVersionKind::Target && NewTA->isDefaultVersion()) ||
- MVType == MultiVersionKind::CPUDispatch ||
- MVType == MultiVersionKind::CPUSpecific) {
+ if (MVType != MultiVersionKind::None &&
+ !(MVType == MultiVersionKind::Target && !NewTA->isDefaultVersion())) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_not_allowed_on_main);
NewFD->setInvalidDecl();
return true;
@@ -10774,13 +10802,35 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::None)
return false;
- if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None) {
+ // Multiversioned redeclarations aren't allowed to omit the attribute, except
+ // for target_clones.
+ if (OldFD->isMultiVersion() && MVType == MultiVersionKind::None &&
+ OldFD->getMultiVersionKind() != MultiVersionKind::TargetClones) {
S.Diag(NewFD->getLocation(), diag::err_multiversion_required_in_redecl)
<< (OldFD->getMultiVersionKind() != MultiVersionKind::Target);
NewFD->setInvalidDecl();
return true;
}
+ if (!OldFD->isMultiVersion()) {
+ switch (MVType) {
+ case MultiVersionKind::Target:
+ return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
+ Redeclaration, OldDecl,
+ MergeTypeWithPrevious, Previous);
+ case MultiVersionKind::TargetClones:
+ if (OldFD->isUsed(false)) {
+ NewFD->setInvalidDecl();
+ return S.Diag(NewFD->getLocation(), diag::err_multiversion_after_used);
+ }
+ OldFD->setIsMultiVersion();
+ break;
+ case MultiVersionKind::CPUDispatch:
+ case MultiVersionKind::CPUSpecific:
+ case MultiVersionKind::None:
+ break;
+ }
+ }
// Handle the target potentially causes multiversioning case.
if (!OldFD->isMultiVersion() && MVType == MultiVersionKind::Target)
return CheckTargetCausesMultiVersioning(S, OldFD, NewFD, NewTA,
@@ -10791,8 +10841,8 @@ static bool CheckMultiVersionFunction(Sema &S, FunctionDecl *NewFD,
// appropriate attribute in the current function decl. Resolve that these are
// still compatible with previous declarations.
return CheckMultiVersionAdditionalDecl(
- S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, Redeclaration,
- OldDecl, MergeTypeWithPrevious, Previous);
+ S, OldFD, NewFD, MVType, NewTA, NewCPUDisp, NewCPUSpec, NewClones,
+ Redeclaration, OldDecl, MergeTypeWithPrevious, Previous);
}
/// Perform semantic checking of a new function declaration.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ef889a36bd55..4df8687aff89 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -1965,6 +1965,28 @@ static void handleRestrictAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
static void handleCPUSpecificAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ // Ensure we don't combine these with themselves, since that causes some
+ // confusing behavior.
+ if (AL.getParsedKind() == ParsedAttr::AT_CPUDispatch) {
+ if (checkAttrMutualExclusion<CPUSpecificAttr>(S, D, AL))
+ return;
+
+ if (const auto *Other = D->getAttr<CPUDispatchAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ } else if (AL.getParsedKind() == ParsedAttr::AT_CPUSpecific) {
+ if (checkAttrMutualExclusion<CPUDispatchAttr>(S, D, AL))
+ return;
+
+ if (const auto *Other = D->getAttr<CPUSpecificAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ }
+
FunctionDecl *FD = cast<FunctionDecl>(D);
if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
@@ -3211,54 +3233,57 @@ static void handleCodeSegAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
bool Sema::checkTargetAttr(SourceLocation LiteralLoc, StringRef AttrStr) {
enum FirstParam { Unsupported, Duplicate, Unknown };
enum SecondParam { None, Architecture, Tune };
+ enum ThirdParam { Target, TargetClones };
if (AttrStr.contains("fpmath="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "fpmath=";
+ << Unsupported << None << "fpmath=" << Target;
// Diagnose use of tune if target doesn't support it.
if (!Context.getTargetInfo().supportsTargetAttributeTune() &&
AttrStr.contains("tune="))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "tune=";
+ << Unsupported << None << "tune=" << Target;
ParsedTargetAttr ParsedAttrs = TargetAttr::parse(AttrStr);
if (!ParsedAttrs.Architecture.empty() &&
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Architecture))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unknown << Architecture << ParsedAttrs.Architecture;
+ << Unknown << Architecture << ParsedAttrs.Architecture << Target;
if (!ParsedAttrs.Tune.empty() &&
!Context.getTargetInfo().isValidCPUName(ParsedAttrs.Tune))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unknown << Tune << ParsedAttrs.Tune;
+ << Unknown << Tune << ParsedAttrs.Tune << Target;
if (ParsedAttrs.DuplicateArchitecture)
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Duplicate << None << "arch=";
+ << Duplicate << None << "arch=" << Target;
if (ParsedAttrs.DuplicateTune)
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Duplicate << None << "tune=";
+ << Duplicate << None << "tune=" << Target;
for (const auto &Feature : ParsedAttrs.Features) {
auto CurFeature = StringRef(Feature).drop_front(); // remove + or -.
if (!Context.getTargetInfo().isValidFeatureName(CurFeature))
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << CurFeature;
+ << Unsupported << None << CurFeature << Target;
}
TargetInfo::BranchProtectionInfo BPI;
- StringRef Error;
- if (!ParsedAttrs.BranchProtection.empty() &&
- !Context.getTargetInfo().validateBranchProtection(
- ParsedAttrs.BranchProtection, BPI, Error)) {
- if (Error.empty())
+ StringRef DiagMsg;
+ if (ParsedAttrs.BranchProtection.empty())
+ return false;
+ if (!Context.getTargetInfo().validateBranchProtection(
+ ParsedAttrs.BranchProtection, BPI, DiagMsg)) {
+ if (DiagMsg.empty())
return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
- << Unsupported << None << "branch-protection";
- else
- return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec)
- << Error;
+ << Unsupported << None << "branch-protection" << Target;
+ return Diag(LiteralLoc, diag::err_invalid_branch_protection_spec)
+ << DiagMsg;
}
+ if (!DiagMsg.empty())
+ Diag(LiteralLoc, diag::warn_unsupported_branch_protection_spec) << DiagMsg;
return false;
}
@@ -3274,6 +3299,107 @@ static void handleTargetAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
D->addAttr(NewAttr);
}
+bool Sema::checkTargetClonesAttrString(SourceLocation LiteralLoc, StringRef Str,
+ const StringLiteral *Literal,
+ bool &HasDefault, bool &HasCommas,
+ SmallVectorImpl<StringRef> &Strings) {
+ enum FirstParam { Unsupported, Duplicate, Unknown };
+ enum SecondParam { None, Architecture, Tune };
+ enum ThirdParam { Target, TargetClones };
+ HasCommas = HasCommas || Str.contains(',');
+ // Warn on empty at the beginning of a string.
+ if (Str.size() == 0)
+ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ std::pair<StringRef, StringRef> Parts = {{}, Str};
+ while (!Parts.second.empty()) {
+ Parts = Parts.second.split(',');
+ StringRef Cur = Parts.first.trim();
+ SourceLocation CurLoc = Literal->getLocationOfByte(
+ Cur.data() - Literal->getString().data(), getSourceManager(),
+ getLangOpts(), Context.getTargetInfo());
+
+ bool DefaultIsDupe = false;
+ if (Cur.empty())
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+
+ if (Cur.startswith("arch=")) {
+ if (!Context.getTargetInfo().isValidCPUName(
+ Cur.drop_front(sizeof("arch=") - 1)))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << Architecture
+ << Cur.drop_front(sizeof("arch=") - 1) << TargetClones;
+ } else if (Cur == "default") {
+ DefaultIsDupe = HasDefault;
+ HasDefault = true;
+ } else if (!Context.getTargetInfo().isValidFeatureName(Cur))
+ return Diag(CurLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << Cur << TargetClones;
+
+ if (llvm::find(Strings, Cur) != Strings.end() || DefaultIsDupe)
+ Diag(CurLoc, diag::warn_target_clone_duplicate_options);
+ // Note: Add even if there are duplicates, since it changes name mangling.
+ Strings.push_back(Cur);
+ }
+
+ if (Str.rtrim().endswith(","))
+ return Diag(LiteralLoc, diag::warn_unsupported_target_attribute)
+ << Unsupported << None << "" << TargetClones;
+ return false;
+}
+
+static void handleTargetClonesAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
+ // Ensure we don't combine these with themselves, since that causes some
+ // confusing behavior.
+ if (const auto *Other = D->getAttr<TargetClonesAttr>()) {
+ S.Diag(AL.getLoc(), diag::err_disallowed_duplicate_attribute) << AL;
+ S.Diag(Other->getLocation(), diag::note_conflicting_attribute);
+ return;
+ }
+ if (checkAttrMutualExclusion<TargetClonesAttr>(S, D, AL))
+ return;
+
+ SmallVector<StringRef, 2> Strings;
+ bool HasCommas = false, HasDefault = false;
+
+ for (unsigned I = 0, E = AL.getNumArgs(); I != E; ++I) {
+ StringRef CurStr;
+ SourceLocation LiteralLoc;
+ if (!S.checkStringLiteralArgumentAttr(AL, I, CurStr, &LiteralLoc) ||
+ S.checkTargetClonesAttrString(
+ LiteralLoc, CurStr,
+ cast<StringLiteral>(AL.getArgAsExpr(I)->IgnoreParenCasts()),
+ HasDefault, HasCommas, Strings))
+ return;
+ }
+
+ if (HasCommas && AL.getNumArgs() > 1)
+ S.Diag(AL.getLoc(), diag::warn_target_clone_mixed_values);
+
+ if (!HasDefault) {
+ S.Diag(AL.getLoc(), diag::err_target_clone_must_have_default);
+ return;
+ }
+
+ // FIXME: We could probably figure out how to get this to work for lambdas
+ // someday.
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+ if (MD->getParent()->isLambda()) {
+ S.Diag(D->getLocation(), diag::err_multiversion_doesnt_support)
+ << static_cast<unsigned>(MultiVersionKind::TargetClones)
+ << /*Lambda*/ 9;
+ return;
+ }
+ }
+
+ cast<FunctionDecl>(D)->setIsMultiVersion();
+ TargetClonesAttr *NewAttr = ::new (S.Context)
+ TargetClonesAttr(S.Context, AL, Strings.data(), Strings.size());
+ D->addAttr(NewAttr);
+}
+
static void handleMinVectorWidthAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
Expr *E = AL.getArgAsExpr(0);
uint32_t VecWidth;
@@ -8217,6 +8343,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_Target:
handleTargetAttr(S, D, AL);
break;
+ case ParsedAttr::AT_TargetClones:
+ handleTargetClonesAttr(S, D, AL);
+ break;
case ParsedAttr::AT_MinVectorWidth:
handleMinVectorWidthAttr(S, D, AL);
break;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 8592335e20d3..b305d4e5b92f 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -16566,6 +16566,17 @@ Sema::PushExpressionEvaluationContext(
ExpressionEvaluationContextRecord::ExpressionKind ExprContext) {
ExprEvalContexts.emplace_back(NewContext, ExprCleanupObjects.size(), Cleanup,
LambdaContextDecl, ExprContext);
+
+ // Discarded statements and immediate contexts nested in other
+ // discarded statements or immediate context are themselves
+ // a discarded statement or an immediate context, respectively.
+ ExprEvalContexts.back().InDiscardedStatement =
+ ExprEvalContexts[ExprEvalContexts.size() - 2]
+ .isDiscardedStatementContext();
+ ExprEvalContexts.back().InImmediateFunctionContext =
+ ExprEvalContexts[ExprEvalContexts.size() - 2]
+ .isImmediateFunctionContext();
+
Cleanup.reset();
if (!MaybeODRUseExprs.empty())
std::swap(MaybeODRUseExprs, ExprEvalContexts.back().SavedMaybeODRUseExprs);
@@ -18965,6 +18976,10 @@ bool Sema::DiagIfReachable(SourceLocation Loc, ArrayRef<const Stmt *> Stmts,
/// during overload resolution or within sizeof/alignof/typeof/typeid.
bool Sema::DiagRuntimeBehavior(SourceLocation Loc, ArrayRef<const Stmt*> Stmts,
const PartialDiagnostic &PD) {
+
+ if (ExprEvalContexts.back().isDiscardedStatementContext())
+ return false;
+
switch (ExprEvalContexts.back().Context) {
case ExpressionEvaluationContext::Unevaluated:
case ExpressionEvaluationContext::UnevaluatedList:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 635252584562..d25f329f85e4 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -1508,8 +1508,9 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
}
// Only construct objects with object types.
- // There doesn't seem to be an explicit rule for this but functions are
- // not objects, so they cannot take initializers.
+ // The standard doesn't explicitly forbid function types here, but that's an
+ // obvious oversight, as there's no way to dynamically construct a function
+ // in general.
if (Ty->isFunctionType())
return ExprError(Diag(TyBeginLoc, diag::err_init_for_function_type)
<< Ty << FullRange);
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 3c820829864d..1d90759f2406 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -3563,8 +3563,7 @@ StmtResult Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc,
bool HasDeducedReturnType =
CurLambda && hasDeducedReturnType(CurLambda->CallOperator);
- if (ExprEvalContexts.back().Context ==
- ExpressionEvaluationContext::DiscardedStatement &&
+ if (ExprEvalContexts.back().isDiscardedStatementContext() &&
(HasDeducedReturnType || CurCap->HasImplicitReturnType)) {
if (RetValExp) {
ExprResult ER =
@@ -3880,8 +3879,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
if (RetVal.isInvalid())
return StmtError();
StmtResult R = BuildReturnStmt(ReturnLoc, RetVal.get());
- if (R.isInvalid() || ExprEvalContexts.back().Context ==
- ExpressionEvaluationContext::DiscardedStatement)
+ if (R.isInvalid() || ExprEvalContexts.back().isDiscardedStatementContext())
return R;
if (VarDecl *VD =
@@ -3966,8 +3964,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
// C++1z: discarded return statements are not considered when deducing a
// return type.
- if (ExprEvalContexts.back().Context ==
- ExpressionEvaluationContext::DiscardedStatement &&
+ if (ExprEvalContexts.back().isDiscardedStatementContext() &&
FnRetType->getContainedAutoType()) {
if (RetValExp) {
ExprResult ER =
diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
index bc939d252800..d57bab154b61 100644
--- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -686,8 +686,8 @@ SwitchNodeBuilder::generateDefaultCaseNode(ProgramStateRef St,
assert(Src->succ_rbegin() != Src->succ_rend());
CFGBlock *DefaultBlock = *Src->succ_rbegin();
- // Sanity check for default blocks that are unreachable and not caught
- // by earlier stages.
+ // Basic correctness check for default blocks that are unreachable and not
+ // caught by earlier stages.
if (!DefaultBlock)
return nullptr;
diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 74403a160b8e..23c67c64f975 100644
--- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -2191,6 +2191,42 @@ LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
Constraint->getMaxValue(), true);
}
+// Simplify the given symbol with the help of the SValBuilder. In
+// SValBuilder::symplifySval, we traverse the symbol tree and query the
+// constraint values for the sub-trees and if a value is a constant we do the
+// constant folding. Compound symbols might collapse to simpler symbol tree
+// that is still possible to further simplify. Thus, we do the simplification on
+// a new symbol tree until we reach the simplest form, i.e. the fixpoint.
+//
+// Consider the following symbol `(b * b) * b * b` which has this tree:
+// *
+// / \
+// * b
+// / \
+// / b
+// (b * b)
+// Now, if the `b * b == 1` new constraint is added then during the first
+// iteration we have the following transformations:
+// * *
+// / \ / \
+// * b --> b b
+// / \
+// / b
+// 1
+// We need another iteration to reach the final result `1`.
+LLVM_NODISCARD
+static SVal simplifyUntilFixpoint(SValBuilder &SVB, ProgramStateRef State,
+ const SymbolRef Sym) {
+ SVal Val = SVB.makeSymbolVal(Sym);
+ SVal SimplifiedVal = SVB.simplifySVal(State, Val);
+ // Do the simplification until we can.
+ while (SimplifiedVal != Val) {
+ Val = SimplifiedVal;
+ SimplifiedVal = SVB.simplifySVal(State, Val);
+ }
+ return SimplifiedVal;
+}
+
// Iterate over all symbols and try to simplify them. Once a symbol is
// simplified then we check if we can merge the simplified symbol's equivalence
// class to this class. This way, we simplify not just the symbols but the
@@ -2202,7 +2238,8 @@ EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
SymbolSet ClassMembers = Class.getClassMembers(State);
for (const SymbolRef &MemberSym : ClassMembers) {
- const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
+ const SVal SimplifiedMemberVal =
+ simplifyUntilFixpoint(SVB, State, MemberSym);
const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
// The symbol is collapsed to a constant, check if the current State is
diff --git a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index 681a1f64eadc..4ca35dd06ae5 100644
--- a/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -372,6 +372,15 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
NonLoc InputLHS = lhs;
NonLoc InputRHS = rhs;
+ // Constraints may have changed since the creation of a bound SVal. Check if
+ // the values can be simplified based on those new constraints.
+ SVal simplifiedLhs = simplifySVal(state, lhs);
+ SVal simplifiedRhs = simplifySVal(state, rhs);
+ if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
+ lhs = *simplifiedLhsAsNonLoc;
+ if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
+ rhs = *simplifiedRhsAsNonLoc;
+
// Handle trivial case where left-side and right-side are the same.
if (lhs == rhs)
switch (op) {
@@ -619,16 +628,6 @@ SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
}
}
- // Does the symbolic expression simplify to a constant?
- // If so, "fold" the constant by setting 'lhs' to a ConcreteInt
- // and try again.
- SVal simplifiedLhs = simplifySVal(state, lhs);
- if (simplifiedLhs != lhs)
- if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) {
- lhs = *simplifiedLhsAsNonLoc;
- continue;
- }
-
// Is the RHS a constant?
if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs))
return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
@@ -1103,7 +1102,6 @@ const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
if (SymbolRef Sym = V.getAsSymbol())
return state->getConstraintManager().getSymVal(state, Sym);
- // FIXME: Add support for SymExprs.
return nullptr;
}
@@ -1135,6 +1133,24 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
return cache(Sym, SVB.makeSymbolVal(Sym));
}
+ // Return the known const value for the Sym if available, or return Undef
+ // otherwise.
+ SVal getConst(SymbolRef Sym) {
+ const llvm::APSInt *Const =
+ State->getConstraintManager().getSymVal(State, Sym);
+ if (Const)
+ return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
+ : (SVal)SVB.makeIntVal(*Const);
+ return UndefinedVal();
+ }
+
+ SVal getConstOrVisit(SymbolRef Sym) {
+ const SVal Ret = getConst(Sym);
+ if (Ret.isUndef())
+ return Visit(Sym);
+ return Ret;
+ }
+
public:
Simplifier(ProgramStateRef State)
: State(State), SVB(State->getStateManager().getSValBuilder()) {}
@@ -1148,15 +1164,14 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
return SVB.makeSymbolVal(S);
}
- // TODO: Support SymbolCast. Support IntSymExpr when/if we actually
- // start producing them.
+ // TODO: Support SymbolCast.
SVal VisitSymIntExpr(const SymIntExpr *S) {
auto I = Cached.find(S);
if (I != Cached.end())
return I->second;
- SVal LHS = Visit(S->getLHS());
+ SVal LHS = getConstOrVisit(S->getLHS());
if (isUnchanged(S->getLHS(), LHS))
return skip(S);
@@ -1183,6 +1198,20 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
}
+ SVal VisitIntSymExpr(const IntSymExpr *S) {
+ auto I = Cached.find(S);
+ if (I != Cached.end())
+ return I->second;
+
+ SVal RHS = getConstOrVisit(S->getRHS());
+ if (isUnchanged(S->getRHS(), RHS))
+ return skip(S);
+
+ SVal LHS = SVB.makeIntVal(S->getLHS());
+ return cache(
+ S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
+ }
+
SVal VisitSymSymExpr(const SymSymExpr *S) {
auto I = Cached.find(S);
if (I != Cached.end())
@@ -1196,8 +1225,9 @@ SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
Loc::isLocType(S->getRHS()->getType()))
return skip(S);
- SVal LHS = Visit(S->getLHS());
- SVal RHS = Visit(S->getRHS());
+ SVal LHS = getConstOrVisit(S->getLHS());
+ SVal RHS = getConstOrVisit(S->getRHS());
+
if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
return skip(S);
diff --git a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index 31de49033ac2..f692c68045ee 100644
--- a/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/clang/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -591,16 +591,24 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) {
// - Main source file: run both path-sensitive and non-path-sensitive checks.
// - Header files: run non-path-sensitive checks only.
// - System headers: don't run any checks.
- SourceManager &SM = Ctx->getSourceManager();
- const Stmt *Body = D->getBody();
- SourceLocation SL = Body ? Body->getBeginLoc() : D->getLocation();
- SL = SM.getExpansionLoc(SL);
-
- if (!Opts->AnalyzeAll && !Mgr->isInCodeFile(SL)) {
- if (SL.isInvalid() || SM.isInSystemHeader(SL))
- return AM_None;
+ if (Opts->AnalyzeAll)
+ return Mode;
+
+ const SourceManager &SM = Ctx->getSourceManager();
+
+ const SourceLocation Loc = [&SM](Decl *D) -> SourceLocation {
+ const Stmt *Body = D->getBody();
+ SourceLocation SL = Body ? Body->getBeginLoc() : D->getLocation();
+ return SM.getExpansionLoc(SL);
+ }(D);
+
+ // Ignore system headers.
+ if (Loc.isInvalid() || SM.isInSystemHeader(Loc))
+ return AM_None;
+
+ // Disable path sensitive analysis in user-headers.
+ if (!Mgr->isInCodeFile(Loc))
return Mode & ~AM_Path;
- }
return Mode;
}
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 40e8bd2b8776..f7c711690d7e 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -129,7 +129,7 @@ DependencyScanningFilesystemSharedCache::get(StringRef Key, bool Minimized) {
///
/// This is kinda hacky, it would be better if we knew what kind of file Clang
/// was expecting instead.
-static bool shouldMinimize(StringRef Filename) {
+static bool shouldMinimizeBasedOnExtension(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return true; // C++ standard library
@@ -147,26 +147,43 @@ static bool shouldCacheStatFailures(StringRef Filename) {
StringRef Ext = llvm::sys::path::extension(Filename);
if (Ext.empty())
return false; // This may be the module cache directory.
- return shouldMinimize(Filename); // Only cache stat failures on source files.
+ // Only cache stat failures on source files.
+ return shouldMinimizeBasedOnExtension(Filename);
}
-void DependencyScanningWorkerFilesystem::ignoreFile(StringRef RawFilename) {
+void DependencyScanningWorkerFilesystem::disableMinimization(
+ StringRef RawFilename) {
llvm::SmallString<256> Filename;
llvm::sys::path::native(RawFilename, Filename);
- IgnoredFiles.insert(Filename);
+ NotToBeMinimized.insert(Filename);
}
-bool DependencyScanningWorkerFilesystem::shouldIgnoreFile(
- StringRef RawFilename) {
+bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
+ if (!shouldMinimizeBasedOnExtension(RawFilename))
+ return false;
+
llvm::SmallString<256> Filename;
llvm::sys::path::native(RawFilename, Filename);
- return IgnoredFiles.contains(Filename);
+ return !NotToBeMinimized.contains(Filename);
+}
+
+CachedFileSystemEntry DependencyScanningWorkerFilesystem::createFileSystemEntry(
+ llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
+ bool ShouldMinimize) {
+ if (!MaybeStatus)
+ return CachedFileSystemEntry(MaybeStatus.getError());
+
+ if (MaybeStatus->isDirectory())
+ return CachedFileSystemEntry::createDirectoryEntry(std::move(*MaybeStatus));
+
+ return CachedFileSystemEntry::createFileEntry(Filename, getUnderlyingFS(),
+ ShouldMinimize);
}
llvm::ErrorOr<const CachedFileSystemEntry *>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
const StringRef Filename) {
- bool ShouldMinimize = !shouldIgnoreFile(Filename) && shouldMinimize(Filename);
+ bool ShouldMinimize = shouldMinimize(Filename);
if (const auto *Entry = Cache.getCachedEntry(Filename, ShouldMinimize))
return Entry;
@@ -182,23 +199,15 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
if (!CacheEntry.isValid()) {
- llvm::vfs::FileSystem &FS = getUnderlyingFS();
- auto MaybeStatus = FS.status(Filename);
- if (!MaybeStatus) {
- if (!shouldCacheStatFailures(Filename))
- // HACK: We need to always restat non source files if the stat fails.
- // This is because Clang first looks up the module cache and module
- // files before building them, and then looks for them again. If we
- // cache the stat failure, it won't see them the second time.
- return MaybeStatus.getError();
- else
- CacheEntry = CachedFileSystemEntry(MaybeStatus.getError());
- } else if (MaybeStatus->isDirectory())
- CacheEntry = CachedFileSystemEntry::createDirectoryEntry(
- std::move(*MaybeStatus));
- else
- CacheEntry = CachedFileSystemEntry::createFileEntry(Filename, FS,
- ShouldMinimize);
+ auto MaybeStatus = getUnderlyingFS().status(Filename);
+ if (!MaybeStatus && !shouldCacheStatFailures(Filename))
+ // HACK: We need to always restat non source files if the stat fails.
+ // This is because Clang first looks up the module cache and module
+ // files before building them, and then looks for them again. If we
+ // cache the stat failure, it won't see them the second time.
+ return MaybeStatus.getError();
+ CacheEntry = createFileSystemEntry(std::move(MaybeStatus), Filename,
+ ShouldMinimize);
}
Result = &CacheEntry;
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
index 7fdc49271791..70bb6c5caf87 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp
@@ -193,20 +193,19 @@ public:
// Use the dependency scanning optimized file system if requested to do so.
if (DepFS) {
- DepFS->clearIgnoredFiles();
- // Ignore any files that contributed to prebuilt modules. The implicit
- // build validates the modules by comparing the reported sizes of their
- // inputs to the current state of the filesystem. Minimization would throw
- // this mechanism off.
+ DepFS->enableMinimizationOfAllFiles();
+ // Don't minimize any files that contributed to prebuilt modules. The
+ // implicit build validates the modules by comparing the reported sizes of
+ // their inputs to the current state of the filesystem. Minimization would
+ // throw this mechanism off.
for (const auto &File : PrebuiltModulesInputFiles)
- DepFS->ignoreFile(File.getKey());
- // Add any filenames that were explicity passed in the build settings and
- // that might be opened, as we want to ensure we don't run source
- // minimization on them.
+ DepFS->disableMinimization(File.getKey());
+ // Don't minimize any files that were explicitly passed in the build
+ // settings and that might be opened.
for (const auto &E : ScanInstance.getHeaderSearchOpts().UserEntries)
- DepFS->ignoreFile(E.Path);
+ DepFS->disableMinimization(E.Path);
for (const auto &F : ScanInstance.getHeaderSearchOpts().VFSOverlayFiles)
- DepFS->ignoreFile(F);
+ DepFS->disableMinimization(F);
// Support for virtual file system overlays on top of the caching
// filesystem.
diff --git a/clang/utils/TableGen/ASTTableGen.cpp b/clang/utils/TableGen/ASTTableGen.cpp
index 3f6da40964e0..6aa8b28a942f 100644
--- a/clang/utils/TableGen/ASTTableGen.cpp
+++ b/clang/utils/TableGen/ASTTableGen.cpp
@@ -107,7 +107,7 @@ static void visitASTNodeRecursive(ASTNode node, ASTNode base,
static void visitHierarchy(RecordKeeper &records,
StringRef nodeClassName,
ASTNodeHierarchyVisitor<ASTNode> visit) {
- // Check for the node class, just as a sanity check.
+ // Check for the node class, just as a basic correctness check.
if (!records.getClass(nodeClassName)) {
PrintFatalError(Twine("cannot find definition for node class ")
+ nodeClassName);
diff --git a/compiler-rt/include/profile/MemProfData.inc b/compiler-rt/include/profile/MemProfData.inc
new file mode 100644
index 000000000000..d64227e4ba31
--- /dev/null
+++ b/compiler-rt/include/profile/MemProfData.inc
@@ -0,0 +1,61 @@
+#ifndef MEMPROF_DATA_INC
+#define MEMPROF_DATA_INC
+/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the main file that defines all the data structure, signature,
+ * constant literals that are shared across profiling runtime library,
+ * and host tools (reader/writer).
+ *
+ * This file has two identical copies. The primary copy lives in LLVM and
+ * the other one sits in compiler-rt/include/profile directory. To make changes
+ * in this file, first modify the primary copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+\*===----------------------------------------------------------------------===*/
+
+
+#ifdef _MSC_VER
+#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop))
+#else
+#define PACKED(__decl__) __decl__ __attribute__((__packed__))
+#endif
+
+// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
+#define MEMPROF_RAW_MAGIC_64 \
+ ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \
+ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
+
+// The version number of the raw binary format.
+#define MEMPROF_RAW_VERSION 1ULL
+
+namespace llvm {
+namespace memprof {
+// A struct describing the header used for the raw binary memprof profile format.
+PACKED(struct Header {
+ uint64_t Magic;
+ uint64_t Version;
+ uint64_t TotalSize;
+ uint64_t SegmentOffset;
+ uint64_t MIBOffset;
+ uint64_t StackOffset;
+});
+
+// A struct describing the information necessary to describe a /proc/maps
+// segment entry for a particular binary/library identified by its build id.
+PACKED(struct SegmentEntry {
+ uint64_t Start;
+ uint64_t End;
+ uint64_t Offset;
+ uint8_t BuildId[32];
+});
+} // namespace memprof
+} // namespace llvm
+
+#endif
diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index b28909152e20..2ff314a5a9cb 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -130,23 +130,24 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
// Strict init-order checking is dlopen-hostile:
// https://github.com/google/sanitizers/issues/178
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
- do { \
- if (flags()->strict_init_order) \
- StopInitOrderChecking(); \
- CheckNoDeepBind(filename, flag); \
- } while (false)
-#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
-#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
-#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
-#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited)
-#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \
- if (AsanThread *t = GetCurrentThread()) { \
- *begin = t->tls_begin(); \
- *end = t->tls_end(); \
- } else { \
- *begin = *end = 0; \
- }
+# define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+ ({ \
+ if (flags()->strict_init_order) \
+ StopInitOrderChecking(); \
+ CheckNoDeepBind(filename, flag); \
+ REAL(dlopen)(filename, flag); \
+ })
+# define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
+# define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
+# define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
+# define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (!asan_inited)
+# define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) \
+ if (AsanThread *t = GetCurrentThread()) { \
+ *begin = t->tls_begin(); \
+ *end = t->tls_end(); \
+ } else { \
+ *begin = *end = 0; \
+ }
#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size) \
do { \
diff --git a/compiler-rt/lib/asan/asan_report.cpp b/compiler-rt/lib/asan/asan_report.cpp
index 1f266334b311..2a38fabaf220 100644
--- a/compiler-rt/lib/asan/asan_report.cpp
+++ b/compiler-rt/lib/asan/asan_report.cpp
@@ -460,6 +460,10 @@ static bool SuppressErrorReport(uptr pc) {
void ReportGenericError(uptr pc, uptr bp, uptr sp, uptr addr, bool is_write,
uptr access_size, u32 exp, bool fatal) {
+ if (__asan_test_only_reported_buggy_pointer) {
+ *__asan_test_only_reported_buggy_pointer = addr;
+ return;
+ }
if (!fatal && SuppressErrorReport(pc)) return;
ENABLE_FRAME_POINTER;
diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp
index 1b150b393cfe..5be8ef0f6d1c 100644
--- a/compiler-rt/lib/asan/asan_rtl.cpp
+++ b/compiler-rt/lib/asan/asan_rtl.cpp
@@ -85,12 +85,8 @@ void ShowStatsAndAbort() {
NOINLINE
static void ReportGenericErrorWrapper(uptr addr, bool is_write, int size,
int exp_arg, bool fatal) {
- if (__asan_test_only_reported_buggy_pointer) {
- *__asan_test_only_reported_buggy_pointer = addr;
- } else {
- GET_CALLER_PC_BP_SP;
- ReportGenericError(pc, bp, sp, addr, is_write, size, exp_arg, fatal);
- }
+ GET_CALLER_PC_BP_SP;
+ ReportGenericError(pc, bp, sp, addr, is_write, size, exp_arg, fatal);
}
// --------------- LowLevelAllocateCallbac ---------- {{{1
diff --git a/compiler-rt/lib/cfi/cfi.cpp b/compiler-rt/lib/cfi/cfi.cpp
index 95853208f951..65a10c999cc6 100644
--- a/compiler-rt/lib/cfi/cfi.cpp
+++ b/compiler-rt/lib/cfi/cfi.cpp
@@ -230,7 +230,7 @@ uptr find_cfi_check_in_dso(dl_phdr_info *info) {
}
if (symtab > strtab) {
- VReport(1, "Can not handle: symtab > strtab (%p > %zx)\n", symtab, strtab);
+ VReport(1, "Can not handle: symtab > strtab (%zx > %zx)\n", symtab, strtab);
return 0;
}
@@ -250,7 +250,7 @@ uptr find_cfi_check_in_dso(dl_phdr_info *info) {
if (phdr_idx == info->dlpi_phnum) {
// Nope, either different segments or just bogus pointers.
// Can not handle this.
- VReport(1, "Can not handle: symtab %p, strtab %zx\n", symtab, strtab);
+ VReport(1, "Can not handle: symtab %zx, strtab %zx\n", symtab, strtab);
return 0;
}
diff --git a/compiler-rt/lib/memprof/memprof_allocator.cpp b/compiler-rt/lib/memprof/memprof_allocator.cpp
index 696f64d8c324..059ce283b8c9 100644
--- a/compiler-rt/lib/memprof/memprof_allocator.cpp
+++ b/compiler-rt/lib/memprof/memprof_allocator.cpp
@@ -252,6 +252,8 @@ struct Allocator {
InsertLiveBlocks();
if (print_text) {
+ if (!flags()->print_terse)
+ Printf("Recorded MIBs (incl. live on exit):\n");
MIBMap.ForEach(PrintCallback,
reinterpret_cast<void *>(flags()->print_terse));
StackDepotPrintAll();
@@ -271,9 +273,6 @@ struct Allocator {
// Inserts any blocks which have been allocated but not yet deallocated.
void InsertLiveBlocks() {
- if (print_text && !flags()->print_terse)
- Printf("Live on exit:\n");
-
allocator.ForEachChunk(
[](uptr chunk, void *alloc) {
u64 user_requested_size;
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index 5575ae2fe444..459ad03e8dfe 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -93,10 +93,6 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
do { \
} while (false)
#define COMMON_INTERCEPTOR_BLOCK_REAL(name) REAL(name)
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
- do { \
- CheckNoDeepBind(filename, flag); \
- } while (false)
#define COMMON_INTERCEPTOR_ON_EXIT(ctx) OnExit()
#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle)
#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED()
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.cpp b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
index 96f315f95b24..c4800a6df34c 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.cpp
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.cpp
@@ -1,6 +1,12 @@
-#include "memprof_rawprofile.h"
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
#include "memprof_meminfoblock.h"
+#include "memprof_rawprofile.h"
+#include "profile/MemProfData.inc"
#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_linux.h"
#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
@@ -8,29 +14,12 @@
#include "sanitizer_common/sanitizer_stacktrace.h"
#include "sanitizer_common/sanitizer_vector.h"
-#include <stdlib.h>
-#include <string.h>
-
namespace __memprof {
using ::__sanitizer::Vector;
+using SegmentEntry = ::llvm::memprof::SegmentEntry;
+using Header = ::llvm::memprof::Header;
namespace {
-typedef struct __attribute__((__packed__)) {
- u64 start;
- u64 end;
- u64 offset;
- u8 buildId[32];
-} SegmentEntry;
-
-typedef struct __attribute__((__packed__)) {
- u64 magic;
- u64 version;
- u64 total_size;
- u64 segment_offset;
- u64 mib_offset;
- u64 stack_offset;
-} Header;
-
template <class T> char *WriteBytes(T Pod, char *&Buffer) {
*(T *)Buffer = Pod;
return Buffer + sizeof(T);
@@ -76,12 +65,12 @@ void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout,
for (Layout.Reset(); Layout.Next(&segment);) {
if (segment.IsReadable() && segment.IsExecutable()) {
- SegmentEntry entry{};
- entry.start = segment.start;
- entry.end = segment.end;
- entry.offset = segment.offset;
- memcpy(entry.buildId, segment.uuid, sizeof(segment.uuid));
- memcpy(Ptr, &entry, sizeof(SegmentEntry));
+ SegmentEntry Entry{};
+ Entry.Start = segment.start;
+ Entry.End = segment.end;
+ Entry.Offset = segment.offset;
+ memcpy(Entry.BuildId, segment.uuid, sizeof(segment.uuid));
+ memcpy(Ptr, &Entry, sizeof(SegmentEntry));
Ptr += sizeof(SegmentEntry);
NumSegmentsRecorded++;
}
@@ -89,7 +78,7 @@ void SerializeSegmentsToBuffer(MemoryMappingLayoutBase &Layout,
// Store the number of segments we recorded in the space we reserved.
*((u64 *)Buffer) = NumSegmentsRecorded;
- CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
@@ -144,7 +133,7 @@ void SerializeStackToBuffer(const Vector<u64> &StackIds,
*(u64 *)(Ptr - (Count + 1) * sizeof(u64)) = Count;
}
- CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
@@ -172,7 +161,7 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
Ptr = WriteBytes((*h)->mib, Ptr);
}
- CHECK(ExpectedNumBytes == static_cast<u64>(Ptr - Buffer) &&
+ CHECK(ExpectedNumBytes >= static_cast<u64>(Ptr - Buffer) &&
"Expected num bytes != actual bytes written");
}
@@ -193,11 +182,15 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
// BuildID 32B
// ----------
// ...
+// ----------
+// Optional Padding Bytes
// ---------- MIB Info
// Num Entries
// ---------- MIB Entry
// Alloc Count
// ...
+// ----------
+// Optional Padding Bytes
// ---------- Stack Info
// Num Entries
// ---------- Stack Entry
@@ -206,23 +199,29 @@ void SerializeMIBInfoToBuffer(MIBMapTy &MIBMap, const Vector<u64> &StackIds,
// PC2
// ...
// ----------
+// Optional Padding Bytes
// ...
u64 SerializeToRawProfile(MIBMapTy &MIBMap, MemoryMappingLayoutBase &Layout,
char *&Buffer) {
- const u64 NumSegmentBytes = SegmentSizeBytes(Layout);
+ // Each section size is rounded up to 8b since the first entry in each section
+ // is a u64 which holds the number of entries in the section by convention.
+ const u64 NumSegmentBytes = RoundUpTo(SegmentSizeBytes(Layout), 8);
Vector<u64> StackIds;
MIBMap.ForEach(RecordStackId, reinterpret_cast<void *>(&StackIds));
// The first 8b are for the total number of MIB records. Each MIB record is
// preceded by a 8b stack id which is associated with stack frames in the next
// section.
- const u64 NumMIBInfoBytes =
- sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock));
+ const u64 NumMIBInfoBytes = RoundUpTo(
+ sizeof(u64) + StackIds.Size() * (sizeof(u64) + sizeof(MemInfoBlock)), 8);
- const u64 NumStackBytes = StackSizeBytes(StackIds);
+ const u64 NumStackBytes = RoundUpTo(StackSizeBytes(StackIds), 8);
- const u64 TotalSizeBytes =
- sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes;
+ // Ensure that the profile is 8b aligned. We allow for some optional padding
+ // at the end so that any subsequent profile serialized to the same file does
+ // not incur unaligned accesses.
+ const u64 TotalSizeBytes = RoundUpTo(
+ sizeof(Header) + NumSegmentBytes + NumStackBytes + NumMIBInfoBytes, 8);
// Allocate the memory for the entire buffer incl. info blocks.
Buffer = (char *)InternalAlloc(TotalSizeBytes);
diff --git a/compiler-rt/lib/memprof/memprof_rawprofile.h b/compiler-rt/lib/memprof/memprof_rawprofile.h
index 052bac3267f1..575104e7e34e 100644
--- a/compiler-rt/lib/memprof/memprof_rawprofile.h
+++ b/compiler-rt/lib/memprof/memprof_rawprofile.h
@@ -5,17 +5,10 @@
#include "sanitizer_common/sanitizer_procmaps.h"
namespace __memprof {
-
-// TODO: pull these in from MemProfData.inc
-#define MEMPROF_RAW_MAGIC_64 \
- (u64)255 << 56 | (u64)'m' << 48 | (u64)'p' << 40 | (u64)'r' << 32 | \
- (u64)'o' << 24 | (u64)'f' << 16 | (u64)'r' << 8 | (u64)129
-
-#define MEMPROF_RAW_VERSION 1ULL
-
+// Serialize the in-memory representation of the memprof profile to the raw
+// binary format. The format itself is documented memprof_rawprofile.cpp.
u64 SerializeToRawProfile(MIBMapTy &BlockCache, MemoryMappingLayoutBase &Layout,
char *&Buffer);
-
} // namespace __memprof
#endif // MEMPROF_RAWPROFILE_H_
diff --git a/compiler-rt/lib/memprof/tests/rawprofile.cpp b/compiler-rt/lib/memprof/tests/rawprofile.cpp
index 4404ab86092e..829e18370737 100644
--- a/compiler-rt/lib/memprof/tests/rawprofile.cpp
+++ b/compiler-rt/lib/memprof/tests/rawprofile.cpp
@@ -1,6 +1,10 @@
#include "memprof/memprof_rawprofile.h"
+#include <cstdint>
+#include <memory>
+
#include "memprof/memprof_meminfoblock.h"
+#include "profile/MemProfData.inc"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_procmaps.h"
#include "sanitizer_common/sanitizer_stackdepot.h"
@@ -8,8 +12,6 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
-#include <memory>
-
namespace {
using ::__memprof::MemInfoBlock;
@@ -47,6 +49,8 @@ u64 PopulateFakeMap(const MemInfoBlock &FakeMIB, uptr StackPCBegin,
template <class T = u64> T Read(char *&Buffer) {
static_assert(std::is_pod<T>::value, "Must be a POD type.");
+ assert(reinterpret_cast<size_t>(Buffer) % sizeof(T) == 0 &&
+ "Unaligned read!");
T t = *reinterpret_cast<T *>(Buffer);
Buffer += sizeof(T);
return t;
@@ -101,8 +105,9 @@ TEST(MemProf, Basic) {
const u64 MIBOffset = Read(Ptr);
const u64 StackOffset = Read(Ptr);
- // ============= Check sizes.
+ // ============= Check sizes and padding.
EXPECT_EQ(TotalSize, NumBytes);
+ EXPECT_EQ(TotalSize % 8, 0ULL);
// Should be equal to the size of the raw profile header.
EXPECT_EQ(SegmentOffset, 48ULL);
@@ -118,8 +123,10 @@ TEST(MemProf, Basic) {
EXPECT_EQ(StackOffset, 336ULL);
// We expect 2 stack entries, with 5 frames - 8b for total count,
- // 2 * (8b for id, 8b for frame count and 5*8b for fake frames)
- EXPECT_EQ(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
+ // 2 * (8b for id, 8b for frame count and 5*8b for fake frames).
+ // Since this is the last section, there may be additional padding at the end
+ // to make the total profile size 8b aligned.
+ EXPECT_GE(TotalSize - StackOffset, 8ULL + 2 * (8 + 8 + 5 * 8));
// ============= Check contents.
unsigned char ExpectedSegmentBytes[64] = {
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index 674b1898b046..80db2527461e 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -34,7 +34,8 @@ uint64_t lprofGetLoadModuleSignature() {
const __llvm_profile_data *FirstD = __llvm_profile_begin_data();
return (NamesSize << 40) + (CounterSize << 30) + (DataSize << 20) +
- (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version;
+ (NumVnodes << 10) + (DataSize > 0 ? FirstD->NameRef : 0) + Version +
+ __llvm_profile_get_magic();
}
/* Returns 1 if profile is not structurally compatible. */
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp
index bcb7370a7906..af0b0949a88e 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator.cpp
@@ -24,66 +24,6 @@ namespace __sanitizer {
const char *PrimaryAllocatorName = "SizeClassAllocator";
const char *SecondaryAllocatorName = "LargeMmapAllocator";
-// ThreadSanitizer for Go uses libc malloc/free.
-#if defined(SANITIZER_USE_MALLOC)
-# if SANITIZER_LINUX && !SANITIZER_ANDROID
-extern "C" void *__libc_malloc(uptr size);
-# if !SANITIZER_GO
-extern "C" void *__libc_memalign(uptr alignment, uptr size);
-# endif
-extern "C" void *__libc_realloc(void *ptr, uptr size);
-extern "C" void __libc_free(void *ptr);
-# else
-# include <stdlib.h>
-# define __libc_malloc malloc
-# if !SANITIZER_GO
-static void *__libc_memalign(uptr alignment, uptr size) {
- void *p;
- uptr error = posix_memalign(&p, alignment, size);
- if (error) return nullptr;
- return p;
-}
-# endif
-# define __libc_realloc realloc
-# define __libc_free free
-# endif
-
-static void *RawInternalAlloc(uptr size, InternalAllocatorCache *cache,
- uptr alignment) {
- (void)cache;
-#if !SANITIZER_GO
- if (alignment == 0)
- return __libc_malloc(size);
- else
- return __libc_memalign(alignment, size);
-#else
- // Windows does not provide __libc_memalign/posix_memalign. It provides
- // __aligned_malloc, but the allocated blocks can't be passed to free,
- // they need to be passed to __aligned_free. InternalAlloc interface does
- // not account for such requirement. Alignemnt does not seem to be used
- // anywhere in runtime, so just call __libc_malloc for now.
- DCHECK_EQ(alignment, 0);
- return __libc_malloc(size);
-#endif
-}
-
-static void *RawInternalRealloc(void *ptr, uptr size,
- InternalAllocatorCache *cache) {
- (void)cache;
- return __libc_realloc(ptr, size);
-}
-
-static void RawInternalFree(void *ptr, InternalAllocatorCache *cache) {
- (void)cache;
- __libc_free(ptr);
-}
-
-InternalAllocator *internal_allocator() {
- return 0;
-}
-
-#else // SANITIZER_GO || defined(SANITIZER_USE_MALLOC)
-
static ALIGNED(64) char internal_alloc_placeholder[sizeof(InternalAllocator)];
static atomic_uint8_t internal_allocator_initialized;
static StaticSpinMutex internal_alloc_init_mu;
@@ -135,8 +75,6 @@ static void RawInternalFree(void *ptr, InternalAllocatorCache *cache) {
internal_allocator()->Deallocate(cache, ptr);
}
-#endif // SANITIZER_GO || defined(SANITIZER_USE_MALLOC)
-
static void NORETURN ReportInternalAllocatorOutOfMemory(uptr requested_size) {
SetAllocatorOutOfMemory();
Report("FATAL: %s: internal allocator is out of memory trying to allocate "
@@ -187,6 +125,16 @@ void InternalFree(void *addr, InternalAllocatorCache *cache) {
RawInternalFree(addr, cache);
}
+void InternalAllocatorLock() NO_THREAD_SAFETY_ANALYSIS {
+ internal_allocator_cache_mu.Lock();
+ internal_allocator()->ForceLock();
+}
+
+void InternalAllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS {
+ internal_allocator()->ForceUnlock();
+ internal_allocator_cache_mu.Unlock();
+}
+
// LowLevelAllocator
constexpr uptr kLowLevelAllocatorDefaultAlignment = 8;
static uptr low_level_alloc_min_alignment = kLowLevelAllocatorDefaultAlignment;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h
index 32849036fd04..38994736877a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_internal.h
@@ -48,6 +48,8 @@ void *InternalReallocArray(void *p, uptr count, uptr size,
void *InternalCalloc(uptr count, uptr size,
InternalAllocatorCache *cache = nullptr);
void InternalFree(void *p, InternalAllocatorCache *cache = nullptr);
+void InternalAllocatorLock();
+void InternalAllocatorUnlock();
InternalAllocator *internal_allocator();
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index 065154496eb5..6ec6bb4bd856 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -460,6 +460,10 @@ template <class T>
constexpr T Max(T a, T b) {
return a > b ? a : b;
}
+template <class T>
+constexpr T Abs(T a) {
+ return a < 0 ? -a : a;
+}
template<class T> void Swap(T& a, T& b) {
T tmp = a;
a = b;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index abb38ccfa15d..d219734fa0a3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -21,7 +21,7 @@
// COMMON_INTERCEPTOR_FD_RELEASE
// COMMON_INTERCEPTOR_FD_ACCESS
// COMMON_INTERCEPTOR_SET_THREAD_NAME
-// COMMON_INTERCEPTOR_ON_DLOPEN
+// COMMON_INTERCEPTOR_DLOPEN
// COMMON_INTERCEPTOR_ON_EXIT
// COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
// COMMON_INTERCEPTOR_MUTEX_POST_LOCK
@@ -206,9 +206,9 @@ extern const short *_tolower_tab_;
COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \
common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) )
-#ifndef COMMON_INTERCEPTOR_ON_DLOPEN
-#define COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag) \
- CheckNoDeepBind(filename, flag);
+#ifndef COMMON_INTERCEPTOR_DLOPEN
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+ ({ CheckNoDeepBind(filename, flag); REAL(dlopen)(filename, flag); })
#endif
#ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE
@@ -6380,8 +6380,7 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
void *ctx;
COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag);
if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
- COMMON_INTERCEPTOR_ON_DLOPEN(filename, flag);
- void *res = REAL(dlopen)(filename, flag);
+ void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag);
Symbolizer::GetOrInit()->InvalidateModuleList();
COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
return res;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h
index 3fa6af76ce29..046d77dddc9c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h
@@ -226,28 +226,26 @@ class DenseMapBase {
return FindAndConstruct(__sanitizer::move(Key)).second;
}
- /// Equality comparison for DenseMap.
+ /// Iterate over active entries of the container.
///
- /// Iterates over elements of LHS confirming that each (key, value) pair in
- /// LHS is also in RHS, and that no additional pairs are in RHS. Equivalent to
- /// N calls to RHS.find and N value comparisons. Amortized complexity is
- /// linear, worst case is O(N^2) (if every hash collides).
- bool operator==(const DenseMapBase &RHS) const {
- if (size() != RHS.size())
- return false;
-
+ /// Function can return fast to stop the process.
+ template <class Fn>
+ void forEach(Fn fn) {
const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
for (auto *P = getBuckets(), *E = getBucketsEnd(); P != E; ++P) {
const KeyT K = P->getFirst();
if (!KeyInfoT::isEqual(K, EmptyKey) &&
!KeyInfoT::isEqual(K, TombstoneKey)) {
- const auto *I = RHS.find(K);
- if (!I || P->getSecond() != I->getSecond())
- return false;
+ if (!fn(*P))
+ return;
}
}
+ }
- return true;
+ template <class Fn>
+ void forEach(Fn fn) const {
+ const_cast<DenseMapBase *>(this)->forEach(
+ [&](const value_type &KV) { return fn(KV); });
}
protected:
@@ -524,6 +522,35 @@ class DenseMapBase {
}
};
+/// Equality comparison for DenseMap.
+///
+/// Iterates over elements of LHS confirming that each (key, value) pair in LHS
+/// is also in RHS, and that no additional pairs are in RHS.
+/// Equivalent to N calls to RHS.find and N value comparisons. Amortized
+/// complexity is linear, worst case is O(N^2) (if every hash collides).
+template <typename DerivedT, typename KeyT, typename ValueT, typename KeyInfoT,
+ typename BucketT>
+bool operator==(
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT, BucketT> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+
+ bool R = true;
+ LHS.forEach(
+ [&](const typename DenseMapBase<DerivedT, KeyT, ValueT, KeyInfoT,
+ BucketT>::value_type &KV) -> bool {
+ const auto *I = RHS.find(KV.first);
+ if (!I || I->second != KV.second) {
+ R = false;
+ return false;
+ }
+ return true;
+ });
+
+ return R;
+}
+
/// Inequality comparison for DenseMap.
///
/// Equivalent to !(LHS == RHS). See operator== for performance notes.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h
index 85c6427906c1..f4640369ae58 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h
@@ -18,7 +18,7 @@ namespace __sanitizer {
namespace detail {
/// Simplistic combination of 32-bit hash values into 32-bit hash values.
-static inline unsigned combineHashValue(unsigned a, unsigned b) {
+static constexpr unsigned combineHashValue(unsigned a, unsigned b) {
u64 key = (u64)a << 32 | (u64)b;
key += ~(key << 32);
key ^= (key >> 22);
@@ -37,18 +37,19 @@ template <typename KeyT, typename ValueT>
struct DenseMapPair {
KeyT first = {};
ValueT second = {};
- DenseMapPair() = default;
- DenseMapPair(const KeyT &f, const ValueT &s) : first(f), second(s) {}
+ constexpr DenseMapPair() = default;
+ constexpr DenseMapPair(const KeyT &f, const ValueT &s)
+ : first(f), second(s) {}
template <typename KeyT2, typename ValueT2>
- DenseMapPair(KeyT2 &&f, ValueT2 &&s)
+ constexpr DenseMapPair(KeyT2 &&f, ValueT2 &&s)
: first(__sanitizer::forward<KeyT2>(f)),
second(__sanitizer::forward<ValueT2>(s)) {}
- DenseMapPair(const DenseMapPair &other) = default;
- DenseMapPair &operator=(const DenseMapPair &other) = default;
- DenseMapPair(DenseMapPair &&other) = default;
- DenseMapPair &operator=(DenseMapPair &&other) = default;
+ constexpr DenseMapPair(const DenseMapPair &other) = default;
+ constexpr DenseMapPair &operator=(const DenseMapPair &other) = default;
+ constexpr DenseMapPair(DenseMapPair &&other) = default;
+ constexpr DenseMapPair &operator=(DenseMapPair &&other) = default;
KeyT &getFirst() { return first; }
const KeyT &getFirst() const { return first; }
@@ -60,8 +61,8 @@ struct DenseMapPair {
template <typename T>
struct DenseMapInfo {
- // static inline T getEmptyKey();
- // static inline T getTombstoneKey();
+ // static T getEmptyKey();
+ // static T getTombstoneKey();
// static unsigned getHashValue(const T &Val);
// static bool isEqual(const T &LHS, const T &RHS);
};
@@ -79,43 +80,50 @@ struct DenseMapInfo<T *> {
// "Log2MaxAlign bits of alignment");
static constexpr uptr Log2MaxAlign = 12;
- static inline T *getEmptyKey() {
+ static constexpr T *getEmptyKey() {
uptr Val = static_cast<uptr>(-1);
Val <<= Log2MaxAlign;
return reinterpret_cast<T *>(Val);
}
- static inline T *getTombstoneKey() {
+ static constexpr T *getTombstoneKey() {
uptr Val = static_cast<uptr>(-2);
Val <<= Log2MaxAlign;
return reinterpret_cast<T *>(Val);
}
- static unsigned getHashValue(const T *PtrVal) {
+ static constexpr unsigned getHashValue(const T *PtrVal) {
return (unsigned((uptr)PtrVal) >> 4) ^ (unsigned((uptr)PtrVal) >> 9);
}
- static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
+ static constexpr bool isEqual(const T *LHS, const T *RHS) {
+ return LHS == RHS;
+ }
};
// Provide DenseMapInfo for chars.
template <>
struct DenseMapInfo<char> {
- static inline char getEmptyKey() { return ~0; }
- static inline char getTombstoneKey() { return ~0 - 1; }
- static unsigned getHashValue(const char &Val) { return Val * 37U; }
+ static constexpr char getEmptyKey() { return ~0; }
+ static constexpr char getTombstoneKey() { return ~0 - 1; }
+ static constexpr unsigned getHashValue(const char &Val) { return Val * 37U; }
- static bool isEqual(const char &LHS, const char &RHS) { return LHS == RHS; }
+ static constexpr bool isEqual(const char &LHS, const char &RHS) {
+ return LHS == RHS;
+ }
};
// Provide DenseMapInfo for unsigned chars.
template <>
struct DenseMapInfo<unsigned char> {
- static inline unsigned char getEmptyKey() { return ~0; }
- static inline unsigned char getTombstoneKey() { return ~0 - 1; }
- static unsigned getHashValue(const unsigned char &Val) { return Val * 37U; }
+ static constexpr unsigned char getEmptyKey() { return ~0; }
+ static constexpr unsigned char getTombstoneKey() { return ~0 - 1; }
+ static constexpr unsigned getHashValue(const unsigned char &Val) {
+ return Val * 37U;
+ }
- static bool isEqual(const unsigned char &LHS, const unsigned char &RHS) {
+ static constexpr bool isEqual(const unsigned char &LHS,
+ const unsigned char &RHS) {
return LHS == RHS;
}
};
@@ -123,11 +131,14 @@ struct DenseMapInfo<unsigned char> {
// Provide DenseMapInfo for unsigned shorts.
template <>
struct DenseMapInfo<unsigned short> {
- static inline unsigned short getEmptyKey() { return 0xFFFF; }
- static inline unsigned short getTombstoneKey() { return 0xFFFF - 1; }
- static unsigned getHashValue(const unsigned short &Val) { return Val * 37U; }
+ static constexpr unsigned short getEmptyKey() { return 0xFFFF; }
+ static constexpr unsigned short getTombstoneKey() { return 0xFFFF - 1; }
+ static constexpr unsigned getHashValue(const unsigned short &Val) {
+ return Val * 37U;
+ }
- static bool isEqual(const unsigned short &LHS, const unsigned short &RHS) {
+ static constexpr bool isEqual(const unsigned short &LHS,
+ const unsigned short &RHS) {
return LHS == RHS;
}
};
@@ -135,11 +146,13 @@ struct DenseMapInfo<unsigned short> {
// Provide DenseMapInfo for unsigned ints.
template <>
struct DenseMapInfo<unsigned> {
- static inline unsigned getEmptyKey() { return ~0U; }
- static inline unsigned getTombstoneKey() { return ~0U - 1; }
- static unsigned getHashValue(const unsigned &Val) { return Val * 37U; }
+ static constexpr unsigned getEmptyKey() { return ~0U; }
+ static constexpr unsigned getTombstoneKey() { return ~0U - 1; }
+ static constexpr unsigned getHashValue(const unsigned &Val) {
+ return Val * 37U;
+ }
- static bool isEqual(const unsigned &LHS, const unsigned &RHS) {
+ static constexpr bool isEqual(const unsigned &LHS, const unsigned &RHS) {
return LHS == RHS;
}
};
@@ -147,14 +160,15 @@ struct DenseMapInfo<unsigned> {
// Provide DenseMapInfo for unsigned longs.
template <>
struct DenseMapInfo<unsigned long> {
- static inline unsigned long getEmptyKey() { return ~0UL; }
- static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
+ static constexpr unsigned long getEmptyKey() { return ~0UL; }
+ static constexpr unsigned long getTombstoneKey() { return ~0UL - 1L; }
- static unsigned getHashValue(const unsigned long &Val) {
+ static constexpr unsigned getHashValue(const unsigned long &Val) {
return (unsigned)(Val * 37UL);
}
- static bool isEqual(const unsigned long &LHS, const unsigned long &RHS) {
+ static constexpr bool isEqual(const unsigned long &LHS,
+ const unsigned long &RHS) {
return LHS == RHS;
}
};
@@ -162,15 +176,15 @@ struct DenseMapInfo<unsigned long> {
// Provide DenseMapInfo for unsigned long longs.
template <>
struct DenseMapInfo<unsigned long long> {
- static inline unsigned long long getEmptyKey() { return ~0ULL; }
- static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
+ static constexpr unsigned long long getEmptyKey() { return ~0ULL; }
+ static constexpr unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
- static unsigned getHashValue(const unsigned long long &Val) {
+ static constexpr unsigned getHashValue(const unsigned long long &Val) {
return (unsigned)(Val * 37ULL);
}
- static bool isEqual(const unsigned long long &LHS,
- const unsigned long long &RHS) {
+ static constexpr bool isEqual(const unsigned long long &LHS,
+ const unsigned long long &RHS) {
return LHS == RHS;
}
};
@@ -178,51 +192,59 @@ struct DenseMapInfo<unsigned long long> {
// Provide DenseMapInfo for shorts.
template <>
struct DenseMapInfo<short> {
- static inline short getEmptyKey() { return 0x7FFF; }
- static inline short getTombstoneKey() { return -0x7FFF - 1; }
- static unsigned getHashValue(const short &Val) { return Val * 37U; }
- static bool isEqual(const short &LHS, const short &RHS) { return LHS == RHS; }
+ static constexpr short getEmptyKey() { return 0x7FFF; }
+ static constexpr short getTombstoneKey() { return -0x7FFF - 1; }
+ static constexpr unsigned getHashValue(const short &Val) { return Val * 37U; }
+ static constexpr bool isEqual(const short &LHS, const short &RHS) {
+ return LHS == RHS;
+ }
};
// Provide DenseMapInfo for ints.
template <>
struct DenseMapInfo<int> {
- static inline int getEmptyKey() { return 0x7fffffff; }
- static inline int getTombstoneKey() { return -0x7fffffff - 1; }
- static unsigned getHashValue(const int &Val) { return (unsigned)(Val * 37U); }
+ static constexpr int getEmptyKey() { return 0x7fffffff; }
+ static constexpr int getTombstoneKey() { return -0x7fffffff - 1; }
+ static constexpr unsigned getHashValue(const int &Val) {
+ return (unsigned)(Val * 37U);
+ }
- static bool isEqual(const int &LHS, const int &RHS) { return LHS == RHS; }
+ static constexpr bool isEqual(const int &LHS, const int &RHS) {
+ return LHS == RHS;
+ }
};
// Provide DenseMapInfo for longs.
template <>
struct DenseMapInfo<long> {
- static inline long getEmptyKey() {
+ static constexpr long getEmptyKey() {
return (1UL << (sizeof(long) * 8 - 1)) - 1UL;
}
- static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
+ static constexpr long getTombstoneKey() { return getEmptyKey() - 1L; }
- static unsigned getHashValue(const long &Val) {
+ static constexpr unsigned getHashValue(const long &Val) {
return (unsigned)(Val * 37UL);
}
- static bool isEqual(const long &LHS, const long &RHS) { return LHS == RHS; }
+ static constexpr bool isEqual(const long &LHS, const long &RHS) {
+ return LHS == RHS;
+ }
};
// Provide DenseMapInfo for long longs.
template <>
struct DenseMapInfo<long long> {
- static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
- static inline long long getTombstoneKey() {
+ static constexpr long long getEmptyKey() { return 0x7fffffffffffffffLL; }
+ static constexpr long long getTombstoneKey() {
return -0x7fffffffffffffffLL - 1;
}
- static unsigned getHashValue(const long long &Val) {
+ static constexpr unsigned getHashValue(const long long &Val) {
return (unsigned)(Val * 37ULL);
}
- static bool isEqual(const long long &LHS, const long long &RHS) {
+ static constexpr bool isEqual(const long long &LHS, const long long &RHS) {
return LHS == RHS;
}
};
@@ -234,22 +256,22 @@ struct DenseMapInfo<detail::DenseMapPair<T, U>> {
using FirstInfo = DenseMapInfo<T>;
using SecondInfo = DenseMapInfo<U>;
- static inline Pair getEmptyKey() {
+ static constexpr Pair getEmptyKey() {
return detail::DenseMapPair<T, U>(FirstInfo::getEmptyKey(),
SecondInfo::getEmptyKey());
}
- static inline Pair getTombstoneKey() {
+ static constexpr Pair getTombstoneKey() {
return detail::DenseMapPair<T, U>(FirstInfo::getTombstoneKey(),
SecondInfo::getTombstoneKey());
}
- static unsigned getHashValue(const Pair &PairVal) {
+ static constexpr unsigned getHashValue(const Pair &PairVal) {
return detail::combineHashValue(FirstInfo::getHashValue(PairVal.first),
SecondInfo::getHashValue(PairVal.second));
}
- static bool isEqual(const Pair &LHS, const Pair &RHS) {
+ static constexpr bool isEqual(const Pair &LHS, const Pair &RHS) {
return FirstInfo::isEqual(LHS.first, RHS.first) &&
SecondInfo::isEqual(LHS.second, RHS.second);
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
index c7b30d988365..9b5f6f1da1a1 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp
@@ -274,6 +274,15 @@ void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name) {
UNIMPLEMENTED();
}
+bool MprotectNoAccess(uptr addr, uptr size) {
+ return _zx_vmar_protect(_zx_vmar_root_self(), 0, Addr, Size) == ZX_OK;
+}
+
+bool MprotectReadOnly(uptr addr, uptr size) {
+ return _zx_vmar_protect(_zx_vmar_root_self(), ZX_VM_PERM_READ, Addr, Size) ==
+ ZX_OK;
+}
+
void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
const char *mem_type) {
CHECK_GE(size, GetPageSize());
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h b/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h
new file mode 100644
index 000000000000..553550d29552
--- /dev/null
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_leb128.h
@@ -0,0 +1,87 @@
+//===-- sanitizer_leb128.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SANITIZER_LEB128_H
+#define SANITIZER_LEB128_H
+
+#include "sanitizer_common.h"
+#include "sanitizer_internal_defs.h"
+
+namespace __sanitizer {
+
+template <typename T, typename It>
+It EncodeSLEB128(T value, It begin, It end) {
+ bool more;
+ do {
+ u8 byte = value & 0x7f;
+ // NOTE: this assumes that this signed shift is an arithmetic right shift.
+ value >>= 7;
+ more = !((((value == 0) && ((byte & 0x40) == 0)) ||
+ ((value == -1) && ((byte & 0x40) != 0))));
+ if (more)
+ byte |= 0x80;
+ if (UNLIKELY(begin == end))
+ break;
+ *(begin++) = byte;
+ } while (more);
+ return begin;
+}
+
+template <typename T, typename It>
+It DecodeSLEB128(It begin, It end, T* v) {
+ T value = 0;
+ unsigned shift = 0;
+ u8 byte;
+ do {
+ if (UNLIKELY(begin == end))
+ return begin;
+ byte = *(begin++);
+ T slice = byte & 0x7f;
+ value |= slice << shift;
+ shift += 7;
+ } while (byte >= 128);
+ if (shift < 64 && (byte & 0x40))
+ value |= (-1ULL) << shift;
+ *v = value;
+ return begin;
+}
+
+template <typename T, typename It>
+It EncodeULEB128(T value, It begin, It end) {
+ do {
+ u8 byte = value & 0x7f;
+ value >>= 7;
+ if (value)
+ byte |= 0x80;
+ if (UNLIKELY(begin == end))
+ break;
+ *(begin++) = byte;
+ } while (value);
+ return begin;
+}
+
+template <typename T, typename It>
+It DecodeULEB128(It begin, It end, T* v) {
+ T value = 0;
+ unsigned shift = 0;
+ u8 byte;
+ do {
+ if (UNLIKELY(begin == end))
+ return begin;
+ byte = *(begin++);
+ T slice = byte & 0x7f;
+ value += slice << shift;
+ shift += 7;
+ } while (byte >= 128);
+ *v = value;
+ return begin;
+}
+
+} // namespace __sanitizer
+
+#endif // SANITIZER_LEB128_H
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 596037d77222..2d787332a445 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -1760,6 +1760,8 @@ HandleSignalMode GetHandleSignalMode(int signum) {
#if !SANITIZER_GO
void *internal_start_thread(void *(*func)(void *arg), void *arg) {
+ if (&real_pthread_create == 0)
+ return nullptr;
// Start the thread with signals blocked, otherwise it can steal user signals.
ScopedBlockSignals block(nullptr);
void *th;
@@ -1768,7 +1770,8 @@ void *internal_start_thread(void *(*func)(void *arg), void *arg) {
}
void internal_join_thread(void *th) {
- real_pthread_join(th, nullptr);
+ if (&real_pthread_join)
+ real_pthread_join(th, nullptr);
}
#else
void *internal_start_thread(void *(*func)(void *), void *arg) { return 0; }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index b67203d4c10e..f9b5c531aeee 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -265,30 +265,32 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
static fd_t internal_spawn_impl(const char *argv[], const char *envp[],
pid_t *pid) {
- fd_t master_fd = kInvalidFd;
- fd_t slave_fd = kInvalidFd;
+ fd_t primary_fd = kInvalidFd;
+ fd_t secondary_fd = kInvalidFd;
auto fd_closer = at_scope_exit([&] {
- internal_close(master_fd);
- internal_close(slave_fd);
+ internal_close(primary_fd);
+ internal_close(secondary_fd);
});
// We need a new pseudoterminal to avoid buffering problems. The 'atos' tool
// in particular detects when it's talking to a pipe and forgets to flush the
// output stream after sending a response.
- master_fd = posix_openpt(O_RDWR);
- if (master_fd == kInvalidFd) return kInvalidFd;
+ primary_fd = posix_openpt(O_RDWR);
+ if (primary_fd == kInvalidFd)
+ return kInvalidFd;
- int res = grantpt(master_fd) || unlockpt(master_fd);
+ int res = grantpt(primary_fd) || unlockpt(primary_fd);
if (res != 0) return kInvalidFd;
// Use TIOCPTYGNAME instead of ptsname() to avoid threading problems.
- char slave_pty_name[128];
- res = ioctl(master_fd, TIOCPTYGNAME, slave_pty_name);
+ char secondary_pty_name[128];
+ res = ioctl(primary_fd, TIOCPTYGNAME, secondary_pty_name);
if (res == -1) return kInvalidFd;
- slave_fd = internal_open(slave_pty_name, O_RDWR);
- if (slave_fd == kInvalidFd) return kInvalidFd;
+ secondary_fd = internal_open(secondary_pty_name, O_RDWR);
+ if (secondary_fd == kInvalidFd)
+ return kInvalidFd;
// File descriptor actions
posix_spawn_file_actions_t acts;
@@ -299,9 +301,9 @@ static fd_t internal_spawn_impl(const char *argv[], const char *envp[],
posix_spawn_file_actions_destroy(&acts);
});
- res = posix_spawn_file_actions_adddup2(&acts, slave_fd, STDIN_FILENO) ||
- posix_spawn_file_actions_adddup2(&acts, slave_fd, STDOUT_FILENO) ||
- posix_spawn_file_actions_addclose(&acts, slave_fd);
+ res = posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDIN_FILENO) ||
+ posix_spawn_file_actions_adddup2(&acts, secondary_fd, STDOUT_FILENO) ||
+ posix_spawn_file_actions_addclose(&acts, secondary_fd);
if (res != 0) return kInvalidFd;
// Spawn attributes
@@ -326,14 +328,14 @@ static fd_t internal_spawn_impl(const char *argv[], const char *envp[],
// Disable echo in the new terminal, disable CR.
struct termios termflags;
- tcgetattr(master_fd, &termflags);
+ tcgetattr(primary_fd, &termflags);
termflags.c_oflag &= ~ONLCR;
termflags.c_lflag &= ~ECHO;
- tcsetattr(master_fd, TCSANOW, &termflags);
+ tcsetattr(primary_fd, TCSANOW, &termflags);
- // On success, do not close master_fd on scope exit.
- fd_t fd = master_fd;
- master_fd = kInvalidFd;
+ // On success, do not close primary_fd on scope exit.
+ fd_t fd = primary_fd;
+ primary_fd = kInvalidFd;
return fd;
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp
index ad88e2bbbefc..b1c15d8c2834 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp
@@ -14,78 +14,187 @@
namespace __sanitizer {
-static constexpr u32 kStackSizeBits = 16;
-
-StackStore::Id StackStore::Store(const StackTrace &trace) {
- uptr *stack_trace = Alloc(trace.size + 1);
- CHECK_LT(trace.size, 1 << kStackSizeBits);
- *stack_trace = trace.size + (trace.tag << kStackSizeBits);
- internal_memcpy(stack_trace + 1, trace.trace, trace.size * sizeof(uptr));
- return reinterpret_cast<StackStore::Id>(stack_trace);
+namespace {
+struct StackTraceHeader {
+ static constexpr u32 kStackSizeBits = 8;
+
+ u8 size;
+ u8 tag;
+ explicit StackTraceHeader(const StackTrace &trace)
+ : size(Min<uptr>(trace.size, (1u << 8) - 1)), tag(trace.tag) {
+ CHECK_EQ(trace.tag, static_cast<uptr>(tag));
+ }
+ explicit StackTraceHeader(uptr h)
+ : size(h & ((1 << kStackSizeBits) - 1)), tag(h >> kStackSizeBits) {}
+
+ uptr ToUptr() const {
+ return static_cast<uptr>(size) | (static_cast<uptr>(tag) << kStackSizeBits);
+ }
+};
+} // namespace
+
+StackStore::Id StackStore::Store(const StackTrace &trace, uptr *pack) {
+ if (!trace.size && !trace.tag)
+ return 0;
+ StackTraceHeader h(trace);
+ uptr idx = 0;
+ *pack = 0;
+ uptr *stack_trace = Alloc(h.size + 1, &idx, pack);
+ *stack_trace = h.ToUptr();
+ internal_memcpy(stack_trace + 1, trace.trace, h.size * sizeof(uptr));
+ *pack += blocks_[GetBlockIdx(idx)].Stored(h.size + 1);
+ return OffsetToId(idx);
}
StackTrace StackStore::Load(Id id) {
- const uptr *stack_trace = reinterpret_cast<const uptr *>(id);
- uptr size = *stack_trace & ((1 << kStackSizeBits) - 1);
- uptr tag = *stack_trace >> kStackSizeBits;
- return StackTrace(stack_trace + 1, size, tag);
+ if (!id)
+ return {};
+ uptr idx = IdToOffset(id);
+ uptr block_idx = GetBlockIdx(idx);
+ CHECK_LT(block_idx, ARRAY_SIZE(blocks_));
+ const uptr *stack_trace = blocks_[block_idx].GetOrUnpack();
+ if (!stack_trace)
+ return {};
+ stack_trace += GetInBlockIdx(idx);
+ StackTraceHeader h(*stack_trace);
+ return StackTrace(stack_trace + 1, h.size, h.tag);
}
-uptr *StackStore::TryAlloc(uptr count) {
- // Optimisic lock-free allocation, essentially try to bump the region ptr.
+uptr StackStore::Allocated() const {
+ uptr next_block = GetBlockIdx(
+ RoundUpTo(atomic_load_relaxed(&total_frames_), kBlockSizeFrames));
+ uptr res = 0;
+ for (uptr i = 0; i < next_block; ++i) res += blocks_[i].Allocated();
+ return res + sizeof(*this);
+}
+
+uptr *StackStore::Alloc(uptr count, uptr *idx, uptr *pack) {
for (;;) {
- uptr cmp = atomic_load(&region_pos_, memory_order_acquire);
- uptr end = atomic_load(&region_end_, memory_order_acquire);
- uptr size = count * sizeof(uptr);
- if (cmp == 0 || cmp + size > end)
- return nullptr;
- if (atomic_compare_exchange_weak(&region_pos_, &cmp, cmp + size,
- memory_order_acquire))
- return reinterpret_cast<uptr *>(cmp);
+ // Optimisic lock-free allocation, essentially try to bump the
+ // total_frames_.
+ uptr start = atomic_fetch_add(&total_frames_, count, memory_order_relaxed);
+ uptr block_idx = GetBlockIdx(start);
+ uptr last_idx = GetBlockIdx(start + count - 1);
+ if (LIKELY(block_idx == last_idx)) {
+ // Fits into the a single block.
+ CHECK_LT(block_idx, ARRAY_SIZE(blocks_));
+ *idx = start;
+ return blocks_[block_idx].GetOrCreate() + GetInBlockIdx(start);
+ }
+
+ // Retry. We can't use range allocated in two different blocks.
+ CHECK_LE(count, kBlockSizeFrames);
+ uptr in_first = kBlockSizeFrames - GetInBlockIdx(start);
+ // Mark tail/head of these blocks as "stored".to avoid waiting before we can
+ // Pack().
+ *pack += blocks_[block_idx].Stored(in_first);
+ *pack += blocks_[last_idx].Stored(count - in_first);
}
}
-uptr *StackStore::Alloc(uptr count) {
- // First, try to allocate optimisitically.
- uptr *s = TryAlloc(count);
- if (LIKELY(s))
- return s;
- return RefillAndAlloc(count);
+uptr StackStore::Pack(Compression type) {
+ uptr res = 0;
+ for (BlockInfo &b : blocks_) res += b.Pack(type);
+ return res;
+}
+
+void StackStore::TestOnlyUnmap() {
+ for (BlockInfo &b : blocks_) b.TestOnlyUnmap();
+ internal_memset(this, 0, sizeof(*this));
}
-uptr *StackStore::RefillAndAlloc(uptr count) {
- // If failed, lock, retry and alloc new superblock.
+uptr *StackStore::BlockInfo::Get() const {
+ // Idiomatic double-checked locking uses memory_order_acquire here. But
+ // relaxed is find for us, justification is similar to
+ // TwoLevelMap::GetOrCreate.
+ return reinterpret_cast<uptr *>(atomic_load_relaxed(&data_));
+}
+
+uptr *StackStore::BlockInfo::Create() {
SpinMutexLock l(&mtx_);
- for (;;) {
- uptr *s = TryAlloc(count);
- if (s)
- return s;
- atomic_store(&region_pos_, 0, memory_order_relaxed);
- uptr size = count * sizeof(uptr) + sizeof(BlockInfo);
- uptr allocsz = RoundUpTo(Max<uptr>(size, 64u * 1024u), GetPageSizeCached());
- uptr mem = (uptr)MmapOrDie(allocsz, "stack depot");
- BlockInfo *new_block = (BlockInfo *)(mem + allocsz) - 1;
- new_block->next = curr_;
- new_block->ptr = mem;
- new_block->size = allocsz;
- curr_ = new_block;
-
- atomic_fetch_add(&mapped_size_, allocsz, memory_order_relaxed);
-
- allocsz -= sizeof(BlockInfo);
- atomic_store(&region_end_, mem + allocsz, memory_order_release);
- atomic_store(&region_pos_, mem, memory_order_release);
+ uptr *ptr = Get();
+ if (!ptr) {
+ ptr = reinterpret_cast<uptr *>(
+ MmapNoReserveOrDie(kBlockSizeBytes, "StackStore"));
+ atomic_store(&data_, reinterpret_cast<uptr>(ptr), memory_order_release);
}
+ return ptr;
}
-void StackStore::TestOnlyUnmap() {
- while (curr_) {
- uptr mem = curr_->ptr;
- uptr allocsz = curr_->size;
- curr_ = curr_->next;
- UnmapOrDie((void *)mem, allocsz);
+uptr *StackStore::BlockInfo::GetOrCreate() {
+ uptr *ptr = Get();
+ if (LIKELY(ptr))
+ return ptr;
+ return Create();
+}
+
+uptr *StackStore::BlockInfo::GetOrUnpack() {
+ SpinMutexLock l(&mtx_);
+ switch (state) {
+ case State::Storing:
+ state = State::Unpacked;
+ FALLTHROUGH;
+ case State::Unpacked:
+ return Get();
+ case State::Packed:
+ break;
}
- internal_memset(this, 0, sizeof(*this));
+
+ uptr *ptr = Get();
+ CHECK_NE(nullptr, ptr);
+ // Fake unpacking.
+ for (uptr i = 0; i < kBlockSizeFrames; ++i) ptr[i] = ~ptr[i];
+ state = State::Unpacked;
+ return Get();
+}
+
+uptr StackStore::BlockInfo::Pack(Compression type) {
+ if (type == Compression::None)
+ return 0;
+
+ SpinMutexLock l(&mtx_);
+ switch (state) {
+ case State::Unpacked:
+ case State::Packed:
+ return 0;
+ case State::Storing:
+ break;
+ }
+
+ uptr *ptr = Get();
+ if (!ptr || !Stored(0))
+ return 0;
+
+ // Fake packing.
+ for (uptr i = 0; i < kBlockSizeFrames; ++i) ptr[i] = ~ptr[i];
+ state = State::Packed;
+ return kBlockSizeBytes - kBlockSizeBytes / 10;
+}
+
+uptr StackStore::BlockInfo::Allocated() const {
+ SpinMutexLock l(&mtx_);
+ switch (state) {
+ case State::Packed:
+ return kBlockSizeBytes / 10;
+ case State::Unpacked:
+ case State::Storing:
+ return kBlockSizeBytes;
+ }
+}
+
+void StackStore::BlockInfo::TestOnlyUnmap() {
+ if (uptr *ptr = Get())
+ UnmapOrDie(ptr, StackStore::kBlockSizeBytes);
+}
+
+bool StackStore::BlockInfo::Stored(uptr n) {
+ return n + atomic_fetch_add(&stored_, n, memory_order_release) ==
+ kBlockSizeFrames;
+}
+
+bool StackStore::BlockInfo::IsPacked() const {
+ SpinMutexLock l(&mtx_);
+ return state == State::Packed;
}
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h
index b5bbdccc20b1..e0bc4e9c4a45 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h
@@ -10,6 +10,7 @@
#define SANITIZER_STACK_STORE_H
#include "sanitizer_atomic.h"
+#include "sanitizer_common.h"
#include "sanitizer_internal_defs.h"
#include "sanitizer_mutex.h"
#include "sanitizer_stacktrace.h"
@@ -17,32 +18,91 @@
namespace __sanitizer {
class StackStore {
+ static constexpr uptr kBlockSizeFrames = 0x100000;
+ static constexpr uptr kBlockCount = 0x1000;
+ static constexpr uptr kBlockSizeBytes = kBlockSizeFrames * sizeof(uptr);
+
public:
+ enum class Compression : u8 {
+ None = 0,
+ Test,
+ };
+
constexpr StackStore() = default;
- using Id = uptr;
+ using Id = u32; // Enough for 2^32 * sizeof(uptr) bytes of traces.
+ static_assert(u64(kBlockCount) * kBlockSizeFrames == 1ull << (sizeof(Id) * 8),
+ "");
- Id Store(const StackTrace &trace);
+ Id Store(const StackTrace &trace,
+ uptr *pack /* number of blocks completed by this call */);
StackTrace Load(Id id);
- uptr Allocated() const { return atomic_load_relaxed(&mapped_size_); }
+ uptr Allocated() const;
+
+ // Packs all blocks which don't expect any more writes. A block is going to be
+ // packed once. As soon trace from that block was requested, it will unpack
+ // and stay unpacked after that.
+ // Returns the number of released bytes.
+ uptr Pack(Compression type);
void TestOnlyUnmap();
private:
- uptr *Alloc(uptr count = 1);
- uptr *TryAlloc(uptr count);
- uptr *RefillAndAlloc(uptr count);
- mutable StaticSpinMutex mtx_ = {}; // Protects alloc of new blocks.
- atomic_uintptr_t region_pos_ = {}; // Region allocator for Node's.
- atomic_uintptr_t region_end_ = {};
- atomic_uintptr_t mapped_size_ = {};
-
- struct BlockInfo {
- const BlockInfo *next;
- uptr ptr;
- uptr size;
+ friend class StackStoreTest;
+ static constexpr uptr GetBlockIdx(uptr frame_idx) {
+ return frame_idx / kBlockSizeFrames;
+ }
+
+ static constexpr uptr GetInBlockIdx(uptr frame_idx) {
+ return frame_idx % kBlockSizeFrames;
+ }
+
+ static constexpr uptr IdToOffset(Id id) {
+ CHECK_NE(id, 0);
+ return id - 1; // Avoid zero as id.
+ }
+
+ static constexpr uptr OffsetToId(Id id) {
+ // This makes UINT32_MAX to 0 and it will be retrived as and empty stack.
+ // But this is not a problem as we will not be able to store anything after
+ // that anyway.
+ return id + 1; // Avoid zero as id.
+ }
+
+ uptr *Alloc(uptr count, uptr *idx, uptr *pack);
+
+ // Total number of allocated frames.
+ atomic_uintptr_t total_frames_ = {};
+
+ // Each block will hold pointer to exactly kBlockSizeFrames.
+ class BlockInfo {
+ atomic_uintptr_t data_;
+ // Counter to track store progress to know when we can Pack() the block.
+ atomic_uint32_t stored_;
+ // Protects alloc of new blocks.
+ mutable StaticSpinMutex mtx_;
+
+ enum class State : u8 {
+ Storing = 0,
+ Packed,
+ Unpacked,
+ };
+ State state GUARDED_BY(mtx_);
+
+ uptr *Create();
+
+ public:
+ uptr *Get() const;
+ uptr *GetOrCreate();
+ uptr *GetOrUnpack();
+ uptr Pack(Compression type);
+ uptr Allocated() const;
+ void TestOnlyUnmap();
+ bool Stored(uptr n);
+ bool IsPacked() const;
};
- const BlockInfo *curr_ = nullptr;
+
+ BlockInfo blocks_[kBlockCount] = {};
};
} // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
index e203b2cc4c89..527221b0c85c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stackdepot.cpp
@@ -23,6 +23,7 @@ struct StackDepotNode {
using hash_type = u64;
hash_type stack_hash;
u32 link;
+ StackStore::Id store_id;
static const u32 kTabSizeLog = SANITIZER_ANDROID ? 16 : 20;
@@ -53,11 +54,6 @@ static StackStore stackStore;
typedef StackDepotBase<StackDepotNode, 1, StackDepotNode::kTabSizeLog>
StackDepot;
static StackDepot theDepot;
-// Keep rarely accessed stack traces out of frequently access nodes to improve
-// caching efficiency.
-static TwoLevelMap<StackStore::Id, StackDepot::kNodesSize1,
- StackDepot::kNodesSize2>
- storeIds;
// Keep mutable data out of frequently access nodes to improve caching
// efficiency.
static TwoLevelMap<atomic_uint32_t, StackDepot::kNodesSize1,
@@ -73,17 +69,18 @@ void StackDepotHandle::inc_use_count_unsafe() {
}
uptr StackDepotNode::allocated() {
- return stackStore.Allocated() + storeIds.MemoryUsage() +
- useCounts.MemoryUsage();
+ return stackStore.Allocated() + useCounts.MemoryUsage();
}
void StackDepotNode::store(u32 id, const args_type &args, hash_type hash) {
stack_hash = hash;
- storeIds[id] = stackStore.Store(args);
+ uptr pack = 0;
+ store_id = stackStore.Store(args, &pack);
+ if (pack)
+ stackStore.Pack(StackStore::Compression::None);
}
StackDepotNode::args_type StackDepotNode::load(u32 id) const {
- StackStore::Id store_id = storeIds[id];
if (!store_id)
return {};
return stackStore.Load(store_id);
@@ -121,7 +118,6 @@ StackDepotHandle StackDepotNode::get_handle(u32 id) {
void StackDepotTestOnlyUnmap() {
theDepot.TestOnlyUnmap();
- storeIds.TestOnlyUnmap();
stackStore.TestOnlyUnmap();
}
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
index 11c6154b09ea..aebd504669d2 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stacktrace.h
@@ -20,7 +20,7 @@ namespace __sanitizer {
struct BufferedStackTrace;
-static const u32 kStackTraceMax = 256;
+static const u32 kStackTraceMax = 255;
#if SANITIZER_LINUX && defined(__mips__)
# define SANITIZER_CAN_FAST_UNWIND 0
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index c3607dbed23e..1a31ce02af4c 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -337,6 +337,11 @@ bool MprotectNoAccess(uptr addr, uptr size) {
return VirtualProtect((LPVOID)addr, size, PAGE_NOACCESS, &old_protection);
}
+bool MprotectReadOnly(uptr addr, uptr size) {
+ DWORD old_protection;
+ return VirtualProtect((LPVOID)addr, size, PAGE_READONLY, &old_protection);
+}
+
void ReleaseMemoryPagesToOS(uptr beg, uptr end) {
uptr beg_aligned = RoundDownTo(beg, GetPageSizeCached()),
end_aligned = RoundDownTo(end, GetPageSizeCached());
diff --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h
index fe0c1da31599..4712c2be1813 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h
@@ -228,6 +228,7 @@ enum MutexType {
MutexTypeFired,
MutexTypeRacy,
MutexTypeGlobalProc,
+ MutexTypeInternalAlloc,
};
} // namespace __tsan
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 25dbe487b280..cf3dc90d96a1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -177,6 +177,7 @@ struct ThreadSignalContext {
struct AtExitCtx {
void (*f)();
void *arg;
+ uptr pc;
};
// InterceptorContext holds all global data required for interceptors.
@@ -367,7 +368,10 @@ TSAN_INTERCEPTOR(int, pause, int fake) {
return BLOCK_REAL(pause)(fake);
}
-static void at_exit_wrapper() {
+// Note: we specifically call the function in such strange way
+// with "installed_at" because in reports it will appear between
+// callback frames and the frame that installed the callback.
+static void at_exit_callback_installed_at() {
AtExitCtx *ctx;
{
// Ensure thread-safety.
@@ -379,15 +383,21 @@ static void at_exit_wrapper() {
interceptor_ctx()->AtExitStack.PopBack();
}
- Acquire(cur_thread(), (uptr)0, (uptr)ctx);
+ ThreadState *thr = cur_thread();
+ Acquire(thr, ctx->pc, (uptr)ctx);
+ FuncEntry(thr, ctx->pc);
((void(*)())ctx->f)();
+ FuncExit(thr);
Free(ctx);
}
-static void cxa_at_exit_wrapper(void *arg) {
- Acquire(cur_thread(), 0, (uptr)arg);
+static void cxa_at_exit_callback_installed_at(void *arg) {
+ ThreadState *thr = cur_thread();
AtExitCtx *ctx = (AtExitCtx*)arg;
+ Acquire(thr, ctx->pc, (uptr)arg);
+ FuncEntry(thr, ctx->pc);
((void(*)(void *arg))ctx->f)(ctx->arg);
+ FuncExit(thr);
Free(ctx);
}
@@ -401,7 +411,7 @@ TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
// We want to setup the atexit callback even if we are in ignored lib
// or after fork.
SCOPED_INTERCEPTOR_RAW(atexit, f);
- return setup_at_exit_wrapper(thr, pc, (void(*)())f, 0, 0);
+ return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, 0, 0);
}
#endif
@@ -409,7 +419,7 @@ TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
if (in_symbolizer())
return 0;
SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso);
- return setup_at_exit_wrapper(thr, pc, (void(*)())f, arg, dso);
+ return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, arg, dso);
}
static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
@@ -417,6 +427,7 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
auto *ctx = New<AtExitCtx>();
ctx->f = f;
ctx->arg = arg;
+ ctx->pc = pc;
Release(thr, pc, (uptr)ctx);
// Memory allocation in __cxa_atexit will race with free during exit,
// because we do not see synchronization around atexit callback list.
@@ -432,25 +443,27 @@ static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
// due to atexit_mu held on exit from the calloc interceptor.
ScopedIgnoreInterceptors ignore;
- res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_wrapper, 0, 0);
+ res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_callback_installed_at,
+ 0, 0);
// Push AtExitCtx on the top of the stack of callback functions
if (!res) {
interceptor_ctx()->AtExitStack.PushBack(ctx);
}
} else {
- res = REAL(__cxa_atexit)(cxa_at_exit_wrapper, ctx, dso);
+ res = REAL(__cxa_atexit)(cxa_at_exit_callback_installed_at, ctx, dso);
}
ThreadIgnoreEnd(thr);
return res;
}
#if !SANITIZER_MAC && !SANITIZER_NETBSD
-static void on_exit_wrapper(int status, void *arg) {
+static void on_exit_callback_installed_at(int status, void *arg) {
ThreadState *thr = cur_thread();
- uptr pc = 0;
- Acquire(thr, pc, (uptr)arg);
AtExitCtx *ctx = (AtExitCtx*)arg;
+ Acquire(thr, ctx->pc, (uptr)arg);
+ FuncEntry(thr, ctx->pc);
((void(*)(int status, void *arg))ctx->f)(status, ctx->arg);
+ FuncExit(thr);
Free(ctx);
}
@@ -461,11 +474,12 @@ TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
auto *ctx = New<AtExitCtx>();
ctx->f = (void(*)())f;
ctx->arg = arg;
+ ctx->pc = GET_CALLER_PC();
Release(thr, pc, (uptr)ctx);
// Memory allocation in __cxa_atexit will race with free during exit,
// because we do not see synchronization around atexit callback list.
ThreadIgnoreBegin(thr, pc);
- int res = REAL(on_exit)(on_exit_wrapper, ctx);
+ int res = REAL(on_exit)(on_exit_callback_installed_at, ctx);
ThreadIgnoreEnd(thr);
return res;
}
@@ -2363,6 +2377,15 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc,
if (fd >= 0) FdClose(thr, pc, fd); \
}
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+ ({ \
+ CheckNoDeepBind(filename, flag); \
+ ThreadIgnoreBegin(thr, 0); \
+ void *res = REAL(dlopen)(filename, flag); \
+ ThreadIgnoreEnd(thr); \
+ res; \
+ })
+
#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
libignore()->OnLibraryLoaded(filename)
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
index ef97ad0bc94e..a31bebcb6ba9 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
@@ -69,8 +69,17 @@ Allocator *allocator() {
struct GlobalProc {
Mutex mtx;
Processor *proc;
-
- GlobalProc() : mtx(MutexTypeGlobalProc), proc(ProcCreate()) {}
+ // This mutex represents the internal allocator combined for
+ // the purposes of deadlock detection. The internal allocator
+ // uses multiple mutexes, moreover they are locked only occasionally
+ // and they are spin mutexes which don't support deadlock detection.
+ // So we use this fake mutex to serve as a substitute for these mutexes.
+ CheckedMutex internal_alloc_mtx;
+
+ GlobalProc()
+ : mtx(MutexTypeGlobalProc),
+ proc(ProcCreate()),
+ internal_alloc_mtx(MutexTypeInternalAlloc) {}
};
static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64);
@@ -78,6 +87,11 @@ GlobalProc *global_proc() {
return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
}
+static void InternalAllocAccess() {
+ global_proc()->internal_alloc_mtx.Lock();
+ global_proc()->internal_alloc_mtx.Unlock();
+}
+
ScopedGlobalProcessor::ScopedGlobalProcessor() {
GlobalProc *gp = global_proc();
ThreadState *thr = cur_thread();
@@ -110,6 +124,18 @@ ScopedGlobalProcessor::~ScopedGlobalProcessor() {
gp->mtx.Unlock();
}
+void AllocatorLock() NO_THREAD_SAFETY_ANALYSIS {
+ global_proc()->mtx.Lock();
+ global_proc()->internal_alloc_mtx.Lock();
+ InternalAllocatorLock();
+}
+
+void AllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS {
+ InternalAllocatorUnlock();
+ global_proc()->internal_alloc_mtx.Unlock();
+ global_proc()->mtx.Unlock();
+}
+
static constexpr uptr kMaxAllowedMallocSize = 1ull << 40;
static uptr max_user_defined_malloc_size;
@@ -342,6 +368,7 @@ void *Alloc(uptr sz) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
CHECK(0);
}
+ InternalAllocAccess();
return InternalAlloc(sz, &thr->proc()->internal_alloc_cache);
}
@@ -351,6 +378,7 @@ void FreeImpl(void *p) {
thr->nomalloc = 0; // CHECK calls internal_malloc().
CHECK(0);
}
+ InternalAllocAccess();
InternalFree(p, &thr->proc()->internal_alloc_cache);
}
diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.h b/compiler-rt/lib/tsan/rtl/tsan_mman.h
index efea5e5abdec..db8488eabbe2 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.h
@@ -24,6 +24,8 @@ void ReplaceSystemMalloc();
void AllocatorProcStart(Processor *proc);
void AllocatorProcFinish(Processor *proc);
void AllocatorPrintStats();
+void AllocatorLock();
+void AllocatorUnlock();
// For user allocations.
void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz,
diff --git a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
index 3faa2d0c6192..1465f9953c19 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cpp
@@ -25,6 +25,7 @@
#include "tsan_rtl.h"
#include "tsan_flags.h"
+#include <limits.h>
#include <mach/mach.h>
#include <pthread.h>
#include <signal.h>
@@ -45,70 +46,83 @@
namespace __tsan {
#if !SANITIZER_GO
-static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) {
- atomic_uintptr_t *a = (atomic_uintptr_t *)dst;
- void *val = (void *)atomic_load_relaxed(a);
- atomic_signal_fence(memory_order_acquire); // Turns the previous load into
- // acquire wrt signals.
- if (UNLIKELY(val == nullptr)) {
- val = (void *)internal_mmap(nullptr, size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, -1, 0);
- CHECK(val);
- void *cmp = nullptr;
- if (!atomic_compare_exchange_strong(a, (uintptr_t *)&cmp, (uintptr_t)val,
- memory_order_acq_rel)) {
- internal_munmap(val, size);
- val = cmp;
- }
- }
- return val;
+static char main_thread_state[sizeof(ThreadState)] ALIGNED(
+ SANITIZER_CACHE_LINE_SIZE);
+static ThreadState *dead_thread_state;
+static pthread_key_t thread_state_key;
+
+// We rely on the following documented, but Darwin-specific behavior to keep the
+// reference to the ThreadState object alive in TLS:
+// pthread_key_create man page:
+// If, after all the destructors have been called for all non-NULL values with
+// associated destructors, there are still some non-NULL values with
+// associated destructors, then the process is repeated. If, after at least
+// [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for
+// outstanding non-NULL values, there are still some non-NULL values with
+// associated destructors, the implementation stops calling destructors.
+static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations");
+static void ThreadStateDestructor(void *thr) {
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
}
-// On OS X, accessing TLVs via __thread or manually by using pthread_key_* is
-// problematic, because there are several places where interceptors are called
-// when TLVs are not accessible (early process startup, thread cleanup, ...).
-// The following provides a "poor man's TLV" implementation, where we use the
-// shadow memory of the pointer returned by pthread_self() to store a pointer to
-// the ThreadState object. The main thread's ThreadState is stored separately
-// in a static variable, because we need to access it even before the
-// shadow memory is set up.
-static uptr main_thread_identity = 0;
-ALIGNED(64) static char main_thread_state[sizeof(ThreadState)];
-static ThreadState *main_thread_state_loc = (ThreadState *)main_thread_state;
-
-// We cannot use pthread_self() before libpthread has been initialized. Our
-// current heuristic for guarding this is checking `main_thread_identity` which
-// is only assigned in `__tsan::InitializePlatform`.
-static ThreadState **cur_thread_location() {
- if (main_thread_identity == 0)
- return &main_thread_state_loc;
- uptr thread_identity = (uptr)pthread_self();
- if (thread_identity == main_thread_identity)
- return &main_thread_state_loc;
- return (ThreadState **)MemToShadow(thread_identity);
+static void InitializeThreadStateStorage() {
+ int res;
+ CHECK_EQ(thread_state_key, 0);
+ res = pthread_key_create(&thread_state_key, ThreadStateDestructor);
+ CHECK_EQ(res, 0);
+ res = pthread_setspecific(thread_state_key, main_thread_state);
+ CHECK_EQ(res, 0);
+
+ auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+ dts->fast_state.SetIgnoreBit();
+ dts->ignore_interceptors = 1;
+ dts->is_dead = true;
+ const_cast<Tid &>(dts->tid) = kInvalidTid;
+ res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ); // immutable
+ CHECK_EQ(res, 0);
+ dead_thread_state = dts;
}
ThreadState *cur_thread() {
- return (ThreadState *)SignalSafeGetOrAllocate(
- (uptr *)cur_thread_location(), sizeof(ThreadState));
+ // Some interceptors get called before libpthread has been initialized and in
+ // these cases we must avoid calling any pthread APIs.
+ if (UNLIKELY(!thread_state_key)) {
+ return (ThreadState *)main_thread_state;
+ }
+
+ // We only reach this line after InitializeThreadStateStorage() ran, i.e,
+ // after TSan (and therefore libpthread) have been initialized.
+ ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+ if (UNLIKELY(!thr)) {
+ thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
+ }
+ return thr;
}
void set_cur_thread(ThreadState *thr) {
- *cur_thread_location() = thr;
+ int res = pthread_setspecific(thread_state_key, thr);
+ CHECK_EQ(res, 0);
}
-// TODO(kuba.brecka): This is not async-signal-safe. In particular, we call
-// munmap first and then clear `fake_tls`; if we receive a signal in between,
-// handler will try to access the unmapped ThreadState.
void cur_thread_finalize() {
- ThreadState **thr_state_loc = cur_thread_location();
- if (thr_state_loc == &main_thread_state_loc) {
+ ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+ CHECK(thr);
+ if (thr == (ThreadState *)main_thread_state) {
// Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
// exit the main thread. Let's keep the main thread's ThreadState.
return;
}
- internal_munmap(*thr_state_loc, sizeof(ThreadState));
- *thr_state_loc = nullptr;
+ // Intercepted functions can still get called after cur_thread_finalize()
+ // (called from DestroyThreadState()), so put a fake thread state for "dead"
+ // threads. An alternative solution would be to release the ThreadState
+ // object from THREAD_DESTROY (which is delivered later and on the parent
+ // thread) instead of THREAD_TERMINATE.
+ int res = pthread_setspecific(thread_state_key, dead_thread_state);
+ CHECK_EQ(res, 0);
+ UnmapOrDie(thr, sizeof(ThreadState));
}
#endif
@@ -222,11 +236,10 @@ static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
}
} else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
- if (thread == pthread_self()) {
- ThreadState *thr = cur_thread();
- if (thr->tctx) {
- DestroyThreadState();
- }
+ CHECK_EQ(thread, pthread_self());
+ ThreadState *thr = cur_thread();
+ if (thr->tctx) {
+ DestroyThreadState();
}
}
@@ -253,8 +266,7 @@ void InitializePlatform() {
#if !SANITIZER_GO
CheckAndProtect();
- CHECK_EQ(main_thread_identity, 0);
- main_thread_identity = (uptr)pthread_self();
+ InitializeThreadStateStorage();
prev_pthread_introspection_hook =
pthread_introspection_hook_install(&my_pthread_introspection_hook);
@@ -286,24 +298,11 @@ uptr ExtractLongJmpSp(uptr *env) {
extern "C" void __tsan_tls_initialization() {}
void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
- // The pointer to the ThreadState object is stored in the shadow memory
- // of the tls.
- uptr tls_end = tls_addr + tls_size;
- uptr thread_identity = (uptr)pthread_self();
const uptr pc = StackTrace::GetNextInstructionPc(
reinterpret_cast<uptr>(__tsan_tls_initialization));
- if (thread_identity == main_thread_identity) {
- MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
- } else {
- uptr thr_state_start = thread_identity;
- uptr thr_state_end = thr_state_start + sizeof(uptr);
- CHECK_GE(thr_state_start, tls_addr);
- CHECK_LE(thr_state_start, tls_addr + tls_size);
- CHECK_GE(thr_state_end, tls_addr);
- CHECK_LE(thr_state_end, tls_addr + tls_size);
- MemoryRangeImitateWrite(thr, pc, tls_addr, thr_state_start - tls_addr);
- MemoryRangeImitateWrite(thr, pc, thr_state_end, tls_end - thr_state_end);
- }
+ // Unlike Linux, we only store a pointer to the ThreadState object in TLS;
+ // just mark the entire range as written to.
+ MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
}
#endif
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index ff7726ef0608..c14af9788e32 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -34,6 +34,9 @@ extern "C" void __tsan_resume() {
__tsan_resumed = 1;
}
+SANITIZER_WEAK_DEFAULT_IMPL
+void __tsan_test_only_on_fork() {}
+
namespace __tsan {
#if !SANITIZER_GO
@@ -271,8 +274,39 @@ void DontNeedShadowFor(uptr addr, uptr size) {
}
#if !SANITIZER_GO
+// We call UnmapShadow before the actual munmap, at that point we don't yet
+// know if the provided address/size are sane. We can't call UnmapShadow
+// after the actual munmap becuase at that point the memory range can
+// already be reused for something else, so we can't rely on the munmap
+// return value to understand is the values are sane.
+// While calling munmap with insane values (non-canonical address, negative
+// size, etc) is an error, the kernel won't crash. We must also try to not
+// crash as the failure mode is very confusing (paging fault inside of the
+// runtime on some derived shadow address).
+static bool IsValidMmapRange(uptr addr, uptr size) {
+ if (size == 0)
+ return true;
+ if (static_cast<sptr>(size) < 0)
+ return false;
+ if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+ return false;
+ // Check that if the start of the region belongs to one of app ranges,
+ // end of the region belongs to the same region.
+ const uptr ranges[][2] = {
+ {LoAppMemBeg(), LoAppMemEnd()},
+ {MidAppMemBeg(), MidAppMemEnd()},
+ {HiAppMemBeg(), HiAppMemEnd()},
+ };
+ for (auto range : ranges) {
+ if (addr >= range[0] && addr < range[1])
+ return addr + size <= range[1];
+ }
+ return false;
+}
+
void UnmapShadow(ThreadState *thr, uptr addr, uptr size) {
- if (size == 0) return;
+ if (size == 0 || !IsValidMmapRange(addr, size))
+ return;
DontNeedShadowFor(addr, size);
ScopedGlobalProcessor sgp;
ctx->metamap.ResetRange(thr->proc(), addr, size);
@@ -491,6 +525,7 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
ctx->thread_registry.Lock();
ctx->report_mtx.Lock();
ScopedErrorReportLock::Lock();
+ AllocatorLock();
// Suppress all reports in the pthread_atfork callbacks.
// Reports will deadlock on the report_mtx.
// We could ignore sync operations as well,
@@ -499,12 +534,20 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports++;
// On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
// we'll assert in CheckNoLocks() unless we ignore interceptors.
+ // On OS X libSystem_atfork_prepare/parent/child callbacks are called
+ // after/before our callbacks and they call free.
thr->ignore_interceptors++;
+ // Disables memory write in OnUserAlloc/Free.
+ thr->ignore_reads_and_writes++;
+
+ __tsan_test_only_on_fork();
}
void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports--; // Enabled in ForkBefore.
thr->ignore_interceptors--;
+ thr->ignore_reads_and_writes--;
+ AllocatorUnlock();
ScopedErrorReportLock::Unlock();
ctx->report_mtx.Unlock();
ctx->thread_registry.Unlock();
@@ -514,6 +557,8 @@ void ForkChildAfter(ThreadState *thr, uptr pc,
bool start_thread) NO_THREAD_SAFETY_ANALYSIS {
thr->suppress_reports--; // Enabled in ForkBefore.
thr->ignore_interceptors--;
+ thr->ignore_reads_and_writes--;
+ AllocatorUnlock();
ScopedErrorReportLock::Unlock();
ctx->report_mtx.Unlock();
ctx->thread_registry.Unlock();
@@ -747,14 +792,17 @@ using namespace __tsan;
MutexMeta mutex_meta[] = {
{MutexInvalid, "Invalid", {}},
{MutexThreadRegistry, "ThreadRegistry", {}},
- {MutexTypeTrace, "Trace", {MutexLeaf}},
- {MutexTypeReport, "Report", {MutexTypeSyncVar}},
- {MutexTypeSyncVar, "SyncVar", {}},
+ {MutexTypeTrace, "Trace", {}},
+ {MutexTypeReport,
+ "Report",
+ {MutexTypeSyncVar, MutexTypeGlobalProc, MutexTypeTrace}},
+ {MutexTypeSyncVar, "SyncVar", {MutexTypeTrace}},
{MutexTypeAnnotations, "Annotations", {}},
{MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}},
{MutexTypeFired, "Fired", {MutexLeaf}},
{MutexTypeRacy, "Racy", {MutexLeaf}},
{MutexTypeGlobalProc, "GlobalProc", {}},
+ {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
{},
};
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 811695d144c5..f332a6a8d1d8 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -346,7 +346,7 @@ void ScopedReportBase::AddLocation(uptr addr, uptr size) {
ThreadContext *tctx = FindThreadByTidLocked(b->tid);
auto *loc = New<ReportLocation>();
loc->type = ReportLocationHeap;
- loc->heap_chunk_start = (uptr)allocator()->GetBlockBegin((void *)addr);
+ loc->heap_chunk_start = block_begin;
loc->heap_chunk_size = b->siz;
loc->external_tag = b->tag;
loc->tid = tctx ? tctx->tid : b->tid;
diff --git a/libcxx/CREDITS.TXT b/libcxx/CREDITS.TXT
index 597c5fcb7cf4..fc442f4db1a1 100644
--- a/libcxx/CREDITS.TXT
+++ b/libcxx/CREDITS.TXT
@@ -149,6 +149,10 @@ N: Klaas de Vries
E: klaas at klaasgaaf dot nl
D: Minor bug fix.
+N: Mark de Wever
+E: koraq at xs4all dot nl
+D: Format library support.
+
N: Zhang Xiongpang
E: zhangxiongpang@gmail.com
D: Minor patches and bug fixes.
diff --git a/libcxx/include/__bit/byteswap.h b/libcxx/include/__bit/byteswap.h
new file mode 100644
index 000000000000..970074ed98ce
--- /dev/null
+++ b/libcxx/include/__bit/byteswap.h
@@ -0,0 +1,55 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___BIT_BYTESWAP_H
+#define _LIBCPP___BIT_BYTESWAP_H
+
+#include <__concepts/arithmetic.h>
+#include <__config>
+#include <cstdint>
+#include <cstdlib>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+template <integral _Tp>
+_LIBCPP_HIDE_FROM_ABI constexpr _Tp byteswap(_Tp __val) noexcept {
+
+ if constexpr (sizeof(_Tp) == 1) {
+ return __val;
+ } else if constexpr (sizeof(_Tp) == 2) {
+ return __builtin_bswap16(__val);
+ } else if constexpr (sizeof(_Tp) == 4) {
+ return __builtin_bswap32(__val);
+ } else if constexpr (sizeof(_Tp) == 8) {
+ return __builtin_bswap64(__val);
+#ifndef _LIBCPP_HAS_NO_INT128
+ } else if constexpr (sizeof(_Tp) == 16) {
+#if __has_builtin(__builtin_bswap128)
+ return __builtin_bswap128(__val);
+#else
+ return static_cast<_Tp>(byteswap(static_cast<uint64_t>(__val))) << 64 |
+ static_cast<_Tp>(byteswap(static_cast<uint64_t>(__val >> 64)));
+#endif // __has_builtin(__builtin_bswap128)
+#endif // _LIBCPP_HAS_NO_INT128
+ } else {
+ static_assert(sizeof(_Tp) == 0, "byteswap is unimplemented for integral types of this size");
+ }
+}
+
+#endif // _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___BIT_BYTESWAP_H
diff --git a/libcxx/include/__bsd_locale_fallbacks.h b/libcxx/include/__bsd_locale_fallbacks.h
index 2d5c2eca4679..a5788d9777b5 100644
--- a/libcxx/include/__bsd_locale_fallbacks.h
+++ b/libcxx/include/__bsd_locale_fallbacks.h
@@ -108,7 +108,7 @@ size_t __libcpp_mbsrtowcs_l(wchar_t *__dest, const char **__src, size_t __len,
}
#endif
-inline
+inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5)
int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__format, ...) {
va_list __va;
va_start(__va, __format);
@@ -118,7 +118,7 @@ int __libcpp_snprintf_l(char *__s, size_t __n, locale_t __l, const char *__forma
return __res;
}
-inline
+inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4)
int __libcpp_asprintf_l(char **__s, locale_t __l, const char *__format, ...) {
va_list __va;
va_start(__va, __format);
@@ -128,7 +128,7 @@ int __libcpp_asprintf_l(char **__s, locale_t __l, const char *__format, ...) {
return __res;
}
-inline
+inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4)
int __libcpp_sscanf_l(const char *__s, locale_t __l, const char *__format, ...) {
va_list __va;
va_start(__va, __format);
diff --git a/libcxx/include/__compare/partial_order.h b/libcxx/include/__compare/partial_order.h
new file mode 100644
index 000000000000..ac8b405a4090
--- /dev/null
+++ b/libcxx/include/__compare/partial_order.h
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_PARTIAL_ORDER
+#define _LIBCPP___COMPARE_PARTIAL_ORDER
+
+#include <__compare/compare_three_way.h>
+#include <__compare/ordering.h>
+#include <__compare/weak_order.h>
+#include <__config>
+#include <__utility/forward.h>
+#include <__utility/priority_tag.h>
+#include <type_traits>
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [cmp.alg]
+namespace __partial_order {
+ struct __fn {
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<2>)
+ noexcept(noexcept(partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return partial_ordering(partial_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<1>)
+ noexcept(noexcept(partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return partial_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<0>)
+ noexcept(noexcept(partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return partial_ordering(_VSTD::weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const
+ noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>())))
+ -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()))
+ { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()); }
+ };
+} // namespace __partial_order
+
+inline namespace __cpo {
+ inline constexpr auto partial_order = __partial_order::__fn{};
+} // namespace __cpo
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_PARTIAL_ORDER
diff --git a/libcxx/include/__compare/strong_order.h b/libcxx/include/__compare/strong_order.h
new file mode 100644
index 000000000000..e49b2d45de45
--- /dev/null
+++ b/libcxx/include/__compare/strong_order.h
@@ -0,0 +1,136 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_STRONG_ORDER
+#define _LIBCPP___COMPARE_STRONG_ORDER
+
+#include <__bit/bit_cast.h>
+#include <__compare/compare_three_way.h>
+#include <__compare/ordering.h>
+#include <__config>
+#include <__utility/forward.h>
+#include <__utility/priority_tag.h>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [cmp.alg]
+namespace __strong_order {
+ struct __fn {
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<2>)
+ noexcept(noexcept(strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return strong_ordering(strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up, class _Dp = decay_t<_Tp>>
+ requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp>
+ _LIBCPP_HIDE_FROM_ABI static constexpr strong_ordering
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<1>) noexcept
+ {
+ if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int32_t)) {
+ int32_t __rx = _VSTD::bit_cast<int32_t>(__t);
+ int32_t __ry = _VSTD::bit_cast<int32_t>(__u);
+ __rx = (__rx < 0) ? (numeric_limits<int32_t>::min() - __rx - 1) : __rx;
+ __ry = (__ry < 0) ? (numeric_limits<int32_t>::min() - __ry - 1) : __ry;
+ return (__rx <=> __ry);
+ } else if constexpr (numeric_limits<_Dp>::is_iec559 && sizeof(_Dp) == sizeof(int64_t)) {
+ int64_t __rx = _VSTD::bit_cast<int64_t>(__t);
+ int64_t __ry = _VSTD::bit_cast<int64_t>(__u);
+ __rx = (__rx < 0) ? (numeric_limits<int64_t>::min() - __rx - 1) : __rx;
+ __ry = (__ry < 0) ? (numeric_limits<int64_t>::min() - __ry - 1) : __ry;
+ return (__rx <=> __ry);
+ } else if (__t < __u) {
+ return strong_ordering::less;
+ } else if (__t > __u) {
+ return strong_ordering::greater;
+ } else if (__t == __u) {
+ if constexpr (numeric_limits<_Dp>::radix == 2) {
+ return _VSTD::signbit(__u) <=> _VSTD::signbit(__t);
+ } else {
+ // This is bullet 3 of the IEEE754 algorithm, relevant
+ // only for decimal floating-point;
+ // see https://stackoverflow.com/questions/69068075/
+ if (__t == 0 || _VSTD::isinf(__t)) {
+ return _VSTD::signbit(__u) <=> _VSTD::signbit(__t);
+ } else {
+ int __texp, __uexp;
+ (void)_VSTD::frexp(__t, &__texp);
+ (void)_VSTD::frexp(__u, &__uexp);
+ return (__t < 0) ? (__texp <=> __uexp) : (__uexp <=> __texp);
+ }
+ }
+ } else {
+ // They're unordered, so one of them must be a NAN.
+ // The order is -QNAN, -SNAN, numbers, +SNAN, +QNAN.
+ bool __t_is_nan = _VSTD::isnan(__t);
+ bool __u_is_nan = _VSTD::isnan(__u);
+ bool __t_is_negative = _VSTD::signbit(__t);
+ bool __u_is_negative = _VSTD::signbit(__u);
+ using _IntType = std::conditional_t<
+ sizeof(__t) == sizeof(int32_t), int32_t, std::conditional_t<
+ sizeof(__t) == sizeof(int64_t), int64_t, void>
+ >;
+ if constexpr (std::is_same_v<_IntType, void>) {
+ static_assert(sizeof(_Dp) == 0, "std::strong_order is unimplemented for this floating-point type");
+ } else if (__t_is_nan && __u_is_nan) {
+ // Order by sign bit, then by "payload bits" (we'll just use bit_cast).
+ if (__t_is_negative != __u_is_negative) {
+ return (__u_is_negative <=> __t_is_negative);
+ } else {
+ return _VSTD::bit_cast<_IntType>(__t) <=> _VSTD::bit_cast<_IntType>(__u);
+ }
+ } else if (__t_is_nan) {
+ return __t_is_negative ? strong_ordering::less : strong_ordering::greater;
+ } else {
+ return __u_is_negative ? strong_ordering::greater : strong_ordering::less;
+ }
+ }
+ }
+
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<0>)
+ noexcept(noexcept(strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return strong_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const
+ noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>())))
+ -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()))
+ { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<2>()); }
+ };
+} // namespace __strong_order
+
+inline namespace __cpo {
+ inline constexpr auto strong_order = __strong_order::__fn{};
+} // namespace __cpo
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___COMPARE_STRONG_ORDER
diff --git a/libcxx/include/__compare/weak_order.h b/libcxx/include/__compare/weak_order.h
new file mode 100644
index 000000000000..f67416ed3ebe
--- /dev/null
+++ b/libcxx/include/__compare/weak_order.h
@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___COMPARE_WEAK_ORDER
+#define _LIBCPP___COMPARE_WEAK_ORDER
+
+#include <__compare/compare_three_way.h>
+#include <__compare/ordering.h>
+#include <__compare/strong_order.h>
+#include <__config>
+#include <__utility/forward.h>
+#include <__utility/priority_tag.h>
+#include <cmath>
+#include <type_traits>
+
+#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [cmp.alg]
+namespace __weak_order {
+ struct __fn {
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<3>)
+ noexcept(noexcept(weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return weak_ordering(weak_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up, class _Dp = decay_t<_Tp>>
+ requires is_same_v<_Dp, decay_t<_Up>> && is_floating_point_v<_Dp>
+ _LIBCPP_HIDE_FROM_ABI static constexpr weak_ordering
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<2>) noexcept
+ {
+ std::partial_ordering __po = (__t <=> __u);
+ if (__po == std::partial_ordering::less) {
+ return std::weak_ordering::less;
+ } else if (__po == std::partial_ordering::equivalent) {
+ return std::weak_ordering::equivalent;
+ } else if (__po == std::partial_ordering::greater) {
+ return std::weak_ordering::greater;
+ } else {
+ // Otherwise, at least one of them is a NaN.
+ bool __t_is_nan = _VSTD::isnan(__t);
+ bool __u_is_nan = _VSTD::isnan(__u);
+ bool __t_is_negative = _VSTD::signbit(__t);
+ bool __u_is_negative = _VSTD::signbit(__u);
+ if (__t_is_nan && __u_is_nan) {
+ return (__u_is_negative <=> __t_is_negative);
+ } else if (__t_is_nan) {
+ return __t_is_negative ? weak_ordering::less : weak_ordering::greater;
+ } else {
+ return __u_is_negative ? weak_ordering::greater : weak_ordering::less;
+ }
+ }
+ }
+
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<1>)
+ noexcept(noexcept(weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return weak_ordering(compare_three_way()(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ requires is_same_v<decay_t<_Tp>, decay_t<_Up>>
+ _LIBCPP_HIDE_FROM_ABI static constexpr auto
+ __go(_Tp&& __t, _Up&& __u, __priority_tag<0>)
+ noexcept(noexcept(weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u)))))
+ -> decltype( weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))))
+ { return weak_ordering(_VSTD::strong_order(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u))); }
+
+ template<class _Tp, class _Up>
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t, _Up&& __u) const
+ noexcept(noexcept(__go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>())))
+ -> decltype( __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>()))
+ { return __go(_VSTD::forward<_Tp>(__t), _VSTD::forward<_Up>(__u), __priority_tag<3>()); }
+ };
+} // namespace __weak_order
+
+inline namespace __cpo {
+ inline constexpr auto weak_order = __weak_order::__fn{};
+} // namespace __cpo
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___COMPARE_WEAK_ORDER
diff --git a/libcxx/include/__config b/libcxx/include/__config
index dbf4383cd6e3..da03e877f753 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -74,10 +74,6 @@
# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
-// Don't use a nullptr_t simulation type in C++03 instead using C++11 nullptr
-// provided under the alternate keyword __nullptr, which changes the mangling
-// of nullptr_t. This option is ABI incompatible with GCC in C++03 mode.
-# define _LIBCPP_ABI_ALWAYS_USE_CXX11_NULLPTR
// Define a key function for `bad_function_call` in the library, to centralize
// its vtable and typeinfo to libc++ rather than having all other libraries
// using that class define their own copies.
@@ -127,6 +123,23 @@
# endif
#endif
+// By default, don't use a nullptr_t emulation type in C++03.
+//
+// This is technically an ABI break from previous releases, however it is
+// very unlikely to impact anyone. If a user is impacted by this break,
+// they can return to using the C++03 nullptr emulation by defining
+// _LIBCPP_ABI_USE_CXX03_NULLPTR_EMULATION.
+//
+// This switch will be removed entirely in favour of never providing a
+// C++03 emulation after one release.
+//
+// IMPORTANT: IF YOU ARE READING THIS AND YOU TURN THIS MACRO ON, PLEASE LEAVE
+// A COMMENT ON https://reviews.llvm.org/D109459 OR YOU WILL BE BROKEN
+// IN THE FUTURE WHEN WE REMOVE THE ABILITY TO USE THE C++03 EMULATION.
+#ifndef _LIBCPP_ABI_USE_CXX03_NULLPTR_EMULATION
+# define _LIBCPP_ABI_ALWAYS_USE_CXX11_NULLPTR
+#endif
+
#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2
// Enable additional explicit instantiations of iostreams components. This
// reduces the number of weak definitions generated in programs that use
@@ -1056,12 +1069,6 @@ typedef unsigned int char32_t;
# define _LIBCPP_NODISCARD_AFTER_CXX17
#endif
-#if !defined(_LIBCPP_DEBUG) && _LIBCPP_STD_VER > 11
-# define _LIBCPP_CONSTEXPR_IF_NODEBUG constexpr
-#else
-# define _LIBCPP_CONSTEXPR_IF_NODEBUG
-#endif
-
#if __has_attribute(no_destroy)
# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__))
#else
@@ -1376,10 +1383,12 @@ extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container(
#endif
#if defined(__GNUC__) || defined(__clang__)
-#define _LIBCPP_FORMAT_PRINTF(a, b) \
- __attribute__((__format__(__printf__, a, b)))
+ // The attribute uses 1-based indices for ordinary and static member functions.
+ // The attribute uses 2-based indices for non-static member functions.
+# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \
+ __attribute__((__format__(archetype, format_string_index, first_format_arg_index)))
#else
-#define _LIBCPP_FORMAT_PRINTF(a, b)
+# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */
#endif
#endif // __cplusplus
diff --git a/libcxx/include/__iterator/reverse_iterator.h b/libcxx/include/__iterator/reverse_iterator.h
index f7a948950df2..d06859ee5f39 100644
--- a/libcxx/include/__iterator/reverse_iterator.h
+++ b/libcxx/include/__iterator/reverse_iterator.h
@@ -11,6 +11,8 @@
#define _LIBCPP___ITERATOR_REVERSE_ITERATOR_H
#include <__config>
+#include <__compare/compare_three_way_result.h>
+#include <__compare/three_way_comparable.h>
#include <__iterator/iterator.h>
#include <__iterator/iterator_traits.h>
#include <__memory/addressof.h>
@@ -193,6 +195,16 @@ operator<=(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>&
return __x.base() >= __y.base();
}
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+template <class _Iter1, three_way_comparable_with<_Iter1> _Iter2>
+_LIBCPP_HIDE_FROM_ABI constexpr
+compare_three_way_result_t<_Iter1, _Iter2>
+operator<=>(const reverse_iterator<_Iter1>& __x, const reverse_iterator<_Iter2>& __y)
+{
+ return __y.base() <=> __x.base();
+}
+#endif
+
#ifndef _LIBCPP_CXX03_LANG
template <class _Iter1, class _Iter2>
inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
diff --git a/libcxx/include/__iterator/wrap_iter.h b/libcxx/include/__iterator/wrap_iter.h
index 28872f9fa41a..cfcc9857b3fc 100644
--- a/libcxx/include/__iterator/wrap_iter.h
+++ b/libcxx/include/__iterator/wrap_iter.h
@@ -40,120 +40,129 @@ public:
private:
iterator_type __i;
public:
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter() _NOEXCEPT
-#if _LIBCPP_STD_VER > 11
- : __i{}
-#endif
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter() _NOEXCEPT
+ : __i()
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
__get_db()->__insert_i(this);
#endif
}
- template <class _Up> _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+ template <class _Up> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
__wrap_iter(const __wrap_iter<_Up>& __u,
typename enable_if<is_convertible<_Up, iterator_type>::value>::type* = nullptr) _NOEXCEPT
: __i(__u.base())
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
__get_db()->__iterator_copy(this, _VSTD::addressof(__u));
#endif
}
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
__wrap_iter(const __wrap_iter& __x)
: __i(__x.base())
{
+ if (!__libcpp_is_constant_evaluated())
__get_db()->__iterator_copy(this, _VSTD::addressof(__x));
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
__wrap_iter& operator=(const __wrap_iter& __x)
{
- if (this != _VSTD::addressof(__x))
+ if (this != _VSTD::addressof(__x) && !__libcpp_is_constant_evaluated())
{
__get_db()->__iterator_copy(this, _VSTD::addressof(__x));
__i = __x.__i;
}
return *this;
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17
~__wrap_iter()
{
+ if (!__libcpp_is_constant_evaluated())
__get_db()->__erase_i(this);
}
#endif
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator*() const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 reference operator*() const _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),
"Attempted to dereference a non-dereferenceable iterator");
#endif
return *__i;
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG pointer operator->() const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 pointer operator->() const _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),
"Attempted to dereference a non-dereferenceable iterator");
#endif
return _VSTD::__to_address(__i);
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator++() _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator++() _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__dereferenceable(this),
"Attempted to increment a non-incrementable iterator");
#endif
++__i;
return *this;
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator++(int) _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator++(int) _NOEXCEPT
{__wrap_iter __tmp(*this); ++(*this); return __tmp;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator--() _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator--() _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__decrementable(this),
"Attempted to decrement a non-decrementable iterator");
#endif
--__i;
return *this;
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator--(int) _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator--(int) _NOEXCEPT
{__wrap_iter __tmp(*this); --(*this); return __tmp;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator+ (difference_type __n) const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator+ (difference_type __n) const _NOEXCEPT
{__wrap_iter __w(*this); __w += __n; return __w;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator+=(difference_type __n) _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator+=(difference_type __n) _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__addable(this, __n),
"Attempted to add/subtract an iterator outside its valid range");
#endif
__i += __n;
return *this;
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter operator- (difference_type __n) const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter operator- (difference_type __n) const _NOEXCEPT
{return *this + (-__n);}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter& operator-=(difference_type __n) _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter& operator-=(difference_type __n) _NOEXCEPT
{*this += -__n; return *this;}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG reference operator[](difference_type __n) const _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 reference operator[](difference_type __n) const _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__subscriptable(this, __n),
"Attempted to subscript an iterator outside its valid range");
#endif
return __i[__n];
}
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG iterator_type base() const _NOEXCEPT {return __i;}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 iterator_type base() const _NOEXCEPT {return __i;}
private:
#if _LIBCPP_DEBUG_LEVEL == 2
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(const void* __p, iterator_type __x) : __i(__x)
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(const void* __p, iterator_type __x) : __i(__x)
{
+ if (!__libcpp_is_constant_evaluated())
__get_db()->__insert_ic(this, __p);
}
#else
- _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {}
+ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11 __wrap_iter(iterator_type __x) _NOEXCEPT : __i(__x) {}
#endif
template <class _Up> friend class __wrap_iter;
@@ -163,24 +172,25 @@ private:
};
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
return __x.base() == __y.base();
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator==(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
return __x.base() == __y.base();
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)),
"Attempted to compare incomparable iterators");
#endif
@@ -188,10 +198,11 @@ bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(&__x, &__y),
"Attempted to compare incomparable iterators");
#endif
@@ -199,63 +210,63 @@ bool operator<(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
return !(__x == __y);
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
return !(__x == __y);
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
return __y < __x;
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
return __y < __x;
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
return !(__x < __y);
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator>=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
return !(__x < __y);
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT
{
return !(__y < __x);
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR
bool operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
{
return !(__y < __x);
}
template <class _Iter1, class _Iter2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
#ifndef _LIBCPP_CXX03_LANG
auto operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT
-> decltype(__x.base() - __y.base())
@@ -265,6 +276,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC
#endif // C++03
{
#if _LIBCPP_DEBUG_LEVEL == 2
+ if (!__libcpp_is_constant_evaluated())
_LIBCPP_ASSERT(__get_const_db()->__less_than_comparable(_VSTD::addressof(__x), _VSTD::addressof(__y)),
"Attempted to subtract incompatible iterators");
#endif
@@ -272,7 +284,7 @@ operator-(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXC
}
template <class _Iter1>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_IF_NODEBUG
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX11
__wrap_iter<_Iter1> operator+(typename __wrap_iter<_Iter1>::difference_type __n, __wrap_iter<_Iter1> __x) _NOEXCEPT
{
__x += __n;
diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h
index cc32352ae11c..f4c8fa02d650 100644
--- a/libcxx/include/__memory/allocator_traits.h
+++ b/libcxx/include/__memory/allocator_traits.h
@@ -349,14 +349,6 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits
}
};
-// A version of `allocator_traits` for internal usage that SFINAEs away if the
-// given allocator doesn't have a nested `value_type`. This helps avoid hard
-// errors when forming implicit deduction guides for a container that has an
-// invalid Allocator type. See https://wg21.link/LWGXXXXX.
-// TODO(varconst): use the actual link once available.
-template <class _Alloc, class _ValueType = typename _Alloc::value_type>
-struct _LIBCPP_TEMPLATE_VIS __allocator_traits : allocator_traits<_Alloc> {};
-
template <class _Traits, class _Tp>
struct __rebind_alloc_helper {
#ifndef _LIBCPP_CXX03_LANG
diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h
index 838960269c97..433120394269 100644
--- a/libcxx/include/__memory/unique_ptr.h
+++ b/libcxx/include/__memory/unique_ptr.h
@@ -174,17 +174,17 @@ public:
template <bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy> >
_LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {}
+ _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {}
template <bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy> >
_LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {}
+ _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {}
template <bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy> >
_LIBCPP_INLINE_VISIBILITY
- explicit unique_ptr(pointer __p) _NOEXCEPT : __ptr_(__p, __default_init_tag()) {}
+ explicit unique_ptr(pointer __p) _NOEXCEPT : __ptr_(__p, __value_init_tag()) {}
template <bool _Dummy = true,
class = _EnableIfDeleterConstructible<_LValRefType<_Dummy> > >
@@ -226,7 +226,7 @@ public:
typename enable_if<is_convertible<_Up*, _Tp*>::value &&
is_same<_Dp, default_delete<_Tp> >::value,
__nat>::type = __nat()) _NOEXCEPT
- : __ptr_(__p.release(), __default_init_tag()) {}
+ : __ptr_(__p.release(), __value_init_tag()) {}
#endif
_LIBCPP_INLINE_VISIBILITY
@@ -397,19 +397,19 @@ public:
template <bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy> >
_LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {}
+ _LIBCPP_CONSTEXPR unique_ptr() _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {}
template <bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy> >
_LIBCPP_INLINE_VISIBILITY
- _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(pointer(), __default_init_tag()) {}
+ _LIBCPP_CONSTEXPR unique_ptr(nullptr_t) _NOEXCEPT : __ptr_(__value_init_tag(), __value_init_tag()) {}
template <class _Pp, bool _Dummy = true,
class = _EnableIfDeleterDefaultConstructible<_Dummy>,
class = _EnableIfPointerConvertible<_Pp> >
_LIBCPP_INLINE_VISIBILITY
explicit unique_ptr(_Pp __p) _NOEXCEPT
- : __ptr_(__p, __default_init_tag()) {}
+ : __ptr_(__p, __value_init_tag()) {}
template <class _Pp, bool _Dummy = true,
class = _EnableIfDeleterConstructible<_LValRefType<_Dummy> >,
diff --git a/libcxx/include/__numeric/accumulate.h b/libcxx/include/__numeric/accumulate.h
new file mode 100644
index 000000000000..fcdad58df141
--- /dev/null
+++ b/libcxx/include/__numeric/accumulate.h
@@ -0,0 +1,52 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_ACCUMULATE_H
+#define _LIBCPP___NUMERIC_ACCUMULATE_H
+
+#include <__config>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _InputIterator, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_Tp
+accumulate(_InputIterator __first, _InputIterator __last, _Tp __init)
+{
+ for (; __first != __last; ++__first)
+#if _LIBCPP_STD_VER > 17
+ __init = _VSTD::move(__init) + *__first;
+#else
+ __init = __init + *__first;
+#endif
+ return __init;
+}
+
+template <class _InputIterator, class _Tp, class _BinaryOperation>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_Tp
+accumulate(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
+{
+ for (; __first != __last; ++__first)
+#if _LIBCPP_STD_VER > 17
+ __init = __binary_op(_VSTD::move(__init), *__first);
+#else
+ __init = __binary_op(__init, *__first);
+#endif
+ return __init;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_ACCUMULATE_H
diff --git a/libcxx/include/__numeric/adjacent_difference.h b/libcxx/include/__numeric/adjacent_difference.h
new file mode 100644
index 000000000000..5c712ecdf77d
--- /dev/null
+++ b/libcxx/include/__numeric/adjacent_difference.h
@@ -0,0 +1,72 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H
+#define _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H
+
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _InputIterator, class _OutputIterator>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
+{
+ if (__first != __last)
+ {
+ typename iterator_traits<_InputIterator>::value_type __acc(*__first);
+ *__result = __acc;
+ for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
+ {
+ typename iterator_traits<_InputIterator>::value_type __val(*__first);
+#if _LIBCPP_STD_VER > 17
+ *__result = __val - _VSTD::move(__acc);
+#else
+ *__result = __val - __acc;
+#endif
+ __acc = _VSTD::move(__val);
+ }
+ }
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
+ _BinaryOperation __binary_op)
+{
+ if (__first != __last)
+ {
+ typename iterator_traits<_InputIterator>::value_type __acc(*__first);
+ *__result = __acc;
+ for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
+ {
+ typename iterator_traits<_InputIterator>::value_type __val(*__first);
+#if _LIBCPP_STD_VER > 17
+ *__result = __binary_op(__val, _VSTD::move(__acc));
+#else
+ *__result = __binary_op(__val, __acc);
+#endif
+ __acc = _VSTD::move(__val);
+ }
+ }
+ return __result;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_ADJACENT_DIFFERENCE_H
diff --git a/libcxx/include/__numeric/exclusive_scan.h b/libcxx/include/__numeric/exclusive_scan.h
new file mode 100644
index 000000000000..c0c89b38805d
--- /dev/null
+++ b/libcxx/include/__numeric/exclusive_scan.h
@@ -0,0 +1,53 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H
+#define _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H
+
+#include <__config>
+#include <__functional/operations.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator
+exclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Tp __init, _BinaryOp __b) {
+ if (__first != __last) {
+ _Tp __tmp(__b(__init, *__first));
+ while (true) {
+ *__result = _VSTD::move(__init);
+ ++__result;
+ ++__first;
+ if (__first == __last)
+ break;
+ __init = _VSTD::move(__tmp);
+ __tmp = __b(__init, *__first);
+ }
+ }
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator
+exclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Tp __init) {
+ return _VSTD::exclusive_scan(__first, __last, __result, __init, _VSTD::plus<>());
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_EXCLUSIVE_SCAN_H
diff --git a/libcxx/include/__numeric/gcd_lcm.h b/libcxx/include/__numeric/gcd_lcm.h
new file mode 100644
index 000000000000..34c0e533c928
--- /dev/null
+++ b/libcxx/include/__numeric/gcd_lcm.h
@@ -0,0 +1,96 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_GCD_LCM_H
+#define _LIBCPP___NUMERIC_GCD_LCM_H
+
+#include <__config>
+#include <__debug>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value> struct __ct_abs;
+
+template <typename _Result, typename _Source>
+struct __ct_abs<_Result, _Source, true> {
+ _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+ _Result operator()(_Source __t) const noexcept
+ {
+ if (__t >= 0) return __t;
+ if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t);
+ return -__t;
+ }
+};
+
+template <typename _Result, typename _Source>
+struct __ct_abs<_Result, _Source, false> {
+ _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+ _Result operator()(_Source __t) const noexcept { return __t; }
+};
+
+
+template<class _Tp>
+_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN
+_Tp __gcd(_Tp __m, _Tp __n)
+{
+ static_assert((!is_signed<_Tp>::value), "");
+ return __n == 0 ? __m : _VSTD::__gcd<_Tp>(__n, __m % __n);
+}
+
+template<class _Tp, class _Up>
+_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+common_type_t<_Tp,_Up>
+gcd(_Tp __m, _Up __n)
+{
+ static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to gcd must be integer types");
+ static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to gcd cannot be bool" );
+ static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" );
+ using _Rp = common_type_t<_Tp,_Up>;
+ using _Wp = make_unsigned_t<_Rp>;
+ return static_cast<_Rp>(_VSTD::__gcd(
+ static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)),
+ static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n))));
+}
+
+template<class _Tp, class _Up>
+_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
+common_type_t<_Tp,_Up>
+lcm(_Tp __m, _Up __n)
+{
+ static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to lcm must be integer types");
+ static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to lcm cannot be bool" );
+ static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to lcm cannot be bool" );
+ if (__m == 0 || __n == 0)
+ return 0;
+
+ using _Rp = common_type_t<_Tp,_Up>;
+ _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / _VSTD::gcd(__m, __n);
+ _Rp __val2 = __ct_abs<_Rp, _Up>()(__n);
+ _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm");
+ return __val1 * __val2;
+}
+
+#endif // _LIBCPP_STD_VER
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___NUMERIC_GCD_LCM_H
diff --git a/libcxx/include/__numeric/inclusive_scan.h b/libcxx/include/__numeric/inclusive_scan.h
new file mode 100644
index 000000000000..a6b005075835
--- /dev/null
+++ b/libcxx/include/__numeric/inclusive_scan.h
@@ -0,0 +1,60 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H
+#define _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H
+
+#include <__config>
+#include <__functional/operations.h>
+#include <__iterator/iterator_traits.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator
+inclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _BinaryOp __b, _Tp __init) {
+ for (; __first != __last; ++__first, (void)++__result) {
+ __init = __b(__init, *__first);
+ *__result = __init;
+ }
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator, class _BinaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator
+inclusive_scan(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _BinaryOp __b) {
+ if (__first != __last) {
+ typename iterator_traits<_InputIterator>::value_type __init = *__first;
+ *__result++ = __init;
+ if (++__first != __last)
+ return _VSTD::inclusive_scan(__first, __last, __result, __b, __init);
+ }
+
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _OutputIterator inclusive_scan(_InputIterator __first,
+ _InputIterator __last,
+ _OutputIterator __result) {
+ return _VSTD::inclusive_scan(__first, __last, __result, _VSTD::plus<>());
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_INCLUSIVE_SCAN_H
diff --git a/libcxx/include/__numeric/inner_product.h b/libcxx/include/__numeric/inner_product.h
new file mode 100644
index 000000000000..004acdde6a0c
--- /dev/null
+++ b/libcxx/include/__numeric/inner_product.h
@@ -0,0 +1,53 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_INNER_PRODUCT_H
+#define _LIBCPP___NUMERIC_INNER_PRODUCT_H
+
+#include <__config>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _InputIterator1, class _InputIterator2, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_Tp
+inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _Tp __init)
+{
+ for (; __first1 != __last1; ++__first1, (void) ++__first2)
+#if _LIBCPP_STD_VER > 17
+ __init = _VSTD::move(__init) + *__first1 * *__first2;
+#else
+ __init = __init + *__first1 * *__first2;
+#endif
+ return __init;
+}
+
+template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_Tp
+inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2,
+ _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
+{
+ for (; __first1 != __last1; ++__first1, (void) ++__first2)
+#if _LIBCPP_STD_VER > 17
+ __init = __binary_op1(_VSTD::move(__init), __binary_op2(*__first1, *__first2));
+#else
+ __init = __binary_op1(__init, __binary_op2(*__first1, *__first2));
+#endif
+ return __init;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_INNER_PRODUCT_H
diff --git a/libcxx/include/__numeric/iota.h b/libcxx/include/__numeric/iota.h
new file mode 100644
index 000000000000..b30e0e0a5484
--- /dev/null
+++ b/libcxx/include/__numeric/iota.h
@@ -0,0 +1,32 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_IOTA_H
+#define _LIBCPP___NUMERIC_IOTA_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _ForwardIterator, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+void
+iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_)
+{
+ for (; __first != __last; ++__first, (void) ++__value_)
+ *__first = __value_;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_IOTA_H
diff --git a/libcxx/include/__numeric/midpoint.h b/libcxx/include/__numeric/midpoint.h
new file mode 100644
index 000000000000..668030c46bcb
--- /dev/null
+++ b/libcxx/include/__numeric/midpoint.h
@@ -0,0 +1,85 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_MIDPOINT_H
+#define _LIBCPP___NUMERIC_MIDPOINT_H
+
+#include <__config>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+template <class _Tp>
+_LIBCPP_INLINE_VISIBILITY constexpr
+enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp>
+midpoint(_Tp __a, _Tp __b) noexcept
+_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
+{
+ using _Up = make_unsigned_t<_Tp>;
+ constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1;
+
+ _Up __diff = _Up(__b) - _Up(__a);
+ _Up __sign_bit = __b < __a;
+
+ _Up __half_diff = (__diff / 2) + (__sign_bit << __bitshift) + (__sign_bit & __diff);
+
+ return __a + __half_diff;
+}
+
+
+template <class _TPtr>
+_LIBCPP_INLINE_VISIBILITY constexpr
+enable_if_t<is_pointer_v<_TPtr>
+ && is_object_v<remove_pointer_t<_TPtr>>
+ && ! is_void_v<remove_pointer_t<_TPtr>>
+ && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr>
+midpoint(_TPtr __a, _TPtr __b) noexcept
+{
+ return __a + _VSTD::midpoint(ptrdiff_t(0), __b - __a);
+}
+
+
+template <typename _Tp>
+constexpr int __sign(_Tp __val) {
+ return (_Tp(0) < __val) - (__val < _Tp(0));
+}
+
+template <typename _Fp>
+constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; }
+
+template <class _Fp>
+_LIBCPP_INLINE_VISIBILITY constexpr
+enable_if_t<is_floating_point_v<_Fp>, _Fp>
+midpoint(_Fp __a, _Fp __b) noexcept
+{
+ constexpr _Fp __lo = numeric_limits<_Fp>::min()*2;
+ constexpr _Fp __hi = numeric_limits<_Fp>::max()/2;
+ return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ? // typical case: overflow is impossible
+ (__a + __b)/2 : // always correctly rounded
+ __fp_abs(__a) < __lo ? __a + __b/2 : // not safe to halve a
+ __fp_abs(__b) < __lo ? __a/2 + __b : // not safe to halve b
+ __a/2 + __b/2; // otherwise correctly rounded
+}
+
+#endif // _LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___NUMERIC_MIDPOINT_H
diff --git a/libcxx/include/__numeric/partial_sum.h b/libcxx/include/__numeric/partial_sum.h
new file mode 100644
index 000000000000..9acee3afc2b0
--- /dev/null
+++ b/libcxx/include/__numeric/partial_sum.h
@@ -0,0 +1,70 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_PARTIAL_SUM_H
+#define _LIBCPP___NUMERIC_PARTIAL_SUM_H
+
+#include <__config>
+#include <__iterator/iterator_traits.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _InputIterator, class _OutputIterator>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
+{
+ if (__first != __last)
+ {
+ typename iterator_traits<_InputIterator>::value_type __t(*__first);
+ *__result = __t;
+ for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
+ {
+#if _LIBCPP_STD_VER > 17
+ __t = _VSTD::move(__t) + *__first;
+#else
+ __t = __t + *__first;
+#endif
+ *__result = __t;
+ }
+ }
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
+ _BinaryOperation __binary_op)
+{
+ if (__first != __last)
+ {
+ typename iterator_traits<_InputIterator>::value_type __t(*__first);
+ *__result = __t;
+ for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
+ {
+#if _LIBCPP_STD_VER > 17
+ __t = __binary_op(_VSTD::move(__t), *__first);
+#else
+ __t = __binary_op(__t, *__first);
+#endif
+ *__result = __t;
+ }
+ }
+ return __result;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_PARTIAL_SUM_H
diff --git a/libcxx/include/__numeric/reduce.h b/libcxx/include/__numeric/reduce.h
new file mode 100644
index 000000000000..90e4d238d868
--- /dev/null
+++ b/libcxx/include/__numeric/reduce.h
@@ -0,0 +1,47 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_REDUCE_H
+#define _LIBCPP___NUMERIC_REDUCE_H
+
+#include <__config>
+#include <__functional/operations.h>
+#include <__iterator/iterator_traits.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+template <class _InputIterator, class _Tp, class _BinaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp reduce(_InputIterator __first, _InputIterator __last,
+ _Tp __init, _BinaryOp __b) {
+ for (; __first != __last; ++__first)
+ __init = __b(__init, *__first);
+ return __init;
+}
+
+template <class _InputIterator, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp reduce(_InputIterator __first, _InputIterator __last,
+ _Tp __init) {
+ return _VSTD::reduce(__first, __last, __init, _VSTD::plus<>());
+}
+
+template <class _InputIterator>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename iterator_traits<_InputIterator>::value_type
+reduce(_InputIterator __first, _InputIterator __last) {
+ return _VSTD::reduce(__first, __last, typename iterator_traits<_InputIterator>::value_type{});
+}
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_REDUCE_H
diff --git a/libcxx/include/__numeric/transform_exclusive_scan.h b/libcxx/include/__numeric/transform_exclusive_scan.h
new file mode 100644
index 000000000000..45b3077f6649
--- /dev/null
+++ b/libcxx/include/__numeric/transform_exclusive_scan.h
@@ -0,0 +1,49 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H
+#define _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+template <class _InputIterator, class _OutputIterator, class _Tp,
+ class _BinaryOp, class _UnaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+transform_exclusive_scan(_InputIterator __first, _InputIterator __last,
+ _OutputIterator __result, _Tp __init,
+ _BinaryOp __b, _UnaryOp __u)
+{
+ if (__first != __last)
+ {
+ _Tp __saved = __init;
+ do
+ {
+ __init = __b(__init, __u(*__first));
+ *__result = __saved;
+ __saved = __init;
+ ++__result;
+ } while (++__first != __last);
+ }
+ return __result;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_TRANSFORM_EXCLUSIVE_SCAN_H
diff --git a/libcxx/include/__numeric/transform_inclusive_scan.h b/libcxx/include/__numeric/transform_inclusive_scan.h
new file mode 100644
index 000000000000..b0d4ab5a88fd
--- /dev/null
+++ b/libcxx/include/__numeric/transform_inclusive_scan.h
@@ -0,0 +1,58 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H
+#define _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H
+
+#include <__config>
+#include <__iterator/iterator_traits.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+
+template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
+ _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init)
+{
+ for (; __first != __last; ++__first, (void) ++__result) {
+ __init = __b(__init, __u(*__first));
+ *__result = __init;
+ }
+
+ return __result;
+}
+
+template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _UnaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
+_OutputIterator
+transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
+ _OutputIterator __result, _BinaryOp __b, _UnaryOp __u)
+{
+ if (__first != __last) {
+ typename iterator_traits<_InputIterator>::value_type __init = __u(*__first);
+ *__result++ = __init;
+ if (++__first != __last)
+ return _VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init);
+ }
+
+ return __result;
+}
+
+#endif // _LIBCPP_STD_VER > 14
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_TRANSFORM_INCLUSIVE_SCAN_H
diff --git a/libcxx/include/__numeric/transform_reduce.h b/libcxx/include/__numeric/transform_reduce.h
new file mode 100644
index 000000000000..da5a77988c38
--- /dev/null
+++ b/libcxx/include/__numeric/transform_reduce.h
@@ -0,0 +1,54 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
+#define _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
+
+#include <__config>
+#include <__functional/operations.h>
+#include <__utility/move.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 14
+template <class _InputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator __first,
+ _InputIterator __last, _Tp __init,
+ _BinaryOp __b, _UnaryOp __u) {
+ for (; __first != __last; ++__first)
+ __init = __b(__init, __u(*__first));
+ return __init;
+}
+
+template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOp1, class _BinaryOp2>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator1 __first1,
+ _InputIterator1 __last1,
+ _InputIterator2 __first2, _Tp __init,
+ _BinaryOp1 __b1, _BinaryOp2 __b2) {
+ for (; __first1 != __last1; ++__first1, (void)++__first2)
+ __init = __b1(__init, __b2(*__first1, *__first2));
+ return __init;
+}
+
+template <class _InputIterator1, class _InputIterator2, class _Tp>
+_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 _Tp transform_reduce(_InputIterator1 __first1,
+ _InputIterator1 __last1,
+ _InputIterator2 __first2, _Tp __init) {
+ return _VSTD::transform_reduce(__first1, __last1, __first2, _VSTD::move(__init), _VSTD::plus<>(),
+ _VSTD::multiplies<>());
+}
+#endif
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___NUMERIC_TRANSFORM_REDUCE_H
diff --git a/libcxx/include/__random/bernoulli_distribution.h b/libcxx/include/__random/bernoulli_distribution.h
new file mode 100644
index 000000000000..60ae5eae7033
--- /dev/null
+++ b/libcxx/include/__random/bernoulli_distribution.h
@@ -0,0 +1,143 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <iosfwd>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+class _LIBCPP_TEMPLATE_VIS bernoulli_distribution
+{
+public:
+ // types
+ typedef bool result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ double __p_;
+ public:
+ typedef bernoulli_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(double __p = 0.5) : __p_(__p) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ bernoulli_distribution() : bernoulli_distribution(0.5) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit bernoulli_distribution(double __p) : __p_(param_type(__p)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit bernoulli_distribution(double __p = 0.5) : __p_(param_type(__p)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit bernoulli_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_.p();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return false;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return true;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const bernoulli_distribution& __x,
+ const bernoulli_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const bernoulli_distribution& __x,
+ const bernoulli_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template<class _URNG>
+inline
+bernoulli_distribution::result_type
+bernoulli_distribution::operator()(_URNG& __g, const param_type& __p)
+{
+ uniform_real_distribution<double> __gen;
+ return __gen(__g) < __p.p();
+}
+
+template <class _CharT, class _Traits>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os, const bernoulli_distribution& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ return __os << __x.p();
+}
+
+template <class _CharT, class _Traits>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is, bernoulli_distribution& __x)
+{
+ typedef bernoulli_distribution _Eng;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ double __p;
+ __is >> __p;
+ if (!__is.fail())
+ __x.param(param_type(__p));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_BERNOULLI_DISTRIBUTION_H
diff --git a/libcxx/include/__random/binomial_distribution.h b/libcxx/include/__random/binomial_distribution.h
new file mode 100644
index 000000000000..9662de8befd9
--- /dev/null
+++ b/libcxx/include/__random/binomial_distribution.h
@@ -0,0 +1,225 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _IntType = int>
+class _LIBCPP_TEMPLATE_VIS binomial_distribution
+{
+public:
+ // types
+ typedef _IntType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __t_;
+ double __p_;
+ double __pr_;
+ double __odds_ratio_;
+ result_type __r0_;
+ public:
+ typedef binomial_distribution distribution_type;
+
+ explicit param_type(result_type __t = 1, double __p = 0.5);
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type t() const {return __t_;}
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__t_ == __y.__t_ && __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+
+ friend class binomial_distribution;
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ binomial_distribution() : binomial_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit binomial_distribution(result_type __t, double __p = 0.5)
+ : __p_(param_type(__t, __p)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit binomial_distribution(result_type __t = 1, double __p = 0.5)
+ : __p_(param_type(__t, __p)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit binomial_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type t() const {return __p_.t();}
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_.p();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return t();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const binomial_distribution& __x,
+ const binomial_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const binomial_distribution& __x,
+ const binomial_distribution& __y)
+ {return !(__x == __y);}
+};
+
+#ifndef _LIBCPP_MSVCRT_LIKE
+extern "C" double lgamma_r(double, int *);
+#endif
+
+inline _LIBCPP_INLINE_VISIBILITY double __libcpp_lgamma(double __d) {
+#if defined(_LIBCPP_MSVCRT_LIKE)
+ return lgamma(__d);
+#else
+ int __sign;
+ return lgamma_r(__d, &__sign);
+#endif
+}
+
+template<class _IntType>
+binomial_distribution<_IntType>::param_type::param_type(result_type __t, double __p)
+ : __t_(__t), __p_(__p)
+{
+ if (0 < __p_ && __p_ < 1)
+ {
+ __r0_ = static_cast<result_type>((__t_ + 1) * __p_);
+ __pr_ = _VSTD::exp(__libcpp_lgamma(__t_ + 1.) -
+ __libcpp_lgamma(__r0_ + 1.) -
+ __libcpp_lgamma(__t_ - __r0_ + 1.) + __r0_ * _VSTD::log(__p_) +
+ (__t_ - __r0_) * _VSTD::log(1 - __p_));
+ __odds_ratio_ = __p_ / (1 - __p_);
+ }
+}
+
+// Reference: Kemp, C.D. (1986). `A modal method for generating binomial
+// variables', Commun. Statist. - Theor. Meth. 15(3), 805-813.
+template<class _IntType>
+template<class _URNG>
+_IntType
+binomial_distribution<_IntType>::operator()(_URNG& __g, const param_type& __pr)
+{
+ if (__pr.__t_ == 0 || __pr.__p_ == 0)
+ return 0;
+ if (__pr.__p_ == 1)
+ return __pr.__t_;
+ uniform_real_distribution<double> __gen;
+ double __u = __gen(__g) - __pr.__pr_;
+ if (__u < 0)
+ return __pr.__r0_;
+ double __pu = __pr.__pr_;
+ double __pd = __pu;
+ result_type __ru = __pr.__r0_;
+ result_type __rd = __ru;
+ while (true)
+ {
+ bool __break = true;
+ if (__rd >= 1)
+ {
+ __pd *= __rd / (__pr.__odds_ratio_ * (__pr.__t_ - __rd + 1));
+ __u -= __pd;
+ __break = false;
+ if (__u < 0)
+ return __rd - 1;
+ }
+ if ( __rd != 0 )
+ --__rd;
+ ++__ru;
+ if (__ru <= __pr.__t_)
+ {
+ __pu *= (__pr.__t_ - __ru + 1) * __pr.__odds_ratio_ / __ru;
+ __u -= __pu;
+ __break = false;
+ if (__u < 0)
+ return __ru;
+ }
+ if (__break)
+ return 0;
+ }
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const binomial_distribution<_IntType>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ return __os << __x.t() << __sp << __x.p();
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ binomial_distribution<_IntType>& __x)
+{
+ typedef binomial_distribution<_IntType> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __t;
+ double __p;
+ __is >> __t >> __p;
+ if (!__is.fail())
+ __x.param(param_type(__t, __p));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_BINOMIAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/cauchy_distribution.h b/libcxx/include/__random/cauchy_distribution.h
new file mode 100644
index 000000000000..6661e00bf939
--- /dev/null
+++ b/libcxx/include/__random/cauchy_distribution.h
@@ -0,0 +1,162 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS cauchy_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __a_;
+ result_type __b_;
+ public:
+ typedef cauchy_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __a = 0, result_type __b = 1)
+ : __a_(__a), __b_(__b) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __a_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __b_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ cauchy_distribution() : cauchy_distribution(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit cauchy_distribution(result_type __a, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit cauchy_distribution(result_type __a = 0, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit cauchy_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __p_.a();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __p_.b();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return -numeric_limits<result_type>::infinity();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const cauchy_distribution& __x,
+ const cauchy_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const cauchy_distribution& __x,
+ const cauchy_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _RealType>
+template<class _URNG>
+inline
+_RealType
+cauchy_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ uniform_real_distribution<result_type> __gen;
+ // purposefully let tan arg get as close to pi/2 as it wants, tan will return a finite
+ return __p.a() + __p.b() * _VSTD::tan(3.1415926535897932384626433832795 * __gen(__g));
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const cauchy_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.a() << __sp << __x.b();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ cauchy_distribution<_RT>& __x)
+{
+ typedef cauchy_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __a;
+ result_type __b;
+ __is >> __a >> __b;
+ if (!__is.fail())
+ __x.param(param_type(__a, __b));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_CAUCHY_DISTRIBUTION_H
diff --git a/libcxx/include/__random/chi_squared_distribution.h b/libcxx/include/__random/chi_squared_distribution.h
new file mode 100644
index 000000000000..9cf38971bdde
--- /dev/null
+++ b/libcxx/include/__random/chi_squared_distribution.h
@@ -0,0 +1,144 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/gamma_distribution.h>
+#include <limits>
+#include <iosfwd>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS chi_squared_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __n_;
+ public:
+ typedef chi_squared_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __n = 1) : __n_(__n) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __n_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__n_ == __y.__n_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ chi_squared_distribution() : chi_squared_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit chi_squared_distribution(result_type __n)
+ : __p_(param_type(__n)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit chi_squared_distribution(result_type __n = 1)
+ : __p_(param_type(__n)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit chi_squared_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g, const param_type& __p)
+ {return gamma_distribution<result_type>(__p.n() / 2, 2)(__g);}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __p_.n();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const chi_squared_distribution& __x,
+ const chi_squared_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const chi_squared_distribution& __x,
+ const chi_squared_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const chi_squared_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ __os << __x.n();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ chi_squared_distribution<_RT>& __x)
+{
+ typedef chi_squared_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __n;
+ __is >> __n;
+ if (!__is.fail())
+ __x.param(param_type(__n));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_CHI_SQUARED_DISTRIBUTION_H
diff --git a/libcxx/include/__random/default_random_engine.h b/libcxx/include/__random/default_random_engine.h
new file mode 100644
index 000000000000..61c5cf9c7142
--- /dev/null
+++ b/libcxx/include/__random/default_random_engine.h
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H
+#define _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H
+
+#include <__config>
+#include <__random/linear_congruential_engine.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+typedef minstd_rand default_random_engine;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANDOM_DEFAULT_RANDOM_ENGINE_H
diff --git a/libcxx/include/__random/discard_block_engine.h b/libcxx/include/__random/discard_block_engine.h
new file mode 100644
index 000000000000..335715211884
--- /dev/null
+++ b/libcxx/include/__random/discard_block_engine.h
@@ -0,0 +1,203 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H
+#define _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H
+
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <__utility/move.h>
+#include <climits>
+#include <iosfwd>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _Engine, size_t __p, size_t __r>
+class _LIBCPP_TEMPLATE_VIS discard_block_engine
+{
+ _Engine __e_;
+ int __n_;
+
+ static_assert( 0 < __r, "discard_block_engine invalid parameters");
+ static_assert(__r <= __p, "discard_block_engine invalid parameters");
+ static_assert(__r <= INT_MAX, "discard_block_engine invalid parameters");
+public:
+ // types
+ typedef typename _Engine::result_type result_type;
+
+ // engine characteristics
+ static _LIBCPP_CONSTEXPR const size_t block_size = __p;
+ static _LIBCPP_CONSTEXPR const size_t used_block = __r;
+
+#ifdef _LIBCPP_CXX03_LANG
+ static const result_type _Min = _Engine::_Min;
+ static const result_type _Max = _Engine::_Max;
+#else
+ static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min();
+ static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max();
+#endif
+
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); }
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); }
+
+ // constructors and seeding functions
+ _LIBCPP_INLINE_VISIBILITY
+ discard_block_engine() : __n_(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit discard_block_engine(const _Engine& __e)
+ : __e_(__e), __n_(0) {}
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit discard_block_engine(_Engine&& __e)
+ : __e_(_VSTD::move(__e)), __n_(0) {}
+#endif // _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit discard_block_engine(result_type __sd) : __e_(__sd), __n_(0) {}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit discard_block_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, discard_block_engine>::value &&
+ !is_convertible<_Sseq, _Engine>::value>::type* = 0)
+ : __e_(__q), __n_(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed() {__e_.seed(); __n_ = 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(result_type __sd) {__e_.seed(__sd); __n_ = 0;}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, discard_block_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q) {__e_.seed(__q); __n_ = 0;}
+
+ // generating functions
+ result_type operator()();
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ const _Engine& base() const _NOEXCEPT {return __e_;}
+
+ template<class _Eng, size_t _Pp, size_t _Rp>
+ friend
+ bool
+ operator==(
+ const discard_block_engine<_Eng, _Pp, _Rp>& __x,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __y);
+
+ template<class _Eng, size_t _Pp, size_t _Rp>
+ friend
+ bool
+ operator!=(
+ const discard_block_engine<_Eng, _Pp, _Rp>& __x,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __y);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Pp, size_t _Rp>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __x);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Pp, size_t _Rp>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ discard_block_engine<_Eng, _Pp, _Rp>& __x);
+};
+
+template<class _Engine, size_t __p, size_t __r>
+ _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::block_size;
+
+template<class _Engine, size_t __p, size_t __r>
+ _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::used_block;
+
+template<class _Engine, size_t __p, size_t __r>
+typename discard_block_engine<_Engine, __p, __r>::result_type
+discard_block_engine<_Engine, __p, __r>::operator()()
+{
+ if (__n_ >= static_cast<int>(__r))
+ {
+ __e_.discard(__p - __r);
+ __n_ = 0;
+ }
+ ++__n_;
+ return __e_();
+}
+
+template<class _Eng, size_t _Pp, size_t _Rp>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator==(const discard_block_engine<_Eng, _Pp, _Rp>& __x,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __y)
+{
+ return __x.__n_ == __y.__n_ && __x.__e_ == __y.__e_;
+}
+
+template<class _Eng, size_t _Pp, size_t _Rp>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(const discard_block_engine<_Eng, _Pp, _Rp>& __x,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __y)
+{
+ return !(__x == __y);
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Pp, size_t _Rp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const discard_block_engine<_Eng, _Pp, _Rp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _Ostream;
+ __os.flags(_Ostream::dec | _Ostream::left);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ return __os << __x.__e_ << __sp << __x.__n_;
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Pp, size_t _Rp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ discard_block_engine<_Eng, _Pp, _Rp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ _Eng __e;
+ int __n;
+ __is >> __e >> __n;
+ if (!__is.fail())
+ {
+ __x.__e_ = __e;
+ __x.__n_ = __n;
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_DISCARD_BLOCK_ENGINE_H
diff --git a/libcxx/include/__random/discrete_distribution.h b/libcxx/include/__random/discrete_distribution.h
new file mode 100644
index 000000000000..dc9881a92c38
--- /dev/null
+++ b/libcxx/include/__random/discrete_distribution.h
@@ -0,0 +1,260 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H
+
+#include <__algorithm/upper_bound.h>
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <cstddef>
+#include <iosfwd>
+#include <numeric>
+#include <vector>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _IntType = int>
+class _LIBCPP_TEMPLATE_VIS discrete_distribution
+{
+public:
+ // types
+ typedef _IntType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ vector<double> __p_;
+ public:
+ typedef discrete_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type() {}
+ template<class _InputIterator>
+ _LIBCPP_INLINE_VISIBILITY
+ param_type(_InputIterator __f, _InputIterator __l)
+ : __p_(__f, __l) {__init();}
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ param_type(initializer_list<double> __wl)
+ : __p_(__wl.begin(), __wl.end()) {__init();}
+#endif // _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ param_type(size_t __nw, double __xmin, double __xmax,
+ _UnaryOperation __fw);
+
+ vector<double> probabilities() const;
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+
+ private:
+ void __init();
+
+ friend class discrete_distribution;
+
+ template <class _CharT, class _Traits, class _IT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const discrete_distribution<_IT>& __x);
+
+ template <class _CharT, class _Traits, class _IT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ discrete_distribution<_IT>& __x);
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+ _LIBCPP_INLINE_VISIBILITY
+ discrete_distribution() {}
+ template<class _InputIterator>
+ _LIBCPP_INLINE_VISIBILITY
+ discrete_distribution(_InputIterator __f, _InputIterator __l)
+ : __p_(__f, __l) {}
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ discrete_distribution(initializer_list<double> __wl)
+ : __p_(__wl) {}
+#endif // _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ _LIBCPP_INLINE_VISIBILITY
+ discrete_distribution(size_t __nw, double __xmin, double __xmax,
+ _UnaryOperation __fw)
+ : __p_(__nw, __xmin, __xmax, __fw) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit discrete_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ vector<double> probabilities() const {return __p_.probabilities();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return __p_.__p_.size();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const discrete_distribution& __x,
+ const discrete_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const discrete_distribution& __x,
+ const discrete_distribution& __y)
+ {return !(__x == __y);}
+
+ template <class _CharT, class _Traits, class _IT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const discrete_distribution<_IT>& __x);
+
+ template <class _CharT, class _Traits, class _IT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ discrete_distribution<_IT>& __x);
+};
+
+template<class _IntType>
+template<class _UnaryOperation>
+discrete_distribution<_IntType>::param_type::param_type(size_t __nw,
+ double __xmin,
+ double __xmax,
+ _UnaryOperation __fw)
+{
+ if (__nw > 1)
+ {
+ __p_.reserve(__nw - 1);
+ double __d = (__xmax - __xmin) / __nw;
+ double __d2 = __d / 2;
+ for (size_t __k = 0; __k < __nw; ++__k)
+ __p_.push_back(__fw(__xmin + __k * __d + __d2));
+ __init();
+ }
+}
+
+template<class _IntType>
+void
+discrete_distribution<_IntType>::param_type::__init()
+{
+ if (!__p_.empty())
+ {
+ if (__p_.size() > 1)
+ {
+ double __s = _VSTD::accumulate(__p_.begin(), __p_.end(), 0.0);
+ for (vector<double>::iterator __i = __p_.begin(), __e = __p_.end(); __i < __e; ++__i)
+ *__i /= __s;
+ vector<double> __t(__p_.size() - 1);
+ _VSTD::partial_sum(__p_.begin(), __p_.end() - 1, __t.begin());
+ swap(__p_, __t);
+ }
+ else
+ {
+ __p_.clear();
+ __p_.shrink_to_fit();
+ }
+ }
+}
+
+template<class _IntType>
+vector<double>
+discrete_distribution<_IntType>::param_type::probabilities() const
+{
+ size_t __n = __p_.size();
+ vector<double> __p(__n+1);
+ _VSTD::adjacent_difference(__p_.begin(), __p_.end(), __p.begin());
+ if (__n > 0)
+ __p[__n] = 1 - __p_[__n-1];
+ else
+ __p[0] = 1;
+ return __p;
+}
+
+template<class _IntType>
+template<class _URNG>
+_IntType
+discrete_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p)
+{
+ uniform_real_distribution<double> __gen;
+ return static_cast<_IntType>(
+ _VSTD::upper_bound(__p.__p_.begin(), __p.__p_.end(), __gen(__g)) -
+ __p.__p_.begin());
+}
+
+template <class _CharT, class _Traits, class _IT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const discrete_distribution<_IT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ size_t __n = __x.__p_.__p_.size();
+ __os << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__p_[__i];
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _IT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ discrete_distribution<_IT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ size_t __n;
+ __is >> __n;
+ vector<double> __p(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __p[__i];
+ if (!__is.fail())
+ swap(__x.__p_.__p_, __p);
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_DISCRETE_DISTRIBUTION_H
diff --git a/libcxx/include/__random/exponential_distribution.h b/libcxx/include/__random/exponential_distribution.h
new file mode 100644
index 000000000000..9e555f0c1075
--- /dev/null
+++ b/libcxx/include/__random/exponential_distribution.h
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/generate_canonical.h>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS exponential_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __lambda_;
+ public:
+ typedef exponential_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type lambda() const {return __lambda_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__lambda_ == __y.__lambda_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ exponential_distribution() : exponential_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit exponential_distribution(result_type __lambda)
+ : __p_(param_type(__lambda)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit exponential_distribution(result_type __lambda = 1)
+ : __p_(param_type(__lambda)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit exponential_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type lambda() const {return __p_.lambda();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const exponential_distribution& __x,
+ const exponential_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const exponential_distribution& __x,
+ const exponential_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+exponential_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ return -_VSTD::log
+ (
+ result_type(1) -
+ _VSTD::generate_canonical<result_type,
+ numeric_limits<result_type>::digits>(__g)
+ )
+ / __p.lambda();
+}
+
+template <class _CharT, class _Traits, class _RealType>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const exponential_distribution<_RealType>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ return __os << __x.lambda();
+}
+
+template <class _CharT, class _Traits, class _RealType>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ exponential_distribution<_RealType>& __x)
+{
+ typedef exponential_distribution<_RealType> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __lambda;
+ __is >> __lambda;
+ if (!__is.fail())
+ __x.param(param_type(__lambda));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_EXPONENTIAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/extreme_value_distribution.h b/libcxx/include/__random/extreme_value_distribution.h
new file mode 100644
index 000000000000..0e200f91d7ff
--- /dev/null
+++ b/libcxx/include/__random/extreme_value_distribution.h
@@ -0,0 +1,161 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS extreme_value_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __a_;
+ result_type __b_;
+ public:
+ typedef extreme_value_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __a = 0, result_type __b = 1)
+ : __a_(__a), __b_(__b) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __a_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __b_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ extreme_value_distribution() : extreme_value_distribution(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit extreme_value_distribution(result_type __a, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit extreme_value_distribution(result_type __a = 0,
+ result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit extreme_value_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __p_.a();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __p_.b();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return -numeric_limits<result_type>::infinity();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const extreme_value_distribution& __x,
+ const extreme_value_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const extreme_value_distribution& __x,
+ const extreme_value_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template<class _RealType>
+template<class _URNG>
+_RealType
+extreme_value_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ return __p.a() - __p.b() *
+ _VSTD::log(-_VSTD::log(1-uniform_real_distribution<result_type>()(__g)));
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const extreme_value_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.a() << __sp << __x.b();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ extreme_value_distribution<_RT>& __x)
+{
+ typedef extreme_value_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __a;
+ result_type __b;
+ __is >> __a >> __b;
+ if (!__is.fail())
+ __x.param(param_type(__a, __b));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_EXTREME_VALUE_DISTRIBUTION_H
diff --git a/libcxx/include/__random/fisher_f_distribution.h b/libcxx/include/__random/fisher_f_distribution.h
new file mode 100644
index 000000000000..bf64d33a645a
--- /dev/null
+++ b/libcxx/include/__random/fisher_f_distribution.h
@@ -0,0 +1,160 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/gamma_distribution.h>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS fisher_f_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __m_;
+ result_type __n_;
+ public:
+ typedef fisher_f_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __m = 1, result_type __n = 1)
+ : __m_(__m), __n_(__n) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type m() const {return __m_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __n_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__m_ == __y.__m_ && __x.__n_ == __y.__n_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ fisher_f_distribution() : fisher_f_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit fisher_f_distribution(result_type __m, result_type __n = 1)
+ : __p_(param_type(__m, __n)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit fisher_f_distribution(result_type __m = 1, result_type __n = 1)
+ : __p_(param_type(__m, __n)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit fisher_f_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type m() const {return __p_.m();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __p_.n();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const fisher_f_distribution& __x,
+ const fisher_f_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const fisher_f_distribution& __x,
+ const fisher_f_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+fisher_f_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ gamma_distribution<result_type> __gdm(__p.m() * result_type(.5));
+ gamma_distribution<result_type> __gdn(__p.n() * result_type(.5));
+ return __p.n() * __gdm(__g) / (__p.m() * __gdn(__g));
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const fisher_f_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.m() << __sp << __x.n();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ fisher_f_distribution<_RT>& __x)
+{
+ typedef fisher_f_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __m;
+ result_type __n;
+ __is >> __m >> __n;
+ if (!__is.fail())
+ __x.param(param_type(__m, __n));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_FISHER_F_DISTRIBUTION_H
diff --git a/libcxx/include/__random/gamma_distribution.h b/libcxx/include/__random/gamma_distribution.h
new file mode 100644
index 000000000000..49d024eafea2
--- /dev/null
+++ b/libcxx/include/__random/gamma_distribution.h
@@ -0,0 +1,213 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <__random/exponential_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS gamma_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __alpha_;
+ result_type __beta_;
+ public:
+ typedef gamma_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __alpha = 1, result_type __beta = 1)
+ : __alpha_(__alpha), __beta_(__beta) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type alpha() const {return __alpha_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type beta() const {return __beta_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__alpha_ == __y.__alpha_ && __x.__beta_ == __y.__beta_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ gamma_distribution() : gamma_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit gamma_distribution(result_type __alpha, result_type __beta = 1)
+ : __p_(param_type(__alpha, __beta)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit gamma_distribution(result_type __alpha = 1,
+ result_type __beta = 1)
+ : __p_(param_type(__alpha, __beta)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit gamma_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type alpha() const {return __p_.alpha();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type beta() const {return __p_.beta();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const gamma_distribution& __x,
+ const gamma_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const gamma_distribution& __x,
+ const gamma_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+gamma_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ result_type __a = __p.alpha();
+ uniform_real_distribution<result_type> __gen(0, 1);
+ exponential_distribution<result_type> __egen;
+ result_type __x;
+ if (__a == 1)
+ __x = __egen(__g);
+ else if (__a > 1)
+ {
+ const result_type __b = __a - 1;
+ const result_type __c = 3 * __a - result_type(0.75);
+ while (true)
+ {
+ const result_type __u = __gen(__g);
+ const result_type __v = __gen(__g);
+ const result_type __w = __u * (1 - __u);
+ if (__w != 0)
+ {
+ const result_type __y = _VSTD::sqrt(__c / __w) *
+ (__u - result_type(0.5));
+ __x = __b + __y;
+ if (__x >= 0)
+ {
+ const result_type __z = 64 * __w * __w * __w * __v * __v;
+ if (__z <= 1 - 2 * __y * __y / __x)
+ break;
+ if (_VSTD::log(__z) <= 2 * (__b * _VSTD::log(__x / __b) - __y))
+ break;
+ }
+ }
+ }
+ }
+ else // __a < 1
+ {
+ while (true)
+ {
+ const result_type __u = __gen(__g);
+ const result_type __es = __egen(__g);
+ if (__u <= 1 - __a)
+ {
+ __x = _VSTD::pow(__u, 1 / __a);
+ if (__x <= __es)
+ break;
+ }
+ else
+ {
+ const result_type __e = -_VSTD::log((1-__u)/__a);
+ __x = _VSTD::pow(1 - __a + __a * __e, 1 / __a);
+ if (__x <= __e + __es)
+ break;
+ }
+ }
+ }
+ return __x * __p.beta();
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const gamma_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.alpha() << __sp << __x.beta();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ gamma_distribution<_RT>& __x)
+{
+ typedef gamma_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __alpha;
+ result_type __beta;
+ __is >> __alpha >> __beta;
+ if (!__is.fail())
+ __x.param(param_type(__alpha, __beta));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_GAMMA_DISTRIBUTION_H
diff --git a/libcxx/include/__random/generate_canonical.h b/libcxx/include/__random/generate_canonical.h
new file mode 100644
index 000000000000..46c3b2980952
--- /dev/null
+++ b/libcxx/include/__random/generate_canonical.h
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_GENERATE_CANONICAL_H
+#define _LIBCPP___RANDOM_GENERATE_CANONICAL_H
+
+#include <__config>
+#include <__random/log2.h>
+#include <cstdint>
+#include <initializer_list>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// generate_canonical
+
+template<class _RealType, size_t __bits, class _URNG>
+_RealType
+generate_canonical(_URNG& __g)
+{
+ const size_t _Dt = numeric_limits<_RealType>::digits;
+ const size_t __b = _Dt < __bits ? _Dt : __bits;
+#ifdef _LIBCPP_CXX03_LANG
+ const size_t __logR = __log2<uint64_t, _URNG::_Max - _URNG::_Min + uint64_t(1)>::value;
+#else
+ const size_t __logR = __log2<uint64_t, _URNG::max() - _URNG::min() + uint64_t(1)>::value;
+#endif
+ const size_t __k = __b / __logR + (__b % __logR != 0) + (__b == 0);
+ const _RealType _Rp = static_cast<_RealType>(_URNG::max() - _URNG::min()) + _RealType(1);
+ _RealType __base = _Rp;
+ _RealType _Sp = __g() - _URNG::min();
+ for (size_t __i = 1; __i < __k; ++__i, __base *= _Rp)
+ _Sp += (__g() - _URNG::min()) * __base;
+ return _Sp / __base;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_GENERATE_CANONICAL_H
diff --git a/libcxx/include/__random/geometric_distribution.h b/libcxx/include/__random/geometric_distribution.h
new file mode 100644
index 000000000000..174914eaed2e
--- /dev/null
+++ b/libcxx/include/__random/geometric_distribution.h
@@ -0,0 +1,141 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/negative_binomial_distribution.h>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _IntType = int>
+class _LIBCPP_TEMPLATE_VIS geometric_distribution
+{
+public:
+ // types
+ typedef _IntType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ double __p_;
+ public:
+ typedef geometric_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(double __p = 0.5) : __p_(__p) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ geometric_distribution() : geometric_distribution(0.5) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit geometric_distribution(double __p)
+ : __p_(__p) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit geometric_distribution(double __p = 0.5)
+ : __p_(__p) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit geometric_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g, const param_type& __p)
+ {return negative_binomial_distribution<result_type>(1, __p.p())(__g);}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_.p();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::max();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const geometric_distribution& __x,
+ const geometric_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const geometric_distribution& __x,
+ const geometric_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _CharT, class _Traits, class _IntType>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const geometric_distribution<_IntType>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ return __os << __x.p();
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ geometric_distribution<_IntType>& __x)
+{
+ typedef geometric_distribution<_IntType> _Eng;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ double __p;
+ __is >> __p;
+ if (!__is.fail())
+ __x.param(param_type(__p));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_GEOMETRIC_DISTRIBUTION_H
diff --git a/libcxx/include/__random/independent_bits_engine.h b/libcxx/include/__random/independent_bits_engine.h
new file mode 100644
index 000000000000..f0e8c654246b
--- /dev/null
+++ b/libcxx/include/__random/independent_bits_engine.h
@@ -0,0 +1,271 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H
+#define _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H
+
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <__random/log2.h>
+#include <__utility/move.h>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _Engine, size_t __w, class _UIntType>
+class _LIBCPP_TEMPLATE_VIS independent_bits_engine
+{
+ template <class _UInt, _UInt _R0, size_t _Wp, size_t _Mp>
+ class __get_n
+ {
+ static _LIBCPP_CONSTEXPR const size_t _Dt = numeric_limits<_UInt>::digits;
+ static _LIBCPP_CONSTEXPR const size_t _Np = _Wp / _Mp + (_Wp % _Mp != 0);
+ static _LIBCPP_CONSTEXPR const size_t _W0 = _Wp / _Np;
+ static _LIBCPP_CONSTEXPR const _UInt _Y0 = _W0 >= _Dt ? 0 : (_R0 >> _W0) << _W0;
+ public:
+ static _LIBCPP_CONSTEXPR const size_t value = _R0 - _Y0 > _Y0 / _Np ? _Np + 1 : _Np;
+ };
+public:
+ // types
+ typedef _UIntType result_type;
+
+private:
+ _Engine __e_;
+
+ static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
+ static_assert( 0 < __w, "independent_bits_engine invalid parameters");
+ static_assert(__w <= _Dt, "independent_bits_engine invalid parameters");
+
+ typedef typename _Engine::result_type _Engine_result_type;
+ typedef typename conditional
+ <
+ sizeof(_Engine_result_type) <= sizeof(result_type),
+ result_type,
+ _Engine_result_type
+ >::type _Working_result_type;
+#ifdef _LIBCPP_CXX03_LANG
+ static const _Working_result_type _Rp = _Engine::_Max - _Engine::_Min
+ + _Working_result_type(1);
+#else
+ static _LIBCPP_CONSTEXPR const _Working_result_type _Rp = _Engine::max() - _Engine::min()
+ + _Working_result_type(1);
+#endif
+ static _LIBCPP_CONSTEXPR const size_t __m = __log2<_Working_result_type, _Rp>::value;
+ static _LIBCPP_CONSTEXPR const size_t __n = __get_n<_Working_result_type, _Rp, __w, __m>::value;
+ static _LIBCPP_CONSTEXPR const size_t __w0 = __w / __n;
+ static _LIBCPP_CONSTEXPR const size_t __n0 = __n - __w % __n;
+ static _LIBCPP_CONSTEXPR const size_t _WDt = numeric_limits<_Working_result_type>::digits;
+ static _LIBCPP_CONSTEXPR const size_t _EDt = numeric_limits<_Engine_result_type>::digits;
+ static _LIBCPP_CONSTEXPR const _Working_result_type __y0 = __w0 >= _WDt ? 0 :
+ (_Rp >> __w0) << __w0;
+ static _LIBCPP_CONSTEXPR const _Working_result_type __y1 = __w0 >= _WDt - 1 ? 0 :
+ (_Rp >> (__w0+1)) << (__w0+1);
+ static _LIBCPP_CONSTEXPR const _Engine_result_type __mask0 = __w0 > 0 ?
+ _Engine_result_type(~0) >> (_EDt - __w0) :
+ _Engine_result_type(0);
+ static _LIBCPP_CONSTEXPR const _Engine_result_type __mask1 = __w0 < _EDt - 1 ?
+ _Engine_result_type(~0) >> (_EDt - (__w0 + 1)) :
+ _Engine_result_type(~0);
+public:
+ static _LIBCPP_CONSTEXPR const result_type _Min = 0;
+ static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
+ (result_type(1) << __w) - result_type(1);
+ static_assert(_Min < _Max, "independent_bits_engine invalid parameters");
+
+ // engine characteristics
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
+
+ // constructors and seeding functions
+ _LIBCPP_INLINE_VISIBILITY
+ independent_bits_engine() {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit independent_bits_engine(const _Engine& __e)
+ : __e_(__e) {}
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit independent_bits_engine(_Engine&& __e)
+ : __e_(_VSTD::move(__e)) {}
+#endif // _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit independent_bits_engine(result_type __sd) : __e_(__sd) {}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit independent_bits_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, independent_bits_engine>::value &&
+ !is_convertible<_Sseq, _Engine>::value>::type* = 0)
+ : __e_(__q) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed() {__e_.seed();}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(result_type __sd) {__e_.seed(__sd);}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, independent_bits_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q) {__e_.seed(__q);}
+
+ // generating functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());}
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ const _Engine& base() const _NOEXCEPT {return __e_;}
+
+ template<class _Eng, size_t _Wp, class _UInt>
+ friend
+ bool
+ operator==(
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __y);
+
+ template<class _Eng, size_t _Wp, class _UInt>
+ friend
+ bool
+ operator!=(
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __y);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Wp, class _UInt>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Wp, class _UInt>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ independent_bits_engine<_Eng, _Wp, _UInt>& __x);
+
+private:
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __eval(false_type);
+ result_type __eval(true_type);
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ __count < _Dt,
+ result_type
+ >::type
+ __lshift(result_type __x) {return __x << __count;}
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ (__count >= _Dt),
+ result_type
+ >::type
+ __lshift(result_type) {return result_type(0);}
+};
+
+template<class _Engine, size_t __w, class _UIntType>
+inline
+_UIntType
+independent_bits_engine<_Engine, __w, _UIntType>::__eval(false_type)
+{
+ return static_cast<result_type>(__e_() & __mask0);
+}
+
+template<class _Engine, size_t __w, class _UIntType>
+_UIntType
+independent_bits_engine<_Engine, __w, _UIntType>::__eval(true_type)
+{
+ result_type _Sp = 0;
+ for (size_t __k = 0; __k < __n0; ++__k)
+ {
+ _Engine_result_type __u;
+ do
+ {
+ __u = __e_() - _Engine::min();
+ } while (__u >= __y0);
+ _Sp = static_cast<result_type>(__lshift<__w0>(_Sp) + (__u & __mask0));
+ }
+ for (size_t __k = __n0; __k < __n; ++__k)
+ {
+ _Engine_result_type __u;
+ do
+ {
+ __u = __e_() - _Engine::min();
+ } while (__u >= __y1);
+ _Sp = static_cast<result_type>(__lshift<__w0+1>(_Sp) + (__u & __mask1));
+ }
+ return _Sp;
+}
+
+template<class _Eng, size_t _Wp, class _UInt>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator==(
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __y)
+{
+ return __x.base() == __y.base();
+}
+
+template<class _Eng, size_t _Wp, class _UInt>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __y)
+{
+ return !(__x == __y);
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Wp, class _UInt>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const independent_bits_engine<_Eng, _Wp, _UInt>& __x)
+{
+ return __os << __x.base();
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Wp, class _UInt>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ independent_bits_engine<_Eng, _Wp, _UInt>& __x)
+{
+ _Eng __e;
+ __is >> __e;
+ if (!__is.fail())
+ __x.__e_ = __e;
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_INDEPENDENT_BITS_ENGINE_H
diff --git a/libcxx/include/__random/is_seed_sequence.h b/libcxx/include/__random/is_seed_sequence.h
new file mode 100644
index 000000000000..46b1d719ddfb
--- /dev/null
+++ b/libcxx/include/__random/is_seed_sequence.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H
+#define _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H
+
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Sseq, class _Engine>
+struct __is_seed_sequence
+{
+ static _LIBCPP_CONSTEXPR const bool value =
+ !is_convertible<_Sseq, typename _Engine::result_type>::value &&
+ !is_same<typename remove_cv<_Sseq>::type, _Engine>::value;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANDOM_IS_SEED_SEQUENCE_H
diff --git a/libcxx/include/__random/knuth_b.h b/libcxx/include/__random/knuth_b.h
new file mode 100644
index 000000000000..ade853884dd3
--- /dev/null
+++ b/libcxx/include/__random/knuth_b.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_KNUTH_B_H
+#define _LIBCPP___RANDOM_KNUTH_B_H
+
+#include <__config>
+#include <__random/linear_congruential_engine.h>
+#include <__random/shuffle_order_engine.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+typedef shuffle_order_engine<minstd_rand0, 256> knuth_b;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANDOM_KNUTH_B_H
diff --git a/libcxx/include/__random/linear_congruential_engine.h b/libcxx/include/__random/linear_congruential_engine.h
new file mode 100644
index 000000000000..64c9f584114c
--- /dev/null
+++ b/libcxx/include/__random/linear_congruential_engine.h
@@ -0,0 +1,398 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H
+#define _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H
+
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <cstdint>
+#include <iosfwd>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <unsigned long long __a, unsigned long long __c,
+ unsigned long long __m, unsigned long long _Mp,
+ bool _MightOverflow = (__a != 0 && __m != 0 && __m-1 > (_Mp-__c)/__a),
+ bool _OverflowOK = ((__m | (__m-1)) > __m), // m = 2^n
+ bool _SchrageOK = (__a != 0 && __m != 0 && __m % __a <= __m / __a)> // r <= q
+struct __lce_alg_picker
+{
+ static_assert(__a != 0 || __m != 0 || !_MightOverflow || _OverflowOK || _SchrageOK,
+ "The current values of a, c, and m cannot generate a number "
+ "within bounds of linear_congruential_engine.");
+
+ static _LIBCPP_CONSTEXPR const bool __use_schrage = _MightOverflow &&
+ !_OverflowOK &&
+ _SchrageOK;
+};
+
+template <unsigned long long __a, unsigned long long __c,
+ unsigned long long __m, unsigned long long _Mp,
+ bool _UseSchrage = __lce_alg_picker<__a, __c, __m, _Mp>::__use_schrage>
+struct __lce_ta;
+
+// 64
+
+template <unsigned long long __a, unsigned long long __c, unsigned long long __m>
+struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), true>
+{
+ typedef unsigned long long result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ // Schrage's algorithm
+ const result_type __q = __m / __a;
+ const result_type __r = __m % __a;
+ const result_type __t0 = __a * (__x % __q);
+ const result_type __t1 = __r * (__x / __q);
+ __x = __t0 + (__t0 < __t1) * __m - __t1;
+ __x += __c - (__x >= __m - __c) * __m;
+ return __x;
+ }
+};
+
+template <unsigned long long __a, unsigned long long __m>
+struct __lce_ta<__a, 0, __m, (unsigned long long)(~0), true>
+{
+ typedef unsigned long long result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ // Schrage's algorithm
+ const result_type __q = __m / __a;
+ const result_type __r = __m % __a;
+ const result_type __t0 = __a * (__x % __q);
+ const result_type __t1 = __r * (__x / __q);
+ __x = __t0 + (__t0 < __t1) * __m - __t1;
+ return __x;
+ }
+};
+
+template <unsigned long long __a, unsigned long long __c, unsigned long long __m>
+struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), false>
+{
+ typedef unsigned long long result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ return (__a * __x + __c) % __m;
+ }
+};
+
+template <unsigned long long __a, unsigned long long __c>
+struct __lce_ta<__a, __c, 0, (unsigned long long)(~0), false>
+{
+ typedef unsigned long long result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ return __a * __x + __c;
+ }
+};
+
+// 32
+
+template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp>
+struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), true>
+{
+ typedef unsigned result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ const result_type __a = static_cast<result_type>(_Ap);
+ const result_type __c = static_cast<result_type>(_Cp);
+ const result_type __m = static_cast<result_type>(_Mp);
+ // Schrage's algorithm
+ const result_type __q = __m / __a;
+ const result_type __r = __m % __a;
+ const result_type __t0 = __a * (__x % __q);
+ const result_type __t1 = __r * (__x / __q);
+ __x = __t0 + (__t0 < __t1) * __m - __t1;
+ __x += __c - (__x >= __m - __c) * __m;
+ return __x;
+ }
+};
+
+template <unsigned long long _Ap, unsigned long long _Mp>
+struct __lce_ta<_Ap, 0, _Mp, unsigned(~0), true>
+{
+ typedef unsigned result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ const result_type __a = static_cast<result_type>(_Ap);
+ const result_type __m = static_cast<result_type>(_Mp);
+ // Schrage's algorithm
+ const result_type __q = __m / __a;
+ const result_type __r = __m % __a;
+ const result_type __t0 = __a * (__x % __q);
+ const result_type __t1 = __r * (__x / __q);
+ __x = __t0 + (__t0 < __t1) * __m - __t1;
+ return __x;
+ }
+};
+
+template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp>
+struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), false>
+{
+ typedef unsigned result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ const result_type __a = static_cast<result_type>(_Ap);
+ const result_type __c = static_cast<result_type>(_Cp);
+ const result_type __m = static_cast<result_type>(_Mp);
+ return (__a * __x + __c) % __m;
+ }
+};
+
+template <unsigned long long _Ap, unsigned long long _Cp>
+struct __lce_ta<_Ap, _Cp, 0, unsigned(~0), false>
+{
+ typedef unsigned result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ const result_type __a = static_cast<result_type>(_Ap);
+ const result_type __c = static_cast<result_type>(_Cp);
+ return __a * __x + __c;
+ }
+};
+
+// 16
+
+template <unsigned long long __a, unsigned long long __c, unsigned long long __m, bool __b>
+struct __lce_ta<__a, __c, __m, (unsigned short)(~0), __b>
+{
+ typedef unsigned short result_type;
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type next(result_type __x)
+ {
+ return static_cast<result_type>(__lce_ta<__a, __c, __m, unsigned(~0)>::next(__x));
+ }
+};
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+class _LIBCPP_TEMPLATE_VIS linear_congruential_engine;
+
+template <class _CharT, class _Traits,
+ class _Up, _Up _Ap, _Up _Cp, _Up _Np>
+_LIBCPP_INLINE_VISIBILITY
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&);
+
+template <class _CharT, class _Traits,
+ class _Up, _Up _Ap, _Up _Cp, _Up _Np>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x);
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+class _LIBCPP_TEMPLATE_VIS linear_congruential_engine
+{
+public:
+ // types
+ typedef _UIntType result_type;
+
+private:
+ result_type __x_;
+
+ static _LIBCPP_CONSTEXPR const result_type _Mp = result_type(~0);
+
+ static_assert(__m == 0 || __a < __m, "linear_congruential_engine invalid parameters");
+ static_assert(__m == 0 || __c < __m, "linear_congruential_engine invalid parameters");
+ static_assert(is_unsigned<_UIntType>::value, "_UIntType must be unsigned type");
+public:
+ static _LIBCPP_CONSTEXPR const result_type _Min = __c == 0u ? 1u: 0u;
+ static _LIBCPP_CONSTEXPR const result_type _Max = __m - 1u;
+ static_assert(_Min < _Max, "linear_congruential_engine invalid parameters");
+
+ // engine characteristics
+ static _LIBCPP_CONSTEXPR const result_type multiplier = __a;
+ static _LIBCPP_CONSTEXPR const result_type increment = __c;
+ static _LIBCPP_CONSTEXPR const result_type modulus = __m;
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() {return _Min;}
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() {return _Max;}
+ static _LIBCPP_CONSTEXPR const result_type default_seed = 1u;
+
+ // constructors and seeding functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ linear_congruential_engine() : linear_congruential_engine(default_seed) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit linear_congruential_engine(result_type __s) { seed(__s); }
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit linear_congruential_engine(result_type __s = default_seed) {
+ seed(__s);
+ }
+#endif
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit linear_congruential_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, linear_congruential_engine>::value>::type* = 0)
+ {seed(__q);}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(result_type __s = default_seed)
+ {seed(integral_constant<bool, __m == 0>(),
+ integral_constant<bool, __c == 0>(), __s);}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, linear_congruential_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q)
+ {__seed(__q, integral_constant<unsigned,
+ 1 + (__m == 0 ? (sizeof(result_type) * __CHAR_BIT__ - 1)/32
+ : (__m > 0x100000000ull))>());}
+
+ // generating functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()()
+ {return __x_ = static_cast<result_type>(__lce_ta<__a, __c, __m, _Mp>::next(__x_));}
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const linear_congruential_engine& __x,
+ const linear_congruential_engine& __y)
+ {return __x.__x_ == __y.__x_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const linear_congruential_engine& __x,
+ const linear_congruential_engine& __y)
+ {return !(__x == __y);}
+
+private:
+
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(true_type, true_type, result_type __s) {__x_ = __s == 0 ? 1 : __s;}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(true_type, false_type, result_type __s) {__x_ = __s;}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(false_type, true_type, result_type __s) {__x_ = __s % __m == 0 ?
+ 1 : __s % __m;}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(false_type, false_type, result_type __s) {__x_ = __s % __m;}
+
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
+
+ template <class _CharT, class _Traits,
+ class _Up, _Up _Ap, _Up _Cp, _Up _Np>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&);
+
+ template <class _CharT, class _Traits,
+ class _Up, _Up _Ap, _Up _Cp, _Up _Np>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x);
+};
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+ _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
+ linear_congruential_engine<_UIntType, __a, __c, __m>::multiplier;
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+ _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
+ linear_congruential_engine<_UIntType, __a, __c, __m>::increment;
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+ _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
+ linear_congruential_engine<_UIntType, __a, __c, __m>::modulus;
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+ _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
+ linear_congruential_engine<_UIntType, __a, __c, __m>::default_seed;
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+template<class _Sseq>
+void
+linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q,
+ integral_constant<unsigned, 1>)
+{
+ const unsigned __k = 1;
+ uint32_t __ar[__k+3];
+ __q.generate(__ar, __ar + __k + 3);
+ result_type __s = static_cast<result_type>(__ar[3] % __m);
+ __x_ = __c == 0 && __s == 0 ? result_type(1) : __s;
+}
+
+template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+template<class _Sseq>
+void
+linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q,
+ integral_constant<unsigned, 2>)
+{
+ const unsigned __k = 2;
+ uint32_t __ar[__k+3];
+ __q.generate(__ar, __ar + __k + 3);
+ result_type __s = static_cast<result_type>((__ar[3] +
+ ((uint64_t)__ar[4] << 32)) % __m);
+ __x_ = __c == 0 && __s == 0 ? result_type(1) : __s;
+}
+
+template <class _CharT, class _Traits,
+ class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+inline _LIBCPP_INLINE_VISIBILITY
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const linear_congruential_engine<_UIntType, __a, __c, __m>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _Ostream;
+ __os.flags(_Ostream::dec | _Ostream::left);
+ __os.fill(__os.widen(' '));
+ return __os << __x.__x_;
+}
+
+template <class _CharT, class _Traits,
+ class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ linear_congruential_engine<_UIntType, __a, __c, __m>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ _UIntType __t;
+ __is >> __t;
+ if (!__is.fail())
+ __x.__x_ = __t;
+ return __is;
+}
+
+typedef linear_congruential_engine<uint_fast32_t, 16807, 0, 2147483647>
+ minstd_rand0;
+typedef linear_congruential_engine<uint_fast32_t, 48271, 0, 2147483647>
+ minstd_rand;
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_LINEAR_CONGRUENTIAL_ENGINE_H
diff --git a/libcxx/include/__random/log2.h b/libcxx/include/__random/log2.h
new file mode 100644
index 000000000000..3d9640c1f787
--- /dev/null
+++ b/libcxx/include/__random/log2.h
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_LOG2_H
+#define _LIBCPP___RANDOM_LOG2_H
+
+#include <__config>
+#include <cstddef>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _UIntType, _UIntType _Xp, size_t _Rp>
+struct __log2_imp;
+
+template <unsigned long long _Xp, size_t _Rp>
+struct __log2_imp<unsigned long long, _Xp, _Rp>
+{
+ static const size_t value = _Xp & ((unsigned long long)(1) << _Rp) ? _Rp
+ : __log2_imp<unsigned long long, _Xp, _Rp - 1>::value;
+};
+
+template <unsigned long long _Xp>
+struct __log2_imp<unsigned long long, _Xp, 0>
+{
+ static const size_t value = 0;
+};
+
+template <size_t _Rp>
+struct __log2_imp<unsigned long long, 0, _Rp>
+{
+ static const size_t value = _Rp + 1;
+};
+
+#ifndef _LIBCPP_HAS_NO_INT128
+
+template <__uint128_t _Xp, size_t _Rp>
+struct __log2_imp<__uint128_t, _Xp, _Rp>
+{
+ static const size_t value = (_Xp >> 64)
+ ? (64 + __log2_imp<unsigned long long, (_Xp >> 64), 63>::value)
+ : __log2_imp<unsigned long long, _Xp, 63>::value;
+};
+
+#endif // _LIBCPP_HAS_NO_INT128
+
+template <class _UIntType, _UIntType _Xp>
+struct __log2
+{
+ static const size_t value = __log2_imp<
+#ifndef _LIBCPP_HAS_NO_INT128
+ typename conditional<
+ sizeof(_UIntType) <= sizeof(unsigned long long),
+ unsigned long long,
+ __uint128_t
+ >::type,
+#else
+ unsigned long long,
+#endif // _LIBCPP_HAS_NO_INT128
+ _Xp, sizeof(_UIntType) * __CHAR_BIT__ - 1>::value;
+};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANDOM_LOG2_H
diff --git a/libcxx/include/__random/lognormal_distribution.h b/libcxx/include/__random/lognormal_distribution.h
new file mode 100644
index 000000000000..752861c3de0c
--- /dev/null
+++ b/libcxx/include/__random/lognormal_distribution.h
@@ -0,0 +1,163 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/normal_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS lognormal_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ normal_distribution<result_type> __nd_;
+ public:
+ typedef lognormal_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __m = 0, result_type __s = 1)
+ : __nd_(__m, __s) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type m() const {return __nd_.mean();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type s() const {return __nd_.stddev();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__nd_ == __y.__nd_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ friend class lognormal_distribution;
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const lognormal_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ lognormal_distribution<_RT>& __x);
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ lognormal_distribution() : lognormal_distribution(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit lognormal_distribution(result_type __m, result_type __s = 1)
+ : __p_(param_type(__m, __s)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit lognormal_distribution(result_type __m = 0,
+ result_type __s = 1)
+ : __p_(param_type(__m, __s)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit lognormal_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {__p_.__nd_.reset();}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g, const param_type& __p)
+ {return _VSTD::exp(const_cast<normal_distribution<result_type>&>(__p.__nd_)(__g));}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type m() const {return __p_.m();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type s() const {return __p_.s();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const lognormal_distribution& __x,
+ const lognormal_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const lognormal_distribution& __x,
+ const lognormal_distribution& __y)
+ {return !(__x == __y);}
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const lognormal_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ lognormal_distribution<_RT>& __x);
+};
+
+template <class _CharT, class _Traits, class _RT>
+inline _LIBCPP_INLINE_VISIBILITY
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const lognormal_distribution<_RT>& __x)
+{
+ return __os << __x.__p_.__nd_;
+}
+
+template <class _CharT, class _Traits, class _RT>
+inline _LIBCPP_INLINE_VISIBILITY
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ lognormal_distribution<_RT>& __x)
+{
+ return __is >> __x.__p_.__nd_;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_LOGNORMAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/mersenne_twister_engine.h b/libcxx/include/__random/mersenne_twister_engine.h
new file mode 100644
index 000000000000..121ffae37ec0
--- /dev/null
+++ b/libcxx/include/__random/mersenne_twister_engine.h
@@ -0,0 +1,534 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H
+#define _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H
+
+#include <__algorithm/equal.h>
+#include <__algorithm/min.h>
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <cstddef>
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine;
+
+template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+bool
+operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
+
+template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+_LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine
+{
+public:
+ // types
+ typedef _UIntType result_type;
+
+private:
+ result_type __x_[__n];
+ size_t __i_;
+
+ static_assert( 0 < __m, "mersenne_twister_engine invalid parameters");
+ static_assert(__m <= __n, "mersenne_twister_engine invalid parameters");
+ static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
+ static_assert(__w <= _Dt, "mersenne_twister_engine invalid parameters");
+ static_assert( 2 <= __w, "mersenne_twister_engine invalid parameters");
+ static_assert(__r <= __w, "mersenne_twister_engine invalid parameters");
+ static_assert(__u <= __w, "mersenne_twister_engine invalid parameters");
+ static_assert(__s <= __w, "mersenne_twister_engine invalid parameters");
+ static_assert(__t <= __w, "mersenne_twister_engine invalid parameters");
+ static_assert(__l <= __w, "mersenne_twister_engine invalid parameters");
+public:
+ static _LIBCPP_CONSTEXPR const result_type _Min = 0;
+ static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
+ (result_type(1) << __w) - result_type(1);
+ static_assert(_Min < _Max, "mersenne_twister_engine invalid parameters");
+ static_assert(__a <= _Max, "mersenne_twister_engine invalid parameters");
+ static_assert(__b <= _Max, "mersenne_twister_engine invalid parameters");
+ static_assert(__c <= _Max, "mersenne_twister_engine invalid parameters");
+ static_assert(__d <= _Max, "mersenne_twister_engine invalid parameters");
+ static_assert(__f <= _Max, "mersenne_twister_engine invalid parameters");
+
+ // engine characteristics
+ static _LIBCPP_CONSTEXPR const size_t word_size = __w;
+ static _LIBCPP_CONSTEXPR const size_t state_size = __n;
+ static _LIBCPP_CONSTEXPR const size_t shift_size = __m;
+ static _LIBCPP_CONSTEXPR const size_t mask_bits = __r;
+ static _LIBCPP_CONSTEXPR const result_type xor_mask = __a;
+ static _LIBCPP_CONSTEXPR const size_t tempering_u = __u;
+ static _LIBCPP_CONSTEXPR const result_type tempering_d = __d;
+ static _LIBCPP_CONSTEXPR const size_t tempering_s = __s;
+ static _LIBCPP_CONSTEXPR const result_type tempering_b = __b;
+ static _LIBCPP_CONSTEXPR const size_t tempering_t = __t;
+ static _LIBCPP_CONSTEXPR const result_type tempering_c = __c;
+ static _LIBCPP_CONSTEXPR const size_t tempering_l = __l;
+ static _LIBCPP_CONSTEXPR const result_type initialization_multiplier = __f;
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
+ static _LIBCPP_CONSTEXPR const result_type default_seed = 5489u;
+
+ // constructors and seeding functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ mersenne_twister_engine() : mersenne_twister_engine(default_seed) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit mersenne_twister_engine(result_type __sd) { seed(__sd); }
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit mersenne_twister_engine(result_type __sd = default_seed) {
+ seed(__sd);
+ }
+#endif
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit mersenne_twister_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, mersenne_twister_engine>::value>::type* = 0)
+ {seed(__q);}
+ void seed(result_type __sd = default_seed);
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, mersenne_twister_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q)
+ {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
+
+ // generating functions
+ result_type operator()();
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+ friend
+ bool
+ operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
+
+ template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+ friend
+ bool
+ operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
+
+ template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
+
+ template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
+private:
+
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ __count < __w,
+ result_type
+ >::type
+ __lshift(result_type __x) {return (__x << __count) & _Max;}
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ (__count >= __w),
+ result_type
+ >::type
+ __lshift(result_type) {return result_type(0);}
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ __count < _Dt,
+ result_type
+ >::type
+ __rshift(result_type __x) {return __x >> __count;}
+
+ template <size_t __count>
+ _LIBCPP_INLINE_VISIBILITY
+ static
+ typename enable_if
+ <
+ (__count >= _Dt),
+ result_type
+ >::type
+ __rshift(result_type) {return result_type(0);}
+};
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::word_size;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::state_size;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::shift_size;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::mask_bits;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::xor_mask;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_u;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_d;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_s;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_b;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_t;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_c;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const size_t
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_l;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::initialization_multiplier;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+ _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
+ mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::default_seed;
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+void
+mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
+ __t, __c, __l, __f>::seed(result_type __sd)
+ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
+{ // __w >= 2
+ __x_[0] = __sd & _Max;
+ for (size_t __i = 1; __i < __n; ++__i)
+ __x_[__i] = (__f * (__x_[__i-1] ^ __rshift<__w - 2>(__x_[__i-1])) + __i) & _Max;
+ __i_ = 0;
+}
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+template<class _Sseq>
+void
+mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
+ __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 1>)
+{
+ const unsigned __k = 1;
+ uint32_t __ar[__n * __k];
+ __q.generate(__ar, __ar + __n * __k);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
+ const result_type __mask = __r == _Dt ? result_type(~0) :
+ (result_type(1) << __r) - result_type(1);
+ __i_ = 0;
+ if ((__x_[0] & ~__mask) == 0)
+ {
+ for (size_t __i = 1; __i < __n; ++__i)
+ if (__x_[__i] != 0)
+ return;
+ __x_[0] = result_type(1) << (__w - 1);
+ }
+}
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+template<class _Sseq>
+void
+mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
+ __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 2>)
+{
+ const unsigned __k = 2;
+ uint32_t __ar[__n * __k];
+ __q.generate(__ar, __ar + __n * __k);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __x_[__i] = static_cast<result_type>(
+ (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
+ const result_type __mask = __r == _Dt ? result_type(~0) :
+ (result_type(1) << __r) - result_type(1);
+ __i_ = 0;
+ if ((__x_[0] & ~__mask) == 0)
+ {
+ for (size_t __i = 1; __i < __n; ++__i)
+ if (__x_[__i] != 0)
+ return;
+ __x_[0] = result_type(1) << (__w - 1);
+ }
+}
+
+template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
+ _UIntType __a, size_t __u, _UIntType __d, size_t __s,
+ _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
+_UIntType
+mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
+ __t, __c, __l, __f>::operator()()
+{
+ const size_t __j = (__i_ + 1) % __n;
+ const result_type __mask = __r == _Dt ? result_type(~0) :
+ (result_type(1) << __r) - result_type(1);
+ const result_type _Yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask);
+ const size_t __k = (__i_ + __m) % __n;
+ __x_[__i_] = __x_[__k] ^ __rshift<1>(_Yp) ^ (__a * (_Yp & 1));
+ result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d);
+ __i_ = __j;
+ __z ^= __lshift<__s>(__z) & __b;
+ __z ^= __lshift<__t>(__z) & __c;
+ return __z ^ __rshift<__l>(__z);
+}
+
+template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+bool
+operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y)
+{
+ if (__x.__i_ == __y.__i_)
+ return _VSTD::equal(__x.__x_, __x.__x_ + _Np, __y.__x_);
+ if (__x.__i_ == 0 || __y.__i_ == 0)
+ {
+ size_t __j = _VSTD::min(_Np - __x.__i_, _Np - __y.__i_);
+ if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j,
+ __y.__x_ + __y.__i_))
+ return false;
+ if (__x.__i_ == 0)
+ return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Np, __y.__x_);
+ return _VSTD::equal(__x.__x_, __x.__x_ + (_Np - __j), __y.__x_ + __j);
+ }
+ if (__x.__i_ < __y.__i_)
+ {
+ size_t __j = _Np - __y.__i_;
+ if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j),
+ __y.__x_ + __y.__i_))
+ return false;
+ if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Np,
+ __y.__x_))
+ return false;
+ return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_,
+ __y.__x_ + (_Np - (__x.__i_ + __j)));
+ }
+ size_t __j = _Np - __x.__i_;
+ if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j),
+ __x.__x_ + __x.__i_))
+ return false;
+ if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Np,
+ __x.__x_))
+ return false;
+ return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_,
+ __x.__x_ + (_Np - (__y.__i_ + __j)));
+}
+
+template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __y)
+{
+ return !(__x == __y);
+}
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _Ostream;
+ __os.flags(_Ostream::dec | _Ostream::left);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.__x_[__x.__i_];
+ for (size_t __j = __x.__i_ + 1; __j < _Np; ++__j)
+ __os << __sp << __x.__x_[__j];
+ for (size_t __j = 0; __j < __x.__i_; ++__j)
+ __os << __sp << __x.__x_[__j];
+ return __os;
+}
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
+ _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
+ _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
+ _Bp, _Tp, _Cp, _Lp, _Fp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ _UInt __t[_Np];
+ for (size_t __i = 0; __i < _Np; ++__i)
+ __is >> __t[__i];
+ if (!__is.fail())
+ {
+ for (size_t __i = 0; __i < _Np; ++__i)
+ __x.__x_[__i] = __t[__i];
+ __x.__i_ = 0;
+ }
+ return __is;
+}
+
+typedef mersenne_twister_engine<uint_fast32_t, 32, 624, 397, 31,
+ 0x9908b0df, 11, 0xffffffff,
+ 7, 0x9d2c5680,
+ 15, 0xefc60000,
+ 18, 1812433253> mt19937;
+typedef mersenne_twister_engine<uint_fast64_t, 64, 312, 156, 31,
+ 0xb5026f5aa96619e9ULL, 29, 0x5555555555555555ULL,
+ 17, 0x71d67fffeda60000ULL,
+ 37, 0xfff7eee000000000ULL,
+ 43, 6364136223846793005ULL> mt19937_64;
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_MERSENNE_TWISTER_ENGINE_H
diff --git a/libcxx/include/__random/negative_binomial_distribution.h b/libcxx/include/__random/negative_binomial_distribution.h
new file mode 100644
index 000000000000..7329bac2ff85
--- /dev/null
+++ b/libcxx/include/__random/negative_binomial_distribution.h
@@ -0,0 +1,176 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/bernoulli_distribution.h>
+#include <__random/gamma_distribution.h>
+#include <__random/poisson_distribution.h>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _IntType = int>
+class _LIBCPP_TEMPLATE_VIS negative_binomial_distribution
+{
+public:
+ // types
+ typedef _IntType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __k_;
+ double __p_;
+ public:
+ typedef negative_binomial_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __k = 1, double __p = 0.5)
+ : __k_(__k), __p_(__p) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type k() const {return __k_;}
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__k_ == __y.__k_ && __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ negative_binomial_distribution() : negative_binomial_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit negative_binomial_distribution(result_type __k, double __p = 0.5)
+ : __p_(__k, __p) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit negative_binomial_distribution(result_type __k = 1,
+ double __p = 0.5)
+ : __p_(__k, __p) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit negative_binomial_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type k() const {return __p_.k();}
+ _LIBCPP_INLINE_VISIBILITY
+ double p() const {return __p_.p();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::max();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const negative_binomial_distribution& __x,
+ const negative_binomial_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const negative_binomial_distribution& __x,
+ const negative_binomial_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _IntType>
+template<class _URNG>
+_IntType
+negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr)
+{
+ result_type __k = __pr.k();
+ double __p = __pr.p();
+ if (__k <= 21 * __p)
+ {
+ bernoulli_distribution __gen(__p);
+ result_type __f = 0;
+ result_type __s = 0;
+ while (__s < __k)
+ {
+ if (__gen(__urng))
+ ++__s;
+ else
+ ++__f;
+ }
+ return __f;
+ }
+ return poisson_distribution<result_type>(gamma_distribution<double>
+ (__k, (1-__p)/__p)(__urng))(__urng);
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const negative_binomial_distribution<_IntType>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ return __os << __x.k() << __sp << __x.p();
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ negative_binomial_distribution<_IntType>& __x)
+{
+ typedef negative_binomial_distribution<_IntType> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __k;
+ double __p;
+ __is >> __k >> __p;
+ if (!__is.fail())
+ __x.param(param_type(__k, __p));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_NEGATIVE_BINOMIAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/normal_distribution.h b/libcxx/include/__random/normal_distribution.h
new file mode 100644
index 000000000000..b460ffb7ea9d
--- /dev/null
+++ b/libcxx/include/__random/normal_distribution.h
@@ -0,0 +1,208 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS normal_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __mean_;
+ result_type __stddev_;
+ public:
+ typedef normal_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __mean = 0, result_type __stddev = 1)
+ : __mean_(__mean), __stddev_(__stddev) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type mean() const {return __mean_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type stddev() const {return __stddev_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+ result_type _V_;
+ bool _V_hot_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ normal_distribution() : normal_distribution(0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit normal_distribution(result_type __mean, result_type __stddev = 1)
+ : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit normal_distribution(result_type __mean = 0,
+ result_type __stddev = 1)
+ : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit normal_distribution(const param_type& __p)
+ : __p_(__p), _V_hot_(false) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {_V_hot_ = false;}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type mean() const {return __p_.mean();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type stddev() const {return __p_.stddev();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return -numeric_limits<result_type>::infinity();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const normal_distribution& __x,
+ const normal_distribution& __y)
+ {return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ &&
+ (!__x._V_hot_ || __x._V_ == __y._V_);}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const normal_distribution& __x,
+ const normal_distribution& __y)
+ {return !(__x == __y);}
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const normal_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ normal_distribution<_RT>& __x);
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+normal_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ result_type _Up;
+ if (_V_hot_)
+ {
+ _V_hot_ = false;
+ _Up = _V_;
+ }
+ else
+ {
+ uniform_real_distribution<result_type> _Uni(-1, 1);
+ result_type __u;
+ result_type __v;
+ result_type __s;
+ do
+ {
+ __u = _Uni(__g);
+ __v = _Uni(__g);
+ __s = __u * __u + __v * __v;
+ } while (__s > 1 || __s == 0);
+ result_type _Fp = _VSTD::sqrt(-2 * _VSTD::log(__s) / __s);
+ _V_ = __v * _Fp;
+ _V_hot_ = true;
+ _Up = __u * _Fp;
+ }
+ return _Up * __p.stddev() + __p.mean();
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const normal_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_;
+ if (__x._V_hot_)
+ __os << __sp << __x._V_;
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ normal_distribution<_RT>& __x)
+{
+ typedef normal_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __mean;
+ result_type __stddev;
+ result_type _Vp = 0;
+ bool _V_hot = false;
+ __is >> __mean >> __stddev >> _V_hot;
+ if (_V_hot)
+ __is >> _Vp;
+ if (!__is.fail())
+ {
+ __x.param(param_type(__mean, __stddev));
+ __x._V_hot_ = _V_hot;
+ __x._V_ = _Vp;
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_NORMAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/piecewise_constant_distribution.h b/libcxx/include/__random/piecewise_constant_distribution.h
new file mode 100644
index 000000000000..ece20d1a1d6e
--- /dev/null
+++ b/libcxx/include/__random/piecewise_constant_distribution.h
@@ -0,0 +1,356 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H
+
+#include <__algorithm/upper_bound.h>
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <iosfwd>
+#include <numeric>
+#include <vector>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS piecewise_constant_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ vector<result_type> __b_;
+ vector<result_type> __densities_;
+ vector<result_type> __areas_;
+ public:
+ typedef piecewise_constant_distribution distribution_type;
+
+ param_type();
+ template<class _InputIteratorB, class _InputIteratorW>
+ param_type(_InputIteratorB __fB, _InputIteratorB __lB,
+ _InputIteratorW __fW);
+#ifndef _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ param_type(initializer_list<result_type> __bl, _UnaryOperation __fw);
+#endif // _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ param_type(size_t __nw, result_type __xmin, result_type __xmax,
+ _UnaryOperation __fw);
+ param_type(param_type const&) = default;
+ param_type & operator=(const param_type& __rhs);
+
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> intervals() const {return __b_;}
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> densities() const {return __densities_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+
+ private:
+ void __init();
+
+ friend class piecewise_constant_distribution;
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_constant_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_constant_distribution<_RT>& __x);
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_constant_distribution() {}
+ template<class _InputIteratorB, class _InputIteratorW>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_constant_distribution(_InputIteratorB __fB,
+ _InputIteratorB __lB,
+ _InputIteratorW __fW)
+ : __p_(__fB, __lB, __fW) {}
+
+#ifndef _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_constant_distribution(initializer_list<result_type> __bl,
+ _UnaryOperation __fw)
+ : __p_(__bl, __fw) {}
+#endif // _LIBCPP_CXX03_LANG
+
+ template<class _UnaryOperation>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_constant_distribution(size_t __nw, result_type __xmin,
+ result_type __xmax, _UnaryOperation __fw)
+ : __p_(__nw, __xmin, __xmax, __fw) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit piecewise_constant_distribution(const param_type& __p)
+ : __p_(__p) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> intervals() const {return __p_.intervals();}
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> densities() const {return __p_.densities();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return __p_.__b_.front();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return __p_.__b_.back();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const piecewise_constant_distribution& __x,
+ const piecewise_constant_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const piecewise_constant_distribution& __x,
+ const piecewise_constant_distribution& __y)
+ {return !(__x == __y);}
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_constant_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_constant_distribution<_RT>& __x);
+};
+
+template<class _RealType>
+typename piecewise_constant_distribution<_RealType>::param_type &
+piecewise_constant_distribution<_RealType>::param_type::operator=
+ (const param_type& __rhs)
+{
+// These can throw
+ __b_.reserve (__rhs.__b_.size ());
+ __densities_.reserve(__rhs.__densities_.size());
+ __areas_.reserve (__rhs.__areas_.size());
+
+// These can not throw
+ __b_ = __rhs.__b_;
+ __densities_ = __rhs.__densities_;
+ __areas_ = __rhs.__areas_;
+ return *this;
+}
+
+template<class _RealType>
+void
+piecewise_constant_distribution<_RealType>::param_type::__init()
+{
+ // __densities_ contains non-normalized areas
+ result_type __total_area = _VSTD::accumulate(__densities_.begin(),
+ __densities_.end(),
+ result_type());
+ for (size_t __i = 0; __i < __densities_.size(); ++__i)
+ __densities_[__i] /= __total_area;
+ // __densities_ contains normalized areas
+ __areas_.assign(__densities_.size(), result_type());
+ _VSTD::partial_sum(__densities_.begin(), __densities_.end() - 1,
+ __areas_.begin() + 1);
+ // __areas_ contains partial sums of normalized areas: [0, __densities_ - 1]
+ __densities_.back() = 1 - __areas_.back(); // correct round off error
+ for (size_t __i = 0; __i < __densities_.size(); ++__i)
+ __densities_[__i] /= (__b_[__i+1] - __b_[__i]);
+ // __densities_ now contains __densities_
+}
+
+template<class _RealType>
+piecewise_constant_distribution<_RealType>::param_type::param_type()
+ : __b_(2),
+ __densities_(1, 1.0),
+ __areas_(1, 0.0)
+{
+ __b_[1] = 1;
+}
+
+template<class _RealType>
+template<class _InputIteratorB, class _InputIteratorW>
+piecewise_constant_distribution<_RealType>::param_type::param_type(
+ _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW)
+ : __b_(__fB, __lB)
+{
+ if (__b_.size() < 2)
+ {
+ __b_.resize(2);
+ __b_[0] = 0;
+ __b_[1] = 1;
+ __densities_.assign(1, 1.0);
+ __areas_.assign(1, 0.0);
+ }
+ else
+ {
+ __densities_.reserve(__b_.size() - 1);
+ for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__fW)
+ __densities_.push_back(*__fW);
+ __init();
+ }
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+
+template<class _RealType>
+template<class _UnaryOperation>
+piecewise_constant_distribution<_RealType>::param_type::param_type(
+ initializer_list<result_type> __bl, _UnaryOperation __fw)
+ : __b_(__bl.begin(), __bl.end())
+{
+ if (__b_.size() < 2)
+ {
+ __b_.resize(2);
+ __b_[0] = 0;
+ __b_[1] = 1;
+ __densities_.assign(1, 1.0);
+ __areas_.assign(1, 0.0);
+ }
+ else
+ {
+ __densities_.reserve(__b_.size() - 1);
+ for (size_t __i = 0; __i < __b_.size() - 1; ++__i)
+ __densities_.push_back(__fw((__b_[__i+1] + __b_[__i])*.5));
+ __init();
+ }
+}
+
+#endif // _LIBCPP_CXX03_LANG
+
+template<class _RealType>
+template<class _UnaryOperation>
+piecewise_constant_distribution<_RealType>::param_type::param_type(
+ size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw)
+ : __b_(__nw == 0 ? 2 : __nw + 1)
+{
+ size_t __n = __b_.size() - 1;
+ result_type __d = (__xmax - __xmin) / __n;
+ __densities_.reserve(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ {
+ __b_[__i] = __xmin + __i * __d;
+ __densities_.push_back(__fw(__b_[__i] + __d*.5));
+ }
+ __b_[__n] = __xmax;
+ __init();
+}
+
+template<class _RealType>
+template<class _URNG>
+_RealType
+piecewise_constant_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ typedef uniform_real_distribution<result_type> _Gen;
+ result_type __u = _Gen()(__g);
+ ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(),
+ __u) - __p.__areas_.begin() - 1;
+ return (__u - __p.__areas_[__k]) / __p.__densities_[__k] + __p.__b_[__k];
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_constant_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ size_t __n = __x.__p_.__b_.size();
+ __os << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__b_[__i];
+ __n = __x.__p_.__densities_.size();
+ __os << __sp << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__densities_[__i];
+ __n = __x.__p_.__areas_.size();
+ __os << __sp << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__areas_[__i];
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_constant_distribution<_RT>& __x)
+{
+ typedef piecewise_constant_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ size_t __n;
+ __is >> __n;
+ vector<result_type> __b(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __b[__i];
+ __is >> __n;
+ vector<result_type> __densities(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __densities[__i];
+ __is >> __n;
+ vector<result_type> __areas(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __areas[__i];
+ if (!__is.fail())
+ {
+ swap(__x.__p_.__b_, __b);
+ swap(__x.__p_.__densities_, __densities);
+ swap(__x.__p_.__areas_, __areas);
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_PIECEWISE_CONSTANT_DISTRIBUTION_H
diff --git a/libcxx/include/__random/piecewise_linear_distribution.h b/libcxx/include/__random/piecewise_linear_distribution.h
new file mode 100644
index 000000000000..b2ba164d0707
--- /dev/null
+++ b/libcxx/include/__random/piecewise_linear_distribution.h
@@ -0,0 +1,372 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H
+
+#include <__algorithm/upper_bound.h>
+#include <__config>
+#include <__random/uniform_real_distribution.h>
+#include <iosfwd>
+#include <numeric>
+#include <vector>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS piecewise_linear_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ vector<result_type> __b_;
+ vector<result_type> __densities_;
+ vector<result_type> __areas_;
+ public:
+ typedef piecewise_linear_distribution distribution_type;
+
+ param_type();
+ template<class _InputIteratorB, class _InputIteratorW>
+ param_type(_InputIteratorB __fB, _InputIteratorB __lB,
+ _InputIteratorW __fW);
+#ifndef _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ param_type(initializer_list<result_type> __bl, _UnaryOperation __fw);
+#endif // _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ param_type(size_t __nw, result_type __xmin, result_type __xmax,
+ _UnaryOperation __fw);
+ param_type(param_type const&) = default;
+ param_type & operator=(const param_type& __rhs);
+
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> intervals() const {return __b_;}
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> densities() const {return __densities_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+
+ private:
+ void __init();
+
+ friend class piecewise_linear_distribution;
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_linear_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_linear_distribution<_RT>& __x);
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_linear_distribution() {}
+ template<class _InputIteratorB, class _InputIteratorW>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_linear_distribution(_InputIteratorB __fB,
+ _InputIteratorB __lB,
+ _InputIteratorW __fW)
+ : __p_(__fB, __lB, __fW) {}
+
+#ifndef _LIBCPP_CXX03_LANG
+ template<class _UnaryOperation>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_linear_distribution(initializer_list<result_type> __bl,
+ _UnaryOperation __fw)
+ : __p_(__bl, __fw) {}
+#endif // _LIBCPP_CXX03_LANG
+
+ template<class _UnaryOperation>
+ _LIBCPP_INLINE_VISIBILITY
+ piecewise_linear_distribution(size_t __nw, result_type __xmin,
+ result_type __xmax, _UnaryOperation __fw)
+ : __p_(__nw, __xmin, __xmax, __fw) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit piecewise_linear_distribution(const param_type& __p)
+ : __p_(__p) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> intervals() const {return __p_.intervals();}
+ _LIBCPP_INLINE_VISIBILITY
+ vector<result_type> densities() const {return __p_.densities();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return __p_.__b_.front();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return __p_.__b_.back();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const piecewise_linear_distribution& __x,
+ const piecewise_linear_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const piecewise_linear_distribution& __x,
+ const piecewise_linear_distribution& __y)
+ {return !(__x == __y);}
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_linear_distribution<_RT>& __x);
+
+ template <class _CharT, class _Traits, class _RT>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_linear_distribution<_RT>& __x);
+};
+
+template<class _RealType>
+typename piecewise_linear_distribution<_RealType>::param_type &
+piecewise_linear_distribution<_RealType>::param_type::operator=
+ (const param_type& __rhs)
+{
+// These can throw
+ __b_.reserve (__rhs.__b_.size ());
+ __densities_.reserve(__rhs.__densities_.size());
+ __areas_.reserve (__rhs.__areas_.size());
+
+// These can not throw
+ __b_ = __rhs.__b_;
+ __densities_ = __rhs.__densities_;
+ __areas_ = __rhs.__areas_;
+ return *this;
+}
+
+
+template<class _RealType>
+void
+piecewise_linear_distribution<_RealType>::param_type::__init()
+{
+ __areas_.assign(__densities_.size() - 1, result_type());
+ result_type _Sp = 0;
+ for (size_t __i = 0; __i < __areas_.size(); ++__i)
+ {
+ __areas_[__i] = (__densities_[__i+1] + __densities_[__i]) *
+ (__b_[__i+1] - __b_[__i]) * .5;
+ _Sp += __areas_[__i];
+ }
+ for (size_t __i = __areas_.size(); __i > 1;)
+ {
+ --__i;
+ __areas_[__i] = __areas_[__i-1] / _Sp;
+ }
+ __areas_[0] = 0;
+ for (size_t __i = 1; __i < __areas_.size(); ++__i)
+ __areas_[__i] += __areas_[__i-1];
+ for (size_t __i = 0; __i < __densities_.size(); ++__i)
+ __densities_[__i] /= _Sp;
+}
+
+template<class _RealType>
+piecewise_linear_distribution<_RealType>::param_type::param_type()
+ : __b_(2),
+ __densities_(2, 1.0),
+ __areas_(1, 0.0)
+{
+ __b_[1] = 1;
+}
+
+template<class _RealType>
+template<class _InputIteratorB, class _InputIteratorW>
+piecewise_linear_distribution<_RealType>::param_type::param_type(
+ _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW)
+ : __b_(__fB, __lB)
+{
+ if (__b_.size() < 2)
+ {
+ __b_.resize(2);
+ __b_[0] = 0;
+ __b_[1] = 1;
+ __densities_.assign(2, 1.0);
+ __areas_.assign(1, 0.0);
+ }
+ else
+ {
+ __densities_.reserve(__b_.size());
+ for (size_t __i = 0; __i < __b_.size(); ++__i, ++__fW)
+ __densities_.push_back(*__fW);
+ __init();
+ }
+}
+
+#ifndef _LIBCPP_CXX03_LANG
+
+template<class _RealType>
+template<class _UnaryOperation>
+piecewise_linear_distribution<_RealType>::param_type::param_type(
+ initializer_list<result_type> __bl, _UnaryOperation __fw)
+ : __b_(__bl.begin(), __bl.end())
+{
+ if (__b_.size() < 2)
+ {
+ __b_.resize(2);
+ __b_[0] = 0;
+ __b_[1] = 1;
+ __densities_.assign(2, 1.0);
+ __areas_.assign(1, 0.0);
+ }
+ else
+ {
+ __densities_.reserve(__b_.size());
+ for (size_t __i = 0; __i < __b_.size(); ++__i)
+ __densities_.push_back(__fw(__b_[__i]));
+ __init();
+ }
+}
+
+#endif // _LIBCPP_CXX03_LANG
+
+template<class _RealType>
+template<class _UnaryOperation>
+piecewise_linear_distribution<_RealType>::param_type::param_type(
+ size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw)
+ : __b_(__nw == 0 ? 2 : __nw + 1)
+{
+ size_t __n = __b_.size() - 1;
+ result_type __d = (__xmax - __xmin) / __n;
+ __densities_.reserve(__b_.size());
+ for (size_t __i = 0; __i < __n; ++__i)
+ {
+ __b_[__i] = __xmin + __i * __d;
+ __densities_.push_back(__fw(__b_[__i]));
+ }
+ __b_[__n] = __xmax;
+ __densities_.push_back(__fw(__b_[__n]));
+ __init();
+}
+
+template<class _RealType>
+template<class _URNG>
+_RealType
+piecewise_linear_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ typedef uniform_real_distribution<result_type> _Gen;
+ result_type __u = _Gen()(__g);
+ ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(),
+ __u) - __p.__areas_.begin() - 1;
+ __u -= __p.__areas_[__k];
+ const result_type __dk = __p.__densities_[__k];
+ const result_type __dk1 = __p.__densities_[__k+1];
+ const result_type __deltad = __dk1 - __dk;
+ const result_type __bk = __p.__b_[__k];
+ if (__deltad == 0)
+ return __u / __dk + __bk;
+ const result_type __bk1 = __p.__b_[__k+1];
+ const result_type __deltab = __bk1 - __bk;
+ return (__bk * __dk1 - __bk1 * __dk +
+ _VSTD::sqrt(__deltab * (__deltab * __dk * __dk + 2 * __deltad * __u))) /
+ __deltad;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const piecewise_linear_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ size_t __n = __x.__p_.__b_.size();
+ __os << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__b_[__i];
+ __n = __x.__p_.__densities_.size();
+ __os << __sp << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__densities_[__i];
+ __n = __x.__p_.__areas_.size();
+ __os << __sp << __n;
+ for (size_t __i = 0; __i < __n; ++__i)
+ __os << __sp << __x.__p_.__areas_[__i];
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ piecewise_linear_distribution<_RT>& __x)
+{
+ typedef piecewise_linear_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ size_t __n;
+ __is >> __n;
+ vector<result_type> __b(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __b[__i];
+ __is >> __n;
+ vector<result_type> __densities(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __densities[__i];
+ __is >> __n;
+ vector<result_type> __areas(__n);
+ for (size_t __i = 0; __i < __n; ++__i)
+ __is >> __areas[__i];
+ if (!__is.fail())
+ {
+ swap(__x.__p_.__b_, __b);
+ swap(__x.__p_.__densities_, __densities);
+ swap(__x.__p_.__areas_, __areas);
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_PIECEWISE_LINEAR_DISTRIBUTION_H
diff --git a/libcxx/include/__random/poisson_distribution.h b/libcxx/include/__random/poisson_distribution.h
new file mode 100644
index 000000000000..fb213b0103ad
--- /dev/null
+++ b/libcxx/include/__random/poisson_distribution.h
@@ -0,0 +1,276 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/exponential_distribution.h>
+#include <__random/normal_distribution.h>
+#include <__random/uniform_real_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _IntType = int>
+class _LIBCPP_TEMPLATE_VIS poisson_distribution
+{
+public:
+ // types
+ typedef _IntType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ double __mean_;
+ double __s_;
+ double __d_;
+ double __l_;
+ double __omega_;
+ double __c0_;
+ double __c1_;
+ double __c2_;
+ double __c3_;
+ double __c_;
+
+ public:
+ typedef poisson_distribution distribution_type;
+
+ explicit param_type(double __mean = 1.0);
+
+ _LIBCPP_INLINE_VISIBILITY
+ double mean() const {return __mean_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__mean_ == __y.__mean_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+
+ friend class poisson_distribution;
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ poisson_distribution() : poisson_distribution(1.0) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit poisson_distribution(double __mean)
+ : __p_(__mean) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit poisson_distribution(double __mean = 1.0)
+ : __p_(__mean) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit poisson_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ double mean() const {return __p_.mean();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::max();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const poisson_distribution& __x,
+ const poisson_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const poisson_distribution& __x,
+ const poisson_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template<class _IntType>
+poisson_distribution<_IntType>::param_type::param_type(double __mean)
+ // According to the standard `inf` is a valid input, but it causes the
+ // distribution to hang, so we replace it with the maximum representable
+ // mean.
+ : __mean_(isinf(__mean) ? numeric_limits<double>::max() : __mean)
+{
+ if (__mean_ < 10)
+ {
+ __s_ = 0;
+ __d_ = 0;
+ __l_ = _VSTD::exp(-__mean_);
+ __omega_ = 0;
+ __c3_ = 0;
+ __c2_ = 0;
+ __c1_ = 0;
+ __c0_ = 0;
+ __c_ = 0;
+ }
+ else
+ {
+ __s_ = _VSTD::sqrt(__mean_);
+ __d_ = 6 * __mean_ * __mean_;
+ __l_ = _VSTD::trunc(__mean_ - 1.1484);
+ __omega_ = .3989423 / __s_;
+ double __b1_ = .4166667E-1 / __mean_;
+ double __b2_ = .3 * __b1_ * __b1_;
+ __c3_ = .1428571 * __b1_ * __b2_;
+ __c2_ = __b2_ - 15. * __c3_;
+ __c1_ = __b1_ - 6. * __b2_ + 45. * __c3_;
+ __c0_ = 1. - __b1_ + 3. * __b2_ - 15. * __c3_;
+ __c_ = .1069 / __mean_;
+ }
+}
+
+template <class _IntType>
+template<class _URNG>
+_IntType
+poisson_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr)
+{
+ double __tx;
+ uniform_real_distribution<double> __urd;
+ if (__pr.__mean_ < 10)
+ {
+ __tx = 0;
+ for (double __p = __urd(__urng); __p > __pr.__l_; ++__tx)
+ __p *= __urd(__urng);
+ }
+ else
+ {
+ double __difmuk;
+ double __g = __pr.__mean_ + __pr.__s_ * normal_distribution<double>()(__urng);
+ double __u;
+ if (__g > 0)
+ {
+ __tx = _VSTD::trunc(__g);
+ if (__tx >= __pr.__l_)
+ return _VSTD::__clamp_to_integral<result_type>(__tx);
+ __difmuk = __pr.__mean_ - __tx;
+ __u = __urd(__urng);
+ if (__pr.__d_ * __u >= __difmuk * __difmuk * __difmuk)
+ return _VSTD::__clamp_to_integral<result_type>(__tx);
+ }
+ exponential_distribution<double> __edist;
+ for (bool __using_exp_dist = false; true; __using_exp_dist = true)
+ {
+ double __e;
+ if (__using_exp_dist || __g <= 0)
+ {
+ double __t;
+ do
+ {
+ __e = __edist(__urng);
+ __u = __urd(__urng);
+ __u += __u - 1;
+ __t = 1.8 + (__u < 0 ? -__e : __e);
+ } while (__t <= -.6744);
+ __tx = _VSTD::trunc(__pr.__mean_ + __pr.__s_ * __t);
+ __difmuk = __pr.__mean_ - __tx;
+ __using_exp_dist = true;
+ }
+ double __px;
+ double __py;
+ if (__tx < 10 && __tx >= 0)
+ {
+ const double __fac[] = {1, 1, 2, 6, 24, 120, 720, 5040,
+ 40320, 362880};
+ __px = -__pr.__mean_;
+ __py = _VSTD::pow(__pr.__mean_, (double)__tx) / __fac[static_cast<int>(__tx)];
+ }
+ else
+ {
+ double __del = .8333333E-1 / __tx;
+ __del -= 4.8 * __del * __del * __del;
+ double __v = __difmuk / __tx;
+ if (_VSTD::abs(__v) > 0.25)
+ __px = __tx * _VSTD::log(1 + __v) - __difmuk - __del;
+ else
+ __px = __tx * __v * __v * (((((((.1250060 * __v + -.1384794) *
+ __v + .1421878) * __v + -.1661269) * __v + .2000118) *
+ __v + -.2500068) * __v + .3333333) * __v + -.5) - __del;
+ __py = .3989423 / _VSTD::sqrt(__tx);
+ }
+ double __r = (0.5 - __difmuk) / __pr.__s_;
+ double __r2 = __r * __r;
+ double __fx = -0.5 * __r2;
+ double __fy = __pr.__omega_ * (((__pr.__c3_ * __r2 + __pr.__c2_) *
+ __r2 + __pr.__c1_) * __r2 + __pr.__c0_);
+ if (__using_exp_dist)
+ {
+ if (__pr.__c_ * _VSTD::abs(__u) <= __py * _VSTD::exp(__px + __e) -
+ __fy * _VSTD::exp(__fx + __e))
+ break;
+ }
+ else
+ {
+ if (__fy - __u * __fy <= __py * _VSTD::exp(__px - __fx))
+ break;
+ }
+ }
+ }
+ return _VSTD::__clamp_to_integral<result_type>(__tx);
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const poisson_distribution<_IntType>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ return __os << __x.mean();
+}
+
+template <class _CharT, class _Traits, class _IntType>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ poisson_distribution<_IntType>& __x)
+{
+ typedef poisson_distribution<_IntType> _Eng;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ double __mean;
+ __is >> __mean;
+ if (!__is.fail())
+ __x.param(param_type(__mean));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_POISSON_DISTRIBUTION_H
diff --git a/libcxx/include/__random/random_device.h b/libcxx/include/__random/random_device.h
new file mode 100644
index 000000000000..f62f7a3d269b
--- /dev/null
+++ b/libcxx/include/__random/random_device.h
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_RANDOM_DEVICE_H
+#define _LIBCPP___RANDOM_RANDOM_DEVICE_H
+
+#include <__config>
+#include <string>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE)
+
+class _LIBCPP_TYPE_VIS random_device
+{
+#ifdef _LIBCPP_USING_DEV_RANDOM
+ int __f_;
+#endif // defined(_LIBCPP_USING_DEV_RANDOM)
+public:
+ // types
+ typedef unsigned result_type;
+
+ // generator characteristics
+ static _LIBCPP_CONSTEXPR const result_type _Min = 0;
+ static _LIBCPP_CONSTEXPR const result_type _Max = 0xFFFFFFFFu;
+
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Min;}
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Max;}
+
+ // constructors
+#ifndef _LIBCPP_CXX03_LANG
+ random_device() : random_device("/dev/urandom") {}
+ explicit random_device(const string& __token);
+#else
+ explicit random_device(const string& __token = "/dev/urandom");
+#endif
+ ~random_device();
+
+ // generating functions
+ result_type operator()();
+
+ // property functions
+ double entropy() const _NOEXCEPT;
+
+private:
+ // no copy functions
+ random_device(const random_device&); // = delete;
+ random_device& operator=(const random_device&); // = delete;
+};
+
+#endif // !_LIBCPP_HAS_NO_RANDOM_DEVICE
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_RANDOM_DEVICE_H
diff --git a/libcxx/include/__random/ranlux.h b/libcxx/include/__random/ranlux.h
new file mode 100644
index 000000000000..0b415928df4d
--- /dev/null
+++ b/libcxx/include/__random/ranlux.h
@@ -0,0 +1,31 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_RANLUX_H
+#define _LIBCPP___RANDOM_RANLUX_H
+
+#include <__config>
+#include <__random/discard_block_engine.h>
+#include <__random/subtract_with_carry_engine.h>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+typedef subtract_with_carry_engine<uint_fast32_t, 24, 10, 24> ranlux24_base;
+typedef subtract_with_carry_engine<uint_fast64_t, 48, 5, 12> ranlux48_base;
+
+typedef discard_block_engine<ranlux24_base, 223, 23> ranlux24;
+typedef discard_block_engine<ranlux48_base, 389, 11> ranlux48;
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RANDOM_RANLUX_H
diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h
new file mode 100644
index 000000000000..97bc88d0d4d1
--- /dev/null
+++ b/libcxx/include/__random/seed_seq.h
@@ -0,0 +1,150 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_SEED_SEQ_H
+#define _LIBCPP___RANDOM_SEED_SEQ_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/fill.h>
+#include <__algorithm/max.h>
+#include <__config>
+#include <initializer_list>
+#include <vector>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+class _LIBCPP_TEMPLATE_VIS seed_seq
+{
+public:
+ // types
+ typedef uint32_t result_type;
+
+private:
+ vector<result_type> __v_;
+
+ template<class _InputIterator>
+ void init(_InputIterator __first, _InputIterator __last);
+public:
+ // constructors
+ _LIBCPP_INLINE_VISIBILITY
+ seed_seq() _NOEXCEPT {}
+#ifndef _LIBCPP_CXX03_LANG
+ template<class _Tp>
+ _LIBCPP_INLINE_VISIBILITY
+ seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());}
+#endif // _LIBCPP_CXX03_LANG
+
+ template<class _InputIterator>
+ _LIBCPP_INLINE_VISIBILITY
+ seed_seq(_InputIterator __first, _InputIterator __last)
+ {init(__first, __last);}
+
+ // generating functions
+ template<class _RandomAccessIterator>
+ void generate(_RandomAccessIterator __first, _RandomAccessIterator __last);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ size_t size() const _NOEXCEPT {return __v_.size();}
+ template<class _OutputIterator>
+ _LIBCPP_INLINE_VISIBILITY
+ void param(_OutputIterator __dest) const
+ {_VSTD::copy(__v_.begin(), __v_.end(), __dest);}
+
+private:
+ // no copy functions
+ seed_seq(const seed_seq&); // = delete;
+ void operator=(const seed_seq&); // = delete;
+
+ _LIBCPP_INLINE_VISIBILITY
+ static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);}
+};
+
+template<class _InputIterator>
+void
+seed_seq::init(_InputIterator __first, _InputIterator __last)
+{
+ for (_InputIterator __s = __first; __s != __last; ++__s)
+ __v_.push_back(*__s & 0xFFFFFFFF);
+}
+
+template<class _RandomAccessIterator>
+void
+seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last)
+{
+ if (__first != __last)
+ {
+ _VSTD::fill(__first, __last, 0x8b8b8b8b);
+ const size_t __n = static_cast<size_t>(__last - __first);
+ const size_t __s = __v_.size();
+ const size_t __t = (__n >= 623) ? 11
+ : (__n >= 68) ? 7
+ : (__n >= 39) ? 5
+ : (__n >= 7) ? 3
+ : (__n - 1) / 2;
+ const size_t __p = (__n - __t) / 2;
+ const size_t __q = __p + __t;
+ const size_t __m = _VSTD::max(__s + 1, __n);
+ // __k = 0;
+ {
+ result_type __r = 1664525 * _Tp(__first[0] ^ __first[__p]
+ ^ __first[__n - 1]);
+ __first[__p] += __r;
+ __r += __s;
+ __first[__q] += __r;
+ __first[0] = __r;
+ }
+ for (size_t __k = 1; __k <= __s; ++__k)
+ {
+ const size_t __kmodn = __k % __n;
+ const size_t __kpmodn = (__k + __p) % __n;
+ result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn]
+ ^ __first[(__k - 1) % __n]);
+ __first[__kpmodn] += __r;
+ __r += __kmodn + __v_[__k-1];
+ __first[(__k + __q) % __n] += __r;
+ __first[__kmodn] = __r;
+ }
+ for (size_t __k = __s + 1; __k < __m; ++__k)
+ {
+ const size_t __kmodn = __k % __n;
+ const size_t __kpmodn = (__k + __p) % __n;
+ result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn]
+ ^ __first[(__k - 1) % __n]);
+ __first[__kpmodn] += __r;
+ __r += __kmodn;
+ __first[(__k + __q) % __n] += __r;
+ __first[__kmodn] = __r;
+ }
+ for (size_t __k = __m; __k < __m + __n; ++__k)
+ {
+ const size_t __kmodn = __k % __n;
+ const size_t __kpmodn = (__k + __p) % __n;
+ result_type __r = 1566083941 * _Tp(__first[__kmodn] +
+ __first[__kpmodn] +
+ __first[(__k - 1) % __n]);
+ __first[__kpmodn] ^= __r;
+ __r -= __kmodn;
+ __first[(__k + __q) % __n] ^= __r;
+ __first[__kmodn] = __r;
+ }
+ }
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_SEED_SEQ_H
diff --git a/libcxx/include/__random/shuffle_order_engine.h b/libcxx/include/__random/shuffle_order_engine.h
new file mode 100644
index 000000000000..7a5735dd7933
--- /dev/null
+++ b/libcxx/include/__random/shuffle_order_engine.h
@@ -0,0 +1,283 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H
+#define _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H
+
+#include <__algorithm/equal.h>
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <__utility/move.h>
+#include <cstdint>
+#include <iosfwd>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <uint64_t _Xp, uint64_t _Yp>
+struct __ugcd
+{
+ static _LIBCPP_CONSTEXPR const uint64_t value = __ugcd<_Yp, _Xp % _Yp>::value;
+};
+
+template <uint64_t _Xp>
+struct __ugcd<_Xp, 0>
+{
+ static _LIBCPP_CONSTEXPR const uint64_t value = _Xp;
+};
+
+template <uint64_t _Np, uint64_t _Dp>
+class __uratio
+{
+ static_assert(_Dp != 0, "__uratio divide by 0");
+ static _LIBCPP_CONSTEXPR const uint64_t __gcd = __ugcd<_Np, _Dp>::value;
+public:
+ static _LIBCPP_CONSTEXPR const uint64_t num = _Np / __gcd;
+ static _LIBCPP_CONSTEXPR const uint64_t den = _Dp / __gcd;
+
+ typedef __uratio<num, den> type;
+};
+
+template<class _Engine, size_t __k>
+class _LIBCPP_TEMPLATE_VIS shuffle_order_engine
+{
+ static_assert(0 < __k, "shuffle_order_engine invalid parameters");
+public:
+ // types
+ typedef typename _Engine::result_type result_type;
+
+private:
+ _Engine __e_;
+ result_type _V_[__k];
+ result_type _Y_;
+
+public:
+ // engine characteristics
+ static _LIBCPP_CONSTEXPR const size_t table_size = __k;
+
+#ifdef _LIBCPP_CXX03_LANG
+ static const result_type _Min = _Engine::_Min;
+ static const result_type _Max = _Engine::_Max;
+#else
+ static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min();
+ static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max();
+#endif
+ static_assert(_Min < _Max, "shuffle_order_engine invalid parameters");
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
+
+ static _LIBCPP_CONSTEXPR const unsigned long long _Rp = _Max - _Min + 1ull;
+
+ // constructors and seeding functions
+ _LIBCPP_INLINE_VISIBILITY
+ shuffle_order_engine() {__init();}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit shuffle_order_engine(const _Engine& __e)
+ : __e_(__e) {__init();}
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit shuffle_order_engine(_Engine&& __e)
+ : __e_(_VSTD::move(__e)) {__init();}
+#endif // _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ explicit shuffle_order_engine(result_type __sd) : __e_(__sd) {__init();}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit shuffle_order_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, shuffle_order_engine>::value &&
+ !is_convertible<_Sseq, _Engine>::value>::type* = 0)
+ : __e_(__q) {__init();}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed() {__e_.seed(); __init();}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(result_type __sd) {__e_.seed(__sd); __init();}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, shuffle_order_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q) {__e_.seed(__q); __init();}
+
+ // generating functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());}
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ const _Engine& base() const _NOEXCEPT {return __e_;}
+
+private:
+ template<class _Eng, size_t _Kp>
+ friend
+ bool
+ operator==(
+ const shuffle_order_engine<_Eng, _Kp>& __x,
+ const shuffle_order_engine<_Eng, _Kp>& __y);
+
+ template<class _Eng, size_t _Kp>
+ friend
+ bool
+ operator!=(
+ const shuffle_order_engine<_Eng, _Kp>& __x,
+ const shuffle_order_engine<_Eng, _Kp>& __y);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Kp>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const shuffle_order_engine<_Eng, _Kp>& __x);
+
+ template <class _CharT, class _Traits,
+ class _Eng, size_t _Kp>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ shuffle_order_engine<_Eng, _Kp>& __x);
+
+ _LIBCPP_INLINE_VISIBILITY
+ void __init()
+ {
+ for (size_t __i = 0; __i < __k; ++__i)
+ _V_[__i] = __e_();
+ _Y_ = __e_();
+ }
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __eval(false_type) {return __eval2(integral_constant<bool, __k & 1>());}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __eval(true_type) {return __eval(__uratio<__k, _Rp>());}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __eval2(false_type) {return __eval(__uratio<__k/2, 0x8000000000000000ull>());}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __eval2(true_type) {return __evalf<__k, 0>();}
+
+ template <uint64_t _Np, uint64_t _Dp>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ (__uratio<_Np, _Dp>::num > 0xFFFFFFFFFFFFFFFFull / (_Max - _Min)),
+ result_type
+ >::type
+ __eval(__uratio<_Np, _Dp>)
+ {return __evalf<__uratio<_Np, _Dp>::num, __uratio<_Np, _Dp>::den>();}
+
+ template <uint64_t _Np, uint64_t _Dp>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __uratio<_Np, _Dp>::num <= 0xFFFFFFFFFFFFFFFFull / (_Max - _Min),
+ result_type
+ >::type
+ __eval(__uratio<_Np, _Dp>)
+ {
+ const size_t __j = static_cast<size_t>(__uratio<_Np, _Dp>::num * (_Y_ - _Min)
+ / __uratio<_Np, _Dp>::den);
+ _Y_ = _V_[__j];
+ _V_[__j] = __e_();
+ return _Y_;
+ }
+
+ template <uint64_t __n, uint64_t __d>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type __evalf()
+ {
+ const double _Fp = __d == 0 ?
+ __n / (2. * 0x8000000000000000ull) :
+ __n / (double)__d;
+ const size_t __j = static_cast<size_t>(_Fp * (_Y_ - _Min));
+ _Y_ = _V_[__j];
+ _V_[__j] = __e_();
+ return _Y_;
+ }
+};
+
+template<class _Engine, size_t __k>
+ _LIBCPP_CONSTEXPR const size_t shuffle_order_engine<_Engine, __k>::table_size;
+
+template<class _Eng, size_t _Kp>
+bool
+operator==(
+ const shuffle_order_engine<_Eng, _Kp>& __x,
+ const shuffle_order_engine<_Eng, _Kp>& __y)
+{
+ return __x._Y_ == __y._Y_ && _VSTD::equal(__x._V_, __x._V_ + _Kp, __y._V_) &&
+ __x.__e_ == __y.__e_;
+}
+
+template<class _Eng, size_t _Kp>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(
+ const shuffle_order_engine<_Eng, _Kp>& __x,
+ const shuffle_order_engine<_Eng, _Kp>& __y)
+{
+ return !(__x == __y);
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Kp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const shuffle_order_engine<_Eng, _Kp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _Ostream;
+ __os.flags(_Ostream::dec | _Ostream::left);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.__e_ << __sp << __x._V_[0];
+ for (size_t __i = 1; __i < _Kp; ++__i)
+ __os << __sp << __x._V_[__i];
+ return __os << __sp << __x._Y_;
+}
+
+template <class _CharT, class _Traits,
+ class _Eng, size_t _Kp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ shuffle_order_engine<_Eng, _Kp>& __x)
+{
+ typedef typename shuffle_order_engine<_Eng, _Kp>::result_type result_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ _Eng __e;
+ result_type _Vp[_Kp+1];
+ __is >> __e;
+ for (size_t __i = 0; __i < _Kp+1; ++__i)
+ __is >> _Vp[__i];
+ if (!__is.fail())
+ {
+ __x.__e_ = __e;
+ for (size_t __i = 0; __i < _Kp; ++__i)
+ __x._V_[__i] = _Vp[__i];
+ __x._Y_ = _Vp[_Kp];
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_SHUFFLE_ORDER_ENGINE_H
diff --git a/libcxx/include/__random/student_t_distribution.h b/libcxx/include/__random/student_t_distribution.h
new file mode 100644
index 000000000000..0cf911e4cd76
--- /dev/null
+++ b/libcxx/include/__random/student_t_distribution.h
@@ -0,0 +1,153 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/gamma_distribution.h>
+#include <__random/normal_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS student_t_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __n_;
+ public:
+ typedef student_t_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __n = 1) : __n_(__n) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __n_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__n_ == __y.__n_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+ normal_distribution<result_type> __nd_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ student_t_distribution() : student_t_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit student_t_distribution(result_type __n)
+ : __p_(param_type(__n)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit student_t_distribution(result_type __n = 1)
+ : __p_(param_type(__n)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit student_t_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {__nd_.reset();}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type n() const {return __p_.n();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return -numeric_limits<result_type>::infinity();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const student_t_distribution& __x,
+ const student_t_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const student_t_distribution& __x,
+ const student_t_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _RealType>
+template<class _URNG>
+_RealType
+student_t_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ gamma_distribution<result_type> __gd(__p.n() * .5, 2);
+ return __nd_(__g) * _VSTD::sqrt(__p.n()/__gd(__g));
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const student_t_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ __os << __x.n();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ student_t_distribution<_RT>& __x)
+{
+ typedef student_t_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __n;
+ __is >> __n;
+ if (!__is.fail())
+ __x.param(param_type(__n));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_STUDENT_T_DISTRIBUTION_H
diff --git a/libcxx/include/__random/subtract_with_carry_engine.h b/libcxx/include/__random/subtract_with_carry_engine.h
new file mode 100644
index 000000000000..073f84dccff6
--- /dev/null
+++ b/libcxx/include/__random/subtract_with_carry_engine.h
@@ -0,0 +1,352 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H
+#define _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H
+
+#include <__algorithm/equal.h>
+#include <__algorithm/min.h>
+#include <__config>
+#include <__random/is_seed_sequence.h>
+#include <__random/linear_congruential_engine.h>
+#include <cstddef>
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine;
+
+template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+bool
+operator==(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
+
+template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+_LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine
+{
+public:
+ // types
+ typedef _UIntType result_type;
+
+private:
+ result_type __x_[__r];
+ result_type __c_;
+ size_t __i_;
+
+ static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
+ static_assert( 0 < __w, "subtract_with_carry_engine invalid parameters");
+ static_assert(__w <= _Dt, "subtract_with_carry_engine invalid parameters");
+ static_assert( 0 < __s, "subtract_with_carry_engine invalid parameters");
+ static_assert(__s < __r, "subtract_with_carry_engine invalid parameters");
+public:
+ static _LIBCPP_CONSTEXPR const result_type _Min = 0;
+ static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
+ (result_type(1) << __w) - result_type(1);
+ static_assert(_Min < _Max, "subtract_with_carry_engine invalid parameters");
+
+ // engine characteristics
+ static _LIBCPP_CONSTEXPR const size_t word_size = __w;
+ static _LIBCPP_CONSTEXPR const size_t short_lag = __s;
+ static _LIBCPP_CONSTEXPR const size_t long_lag = __r;
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
+ _LIBCPP_INLINE_VISIBILITY
+ static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
+ static _LIBCPP_CONSTEXPR const result_type default_seed = 19780503u;
+
+ // constructors and seeding functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ subtract_with_carry_engine() : subtract_with_carry_engine(default_seed) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit subtract_with_carry_engine(result_type __sd) { seed(__sd); }
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit subtract_with_carry_engine(result_type __sd = default_seed) {
+ seed(__sd);
+ }
+#endif
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ explicit subtract_with_carry_engine(_Sseq& __q,
+ typename enable_if<__is_seed_sequence<_Sseq, subtract_with_carry_engine>::value>::type* = 0)
+ {seed(__q);}
+ _LIBCPP_INLINE_VISIBILITY
+ void seed(result_type __sd = default_seed)
+ {seed(__sd, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
+ template<class _Sseq>
+ _LIBCPP_INLINE_VISIBILITY
+ typename enable_if
+ <
+ __is_seed_sequence<_Sseq, subtract_with_carry_engine>::value,
+ void
+ >::type
+ seed(_Sseq& __q)
+ {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
+
+ // generating functions
+ result_type operator()();
+ _LIBCPP_INLINE_VISIBILITY
+ void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
+
+ template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+ friend
+ bool
+ operator==(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
+
+ template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+ friend
+ bool
+ operator!=(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
+
+ template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+ friend
+ basic_ostream<_CharT, _Traits>&
+ operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
+
+ template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+ friend
+ basic_istream<_CharT, _Traits>&
+ operator>>(basic_istream<_CharT, _Traits>& __is,
+ subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
+
+private:
+
+ void seed(result_type __sd, integral_constant<unsigned, 1>);
+ void seed(result_type __sd, integral_constant<unsigned, 2>);
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
+ template<class _Sseq>
+ void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
+};
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+ _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::word_size;
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+ _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::short_lag;
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+ _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::long_lag;
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+ _LIBCPP_CONSTEXPR const typename subtract_with_carry_engine<_UIntType, __w, __s, __r>::result_type
+ subtract_with_carry_engine<_UIntType, __w, __s, __r>::default_seed;
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+void
+subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd,
+ integral_constant<unsigned, 1>)
+{
+ linear_congruential_engine<result_type, 40014u, 0u, 2147483563u>
+ __e(__sd == 0u ? default_seed : __sd);
+ for (size_t __i = 0; __i < __r; ++__i)
+ __x_[__i] = static_cast<result_type>(__e() & _Max);
+ __c_ = __x_[__r-1] == 0;
+ __i_ = 0;
+}
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+void
+subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd,
+ integral_constant<unsigned, 2>)
+{
+ linear_congruential_engine<result_type, 40014u, 0u, 2147483563u>
+ __e(__sd == 0u ? default_seed : __sd);
+ for (size_t __i = 0; __i < __r; ++__i)
+ {
+ result_type __e0 = __e();
+ __x_[__i] = static_cast<result_type>(
+ (__e0 + ((uint64_t)__e() << 32)) & _Max);
+ }
+ __c_ = __x_[__r-1] == 0;
+ __i_ = 0;
+}
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+template<class _Sseq>
+void
+subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q,
+ integral_constant<unsigned, 1>)
+{
+ const unsigned __k = 1;
+ uint32_t __ar[__r * __k];
+ __q.generate(__ar, __ar + __r * __k);
+ for (size_t __i = 0; __i < __r; ++__i)
+ __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
+ __c_ = __x_[__r-1] == 0;
+ __i_ = 0;
+}
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+template<class _Sseq>
+void
+subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q,
+ integral_constant<unsigned, 2>)
+{
+ const unsigned __k = 2;
+ uint32_t __ar[__r * __k];
+ __q.generate(__ar, __ar + __r * __k);
+ for (size_t __i = 0; __i < __r; ++__i)
+ __x_[__i] = static_cast<result_type>(
+ (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
+ __c_ = __x_[__r-1] == 0;
+ __i_ = 0;
+}
+
+template<class _UIntType, size_t __w, size_t __s, size_t __r>
+_UIntType
+subtract_with_carry_engine<_UIntType, __w, __s, __r>::operator()()
+{
+ const result_type& __xs = __x_[(__i_ + (__r - __s)) % __r];
+ result_type& __xr = __x_[__i_];
+ result_type __new_c = __c_ == 0 ? __xs < __xr : __xs != 0 ? __xs <= __xr : 1;
+ __xr = (__xs - __xr - __c_) & _Max;
+ __c_ = __new_c;
+ __i_ = (__i_ + 1) % __r;
+ return __xr;
+}
+
+template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+bool
+operator==(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y)
+{
+ if (__x.__c_ != __y.__c_)
+ return false;
+ if (__x.__i_ == __y.__i_)
+ return _VSTD::equal(__x.__x_, __x.__x_ + _Rp, __y.__x_);
+ if (__x.__i_ == 0 || __y.__i_ == 0)
+ {
+ size_t __j = _VSTD::min(_Rp - __x.__i_, _Rp - __y.__i_);
+ if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j,
+ __y.__x_ + __y.__i_))
+ return false;
+ if (__x.__i_ == 0)
+ return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Rp, __y.__x_);
+ return _VSTD::equal(__x.__x_, __x.__x_ + (_Rp - __j), __y.__x_ + __j);
+ }
+ if (__x.__i_ < __y.__i_)
+ {
+ size_t __j = _Rp - __y.__i_;
+ if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j),
+ __y.__x_ + __y.__i_))
+ return false;
+ if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Rp,
+ __y.__x_))
+ return false;
+ return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_,
+ __y.__x_ + (_Rp - (__x.__i_ + __j)));
+ }
+ size_t __j = _Rp - __x.__i_;
+ if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j),
+ __x.__x_ + __x.__i_))
+ return false;
+ if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Rp,
+ __x.__x_))
+ return false;
+ return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_,
+ __x.__x_ + (_Rp - (__y.__i_ + __j)));
+}
+
+template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+inline _LIBCPP_INLINE_VISIBILITY
+bool
+operator!=(
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y)
+{
+ return !(__x == __y);
+}
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _Ostream;
+ __os.flags(_Ostream::dec | _Ostream::left);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.__x_[__x.__i_];
+ for (size_t __j = __x.__i_ + 1; __j < _Rp; ++__j)
+ __os << __sp << __x.__x_[__j];
+ for (size_t __j = 0; __j < __x.__i_; ++__j)
+ __os << __sp << __x.__x_[__j];
+ __os << __sp << __x.__c_;
+ return __os;
+}
+
+template <class _CharT, class _Traits,
+ class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ _UInt __t[_Rp+1];
+ for (size_t __i = 0; __i < _Rp+1; ++__i)
+ __is >> __t[__i];
+ if (!__is.fail())
+ {
+ for (size_t __i = 0; __i < _Rp; ++__i)
+ __x.__x_[__i] = __t[__i];
+ __x.__c_ = __t[_Rp];
+ __x.__i_ = 0;
+ }
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_SUBTRACT_WITH_CARRY_ENGINE_H
diff --git a/libcxx/include/__random/uniform_int_distribution.h b/libcxx/include/__random/uniform_int_distribution.h
index a7cfa1ec7305..55b4761637f0 100644
--- a/libcxx/include/__random/uniform_int_distribution.h
+++ b/libcxx/include/__random/uniform_int_distribution.h
@@ -11,6 +11,8 @@
#include <__bits>
#include <__config>
+#include <__random/log2.h>
+#include <bit>
#include <cstddef>
#include <cstdint>
#include <iosfwd>
@@ -26,34 +28,6 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-// __independent_bits_engine
-
-template <unsigned long long _Xp, size_t _Rp>
-struct __log2_imp
-{
- static const size_t value = _Xp & ((unsigned long long)(1) << _Rp) ? _Rp
- : __log2_imp<_Xp, _Rp - 1>::value;
-};
-
-template <unsigned long long _Xp>
-struct __log2_imp<_Xp, 0>
-{
- static const size_t value = 0;
-};
-
-template <size_t _Rp>
-struct __log2_imp<0, _Rp>
-{
- static const size_t value = _Rp + 1;
-};
-
-template <class _UIntType, _UIntType _Xp>
-struct __log2
-{
- static const size_t value = __log2_imp<_Xp,
- sizeof(_UIntType) * __CHAR_BIT__ - 1>::value;
-};
-
template<class _Engine, class _UIntType>
class __independent_bits_engine
{
@@ -181,7 +155,7 @@ __independent_bits_engine<_Engine, _UIntType>::__eval(true_type)
return _Sp;
}
-template<class _IntType = int>
+template<class _IntType = int> // __int128_t is also supported as an extension here
class uniform_int_distribution
{
public:
@@ -256,8 +230,8 @@ typename uniform_int_distribution<_IntType>::result_type
uniform_int_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p)
_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
- typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t),
- uint32_t, uint64_t>::type _UIntType;
+ typedef typename conditional<sizeof(result_type) <= sizeof(uint32_t), uint32_t,
+ typename make_unsigned<result_type>::type>::type _UIntType;
const _UIntType _Rp = _UIntType(__p.b()) - _UIntType(__p.a()) + _UIntType(1);
if (_Rp == 1)
return __p.a();
@@ -265,7 +239,7 @@ _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
typedef __independent_bits_engine<_URNG, _UIntType> _Eng;
if (_Rp == 0)
return static_cast<result_type>(_Eng(__g, _Dt)());
- size_t __w = _Dt - __libcpp_clz(_Rp) - 1;
+ size_t __w = _Dt - __countl_zero(_Rp) - 1;
if ((_Rp & (numeric_limits<_UIntType>::max() >> (_Dt - __w))) != 0)
++__w;
_Eng __e(__g, __w);
diff --git a/libcxx/include/__random/uniform_random_bit_generator.h b/libcxx/include/__random/uniform_random_bit_generator.h
new file mode 100644
index 000000000000..7b2f0df868d7
--- /dev/null
+++ b/libcxx/include/__random/uniform_random_bit_generator.h
@@ -0,0 +1,45 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H
+#define _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H
+
+#include <__concepts/arithmetic.h>
+#include <__concepts/invocable.h>
+#include <__concepts/same_as.h>
+#include <__config>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+// [rand.req.urng]
+template<class _Gen>
+concept uniform_random_bit_generator =
+ invocable<_Gen&> && unsigned_integral<invoke_result_t<_Gen&>> &&
+ requires {
+ { _Gen::min() } -> same_as<invoke_result_t<_Gen&>>;
+ { _Gen::max() } -> same_as<invoke_result_t<_Gen&>>;
+ requires bool_constant<(_Gen::min() < _Gen::max())>::value;
+ };
+
+#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_UNIFORM_RANDOM_BIT_GENERATOR_H
diff --git a/libcxx/include/__random/uniform_real_distribution.h b/libcxx/include/__random/uniform_real_distribution.h
new file mode 100644
index 000000000000..967e4e26fd0c
--- /dev/null
+++ b/libcxx/include/__random/uniform_real_distribution.h
@@ -0,0 +1,160 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/generate_canonical.h>
+#include <iosfwd>
+#include <limits>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS uniform_real_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __a_;
+ result_type __b_;
+ public:
+ typedef uniform_real_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __a = 0,
+ result_type __b = 1)
+ : __a_(__a), __b_(__b) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __a_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __b_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructors and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ uniform_real_distribution() : uniform_real_distribution(0) {}
+ explicit uniform_real_distribution(result_type __a, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit uniform_real_distribution(result_type __a = 0, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit uniform_real_distribution(const param_type& __p) : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __p_.a();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __p_.b();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return a();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return b();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const uniform_real_distribution& __x,
+ const uniform_real_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const uniform_real_distribution& __x,
+ const uniform_real_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template<class _RealType>
+template<class _URNG>
+inline
+typename uniform_real_distribution<_RealType>::result_type
+uniform_real_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
+{
+ return (__p.b() - __p.a())
+ * _VSTD::generate_canonical<_RealType, numeric_limits<_RealType>::digits>(__g)
+ + __p.a();
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const uniform_real_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ return __os << __x.a() << __sp << __x.b();
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ uniform_real_distribution<_RT>& __x)
+{
+ typedef uniform_real_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __a;
+ result_type __b;
+ __is >> __a >> __b;
+ if (!__is.fail())
+ __x.param(param_type(__a, __b));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_UNIFORM_REAL_DISTRIBUTION_H
diff --git a/libcxx/include/__random/weibull_distribution.h b/libcxx/include/__random/weibull_distribution.h
new file mode 100644
index 000000000000..4c5e4e8fff1c
--- /dev/null
+++ b/libcxx/include/__random/weibull_distribution.h
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H
+#define _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H
+
+#include <__config>
+#include <__random/exponential_distribution.h>
+#include <cmath>
+#include <iosfwd>
+#include <limits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<class _RealType = double>
+class _LIBCPP_TEMPLATE_VIS weibull_distribution
+{
+public:
+ // types
+ typedef _RealType result_type;
+
+ class _LIBCPP_TEMPLATE_VIS param_type
+ {
+ result_type __a_;
+ result_type __b_;
+ public:
+ typedef weibull_distribution distribution_type;
+
+ _LIBCPP_INLINE_VISIBILITY
+ explicit param_type(result_type __a = 1, result_type __b = 1)
+ : __a_(__a), __b_(__b) {}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __a_;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __b_;}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const param_type& __x, const param_type& __y)
+ {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const param_type& __x, const param_type& __y)
+ {return !(__x == __y);}
+ };
+
+private:
+ param_type __p_;
+
+public:
+ // constructor and reset functions
+#ifndef _LIBCPP_CXX03_LANG
+ _LIBCPP_INLINE_VISIBILITY
+ weibull_distribution() : weibull_distribution(1) {}
+ _LIBCPP_INLINE_VISIBILITY
+ explicit weibull_distribution(result_type __a, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#else
+ _LIBCPP_INLINE_VISIBILITY
+ explicit weibull_distribution(result_type __a = 1, result_type __b = 1)
+ : __p_(param_type(__a, __b)) {}
+#endif
+ _LIBCPP_INLINE_VISIBILITY
+ explicit weibull_distribution(const param_type& __p)
+ : __p_(__p) {}
+ _LIBCPP_INLINE_VISIBILITY
+ void reset() {}
+
+ // generating functions
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g)
+ {return (*this)(__g, __p_);}
+ template<class _URNG>
+ _LIBCPP_INLINE_VISIBILITY
+ result_type operator()(_URNG& __g, const param_type& __p)
+ {return __p.b() *
+ _VSTD::pow(exponential_distribution<result_type>()(__g), 1/__p.a());}
+
+ // property functions
+ _LIBCPP_INLINE_VISIBILITY
+ result_type a() const {return __p_.a();}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type b() const {return __p_.b();}
+
+ _LIBCPP_INLINE_VISIBILITY
+ param_type param() const {return __p_;}
+ _LIBCPP_INLINE_VISIBILITY
+ void param(const param_type& __p) {__p_ = __p;}
+
+ _LIBCPP_INLINE_VISIBILITY
+ result_type min() const {return 0;}
+ _LIBCPP_INLINE_VISIBILITY
+ result_type max() const {return numeric_limits<result_type>::infinity();}
+
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator==(const weibull_distribution& __x,
+ const weibull_distribution& __y)
+ {return __x.__p_ == __y.__p_;}
+ friend _LIBCPP_INLINE_VISIBILITY
+ bool operator!=(const weibull_distribution& __x,
+ const weibull_distribution& __y)
+ {return !(__x == __y);}
+};
+
+template <class _CharT, class _Traits, class _RT>
+basic_ostream<_CharT, _Traits>&
+operator<<(basic_ostream<_CharT, _Traits>& __os,
+ const weibull_distribution<_RT>& __x)
+{
+ __save_flags<_CharT, _Traits> __lx(__os);
+ typedef basic_ostream<_CharT, _Traits> _OStream;
+ __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
+ _OStream::scientific);
+ _CharT __sp = __os.widen(' ');
+ __os.fill(__sp);
+ __os << __x.a() << __sp << __x.b();
+ return __os;
+}
+
+template <class _CharT, class _Traits, class _RT>
+basic_istream<_CharT, _Traits>&
+operator>>(basic_istream<_CharT, _Traits>& __is,
+ weibull_distribution<_RT>& __x)
+{
+ typedef weibull_distribution<_RT> _Eng;
+ typedef typename _Eng::result_type result_type;
+ typedef typename _Eng::param_type param_type;
+ __save_flags<_CharT, _Traits> __lx(__is);
+ typedef basic_istream<_CharT, _Traits> _Istream;
+ __is.flags(_Istream::dec | _Istream::skipws);
+ result_type __a;
+ result_type __b;
+ __is >> __a >> __b;
+ if (!__is.fail())
+ __x.param(param_type(__a, __b));
+ return __is;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___RANDOM_WEIBULL_DISTRIBUTION_H
diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h
index dc1cece33b8d..6a8364006beb 100644
--- a/libcxx/include/__ranges/concepts.h
+++ b/libcxx/include/__ranges/concepts.h
@@ -16,8 +16,8 @@
#include <__iterator/iterator_traits.h>
#include <__iterator/readable_traits.h>
#include <__ranges/access.h>
-#include <__ranges/enable_borrowed_range.h>
#include <__ranges/data.h>
+#include <__ranges/enable_borrowed_range.h>
#include <__ranges/enable_view.h>
#include <__ranges/size.h>
#include <concepts>
diff --git a/libcxx/include/__utility/priority_tag.h b/libcxx/include/__utility/priority_tag.h
new file mode 100644
index 000000000000..45d9e5ec4c8f
--- /dev/null
+++ b/libcxx/include/__utility/priority_tag.h
@@ -0,0 +1,26 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___UTILITY_PRIORITY_TAG_H
+#define _LIBCPP___UTILITY_PRIORITY_TAG_H
+
+#include <__config>
+#include <cstddef>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template<size_t _Ip> struct __priority_tag : __priority_tag<_Ip - 1> {};
+template<> struct __priority_tag<0> {};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___UTILITY_PRIORITY_TAG_H
diff --git a/libcxx/include/bit b/libcxx/include/bit
index 634475b99879..0aab83e7a6eb 100644
--- a/libcxx/include/bit
+++ b/libcxx/include/bit
@@ -14,9 +14,13 @@
bit synopsis
namespace std {
- // [bit.cast], bit_cast
- template<class To, class From>
- constexpr To bit_cast(const From& from) noexcept; // C++20
+ // [bit.cast], bit_cast
+ template<class To, class From>
+ constexpr To bit_cast(const From& from) noexcept; // C++20
+
+ // [bit.byteswap], byteswap
+ template<class T>
+ constexpr T byteswap(T value) noexcept; // C++23
// [bit.pow.two], integral powers of 2
template <class T>
@@ -51,13 +55,14 @@ namespace std {
little = see below, // C++20
big = see below, // C++20
native = see below // C++20
-};
+ };
} // namespace std
*/
#include <__bit/bit_cast.h>
+#include <__bit/byteswap.h>
#include <__bits> // __libcpp_clz
#include <__config>
#include <__debug>
diff --git a/libcxx/include/compare b/libcxx/include/compare
index 8a2a82907062..5c4578da0b89 100644
--- a/libcxx/include/compare
+++ b/libcxx/include/compare
@@ -140,25 +140,10 @@ namespace std {
#include <__compare/compare_three_way_result.h>
#include <__compare/is_eq.h>
#include <__compare/ordering.h>
+#include <__compare/partial_order.h>
+#include <__compare/strong_order.h>
#include <__compare/three_way_comparable.h>
+#include <__compare/weak_order.h>
#include <__config>
-#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
-#pragma GCC system_header
-#endif
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if _LIBCPP_STD_VER > 17
-
-// [cmp.alg], comparison algorithms
-// TODO: unimplemented
-template<class _Tp> constexpr strong_ordering strong_order(const _Tp& __lhs, const _Tp& __rhs);
-template<class _Tp> constexpr weak_ordering weak_order(const _Tp& __lhs, const _Tp& __rhs);
-template<class _Tp> constexpr partial_ordering partial_order(const _Tp& __lhs, const _Tp& __rhs);
-
-#endif // _LIBCPP_STD_VER > 17
-
-_LIBCPP_END_NAMESPACE_STD
-
#endif // _LIBCPP_COMPARE
diff --git a/libcxx/include/deque b/libcxx/include/deque
index 9ab6ea748d53..e45d780e274f 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -915,16 +915,16 @@ class __deque_base
__deque_base(const __deque_base& __c);
__deque_base& operator=(const __deque_base& __c);
public:
- typedef _Allocator allocator_type;
- typedef allocator_traits<allocator_type> __alloc_traits;
- typedef typename __alloc_traits::size_type size_type;
+ typedef _Allocator allocator_type;
+ typedef allocator_traits<allocator_type> __alloc_traits;
+ typedef typename __alloc_traits::size_type size_type;
- typedef _Tp value_type;
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename __alloc_traits::difference_type difference_type;
- typedef typename __alloc_traits::pointer pointer;
- typedef typename __alloc_traits::const_pointer const_pointer;
+ typedef _Tp value_type;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename __alloc_traits::difference_type difference_type;
+ typedef typename __alloc_traits::pointer pointer;
+ typedef typename __alloc_traits::const_pointer const_pointer;
static const difference_type __block_size;
@@ -1259,20 +1259,20 @@ public:
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
- typedef __deque_base<value_type, allocator_type> __base;
+ typedef __deque_base<value_type, allocator_type> __base;
- typedef typename __base::__alloc_traits __alloc_traits;
- typedef typename __base::reference reference;
- typedef typename __base::const_reference const_reference;
- typedef typename __base::iterator iterator;
- typedef typename __base::const_iterator const_iterator;
- typedef typename __allocator_traits<allocator_type>::size_type size_type;
- typedef typename __base::difference_type difference_type;
+ typedef typename __base::__alloc_traits __alloc_traits;
+ typedef typename __base::reference reference;
+ typedef typename __base::const_reference const_reference;
+ typedef typename __base::iterator iterator;
+ typedef typename __base::const_iterator const_iterator;
+ typedef typename __base::size_type size_type;
+ typedef typename __base::difference_type difference_type;
- typedef typename __base::pointer pointer;
- typedef typename __base::const_pointer const_pointer;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef typename __base::pointer pointer;
+ typedef typename __base::const_pointer const_pointer;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
using typename __base::__deque_range;
using typename __base::__deque_block_range;
@@ -1289,7 +1289,14 @@ public:
explicit deque(size_type __n, const _Allocator& __a);
#endif
deque(size_type __n, const value_type& __v);
- deque(size_type __n, const value_type& __v, const allocator_type& __a);
+
+ template <class = __enable_if_t<__is_allocator<_Allocator>::value> >
+ deque(size_type __n, const value_type& __v, const allocator_type& __a) : __base(__a)
+ {
+ if (__n > 0)
+ __append(__n, __v);
+ }
+
template <class _InputIter>
deque(_InputIter __f, _InputIter __l,
typename enable_if<__is_cpp17_input_iterator<_InputIter>::value>::type* = 0);
@@ -1609,14 +1616,6 @@ deque<_Tp, _Allocator>::deque(size_type __n, const value_type& __v)
}
template <class _Tp, class _Allocator>
-deque<_Tp, _Allocator>::deque(size_type __n, const value_type& __v, const allocator_type& __a)
- : __base(__a)
-{
- if (__n > 0)
- __append(__n, __v);
-}
-
-template <class _Tp, class _Allocator>
template <class _InputIter>
deque<_Tp, _Allocator>::deque(_InputIter __f, _InputIter __l,
typename enable_if<__is_cpp17_input_iterator<_InputIter>::value>::type*)
diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem
index dcbdbbae6985..39e8ca2e814b 100644
--- a/libcxx/include/filesystem
+++ b/libcxx/include/filesystem
@@ -1033,7 +1033,7 @@ public:
auto __p_root_name = __p.__root_name();
auto __p_root_name_size = __p_root_name.size();
if (__p.is_absolute() ||
- (!__p_root_name.empty() && __p_root_name != root_name())) {
+ (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) {
__pn_ = __p.__pn_;
return *this;
}
@@ -1492,22 +1492,22 @@ public:
#endif // !_LIBCPP_HAS_NO_LOCALIZATION
friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) == 0;
+ return __lhs.__compare(__rhs.__pn_) == 0;
}
friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) != 0;
+ return __lhs.__compare(__rhs.__pn_) != 0;
}
friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) < 0;
+ return __lhs.__compare(__rhs.__pn_) < 0;
}
friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) <= 0;
+ return __lhs.__compare(__rhs.__pn_) <= 0;
}
friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) > 0;
+ return __lhs.__compare(__rhs.__pn_) > 0;
}
friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept {
- return __lhs.compare(__rhs) >= 0;
+ return __lhs.__compare(__rhs.__pn_) >= 0;
}
friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs,
@@ -3024,13 +3024,17 @@ _LIBCPP_END_NAMESPACE_FILESYSTEM
#if !defined(_LIBCPP_HAS_NO_RANGES)
template <>
+_LIBCPP_AVAILABILITY_FILESYSTEM
inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true;
template <>
+_LIBCPP_AVAILABILITY_FILESYSTEM
inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true;
template <>
+_LIBCPP_AVAILABILITY_FILESYSTEM
inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true;
template <>
+_LIBCPP_AVAILABILITY_FILESYSTEM
inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true;
#endif
diff --git a/libcxx/include/format b/libcxx/include/format
index e1d47c9f84dd..788b9c299abc 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -51,9 +51,6 @@ namespace std {
using wformat_args = basic_format_args<wformat_context>;
- template<class Out, class charT>
- using format_args_t = basic_format_args<basic_format_context<Out, charT>>;
-
// [format.functions], formatting functions
template<class... Args>
string format(string_view fmt, const Args&... args);
@@ -79,17 +76,15 @@ namespace std {
Out format_to(Out out, const locale& loc, wstring_view fmt, const Args&... args);
template<class Out>
- Out vformat_to(Out out, string_view fmt,
- format_args_t<type_identity_t<Out>, char> args);
+ Out vformat_to(Out out, string_view fmt, format_args args);
template<class Out>
- Out vformat_to(Out out, wstring_view fmt,
- format_args_t<type_identity_t<Out>, wchar_t> args);
+ Out vformat_to(Out out, wstring_view fmt, wformat_args args);
template<class Out>
Out vformat_to(Out out, const locale& loc, string_view fmt,
- format_args_t<type_identity_t<Out>, char> args);
+ format_args char> args);
template<class Out>
Out vformat_to(Out out, const locale& loc, wstring_view fmt,
- format_args_t<type_identity_t<Out>, wchar_t> args);
+ wformat_args args);
template<class Out> struct format_to_n_result {
Out out;
@@ -325,9 +320,6 @@ using format_args = basic_format_args<format_context>;
using wformat_args = basic_format_args<wformat_context>;
#endif
-template <class _OutIt, class _CharT>
-using format_args_t = basic_format_args<basic_format_context<_OutIt, _CharT>>;
-
template <class _Context, class... _Args>
struct _LIBCPP_TEMPLATE_VIS __format_arg_store {
// TODO FMT Use a built-in array.
@@ -436,51 +428,55 @@ __vformat_to(_ParseCtx&& __parse_ctx, _Ctx&& __ctx) {
} // namespace __format
-template <class _OutIt, class _CharT>
+template <class _OutIt, class _CharT, class _FormatOutIt>
requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
- __vformat_to(_OutIt __out_it, basic_string_view<_CharT> __fmt,
- format_args_t<type_identity_t<_OutIt>, _CharT> __args) {
- return __format::__vformat_to(
- basic_format_parse_context{__fmt, __args.__size()},
- _VSTD::__format_context_create(_VSTD::move(__out_it), __args));
+ __vformat_to(
+ _OutIt __out_it, basic_string_view<_CharT> __fmt,
+ basic_format_args<basic_format_context<_FormatOutIt, _CharT>> __args) {
+ if constexpr (same_as<_OutIt, _FormatOutIt>)
+ return _VSTD::__format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::move(__out_it), __args));
+ else {
+ basic_string<_CharT> __str;
+ _VSTD::__format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args));
+ return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it));
+ }
}
template <output_iterator<const char&> _OutIt>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
-vformat_to(_OutIt __out_it, string_view __fmt,
- format_args_t<type_identity_t<_OutIt>, char> __args) {
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, string_view __fmt, format_args __args) {
return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args);
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <output_iterator<const wchar_t&> _OutIt>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
-vformat_to(_OutIt __out_it, wstring_view __fmt,
- format_args_t<type_identity_t<_OutIt>, wchar_t> __args) {
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) {
return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args);
}
#endif
template <output_iterator<const char&> _OutIt, class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
format_to(_OutIt __out_it, string_view __fmt, const _Args&... __args) {
- return _VSTD::vformat_to(
- _VSTD::move(__out_it), __fmt,
- _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...));
+ return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt,
+ _VSTD::make_format_args(__args...));
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <output_iterator<const wchar_t&> _OutIt, class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
format_to(_OutIt __out_it, wstring_view __fmt, const _Args&... __args) {
- return _VSTD::vformat_to(
- _VSTD::move(__out_it), __fmt,
- _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>(
- __args...));
+ return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt,
+ _VSTD::make_wformat_args(__args...));
}
#endif
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
vformat(string_view __fmt, format_args __args) {
string __res;
_VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args);
@@ -488,7 +484,7 @@ vformat(string_view __fmt, format_args __args) {
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
vformat(wstring_view __fmt, wformat_args __args) {
wstring __res;
_VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args);
@@ -497,14 +493,14 @@ vformat(wstring_view __fmt, wformat_args __args) {
#endif
template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
format(string_view __fmt, const _Args&... __args) {
return _VSTD::vformat(__fmt, _VSTD::make_format_args(__args...));
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
format(wstring_view __fmt, const _Args&... __args) {
return _VSTD::vformat(__fmt, _VSTD::make_wformat_args(__args...));
}
@@ -556,54 +552,59 @@ formatted_size(wstring_view __fmt, const _Args&... __args) {
#ifndef _LIBCPP_HAS_NO_LOCALIZATION
-template <class _OutIt, class _CharT>
+template <class _OutIt, class _CharT, class _FormatOutIt>
requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
- __vformat_to(_OutIt __out_it, locale __loc, basic_string_view<_CharT> __fmt,
- format_args_t<type_identity_t<_OutIt>, _CharT> __args) {
- return __format::__vformat_to(
- basic_format_parse_context{__fmt, __args.__size()},
- _VSTD::__format_context_create(_VSTD::move(__out_it), __args,
- _VSTD::move(__loc)));
+ __vformat_to(
+ _OutIt __out_it, locale __loc, basic_string_view<_CharT> __fmt,
+ basic_format_args<basic_format_context<_FormatOutIt, _CharT>> __args) {
+ if constexpr (same_as<_OutIt, _FormatOutIt>)
+ return _VSTD::__format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::move(__out_it), __args,
+ _VSTD::move(__loc)));
+ else {
+ basic_string<_CharT> __str;
+ _VSTD::__format::__vformat_to(
+ basic_format_parse_context{__fmt, __args.__size()},
+ _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args,
+ _VSTD::move(__loc)));
+ return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it));
+ }
}
template <output_iterator<const char&> _OutIt>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
-vformat_to(_OutIt __out_it, locale __loc, string_view __fmt,
- format_args_t<type_identity_t<_OutIt>, char> __args) {
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to(
+ _OutIt __out_it, locale __loc, string_view __fmt, format_args __args) {
return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
__args);
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <output_iterator<const wchar_t&> _OutIt>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt
-vformat_to(_OutIt __out_it, locale __loc, wstring_view __fmt,
- format_args_t<type_identity_t<_OutIt>, wchar_t> __args) {
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to(
+ _OutIt __out_it, locale __loc, wstring_view __fmt, wformat_args __args) {
return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
__args);
}
#endif
template <output_iterator<const char&> _OutIt, class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
_OutIt __out_it, locale __loc, string_view __fmt, const _Args&... __args) {
- return _VSTD::vformat_to(
- _VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
- _VSTD::make_format_args<basic_format_context<_OutIt, char>>(__args...));
+ return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ _VSTD::make_format_args(__args...));
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <output_iterator<const wchar_t&> _OutIt, class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt format_to(
_OutIt __out_it, locale __loc, wstring_view __fmt, const _Args&... __args) {
- return _VSTD::vformat_to(
- _VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
- _VSTD::make_format_args<basic_format_context<_OutIt, wchar_t>>(
- __args...));
+ return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt,
+ _VSTD::make_wformat_args(__args...));
}
#endif
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
vformat(locale __loc, string_view __fmt, format_args __args) {
string __res;
_VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt,
@@ -612,7 +613,7 @@ vformat(locale __loc, string_view __fmt, format_args __args) {
}
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
vformat(locale __loc, wstring_view __fmt, wformat_args __args) {
wstring __res;
_VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt,
@@ -622,7 +623,7 @@ vformat(locale __loc, wstring_view __fmt, wformat_args __args) {
#endif
template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string
format(locale __loc, string_view __fmt, const _Args&... __args) {
return _VSTD::vformat(_VSTD::move(__loc), __fmt,
_VSTD::make_format_args(__args...));
@@ -630,7 +631,7 @@ format(locale __loc, string_view __fmt, const _Args&... __args) {
#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <class... _Args>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
+_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring
format(locale __loc, wstring_view __fmt, const _Args&... __args) {
return _VSTD::vformat(_VSTD::move(__loc), __fmt,
_VSTD::make_wformat_args(__args...));
diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index 9d19e741f061..34168e88746e 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -186,6 +186,7 @@ template <class T, class Allocator, class Predicate>
#include <iterator>
#include <limits>
#include <memory>
+#include <type_traits>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -643,12 +644,12 @@ public:
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename allocator_traits<allocator_type>::pointer pointer;
- typedef typename allocator_traits<allocator_type>::const_pointer const_pointer;
- typedef typename __allocator_traits<allocator_type>::size_type size_type;
- typedef typename allocator_traits<allocator_type>::difference_type difference_type;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename allocator_traits<allocator_type>::pointer pointer;
+ typedef typename allocator_traits<allocator_type>::const_pointer const_pointer;
+ typedef typename allocator_traits<allocator_type>::size_type size_type;
+ typedef typename allocator_traits<allocator_type>::difference_type difference_type;
typedef typename base::iterator iterator;
typedef typename base::const_iterator const_iterator;
@@ -669,7 +670,13 @@ public:
explicit forward_list(size_type __n, const allocator_type& __a);
#endif
forward_list(size_type __n, const value_type& __v);
- forward_list(size_type __n, const value_type& __v, const allocator_type& __a);
+
+ template <class = __enable_if_t<__is_allocator<_Alloc>::value> >
+ forward_list(size_type __n, const value_type& __v, const allocator_type& __a) : base(__a)
+ {
+ insert_after(cbefore_begin(), __n, __v);
+ }
+
template <class _InputIterator>
forward_list(_InputIterator __f, _InputIterator __l,
typename enable_if<
@@ -944,14 +951,6 @@ forward_list<_Tp, _Alloc>::forward_list(size_type __n, const value_type& __v)
}
template <class _Tp, class _Alloc>
-forward_list<_Tp, _Alloc>::forward_list(size_type __n, const value_type& __v,
- const allocator_type& __a)
- : base(__a)
-{
- insert_after(cbefore_begin(), __n, __v);
-}
-
-template <class _Tp, class _Alloc>
template <class _InputIterator>
forward_list<_Tp, _Alloc>::forward_list(_InputIterator __f, _InputIterator __l,
typename enable_if<
diff --git a/libcxx/include/list b/libcxx/include/list
index 6282983ad20a..c9c050a4f1f0 100644
--- a/libcxx/include/list
+++ b/libcxx/include/list
@@ -845,24 +845,24 @@ class _LIBCPP_TEMPLATE_VIS list
typedef typename base::__link_pointer __link_pointer;
public:
- typedef _Tp value_type;
- typedef _Alloc allocator_type;
+ typedef _Tp value_type;
+ typedef _Alloc allocator_type;
static_assert((is_same<value_type, typename allocator_type::value_type>::value),
"Invalid allocator::value_type");
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename base::pointer pointer;
- typedef typename base::const_pointer const_pointer;
- typedef typename __allocator_traits<allocator_type>::size_type size_type;
- typedef typename base::difference_type difference_type;
- typedef typename base::iterator iterator;
- typedef typename base::const_iterator const_iterator;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename base::pointer pointer;
+ typedef typename base::const_pointer const_pointer;
+ typedef typename base::size_type size_type;
+ typedef typename base::difference_type difference_type;
+ typedef typename base::iterator iterator;
+ typedef typename base::const_iterator const_iterator;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
#if _LIBCPP_STD_VER > 17
- typedef size_type __remove_return_type;
+ typedef size_type __remove_return_type;
#else
- typedef void __remove_return_type;
+ typedef void __remove_return_type;
#endif
_LIBCPP_INLINE_VISIBILITY
@@ -885,7 +885,16 @@ public:
explicit list(size_type __n, const allocator_type& __a);
#endif
list(size_type __n, const value_type& __x);
- list(size_type __n, const value_type& __x, const allocator_type& __a);
+ template <class = __enable_if_t<__is_allocator<_Alloc>::value> >
+ list(size_type __n, const value_type& __x, const allocator_type& __a) : base(__a)
+ {
+#if _LIBCPP_DEBUG_LEVEL == 2
+ __get_db()->__insert_c(this);
+#endif
+ for (; __n > 0; --__n)
+ push_back(__x);
+ }
+
template <class _InpIter>
list(_InpIter __f, _InpIter __l,
typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type* = 0);
@@ -1242,17 +1251,6 @@ list<_Tp, _Alloc>::list(size_type __n, const value_type& __x)
}
template <class _Tp, class _Alloc>
-list<_Tp, _Alloc>::list(size_type __n, const value_type& __x, const allocator_type& __a)
- : base(__a)
-{
-#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->__insert_c(this);
-#endif
- for (; __n > 0; --__n)
- push_back(__x);
-}
-
-template <class _Tp, class _Alloc>
template <class _InpIter>
list<_Tp, _Alloc>::list(_InpIter __f, _InpIter __l,
typename enable_if<__is_cpp17_input_iterator<_InpIter>::value>::type*)
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index f34442ed5c9a..a4a264bd9147 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -338,6 +338,7 @@ module std [system] {
module __bit {
module bit_cast { private header "__bit/bit_cast.h" }
+ module byteswap { private header "__bit/byteswap.h" }
}
}
module bitset {
@@ -376,8 +377,11 @@ module std [system] {
module compare_three_way_result { private header "__compare/compare_three_way_result.h" }
module is_eq { private header "__compare/is_eq.h" }
module ordering { private header "__compare/ordering.h" }
+ module partial_order { private header "__compare/partial_order.h" }
+ module strong_order { private header "__compare/strong_order.h" }
module synth_three_way { private header "__compare/synth_three_way.h" }
module three_way_comparable { private header "__compare/three_way_comparable.h" }
+ module weak_order { private header "__compare/weak_order.h" }
}
}
module complex {
@@ -658,6 +662,22 @@ module std [system] {
module numeric {
header "numeric"
export *
+
+ module __numeric {
+ module accumulate { private header "__numeric/accumulate.h" }
+ module adjacent_difference { private header "__numeric/adjacent_difference.h" }
+ module exclusive_scan { private header "__numeric/exclusive_scan.h" }
+ module gcd_lcm { private header "__numeric/gcd_lcm.h" }
+ module inclusive_scan { private header "__numeric/inclusive_scan.h" }
+ module inner_product { private header "__numeric/inner_product.h" }
+ module iota { private header "__numeric/iota.h" }
+ module midpoint { private header "__numeric/midpoint.h" }
+ module partial_sum { private header "__numeric/partial_sum.h" }
+ module reduce { private header "__numeric/reduce.h" }
+ module transform_exclusive_scan { private header "__numeric/transform_exclusive_scan.h" }
+ module transform_inclusive_scan { private header "__numeric/transform_inclusive_scan.h" }
+ module transform_reduce { private header "__numeric/transform_reduce.h" }
+ }
}
module optional {
header "optional"
@@ -679,7 +699,41 @@ module std [system] {
export *
module __random {
- module uniform_int_distribution { private header "__random/uniform_int_distribution.h" }
+ module bernoulli_distribution { private header "__random/bernoulli_distribution.h" }
+ module binomial_distribution { private header "__random/binomial_distribution.h" }
+ module cauchy_distribution { private header "__random/cauchy_distribution.h" }
+ module chi_squared_distribution { private header "__random/chi_squared_distribution.h" }
+ module default_random_engine { private header "__random/default_random_engine.h" }
+ module discard_block_engine { private header "__random/discard_block_engine.h" }
+ module discrete_distribution { private header "__random/discrete_distribution.h" }
+ module exponential_distribution { private header "__random/exponential_distribution.h" }
+ module extreme_value_distribution { private header "__random/extreme_value_distribution.h" }
+ module fisher_f_distribution { private header "__random/fisher_f_distribution.h" }
+ module gamma_distribution { private header "__random/gamma_distribution.h" }
+ module generate_canonical { private header "__random/generate_canonical.h" }
+ module geometric_distribution { private header "__random/geometric_distribution.h" }
+ module independent_bits_engine { private header "__random/independent_bits_engine.h" }
+ module is_seed_sequence { private header "__random/is_seed_sequence.h" }
+ module knuth_b { private header "__random/knuth_b.h" }
+ module linear_congruential_engine { private header "__random/linear_congruential_engine.h" }
+ module log2 { private header "__random/log2.h" }
+ module lognormal_distribution { private header "__random/lognormal_distribution.h" }
+ module mersenne_twister_engine { private header "__random/mersenne_twister_engine.h" }
+ module negative_binomial_distribution { private header "__random/negative_binomial_distribution.h" }
+ module normal_distribution { private header "__random/normal_distribution.h" }
+ module piecewise_constant_distribution { private header "__random/piecewise_constant_distribution.h" }
+ module piecewise_linear_distribution { private header "__random/piecewise_linear_distribution.h" }
+ module poisson_distribution { private header "__random/poisson_distribution.h" }
+ module random_device { private header "__random/random_device.h" }
+ module ranlux { private header "__random/ranlux.h" }
+ module seed_seq { private header "__random/seed_seq.h" }
+ module shuffle_order_engine { private header "__random/shuffle_order_engine.h" }
+ module student_t_distribution { private header "__random/student_t_distribution.h" }
+ module subtract_with_carry_engine { private header "__random/subtract_with_carry_engine.h" }
+ module uniform_int_distribution { private header "__random/uniform_int_distribution.h" }
+ module uniform_random_bit_generator { private header "__random/uniform_random_bit_generator.h" }
+ module uniform_real_distribution { private header "__random/uniform_real_distribution.h" }
+ module weibull_distribution { private header "__random/weibull_distribution.h" }
}
}
module ranges {
@@ -848,6 +902,7 @@ module std [system] {
module move { private header "__utility/move.h" }
module pair { private header "__utility/pair.h" }
module piecewise_construct { private header "__utility/piecewise_construct.h" }
+ module priority_tag { private header "__utility/priority_tag.h" }
module rel_ops { private header "__utility/rel_ops.h" }
module swap { private header "__utility/swap.h" }
module to_underlying { private header "__utility/to_underlying.h" }
diff --git a/libcxx/include/numeric b/libcxx/include/numeric
index fc44efff761d..09d15a6024de 100644
--- a/libcxx/include/numeric
+++ b/libcxx/include/numeric
@@ -145,490 +145,29 @@ template<class T>
*/
#include <__config>
-#include <__debug>
#include <cmath> // for isnormal
#include <functional>
#include <iterator>
-#include <limits> // for numeric_limits
#include <version>
+#include <__numeric/accumulate.h>
+#include <__numeric/adjacent_difference.h>
+#include <__numeric/exclusive_scan.h>
+#include <__numeric/gcd_lcm.h>
+#include <__numeric/inclusive_scan.h>
+#include <__numeric/inner_product.h>
+#include <__numeric/iota.h>
+#include <__numeric/midpoint.h>
+#include <__numeric/partial_sum.h>
+#include <__numeric/reduce.h>
+#include <__numeric/transform_exclusive_scan.h>
+#include <__numeric/transform_inclusive_scan.h>
+#include <__numeric/transform_reduce.h>
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-template <class _InputIterator, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-accumulate(_InputIterator __first, _InputIterator __last, _Tp __init)
-{
- for (; __first != __last; ++__first)
-#if _LIBCPP_STD_VER > 17
- __init = _VSTD::move(__init) + *__first;
-#else
- __init = __init + *__first;
-#endif
- return __init;
-}
-
-template <class _InputIterator, class _Tp, class _BinaryOperation>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-accumulate(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
-{
- for (; __first != __last; ++__first)
-#if _LIBCPP_STD_VER > 17
- __init = __binary_op(_VSTD::move(__init), *__first);
-#else
- __init = __binary_op(__init, *__first);
-#endif
- return __init;
-}
-
-#if _LIBCPP_STD_VER > 14
-template <class _InputIterator, class _Tp, class _BinaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOp __b)
-{
- for (; __first != __last; ++__first)
- __init = __b(__init, *__first);
- return __init;
-}
-
-template <class _InputIterator, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-reduce(_InputIterator __first, _InputIterator __last, _Tp __init)
-{
- return _VSTD::reduce(__first, __last, __init, _VSTD::plus<>());
-}
-
-template <class _InputIterator>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-typename iterator_traits<_InputIterator>::value_type
-reduce(_InputIterator __first, _InputIterator __last)
-{
- return _VSTD::reduce(__first, __last,
- typename iterator_traits<_InputIterator>::value_type{});
-}
-#endif
-
-template <class _InputIterator1, class _InputIterator2, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _Tp __init)
-{
- for (; __first1 != __last1; ++__first1, (void) ++__first2)
-#if _LIBCPP_STD_VER > 17
- __init = _VSTD::move(__init) + *__first1 * *__first2;
-#else
- __init = __init + *__first1 * *__first2;
-#endif
- return __init;
-}
-
-template <class _InputIterator1, class _InputIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-inner_product(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2,
- _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
-{
- for (; __first1 != __last1; ++__first1, (void) ++__first2)
-#if _LIBCPP_STD_VER > 17
- __init = __binary_op1(_VSTD::move(__init), __binary_op2(*__first1, *__first2));
-#else
- __init = __binary_op1(__init, __binary_op2(*__first1, *__first2));
-#endif
- return __init;
-}
-
-#if _LIBCPP_STD_VER > 14
-template <class _InputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-transform_reduce(_InputIterator __first, _InputIterator __last,
- _Tp __init, _BinaryOp __b, _UnaryOp __u)
-{
- for (; __first != __last; ++__first)
- __init = __b(__init, __u(*__first));
- return __init;
-}
-
-template <class _InputIterator1, class _InputIterator2,
- class _Tp, class _BinaryOp1, class _BinaryOp2>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _Tp __init, _BinaryOp1 __b1, _BinaryOp2 __b2)
-{
- for (; __first1 != __last1; ++__first1, (void) ++__first2)
- __init = __b1(__init, __b2(*__first1, *__first2));
- return __init;
-}
-
-template <class _InputIterator1, class _InputIterator2, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_Tp
-transform_reduce(_InputIterator1 __first1, _InputIterator1 __last1,
- _InputIterator2 __first2, _Tp __init)
-{
- return _VSTD::transform_reduce(__first1, __last1, __first2, _VSTD::move(__init),
- _VSTD::plus<>(), _VSTD::multiplies<>());
-}
-#endif
-
-template <class _InputIterator, class _OutputIterator>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
-{
- if (__first != __last)
- {
- typename iterator_traits<_InputIterator>::value_type __t(*__first);
- *__result = __t;
- for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
- {
-#if _LIBCPP_STD_VER > 17
- __t = _VSTD::move(__t) + *__first;
-#else
- __t = __t + *__first;
-#endif
- *__result = __t;
- }
- }
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-partial_sum(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
- _BinaryOperation __binary_op)
-{
- if (__first != __last)
- {
- typename iterator_traits<_InputIterator>::value_type __t(*__first);
- *__result = __t;
- for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
- {
-#if _LIBCPP_STD_VER > 17
- __t = __binary_op(_VSTD::move(__t), *__first);
-#else
- __t = __binary_op(__t, *__first);
-#endif
- *__result = __t;
- }
- }
- return __result;
-}
-
-#if _LIBCPP_STD_VER > 14
-template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-exclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _Tp __init, _BinaryOp __b)
-{
- if (__first != __last)
- {
- _Tp __tmp(__b(__init, *__first));
- while (true)
- {
- *__result = _VSTD::move(__init);
- ++__result;
- ++__first;
- if (__first == __last)
- break;
- __init = _VSTD::move(__tmp);
- __tmp = __b(__init, *__first);
- }
- }
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-exclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _Tp __init)
-{
- return _VSTD::exclusive_scan(__first, __last, __result, __init, _VSTD::plus<>());
-}
-
-template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _BinaryOp __b, _Tp __init)
-{
- for (; __first != __last; ++__first, (void) ++__result) {
- __init = __b(__init, *__first);
- *__result = __init;
- }
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _BinaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _BinaryOp __b)
-{
- if (__first != __last) {
- typename iterator_traits<_InputIterator>::value_type __init = *__first;
- *__result++ = __init;
- if (++__first != __last)
- return _VSTD::inclusive_scan(__first, __last, __result, __b, __init);
- }
-
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator inclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result)
-{
- return _VSTD::inclusive_scan(__first, __last, __result, _VSTD::plus<>());
-}
-
-template <class _InputIterator, class _OutputIterator, class _Tp,
- class _BinaryOp, class _UnaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-transform_exclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _Tp __init,
- _BinaryOp __b, _UnaryOp __u)
-{
- if (__first != __last)
- {
- _Tp __saved = __init;
- do
- {
- __init = __b(__init, __u(*__first));
- *__result = __saved;
- __saved = __init;
- ++__result;
- } while (++__first != __last);
- }
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _Tp, class _BinaryOp, class _UnaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _BinaryOp __b, _UnaryOp __u, _Tp __init)
-{
- for (; __first != __last; ++__first, (void) ++__result) {
- __init = __b(__init, __u(*__first));
- *__result = __init;
- }
-
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _BinaryOp, class _UnaryOp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-transform_inclusive_scan(_InputIterator __first, _InputIterator __last,
- _OutputIterator __result, _BinaryOp __b, _UnaryOp __u)
-{
- if (__first != __last) {
- typename iterator_traits<_InputIterator>::value_type __init = __u(*__first);
- *__result++ = __init;
- if (++__first != __last)
- return _VSTD::transform_inclusive_scan(__first, __last, __result, __b, __u, __init);
- }
-
- return __result;
-}
-#endif
-
-template <class _InputIterator, class _OutputIterator>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result)
-{
- if (__first != __last)
- {
- typename iterator_traits<_InputIterator>::value_type __acc(*__first);
- *__result = __acc;
- for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
- {
- typename iterator_traits<_InputIterator>::value_type __val(*__first);
-#if _LIBCPP_STD_VER > 17
- *__result = __val - _VSTD::move(__acc);
-#else
- *__result = __val - __acc;
-#endif
- __acc = _VSTD::move(__val);
- }
- }
- return __result;
-}
-
-template <class _InputIterator, class _OutputIterator, class _BinaryOperation>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-_OutputIterator
-adjacent_difference(_InputIterator __first, _InputIterator __last, _OutputIterator __result,
- _BinaryOperation __binary_op)
-{
- if (__first != __last)
- {
- typename iterator_traits<_InputIterator>::value_type __acc(*__first);
- *__result = __acc;
- for (++__first, (void) ++__result; __first != __last; ++__first, (void) ++__result)
- {
- typename iterator_traits<_InputIterator>::value_type __val(*__first);
-#if _LIBCPP_STD_VER > 17
- *__result = __binary_op(__val, _VSTD::move(__acc));
-#else
- *__result = __binary_op(__val, __acc);
-#endif
- __acc = _VSTD::move(__val);
- }
- }
- return __result;
-}
-
-template <class _ForwardIterator, class _Tp>
-_LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
-void
-iota(_ForwardIterator __first, _ForwardIterator __last, _Tp __value_)
-{
- for (; __first != __last; ++__first, (void) ++__value_)
- *__first = __value_;
-}
-
-
-#if _LIBCPP_STD_VER > 14
-template <typename _Result, typename _Source, bool _IsSigned = is_signed<_Source>::value> struct __ct_abs;
-
-template <typename _Result, typename _Source>
-struct __ct_abs<_Result, _Source, true> {
- _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
- _Result operator()(_Source __t) const noexcept
- {
- if (__t >= 0) return __t;
- if (__t == numeric_limits<_Source>::min()) return -static_cast<_Result>(__t);
- return -__t;
- }
-};
-
-template <typename _Result, typename _Source>
-struct __ct_abs<_Result, _Source, false> {
- _LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
- _Result operator()(_Source __t) const noexcept { return __t; }
-};
-
-
-template<class _Tp>
-_LIBCPP_CONSTEXPR _LIBCPP_HIDDEN
-_Tp __gcd(_Tp __m, _Tp __n)
-{
- static_assert((!is_signed<_Tp>::value), "");
- return __n == 0 ? __m : _VSTD::__gcd<_Tp>(__n, __m % __n);
-}
-
-
-template<class _Tp, class _Up>
-_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
-common_type_t<_Tp,_Up>
-gcd(_Tp __m, _Up __n)
-{
- static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to gcd must be integer types");
- static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to gcd cannot be bool" );
- static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to gcd cannot be bool" );
- using _Rp = common_type_t<_Tp,_Up>;
- using _Wp = make_unsigned_t<_Rp>;
- return static_cast<_Rp>(_VSTD::__gcd(
- static_cast<_Wp>(__ct_abs<_Rp, _Tp>()(__m)),
- static_cast<_Wp>(__ct_abs<_Rp, _Up>()(__n))));
-}
-
-template<class _Tp, class _Up>
-_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
-common_type_t<_Tp,_Up>
-lcm(_Tp __m, _Up __n)
-{
- static_assert((is_integral<_Tp>::value && is_integral<_Up>::value), "Arguments to lcm must be integer types");
- static_assert((!is_same<typename remove_cv<_Tp>::type, bool>::value), "First argument to lcm cannot be bool" );
- static_assert((!is_same<typename remove_cv<_Up>::type, bool>::value), "Second argument to lcm cannot be bool" );
- if (__m == 0 || __n == 0)
- return 0;
-
- using _Rp = common_type_t<_Tp,_Up>;
- _Rp __val1 = __ct_abs<_Rp, _Tp>()(__m) / _VSTD::gcd(__m, __n);
- _Rp __val2 = __ct_abs<_Rp, _Up>()(__n);
- _LIBCPP_ASSERT((numeric_limits<_Rp>::max() / __val1 > __val2), "Overflow in lcm");
- return __val1 * __val2;
-}
-
-#endif /* _LIBCPP_STD_VER > 14 */
-
-#if _LIBCPP_STD_VER > 17
-template <class _Tp>
-_LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>, _Tp>
-midpoint(_Tp __a, _Tp __b) noexcept
-_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
-{
- using _Up = make_unsigned_t<_Tp>;
- constexpr _Up __bitshift = numeric_limits<_Up>::digits - 1;
-
- _Up __diff = _Up(__b) - _Up(__a);
- _Up __sign_bit = __b < __a;
-
- _Up __half_diff = (__diff / 2) + (__sign_bit << __bitshift) + (__sign_bit & __diff);
-
- return __a + __half_diff;
-}
-
-
-template <class _TPtr>
-_LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_pointer_v<_TPtr>
- && is_object_v<remove_pointer_t<_TPtr>>
- && ! is_void_v<remove_pointer_t<_TPtr>>
- && (sizeof(remove_pointer_t<_TPtr>) > 0), _TPtr>
-midpoint(_TPtr __a, _TPtr __b) noexcept
-{
- return __a + _VSTD::midpoint(ptrdiff_t(0), __b - __a);
-}
-
-
-template <typename _Tp>
-constexpr int __sign(_Tp __val) {
- return (_Tp(0) < __val) - (__val < _Tp(0));
-}
-
-template <typename _Fp>
-constexpr _Fp __fp_abs(_Fp __f) { return __f >= 0 ? __f : -__f; }
-
-template <class _Fp>
-_LIBCPP_INLINE_VISIBILITY constexpr
-enable_if_t<is_floating_point_v<_Fp>, _Fp>
-midpoint(_Fp __a, _Fp __b) noexcept
-{
- constexpr _Fp __lo = numeric_limits<_Fp>::min()*2;
- constexpr _Fp __hi = numeric_limits<_Fp>::max()/2;
- return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi ? // typical case: overflow is impossible
- (__a + __b)/2 : // always correctly rounded
- __fp_abs(__a) < __lo ? __a + __b/2 : // not safe to halve a
- __fp_abs(__b) < __lo ? __a/2 + __b : // not safe to halve b
- __a/2 + __b/2; // otherwise correctly rounded
-}
-
-#endif // _LIBCPP_STD_VER > 17
-
-_LIBCPP_END_NAMESPACE_STD
-
-_LIBCPP_POP_MACROS
-
#if defined(_LIBCPP_HAS_PARALLEL_ALGORITHMS) && _LIBCPP_STD_VER >= 17
# include <__pstl_numeric>
#endif
diff --git a/libcxx/include/random b/libcxx/include/random
index 72d9855765f8..9eb70bac00b9 100644
--- a/libcxx/include/random
+++ b/libcxx/include/random
@@ -1678,5330 +1678,56 @@ class piecewise_linear_distribution
*/
#include <__config>
+#include <__random/bernoulli_distribution.h>
+#include <__random/binomial_distribution.h>
+#include <__random/cauchy_distribution.h>
+#include <__random/chi_squared_distribution.h>
+#include <__random/default_random_engine.h>
+#include <__random/discard_block_engine.h>
+#include <__random/discrete_distribution.h>
+#include <__random/exponential_distribution.h>
+#include <__random/extreme_value_distribution.h>
+#include <__random/fisher_f_distribution.h>
+#include <__random/gamma_distribution.h>
+#include <__random/generate_canonical.h>
+#include <__random/geometric_distribution.h>
+#include <__random/independent_bits_engine.h>
+#include <__random/is_seed_sequence.h>
+#include <__random/knuth_b.h>
+#include <__random/linear_congruential_engine.h>
+#include <__random/log2.h>
+#include <__random/lognormal_distribution.h>
+#include <__random/mersenne_twister_engine.h>
+#include <__random/negative_binomial_distribution.h>
+#include <__random/normal_distribution.h>
+#include <__random/piecewise_constant_distribution.h>
+#include <__random/piecewise_linear_distribution.h>
+#include <__random/poisson_distribution.h>
+#include <__random/random_device.h>
+#include <__random/ranlux.h>
+#include <__random/seed_seq.h>
+#include <__random/shuffle_order_engine.h>
+#include <__random/student_t_distribution.h>
+#include <__random/subtract_with_carry_engine.h>
#include <__random/uniform_int_distribution.h>
-#include <algorithm>
-#include <cmath>
-#include <concepts>
-#include <cstddef>
-#include <cstdint>
+#include <__random/uniform_random_bit_generator.h>
+#include <__random/uniform_real_distribution.h>
+#include <__random/weibull_distribution.h>
#include <initializer_list>
-#include <iosfwd>
-#include <limits>
-#include <numeric>
-#include <string>
-#include <type_traits>
-#include <vector>
+
+#include <algorithm> // for backward compatibility; TODO remove it
+#include <cmath> // for backward compatibility; TODO remove it
+#include <cstddef> // for backward compatibility; TODO remove it
+#include <cstdint> // for backward compatibility; TODO remove it
+#include <iosfwd> // for backward compatibility; TODO remove it
+#include <limits> // for backward compatibility; TODO remove it
+#include <numeric> // for backward compatibility; TODO remove it
+#include <string> // for backward compatibility; TODO remove it
+#include <type_traits> // for backward compatibility; TODO remove it
+#include <vector> // for backward compatibility; TODO remove it
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif
-_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
-
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
-
-// [rand.req.urng]
-template<class _Gen>
-concept uniform_random_bit_generator =
- invocable<_Gen&> && unsigned_integral<invoke_result_t<_Gen&>> &&
- requires {
- { _Gen::min() } -> same_as<invoke_result_t<_Gen&>>;
- { _Gen::max() } -> same_as<invoke_result_t<_Gen&>>;
- requires bool_constant<(_Gen::min() < _Gen::max())>::value;
- };
-
-#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)
-
-// __is_seed_sequence
-
-template <class _Sseq, class _Engine>
-struct __is_seed_sequence
-{
- static _LIBCPP_CONSTEXPR const bool value =
- !is_convertible<_Sseq, typename _Engine::result_type>::value &&
- !is_same<typename remove_cv<_Sseq>::type, _Engine>::value;
-};
-
-// linear_congruential_engine
-
-template <unsigned long long __a, unsigned long long __c,
- unsigned long long __m, unsigned long long _Mp,
- bool _MightOverflow = (__a != 0 && __m != 0 && __m-1 > (_Mp-__c)/__a),
- bool _OverflowOK = ((__m | (__m-1)) > __m), // m = 2^n
- bool _SchrageOK = (__a != 0 && __m != 0 && __m % __a <= __m / __a)> // r <= q
-struct __lce_alg_picker
-{
- static_assert(__a != 0 || __m != 0 || !_MightOverflow || _OverflowOK || _SchrageOK,
- "The current values of a, c, and m cannot generate a number "
- "within bounds of linear_congruential_engine.");
-
- static _LIBCPP_CONSTEXPR const bool __use_schrage = _MightOverflow &&
- !_OverflowOK &&
- _SchrageOK;
-};
-
-template <unsigned long long __a, unsigned long long __c,
- unsigned long long __m, unsigned long long _Mp,
- bool _UseSchrage = __lce_alg_picker<__a, __c, __m, _Mp>::__use_schrage>
-struct __lce_ta;
-
-// 64
-
-template <unsigned long long __a, unsigned long long __c, unsigned long long __m>
-struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), true>
-{
- typedef unsigned long long result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- // Schrage's algorithm
- const result_type __q = __m / __a;
- const result_type __r = __m % __a;
- const result_type __t0 = __a * (__x % __q);
- const result_type __t1 = __r * (__x / __q);
- __x = __t0 + (__t0 < __t1) * __m - __t1;
- __x += __c - (__x >= __m - __c) * __m;
- return __x;
- }
-};
-
-template <unsigned long long __a, unsigned long long __m>
-struct __lce_ta<__a, 0, __m, (unsigned long long)(~0), true>
-{
- typedef unsigned long long result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- // Schrage's algorithm
- const result_type __q = __m / __a;
- const result_type __r = __m % __a;
- const result_type __t0 = __a * (__x % __q);
- const result_type __t1 = __r * (__x / __q);
- __x = __t0 + (__t0 < __t1) * __m - __t1;
- return __x;
- }
-};
-
-template <unsigned long long __a, unsigned long long __c, unsigned long long __m>
-struct __lce_ta<__a, __c, __m, (unsigned long long)(~0), false>
-{
- typedef unsigned long long result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- return (__a * __x + __c) % __m;
- }
-};
-
-template <unsigned long long __a, unsigned long long __c>
-struct __lce_ta<__a, __c, 0, (unsigned long long)(~0), false>
-{
- typedef unsigned long long result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- return __a * __x + __c;
- }
-};
-
-// 32
-
-template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp>
-struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), true>
-{
- typedef unsigned result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- const result_type __a = static_cast<result_type>(_Ap);
- const result_type __c = static_cast<result_type>(_Cp);
- const result_type __m = static_cast<result_type>(_Mp);
- // Schrage's algorithm
- const result_type __q = __m / __a;
- const result_type __r = __m % __a;
- const result_type __t0 = __a * (__x % __q);
- const result_type __t1 = __r * (__x / __q);
- __x = __t0 + (__t0 < __t1) * __m - __t1;
- __x += __c - (__x >= __m - __c) * __m;
- return __x;
- }
-};
-
-template <unsigned long long _Ap, unsigned long long _Mp>
-struct __lce_ta<_Ap, 0, _Mp, unsigned(~0), true>
-{
- typedef unsigned result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- const result_type __a = static_cast<result_type>(_Ap);
- const result_type __m = static_cast<result_type>(_Mp);
- // Schrage's algorithm
- const result_type __q = __m / __a;
- const result_type __r = __m % __a;
- const result_type __t0 = __a * (__x % __q);
- const result_type __t1 = __r * (__x / __q);
- __x = __t0 + (__t0 < __t1) * __m - __t1;
- return __x;
- }
-};
-
-template <unsigned long long _Ap, unsigned long long _Cp, unsigned long long _Mp>
-struct __lce_ta<_Ap, _Cp, _Mp, unsigned(~0), false>
-{
- typedef unsigned result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- const result_type __a = static_cast<result_type>(_Ap);
- const result_type __c = static_cast<result_type>(_Cp);
- const result_type __m = static_cast<result_type>(_Mp);
- return (__a * __x + __c) % __m;
- }
-};
-
-template <unsigned long long _Ap, unsigned long long _Cp>
-struct __lce_ta<_Ap, _Cp, 0, unsigned(~0), false>
-{
- typedef unsigned result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- const result_type __a = static_cast<result_type>(_Ap);
- const result_type __c = static_cast<result_type>(_Cp);
- return __a * __x + __c;
- }
-};
-
-// 16
-
-template <unsigned long long __a, unsigned long long __c, unsigned long long __m, bool __b>
-struct __lce_ta<__a, __c, __m, (unsigned short)(~0), __b>
-{
- typedef unsigned short result_type;
- _LIBCPP_INLINE_VISIBILITY
- static result_type next(result_type __x)
- {
- return static_cast<result_type>(__lce_ta<__a, __c, __m, unsigned(~0)>::next(__x));
- }
-};
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-class _LIBCPP_TEMPLATE_VIS linear_congruential_engine;
-
-template <class _CharT, class _Traits,
- class _Up, _Up _Ap, _Up _Cp, _Up _Np>
-_LIBCPP_INLINE_VISIBILITY
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&);
-
-template <class _CharT, class _Traits,
- class _Up, _Up _Ap, _Up _Cp, _Up _Np>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x);
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-class _LIBCPP_TEMPLATE_VIS linear_congruential_engine
-{
-public:
- // types
- typedef _UIntType result_type;
-
-private:
- result_type __x_;
-
- static _LIBCPP_CONSTEXPR const result_type _Mp = result_type(~0);
-
- static_assert(__m == 0 || __a < __m, "linear_congruential_engine invalid parameters");
- static_assert(__m == 0 || __c < __m, "linear_congruential_engine invalid parameters");
- static_assert(is_unsigned<_UIntType>::value, "_UIntType must be unsigned type");
-public:
- static _LIBCPP_CONSTEXPR const result_type _Min = __c == 0u ? 1u: 0u;
- static _LIBCPP_CONSTEXPR const result_type _Max = __m - 1u;
- static_assert(_Min < _Max, "linear_congruential_engine invalid parameters");
-
- // engine characteristics
- static _LIBCPP_CONSTEXPR const result_type multiplier = __a;
- static _LIBCPP_CONSTEXPR const result_type increment = __c;
- static _LIBCPP_CONSTEXPR const result_type modulus = __m;
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() {return _Min;}
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() {return _Max;}
- static _LIBCPP_CONSTEXPR const result_type default_seed = 1u;
-
- // constructors and seeding functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- linear_congruential_engine() : linear_congruential_engine(default_seed) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit linear_congruential_engine(result_type __s) { seed(__s); }
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit linear_congruential_engine(result_type __s = default_seed) {
- seed(__s);
- }
-#endif
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit linear_congruential_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, linear_congruential_engine>::value>::type* = 0)
- {seed(__q);}
- _LIBCPP_INLINE_VISIBILITY
- void seed(result_type __s = default_seed)
- {seed(integral_constant<bool, __m == 0>(),
- integral_constant<bool, __c == 0>(), __s);}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, linear_congruential_engine>::value,
- void
- >::type
- seed(_Sseq& __q)
- {__seed(__q, integral_constant<unsigned,
- 1 + (__m == 0 ? (sizeof(result_type) * __CHAR_BIT__ - 1)/32
- : (__m > 0x100000000ull))>());}
-
- // generating functions
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()()
- {return __x_ = static_cast<result_type>(__lce_ta<__a, __c, __m, _Mp>::next(__x_));}
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const linear_congruential_engine& __x,
- const linear_congruential_engine& __y)
- {return __x.__x_ == __y.__x_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const linear_congruential_engine& __x,
- const linear_congruential_engine& __y)
- {return !(__x == __y);}
-
-private:
-
- _LIBCPP_INLINE_VISIBILITY
- void seed(true_type, true_type, result_type __s) {__x_ = __s == 0 ? 1 : __s;}
- _LIBCPP_INLINE_VISIBILITY
- void seed(true_type, false_type, result_type __s) {__x_ = __s;}
- _LIBCPP_INLINE_VISIBILITY
- void seed(false_type, true_type, result_type __s) {__x_ = __s % __m == 0 ?
- 1 : __s % __m;}
- _LIBCPP_INLINE_VISIBILITY
- void seed(false_type, false_type, result_type __s) {__x_ = __s % __m;}
-
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
-
- template <class _CharT, class _Traits,
- class _Up, _Up _Ap, _Up _Cp, _Up _Np>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const linear_congruential_engine<_Up, _Ap, _Cp, _Np>&);
-
- template <class _CharT, class _Traits,
- class _Up, _Up _Ap, _Up _Cp, _Up _Np>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- linear_congruential_engine<_Up, _Ap, _Cp, _Np>& __x);
-};
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
- _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
- linear_congruential_engine<_UIntType, __a, __c, __m>::multiplier;
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
- _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
- linear_congruential_engine<_UIntType, __a, __c, __m>::increment;
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
- _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
- linear_congruential_engine<_UIntType, __a, __c, __m>::modulus;
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
- _LIBCPP_CONSTEXPR const typename linear_congruential_engine<_UIntType, __a, __c, __m>::result_type
- linear_congruential_engine<_UIntType, __a, __c, __m>::default_seed;
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-template<class _Sseq>
-void
-linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q,
- integral_constant<unsigned, 1>)
-{
- const unsigned __k = 1;
- uint32_t __ar[__k+3];
- __q.generate(__ar, __ar + __k + 3);
- result_type __s = static_cast<result_type>(__ar[3] % __m);
- __x_ = __c == 0 && __s == 0 ? result_type(1) : __s;
-}
-
-template <class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-template<class _Sseq>
-void
-linear_congruential_engine<_UIntType, __a, __c, __m>::__seed(_Sseq& __q,
- integral_constant<unsigned, 2>)
-{
- const unsigned __k = 2;
- uint32_t __ar[__k+3];
- __q.generate(__ar, __ar + __k + 3);
- result_type __s = static_cast<result_type>((__ar[3] +
- ((uint64_t)__ar[4] << 32)) % __m);
- __x_ = __c == 0 && __s == 0 ? result_type(1) : __s;
-}
-
-template <class _CharT, class _Traits,
- class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-inline _LIBCPP_INLINE_VISIBILITY
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const linear_congruential_engine<_UIntType, __a, __c, __m>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _Ostream;
- __os.flags(_Ostream::dec | _Ostream::left);
- __os.fill(__os.widen(' '));
- return __os << __x.__x_;
-}
-
-template <class _CharT, class _Traits,
- class _UIntType, _UIntType __a, _UIntType __c, _UIntType __m>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- linear_congruential_engine<_UIntType, __a, __c, __m>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- _UIntType __t;
- __is >> __t;
- if (!__is.fail())
- __x.__x_ = __t;
- return __is;
-}
-
-typedef linear_congruential_engine<uint_fast32_t, 16807, 0, 2147483647>
- minstd_rand0;
-typedef linear_congruential_engine<uint_fast32_t, 48271, 0, 2147483647>
- minstd_rand;
-typedef minstd_rand default_random_engine;
-// mersenne_twister_engine
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine;
-
-template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-bool
-operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
-template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-class _LIBCPP_TEMPLATE_VIS mersenne_twister_engine
-{
-public:
- // types
- typedef _UIntType result_type;
-
-private:
- result_type __x_[__n];
- size_t __i_;
-
- static_assert( 0 < __m, "mersenne_twister_engine invalid parameters");
- static_assert(__m <= __n, "mersenne_twister_engine invalid parameters");
- static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
- static_assert(__w <= _Dt, "mersenne_twister_engine invalid parameters");
- static_assert( 2 <= __w, "mersenne_twister_engine invalid parameters");
- static_assert(__r <= __w, "mersenne_twister_engine invalid parameters");
- static_assert(__u <= __w, "mersenne_twister_engine invalid parameters");
- static_assert(__s <= __w, "mersenne_twister_engine invalid parameters");
- static_assert(__t <= __w, "mersenne_twister_engine invalid parameters");
- static_assert(__l <= __w, "mersenne_twister_engine invalid parameters");
-public:
- static _LIBCPP_CONSTEXPR const result_type _Min = 0;
- static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
- (result_type(1) << __w) - result_type(1);
- static_assert(_Min < _Max, "mersenne_twister_engine invalid parameters");
- static_assert(__a <= _Max, "mersenne_twister_engine invalid parameters");
- static_assert(__b <= _Max, "mersenne_twister_engine invalid parameters");
- static_assert(__c <= _Max, "mersenne_twister_engine invalid parameters");
- static_assert(__d <= _Max, "mersenne_twister_engine invalid parameters");
- static_assert(__f <= _Max, "mersenne_twister_engine invalid parameters");
-
- // engine characteristics
- static _LIBCPP_CONSTEXPR const size_t word_size = __w;
- static _LIBCPP_CONSTEXPR const size_t state_size = __n;
- static _LIBCPP_CONSTEXPR const size_t shift_size = __m;
- static _LIBCPP_CONSTEXPR const size_t mask_bits = __r;
- static _LIBCPP_CONSTEXPR const result_type xor_mask = __a;
- static _LIBCPP_CONSTEXPR const size_t tempering_u = __u;
- static _LIBCPP_CONSTEXPR const result_type tempering_d = __d;
- static _LIBCPP_CONSTEXPR const size_t tempering_s = __s;
- static _LIBCPP_CONSTEXPR const result_type tempering_b = __b;
- static _LIBCPP_CONSTEXPR const size_t tempering_t = __t;
- static _LIBCPP_CONSTEXPR const result_type tempering_c = __c;
- static _LIBCPP_CONSTEXPR const size_t tempering_l = __l;
- static _LIBCPP_CONSTEXPR const result_type initialization_multiplier = __f;
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
- static _LIBCPP_CONSTEXPR const result_type default_seed = 5489u;
-
- // constructors and seeding functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- mersenne_twister_engine() : mersenne_twister_engine(default_seed) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit mersenne_twister_engine(result_type __sd) { seed(__sd); }
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit mersenne_twister_engine(result_type __sd = default_seed) {
- seed(__sd);
- }
-#endif
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit mersenne_twister_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, mersenne_twister_engine>::value>::type* = 0)
- {seed(__q);}
- void seed(result_type __sd = default_seed);
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, mersenne_twister_engine>::value,
- void
- >::type
- seed(_Sseq& __q)
- {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
-
- // generating functions
- result_type operator()();
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
- friend
- bool
- operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
- template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
- friend
- bool
- operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y);
-
- template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
-
- template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x);
-private:
-
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- __count < __w,
- result_type
- >::type
- __lshift(result_type __x) {return (__x << __count) & _Max;}
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- (__count >= __w),
- result_type
- >::type
- __lshift(result_type) {return result_type(0);}
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- __count < _Dt,
- result_type
- >::type
- __rshift(result_type __x) {return __x >> __count;}
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- (__count >= _Dt),
- result_type
- >::type
- __rshift(result_type) {return result_type(0);}
-};
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::word_size;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::state_size;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::shift_size;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::mask_bits;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::xor_mask;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_u;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_d;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_s;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_b;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_t;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_c;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const size_t
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::tempering_l;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::initialization_multiplier;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
- _LIBCPP_CONSTEXPR const typename mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::result_type
- mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b, __t, __c, __l, __f>::default_seed;
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-void
-mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
- __t, __c, __l, __f>::seed(result_type __sd)
- _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
-{ // __w >= 2
- __x_[0] = __sd & _Max;
- for (size_t __i = 1; __i < __n; ++__i)
- __x_[__i] = (__f * (__x_[__i-1] ^ __rshift<__w - 2>(__x_[__i-1])) + __i) & _Max;
- __i_ = 0;
-}
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-template<class _Sseq>
-void
-mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
- __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 1>)
-{
- const unsigned __k = 1;
- uint32_t __ar[__n * __k];
- __q.generate(__ar, __ar + __n * __k);
- for (size_t __i = 0; __i < __n; ++__i)
- __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
- const result_type __mask = __r == _Dt ? result_type(~0) :
- (result_type(1) << __r) - result_type(1);
- __i_ = 0;
- if ((__x_[0] & ~__mask) == 0)
- {
- for (size_t __i = 1; __i < __n; ++__i)
- if (__x_[__i] != 0)
- return;
- __x_[0] = result_type(1) << (__w - 1);
- }
-}
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-template<class _Sseq>
-void
-mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
- __t, __c, __l, __f>::__seed(_Sseq& __q, integral_constant<unsigned, 2>)
-{
- const unsigned __k = 2;
- uint32_t __ar[__n * __k];
- __q.generate(__ar, __ar + __n * __k);
- for (size_t __i = 0; __i < __n; ++__i)
- __x_[__i] = static_cast<result_type>(
- (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
- const result_type __mask = __r == _Dt ? result_type(~0) :
- (result_type(1) << __r) - result_type(1);
- __i_ = 0;
- if ((__x_[0] & ~__mask) == 0)
- {
- for (size_t __i = 1; __i < __n; ++__i)
- if (__x_[__i] != 0)
- return;
- __x_[0] = result_type(1) << (__w - 1);
- }
-}
-
-template <class _UIntType, size_t __w, size_t __n, size_t __m, size_t __r,
- _UIntType __a, size_t __u, _UIntType __d, size_t __s,
- _UIntType __b, size_t __t, _UIntType __c, size_t __l, _UIntType __f>
-_UIntType
-mersenne_twister_engine<_UIntType, __w, __n, __m, __r, __a, __u, __d, __s, __b,
- __t, __c, __l, __f>::operator()()
-{
- const size_t __j = (__i_ + 1) % __n;
- const result_type __mask = __r == _Dt ? result_type(~0) :
- (result_type(1) << __r) - result_type(1);
- const result_type _Yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask);
- const size_t __k = (__i_ + __m) % __n;
- __x_[__i_] = __x_[__k] ^ __rshift<1>(_Yp) ^ (__a * (_Yp & 1));
- result_type __z = __x_[__i_] ^ (__rshift<__u>(__x_[__i_]) & __d);
- __i_ = __j;
- __z ^= __lshift<__s>(__z) & __b;
- __z ^= __lshift<__t>(__z) & __c;
- return __z ^ __rshift<__l>(__z);
-}
-
-template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-bool
-operator==(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y)
-{
- if (__x.__i_ == __y.__i_)
- return _VSTD::equal(__x.__x_, __x.__x_ + _Np, __y.__x_);
- if (__x.__i_ == 0 || __y.__i_ == 0)
- {
- size_t __j = _VSTD::min(_Np - __x.__i_, _Np - __y.__i_);
- if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j,
- __y.__x_ + __y.__i_))
- return false;
- if (__x.__i_ == 0)
- return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Np, __y.__x_);
- return _VSTD::equal(__x.__x_, __x.__x_ + (_Np - __j), __y.__x_ + __j);
- }
- if (__x.__i_ < __y.__i_)
- {
- size_t __j = _Np - __y.__i_;
- if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j),
- __y.__x_ + __y.__i_))
- return false;
- if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Np,
- __y.__x_))
- return false;
- return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_,
- __y.__x_ + (_Np - (__x.__i_ + __j)));
- }
- size_t __j = _Np - __x.__i_;
- if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j),
- __x.__x_ + __x.__i_))
- return false;
- if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Np,
- __x.__x_))
- return false;
- return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_,
- __x.__x_ + (_Np - (__y.__i_ + __j)));
-}
-
-template <class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __y)
-{
- return !(__x == __y);
-}
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _Ostream;
- __os.flags(_Ostream::dec | _Ostream::left);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.__x_[__x.__i_];
- for (size_t __j = __x.__i_ + 1; __j < _Np; ++__j)
- __os << __sp << __x.__x_[__j];
- for (size_t __j = 0; __j < __x.__i_; ++__j)
- __os << __sp << __x.__x_[__j];
- return __os;
-}
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Np, size_t _Mp, size_t _Rp,
- _UInt _Ap, size_t _Up, _UInt _Dp, size_t _Sp,
- _UInt _Bp, size_t _Tp, _UInt _Cp, size_t _Lp, _UInt _Fp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- mersenne_twister_engine<_UInt, _Wp, _Np, _Mp, _Rp, _Ap, _Up, _Dp, _Sp,
- _Bp, _Tp, _Cp, _Lp, _Fp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- _UInt __t[_Np];
- for (size_t __i = 0; __i < _Np; ++__i)
- __is >> __t[__i];
- if (!__is.fail())
- {
- for (size_t __i = 0; __i < _Np; ++__i)
- __x.__x_[__i] = __t[__i];
- __x.__i_ = 0;
- }
- return __is;
-}
-
-typedef mersenne_twister_engine<uint_fast32_t, 32, 624, 397, 31,
- 0x9908b0df, 11, 0xffffffff,
- 7, 0x9d2c5680,
- 15, 0xefc60000,
- 18, 1812433253> mt19937;
-typedef mersenne_twister_engine<uint_fast64_t, 64, 312, 156, 31,
- 0xb5026f5aa96619e9ULL, 29, 0x5555555555555555ULL,
- 17, 0x71d67fffeda60000ULL,
- 37, 0xfff7eee000000000ULL,
- 43, 6364136223846793005ULL> mt19937_64;
-
-// subtract_with_carry_engine
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine;
-
-template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-bool
-operator==(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
-
-template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-_LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-class _LIBCPP_TEMPLATE_VIS subtract_with_carry_engine
-{
-public:
- // types
- typedef _UIntType result_type;
-
-private:
- result_type __x_[__r];
- result_type __c_;
- size_t __i_;
-
- static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
- static_assert( 0 < __w, "subtract_with_carry_engine invalid parameters");
- static_assert(__w <= _Dt, "subtract_with_carry_engine invalid parameters");
- static_assert( 0 < __s, "subtract_with_carry_engine invalid parameters");
- static_assert(__s < __r, "subtract_with_carry_engine invalid parameters");
-public:
- static _LIBCPP_CONSTEXPR const result_type _Min = 0;
- static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
- (result_type(1) << __w) - result_type(1);
- static_assert(_Min < _Max, "subtract_with_carry_engine invalid parameters");
-
- // engine characteristics
- static _LIBCPP_CONSTEXPR const size_t word_size = __w;
- static _LIBCPP_CONSTEXPR const size_t short_lag = __s;
- static _LIBCPP_CONSTEXPR const size_t long_lag = __r;
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
- static _LIBCPP_CONSTEXPR const result_type default_seed = 19780503u;
-
- // constructors and seeding functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- subtract_with_carry_engine() : subtract_with_carry_engine(default_seed) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit subtract_with_carry_engine(result_type __sd) { seed(__sd); }
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit subtract_with_carry_engine(result_type __sd = default_seed) {
- seed(__sd);
- }
-#endif
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit subtract_with_carry_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, subtract_with_carry_engine>::value>::type* = 0)
- {seed(__q);}
- _LIBCPP_INLINE_VISIBILITY
- void seed(result_type __sd = default_seed)
- {seed(__sd, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, subtract_with_carry_engine>::value,
- void
- >::type
- seed(_Sseq& __q)
- {__seed(__q, integral_constant<unsigned, 1 + (__w - 1) / 32>());}
-
- // generating functions
- result_type operator()();
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
- friend
- bool
- operator==(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
-
- template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
- friend
- bool
- operator!=(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y);
-
- template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
-
- template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x);
-
-private:
-
- void seed(result_type __sd, integral_constant<unsigned, 1>);
- void seed(result_type __sd, integral_constant<unsigned, 2>);
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 1>);
- template<class _Sseq>
- void __seed(_Sseq& __q, integral_constant<unsigned, 2>);
-};
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
- _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::word_size;
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
- _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::short_lag;
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
- _LIBCPP_CONSTEXPR const size_t subtract_with_carry_engine<_UIntType, __w, __s, __r>::long_lag;
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
- _LIBCPP_CONSTEXPR const typename subtract_with_carry_engine<_UIntType, __w, __s, __r>::result_type
- subtract_with_carry_engine<_UIntType, __w, __s, __r>::default_seed;
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-void
-subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd,
- integral_constant<unsigned, 1>)
-{
- linear_congruential_engine<result_type, 40014u, 0u, 2147483563u>
- __e(__sd == 0u ? default_seed : __sd);
- for (size_t __i = 0; __i < __r; ++__i)
- __x_[__i] = static_cast<result_type>(__e() & _Max);
- __c_ = __x_[__r-1] == 0;
- __i_ = 0;
-}
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-void
-subtract_with_carry_engine<_UIntType, __w, __s, __r>::seed(result_type __sd,
- integral_constant<unsigned, 2>)
-{
- linear_congruential_engine<result_type, 40014u, 0u, 2147483563u>
- __e(__sd == 0u ? default_seed : __sd);
- for (size_t __i = 0; __i < __r; ++__i)
- {
- result_type __e0 = __e();
- __x_[__i] = static_cast<result_type>(
- (__e0 + ((uint64_t)__e() << 32)) & _Max);
- }
- __c_ = __x_[__r-1] == 0;
- __i_ = 0;
-}
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-template<class _Sseq>
-void
-subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q,
- integral_constant<unsigned, 1>)
-{
- const unsigned __k = 1;
- uint32_t __ar[__r * __k];
- __q.generate(__ar, __ar + __r * __k);
- for (size_t __i = 0; __i < __r; ++__i)
- __x_[__i] = static_cast<result_type>(__ar[__i] & _Max);
- __c_ = __x_[__r-1] == 0;
- __i_ = 0;
-}
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-template<class _Sseq>
-void
-subtract_with_carry_engine<_UIntType, __w, __s, __r>::__seed(_Sseq& __q,
- integral_constant<unsigned, 2>)
-{
- const unsigned __k = 2;
- uint32_t __ar[__r * __k];
- __q.generate(__ar, __ar + __r * __k);
- for (size_t __i = 0; __i < __r; ++__i)
- __x_[__i] = static_cast<result_type>(
- (__ar[2 * __i] + ((uint64_t)__ar[2 * __i + 1] << 32)) & _Max);
- __c_ = __x_[__r-1] == 0;
- __i_ = 0;
-}
-
-template<class _UIntType, size_t __w, size_t __s, size_t __r>
-_UIntType
-subtract_with_carry_engine<_UIntType, __w, __s, __r>::operator()()
-{
- const result_type& __xs = __x_[(__i_ + (__r - __s)) % __r];
- result_type& __xr = __x_[__i_];
- result_type __new_c = __c_ == 0 ? __xs < __xr : __xs != 0 ? __xs <= __xr : 1;
- __xr = (__xs - __xr - __c_) & _Max;
- __c_ = __new_c;
- __i_ = (__i_ + 1) % __r;
- return __xr;
-}
-
-template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-bool
-operator==(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y)
-{
- if (__x.__c_ != __y.__c_)
- return false;
- if (__x.__i_ == __y.__i_)
- return _VSTD::equal(__x.__x_, __x.__x_ + _Rp, __y.__x_);
- if (__x.__i_ == 0 || __y.__i_ == 0)
- {
- size_t __j = _VSTD::min(_Rp - __x.__i_, _Rp - __y.__i_);
- if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + __x.__i_ + __j,
- __y.__x_ + __y.__i_))
- return false;
- if (__x.__i_ == 0)
- return _VSTD::equal(__x.__x_ + __j, __x.__x_ + _Rp, __y.__x_);
- return _VSTD::equal(__x.__x_, __x.__x_ + (_Rp - __j), __y.__x_ + __j);
- }
- if (__x.__i_ < __y.__i_)
- {
- size_t __j = _Rp - __y.__i_;
- if (!_VSTD::equal(__x.__x_ + __x.__i_, __x.__x_ + (__x.__i_ + __j),
- __y.__x_ + __y.__i_))
- return false;
- if (!_VSTD::equal(__x.__x_ + (__x.__i_ + __j), __x.__x_ + _Rp,
- __y.__x_))
- return false;
- return _VSTD::equal(__x.__x_, __x.__x_ + __x.__i_,
- __y.__x_ + (_Rp - (__x.__i_ + __j)));
- }
- size_t __j = _Rp - __x.__i_;
- if (!_VSTD::equal(__y.__x_ + __y.__i_, __y.__x_ + (__y.__i_ + __j),
- __x.__x_ + __x.__i_))
- return false;
- if (!_VSTD::equal(__y.__x_ + (__y.__i_ + __j), __y.__x_ + _Rp,
- __x.__x_))
- return false;
- return _VSTD::equal(__y.__x_, __y.__x_ + __y.__i_,
- __x.__x_ + (_Rp - (__y.__i_ + __j)));
-}
-
-template<class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __y)
-{
- return !(__x == __y);
-}
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _Ostream;
- __os.flags(_Ostream::dec | _Ostream::left);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.__x_[__x.__i_];
- for (size_t __j = __x.__i_ + 1; __j < _Rp; ++__j)
- __os << __sp << __x.__x_[__j];
- for (size_t __j = 0; __j < __x.__i_; ++__j)
- __os << __sp << __x.__x_[__j];
- __os << __sp << __x.__c_;
- return __os;
-}
-
-template <class _CharT, class _Traits,
- class _UInt, size_t _Wp, size_t _Sp, size_t _Rp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- subtract_with_carry_engine<_UInt, _Wp, _Sp, _Rp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- _UInt __t[_Rp+1];
- for (size_t __i = 0; __i < _Rp+1; ++__i)
- __is >> __t[__i];
- if (!__is.fail())
- {
- for (size_t __i = 0; __i < _Rp; ++__i)
- __x.__x_[__i] = __t[__i];
- __x.__c_ = __t[_Rp];
- __x.__i_ = 0;
- }
- return __is;
-}
-
-typedef subtract_with_carry_engine<uint_fast32_t, 24, 10, 24> ranlux24_base;
-typedef subtract_with_carry_engine<uint_fast64_t, 48, 5, 12> ranlux48_base;
-
-// discard_block_engine
-
-template<class _Engine, size_t __p, size_t __r>
-class _LIBCPP_TEMPLATE_VIS discard_block_engine
-{
- _Engine __e_;
- int __n_;
-
- static_assert( 0 < __r, "discard_block_engine invalid parameters");
- static_assert(__r <= __p, "discard_block_engine invalid parameters");
- static_assert(__r <= INT_MAX, "discard_block_engine invalid parameters");
-public:
- // types
- typedef typename _Engine::result_type result_type;
-
- // engine characteristics
- static _LIBCPP_CONSTEXPR const size_t block_size = __p;
- static _LIBCPP_CONSTEXPR const size_t used_block = __r;
-
-#ifdef _LIBCPP_CXX03_LANG
- static const result_type _Min = _Engine::_Min;
- static const result_type _Max = _Engine::_Max;
-#else
- static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min();
- static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max();
-#endif
-
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); }
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); }
-
- // constructors and seeding functions
- _LIBCPP_INLINE_VISIBILITY
- discard_block_engine() : __n_(0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit discard_block_engine(const _Engine& __e)
- : __e_(__e), __n_(0) {}
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit discard_block_engine(_Engine&& __e)
- : __e_(_VSTD::move(__e)), __n_(0) {}
-#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit discard_block_engine(result_type __sd) : __e_(__sd), __n_(0) {}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit discard_block_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, discard_block_engine>::value &&
- !is_convertible<_Sseq, _Engine>::value>::type* = 0)
- : __e_(__q), __n_(0) {}
- _LIBCPP_INLINE_VISIBILITY
- void seed() {__e_.seed(); __n_ = 0;}
- _LIBCPP_INLINE_VISIBILITY
- void seed(result_type __sd) {__e_.seed(__sd); __n_ = 0;}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, discard_block_engine>::value,
- void
- >::type
- seed(_Sseq& __q) {__e_.seed(__q); __n_ = 0;}
-
- // generating functions
- result_type operator()();
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- const _Engine& base() const _NOEXCEPT {return __e_;}
-
- template<class _Eng, size_t _Pp, size_t _Rp>
- friend
- bool
- operator==(
- const discard_block_engine<_Eng, _Pp, _Rp>& __x,
- const discard_block_engine<_Eng, _Pp, _Rp>& __y);
-
- template<class _Eng, size_t _Pp, size_t _Rp>
- friend
- bool
- operator!=(
- const discard_block_engine<_Eng, _Pp, _Rp>& __x,
- const discard_block_engine<_Eng, _Pp, _Rp>& __y);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Pp, size_t _Rp>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const discard_block_engine<_Eng, _Pp, _Rp>& __x);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Pp, size_t _Rp>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- discard_block_engine<_Eng, _Pp, _Rp>& __x);
-};
-
-template<class _Engine, size_t __p, size_t __r>
- _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::block_size;
-
-template<class _Engine, size_t __p, size_t __r>
- _LIBCPP_CONSTEXPR const size_t discard_block_engine<_Engine, __p, __r>::used_block;
-
-template<class _Engine, size_t __p, size_t __r>
-typename discard_block_engine<_Engine, __p, __r>::result_type
-discard_block_engine<_Engine, __p, __r>::operator()()
-{
- if (__n_ >= static_cast<int>(__r))
- {
- __e_.discard(__p - __r);
- __n_ = 0;
- }
- ++__n_;
- return __e_();
-}
-
-template<class _Eng, size_t _Pp, size_t _Rp>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator==(const discard_block_engine<_Eng, _Pp, _Rp>& __x,
- const discard_block_engine<_Eng, _Pp, _Rp>& __y)
-{
- return __x.__n_ == __y.__n_ && __x.__e_ == __y.__e_;
-}
-
-template<class _Eng, size_t _Pp, size_t _Rp>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(const discard_block_engine<_Eng, _Pp, _Rp>& __x,
- const discard_block_engine<_Eng, _Pp, _Rp>& __y)
-{
- return !(__x == __y);
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Pp, size_t _Rp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const discard_block_engine<_Eng, _Pp, _Rp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _Ostream;
- __os.flags(_Ostream::dec | _Ostream::left);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- return __os << __x.__e_ << __sp << __x.__n_;
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Pp, size_t _Rp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- discard_block_engine<_Eng, _Pp, _Rp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- _Eng __e;
- int __n;
- __is >> __e >> __n;
- if (!__is.fail())
- {
- __x.__e_ = __e;
- __x.__n_ = __n;
- }
- return __is;
-}
-
-typedef discard_block_engine<ranlux24_base, 223, 23> ranlux24;
-typedef discard_block_engine<ranlux48_base, 389, 11> ranlux48;
-
-// independent_bits_engine
-
-template<class _Engine, size_t __w, class _UIntType>
-class _LIBCPP_TEMPLATE_VIS independent_bits_engine
-{
- template <class _UInt, _UInt _R0, size_t _Wp, size_t _Mp>
- class __get_n
- {
- static _LIBCPP_CONSTEXPR const size_t _Dt = numeric_limits<_UInt>::digits;
- static _LIBCPP_CONSTEXPR const size_t _Np = _Wp / _Mp + (_Wp % _Mp != 0);
- static _LIBCPP_CONSTEXPR const size_t _W0 = _Wp / _Np;
- static _LIBCPP_CONSTEXPR const _UInt _Y0 = _W0 >= _Dt ? 0 : (_R0 >> _W0) << _W0;
- public:
- static _LIBCPP_CONSTEXPR const size_t value = _R0 - _Y0 > _Y0 / _Np ? _Np + 1 : _Np;
- };
-public:
- // types
- typedef _UIntType result_type;
-
-private:
- _Engine __e_;
-
- static _LIBCPP_CONSTEXPR const result_type _Dt = numeric_limits<result_type>::digits;
- static_assert( 0 < __w, "independent_bits_engine invalid parameters");
- static_assert(__w <= _Dt, "independent_bits_engine invalid parameters");
-
- typedef typename _Engine::result_type _Engine_result_type;
- typedef typename conditional
- <
- sizeof(_Engine_result_type) <= sizeof(result_type),
- result_type,
- _Engine_result_type
- >::type _Working_result_type;
-#ifdef _LIBCPP_CXX03_LANG
- static const _Working_result_type _Rp = _Engine::_Max - _Engine::_Min
- + _Working_result_type(1);
-#else
- static _LIBCPP_CONSTEXPR const _Working_result_type _Rp = _Engine::max() - _Engine::min()
- + _Working_result_type(1);
-#endif
- static _LIBCPP_CONSTEXPR const size_t __m = __log2<_Working_result_type, _Rp>::value;
- static _LIBCPP_CONSTEXPR const size_t __n = __get_n<_Working_result_type, _Rp, __w, __m>::value;
- static _LIBCPP_CONSTEXPR const size_t __w0 = __w / __n;
- static _LIBCPP_CONSTEXPR const size_t __n0 = __n - __w % __n;
- static _LIBCPP_CONSTEXPR const size_t _WDt = numeric_limits<_Working_result_type>::digits;
- static _LIBCPP_CONSTEXPR const size_t _EDt = numeric_limits<_Engine_result_type>::digits;
- static _LIBCPP_CONSTEXPR const _Working_result_type __y0 = __w0 >= _WDt ? 0 :
- (_Rp >> __w0) << __w0;
- static _LIBCPP_CONSTEXPR const _Working_result_type __y1 = __w0 >= _WDt - 1 ? 0 :
- (_Rp >> (__w0+1)) << (__w0+1);
- static _LIBCPP_CONSTEXPR const _Engine_result_type __mask0 = __w0 > 0 ?
- _Engine_result_type(~0) >> (_EDt - __w0) :
- _Engine_result_type(0);
- static _LIBCPP_CONSTEXPR const _Engine_result_type __mask1 = __w0 < _EDt - 1 ?
- _Engine_result_type(~0) >> (_EDt - (__w0 + 1)) :
- _Engine_result_type(~0);
-public:
- static _LIBCPP_CONSTEXPR const result_type _Min = 0;
- static _LIBCPP_CONSTEXPR const result_type _Max = __w == _Dt ? result_type(~0) :
- (result_type(1) << __w) - result_type(1);
- static_assert(_Min < _Max, "independent_bits_engine invalid parameters");
-
- // engine characteristics
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
-
- // constructors and seeding functions
- _LIBCPP_INLINE_VISIBILITY
- independent_bits_engine() {}
- _LIBCPP_INLINE_VISIBILITY
- explicit independent_bits_engine(const _Engine& __e)
- : __e_(__e) {}
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit independent_bits_engine(_Engine&& __e)
- : __e_(_VSTD::move(__e)) {}
-#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit independent_bits_engine(result_type __sd) : __e_(__sd) {}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit independent_bits_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, independent_bits_engine>::value &&
- !is_convertible<_Sseq, _Engine>::value>::type* = 0)
- : __e_(__q) {}
- _LIBCPP_INLINE_VISIBILITY
- void seed() {__e_.seed();}
- _LIBCPP_INLINE_VISIBILITY
- void seed(result_type __sd) {__e_.seed(__sd);}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, independent_bits_engine>::value,
- void
- >::type
- seed(_Sseq& __q) {__e_.seed(__q);}
-
- // generating functions
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());}
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- const _Engine& base() const _NOEXCEPT {return __e_;}
-
- template<class _Eng, size_t _Wp, class _UInt>
- friend
- bool
- operator==(
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __y);
-
- template<class _Eng, size_t _Wp, class _UInt>
- friend
- bool
- operator!=(
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __y);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Wp, class _UInt>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Wp, class _UInt>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- independent_bits_engine<_Eng, _Wp, _UInt>& __x);
-
-private:
- _LIBCPP_INLINE_VISIBILITY
- result_type __eval(false_type);
- result_type __eval(true_type);
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- __count < _Dt,
- result_type
- >::type
- __lshift(result_type __x) {return __x << __count;}
-
- template <size_t __count>
- _LIBCPP_INLINE_VISIBILITY
- static
- typename enable_if
- <
- (__count >= _Dt),
- result_type
- >::type
- __lshift(result_type) {return result_type(0);}
-};
-
-template<class _Engine, size_t __w, class _UIntType>
-inline
-_UIntType
-independent_bits_engine<_Engine, __w, _UIntType>::__eval(false_type)
-{
- return static_cast<result_type>(__e_() & __mask0);
-}
-
-template<class _Engine, size_t __w, class _UIntType>
-_UIntType
-independent_bits_engine<_Engine, __w, _UIntType>::__eval(true_type)
-{
- result_type _Sp = 0;
- for (size_t __k = 0; __k < __n0; ++__k)
- {
- _Engine_result_type __u;
- do
- {
- __u = __e_() - _Engine::min();
- } while (__u >= __y0);
- _Sp = static_cast<result_type>(__lshift<__w0>(_Sp) + (__u & __mask0));
- }
- for (size_t __k = __n0; __k < __n; ++__k)
- {
- _Engine_result_type __u;
- do
- {
- __u = __e_() - _Engine::min();
- } while (__u >= __y1);
- _Sp = static_cast<result_type>(__lshift<__w0+1>(_Sp) + (__u & __mask1));
- }
- return _Sp;
-}
-
-template<class _Eng, size_t _Wp, class _UInt>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator==(
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __y)
-{
- return __x.base() == __y.base();
-}
-
-template<class _Eng, size_t _Wp, class _UInt>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __y)
-{
- return !(__x == __y);
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Wp, class _UInt>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const independent_bits_engine<_Eng, _Wp, _UInt>& __x)
-{
- return __os << __x.base();
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Wp, class _UInt>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- independent_bits_engine<_Eng, _Wp, _UInt>& __x)
-{
- _Eng __e;
- __is >> __e;
- if (!__is.fail())
- __x.__e_ = __e;
- return __is;
-}
-
-// shuffle_order_engine
-
-template <uint64_t _Xp, uint64_t _Yp>
-struct __ugcd
-{
- static _LIBCPP_CONSTEXPR const uint64_t value = __ugcd<_Yp, _Xp % _Yp>::value;
-};
-
-template <uint64_t _Xp>
-struct __ugcd<_Xp, 0>
-{
- static _LIBCPP_CONSTEXPR const uint64_t value = _Xp;
-};
-
-template <uint64_t _Np, uint64_t _Dp>
-class __uratio
-{
- static_assert(_Dp != 0, "__uratio divide by 0");
- static _LIBCPP_CONSTEXPR const uint64_t __gcd = __ugcd<_Np, _Dp>::value;
-public:
- static _LIBCPP_CONSTEXPR const uint64_t num = _Np / __gcd;
- static _LIBCPP_CONSTEXPR const uint64_t den = _Dp / __gcd;
-
- typedef __uratio<num, den> type;
-};
-
-template<class _Engine, size_t __k>
-class _LIBCPP_TEMPLATE_VIS shuffle_order_engine
-{
- static_assert(0 < __k, "shuffle_order_engine invalid parameters");
-public:
- // types
- typedef typename _Engine::result_type result_type;
-
-private:
- _Engine __e_;
- result_type _V_[__k];
- result_type _Y_;
-
-public:
- // engine characteristics
- static _LIBCPP_CONSTEXPR const size_t table_size = __k;
-
-#ifdef _LIBCPP_CXX03_LANG
- static const result_type _Min = _Engine::_Min;
- static const result_type _Max = _Engine::_Max;
-#else
- static _LIBCPP_CONSTEXPR const result_type _Min = _Engine::min();
- static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max();
-#endif
- static_assert(_Min < _Max, "shuffle_order_engine invalid parameters");
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Min; }
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Max; }
-
- static _LIBCPP_CONSTEXPR const unsigned long long _Rp = _Max - _Min + 1ull;
-
- // constructors and seeding functions
- _LIBCPP_INLINE_VISIBILITY
- shuffle_order_engine() {__init();}
- _LIBCPP_INLINE_VISIBILITY
- explicit shuffle_order_engine(const _Engine& __e)
- : __e_(__e) {__init();}
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit shuffle_order_engine(_Engine&& __e)
- : __e_(_VSTD::move(__e)) {__init();}
-#endif // _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- explicit shuffle_order_engine(result_type __sd) : __e_(__sd) {__init();}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- explicit shuffle_order_engine(_Sseq& __q,
- typename enable_if<__is_seed_sequence<_Sseq, shuffle_order_engine>::value &&
- !is_convertible<_Sseq, _Engine>::value>::type* = 0)
- : __e_(__q) {__init();}
- _LIBCPP_INLINE_VISIBILITY
- void seed() {__e_.seed(); __init();}
- _LIBCPP_INLINE_VISIBILITY
- void seed(result_type __sd) {__e_.seed(__sd); __init();}
- template<class _Sseq>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __is_seed_sequence<_Sseq, shuffle_order_engine>::value,
- void
- >::type
- seed(_Sseq& __q) {__e_.seed(__q); __init();}
-
- // generating functions
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()() {return __eval(integral_constant<bool, _Rp != 0>());}
- _LIBCPP_INLINE_VISIBILITY
- void discard(unsigned long long __z) {for (; __z; --__z) operator()();}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- const _Engine& base() const _NOEXCEPT {return __e_;}
-
-private:
- template<class _Eng, size_t _Kp>
- friend
- bool
- operator==(
- const shuffle_order_engine<_Eng, _Kp>& __x,
- const shuffle_order_engine<_Eng, _Kp>& __y);
-
- template<class _Eng, size_t _Kp>
- friend
- bool
- operator!=(
- const shuffle_order_engine<_Eng, _Kp>& __x,
- const shuffle_order_engine<_Eng, _Kp>& __y);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Kp>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const shuffle_order_engine<_Eng, _Kp>& __x);
-
- template <class _CharT, class _Traits,
- class _Eng, size_t _Kp>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- shuffle_order_engine<_Eng, _Kp>& __x);
-
- _LIBCPP_INLINE_VISIBILITY
- void __init()
- {
- for (size_t __i = 0; __i < __k; ++__i)
- _V_[__i] = __e_();
- _Y_ = __e_();
- }
-
- _LIBCPP_INLINE_VISIBILITY
- result_type __eval(false_type) {return __eval2(integral_constant<bool, __k & 1>());}
- _LIBCPP_INLINE_VISIBILITY
- result_type __eval(true_type) {return __eval(__uratio<__k, _Rp>());}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type __eval2(false_type) {return __eval(__uratio<__k/2, 0x8000000000000000ull>());}
- _LIBCPP_INLINE_VISIBILITY
- result_type __eval2(true_type) {return __evalf<__k, 0>();}
-
- template <uint64_t _Np, uint64_t _Dp>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- (__uratio<_Np, _Dp>::num > 0xFFFFFFFFFFFFFFFFull / (_Max - _Min)),
- result_type
- >::type
- __eval(__uratio<_Np, _Dp>)
- {return __evalf<__uratio<_Np, _Dp>::num, __uratio<_Np, _Dp>::den>();}
-
- template <uint64_t _Np, uint64_t _Dp>
- _LIBCPP_INLINE_VISIBILITY
- typename enable_if
- <
- __uratio<_Np, _Dp>::num <= 0xFFFFFFFFFFFFFFFFull / (_Max - _Min),
- result_type
- >::type
- __eval(__uratio<_Np, _Dp>)
- {
- const size_t __j = static_cast<size_t>(__uratio<_Np, _Dp>::num * (_Y_ - _Min)
- / __uratio<_Np, _Dp>::den);
- _Y_ = _V_[__j];
- _V_[__j] = __e_();
- return _Y_;
- }
-
- template <uint64_t __n, uint64_t __d>
- _LIBCPP_INLINE_VISIBILITY
- result_type __evalf()
- {
- const double _Fp = __d == 0 ?
- __n / (2. * 0x8000000000000000ull) :
- __n / (double)__d;
- const size_t __j = static_cast<size_t>(_Fp * (_Y_ - _Min));
- _Y_ = _V_[__j];
- _V_[__j] = __e_();
- return _Y_;
- }
-};
-
-template<class _Engine, size_t __k>
- _LIBCPP_CONSTEXPR const size_t shuffle_order_engine<_Engine, __k>::table_size;
-
-template<class _Eng, size_t _Kp>
-bool
-operator==(
- const shuffle_order_engine<_Eng, _Kp>& __x,
- const shuffle_order_engine<_Eng, _Kp>& __y)
-{
- return __x._Y_ == __y._Y_ && _VSTD::equal(__x._V_, __x._V_ + _Kp, __y._V_) &&
- __x.__e_ == __y.__e_;
-}
-
-template<class _Eng, size_t _Kp>
-inline _LIBCPP_INLINE_VISIBILITY
-bool
-operator!=(
- const shuffle_order_engine<_Eng, _Kp>& __x,
- const shuffle_order_engine<_Eng, _Kp>& __y)
-{
- return !(__x == __y);
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Kp>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const shuffle_order_engine<_Eng, _Kp>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _Ostream;
- __os.flags(_Ostream::dec | _Ostream::left);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.__e_ << __sp << __x._V_[0];
- for (size_t __i = 1; __i < _Kp; ++__i)
- __os << __sp << __x._V_[__i];
- return __os << __sp << __x._Y_;
-}
-
-template <class _CharT, class _Traits,
- class _Eng, size_t _Kp>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- shuffle_order_engine<_Eng, _Kp>& __x)
-{
- typedef typename shuffle_order_engine<_Eng, _Kp>::result_type result_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- _Eng __e;
- result_type _Vp[_Kp+1];
- __is >> __e;
- for (size_t __i = 0; __i < _Kp+1; ++__i)
- __is >> _Vp[__i];
- if (!__is.fail())
- {
- __x.__e_ = __e;
- for (size_t __i = 0; __i < _Kp; ++__i)
- __x._V_[__i] = _Vp[__i];
- __x._Y_ = _Vp[_Kp];
- }
- return __is;
-}
-
-typedef shuffle_order_engine<minstd_rand0, 256> knuth_b;
-
-// random_device
-
-#if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE)
-
-class _LIBCPP_TYPE_VIS random_device
-{
-#ifdef _LIBCPP_USING_DEV_RANDOM
- int __f_;
-#endif // defined(_LIBCPP_USING_DEV_RANDOM)
-public:
- // types
- typedef unsigned result_type;
-
- // generator characteristics
- static _LIBCPP_CONSTEXPR const result_type _Min = 0;
- static _LIBCPP_CONSTEXPR const result_type _Max = 0xFFFFFFFFu;
-
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type min() { return _Min;}
- _LIBCPP_INLINE_VISIBILITY
- static _LIBCPP_CONSTEXPR result_type max() { return _Max;}
-
- // constructors
-#ifndef _LIBCPP_CXX03_LANG
- random_device() : random_device("/dev/urandom") {}
- explicit random_device(const string& __token);
-#else
- explicit random_device(const string& __token = "/dev/urandom");
-#endif
- ~random_device();
-
- // generating functions
- result_type operator()();
-
- // property functions
- double entropy() const _NOEXCEPT;
-
-private:
- // no copy functions
- random_device(const random_device&); // = delete;
- random_device& operator=(const random_device&); // = delete;
-};
-
-#endif // !_LIBCPP_HAS_NO_RANDOM_DEVICE
-
-// seed_seq
-
-class _LIBCPP_TEMPLATE_VIS seed_seq
-{
-public:
- // types
- typedef uint32_t result_type;
-
-private:
- vector<result_type> __v_;
-
- template<class _InputIterator>
- void init(_InputIterator __first, _InputIterator __last);
-public:
- // constructors
- _LIBCPP_INLINE_VISIBILITY
- seed_seq() _NOEXCEPT {}
-#ifndef _LIBCPP_CXX03_LANG
- template<class _Tp>
- _LIBCPP_INLINE_VISIBILITY
- seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());}
-#endif // _LIBCPP_CXX03_LANG
-
- template<class _InputIterator>
- _LIBCPP_INLINE_VISIBILITY
- seed_seq(_InputIterator __first, _InputIterator __last)
- {init(__first, __last);}
-
- // generating functions
- template<class _RandomAccessIterator>
- void generate(_RandomAccessIterator __first, _RandomAccessIterator __last);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- size_t size() const _NOEXCEPT {return __v_.size();}
- template<class _OutputIterator>
- _LIBCPP_INLINE_VISIBILITY
- void param(_OutputIterator __dest) const
- {_VSTD::copy(__v_.begin(), __v_.end(), __dest);}
-
-private:
- // no copy functions
- seed_seq(const seed_seq&); // = delete;
- void operator=(const seed_seq&); // = delete;
-
- _LIBCPP_INLINE_VISIBILITY
- static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);}
-};
-
-template<class _InputIterator>
-void
-seed_seq::init(_InputIterator __first, _InputIterator __last)
-{
- for (_InputIterator __s = __first; __s != __last; ++__s)
- __v_.push_back(*__s & 0xFFFFFFFF);
-}
-
-template<class _RandomAccessIterator>
-void
-seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last)
-{
- if (__first != __last)
- {
- _VSTD::fill(__first, __last, 0x8b8b8b8b);
- const size_t __n = static_cast<size_t>(__last - __first);
- const size_t __s = __v_.size();
- const size_t __t = (__n >= 623) ? 11
- : (__n >= 68) ? 7
- : (__n >= 39) ? 5
- : (__n >= 7) ? 3
- : (__n - 1) / 2;
- const size_t __p = (__n - __t) / 2;
- const size_t __q = __p + __t;
- const size_t __m = _VSTD::max(__s + 1, __n);
- // __k = 0;
- {
- result_type __r = 1664525 * _Tp(__first[0] ^ __first[__p]
- ^ __first[__n - 1]);
- __first[__p] += __r;
- __r += __s;
- __first[__q] += __r;
- __first[0] = __r;
- }
- for (size_t __k = 1; __k <= __s; ++__k)
- {
- const size_t __kmodn = __k % __n;
- const size_t __kpmodn = (__k + __p) % __n;
- result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn]
- ^ __first[(__k - 1) % __n]);
- __first[__kpmodn] += __r;
- __r += __kmodn + __v_[__k-1];
- __first[(__k + __q) % __n] += __r;
- __first[__kmodn] = __r;
- }
- for (size_t __k = __s + 1; __k < __m; ++__k)
- {
- const size_t __kmodn = __k % __n;
- const size_t __kpmodn = (__k + __p) % __n;
- result_type __r = 1664525 * _Tp(__first[__kmodn] ^ __first[__kpmodn]
- ^ __first[(__k - 1) % __n]);
- __first[__kpmodn] += __r;
- __r += __kmodn;
- __first[(__k + __q) % __n] += __r;
- __first[__kmodn] = __r;
- }
- for (size_t __k = __m; __k < __m + __n; ++__k)
- {
- const size_t __kmodn = __k % __n;
- const size_t __kpmodn = (__k + __p) % __n;
- result_type __r = 1566083941 * _Tp(__first[__kmodn] +
- __first[__kpmodn] +
- __first[(__k - 1) % __n]);
- __first[__kpmodn] ^= __r;
- __r -= __kmodn;
- __first[(__k + __q) % __n] ^= __r;
- __first[__kmodn] = __r;
- }
- }
-}
-
-// generate_canonical
-
-template<class _RealType, size_t __bits, class _URNG>
-_RealType
-generate_canonical(_URNG& __g)
-{
- const size_t _Dt = numeric_limits<_RealType>::digits;
- const size_t __b = _Dt < __bits ? _Dt : __bits;
-#ifdef _LIBCPP_CXX03_LANG
- const size_t __logR = __log2<uint64_t, _URNG::_Max - _URNG::_Min + uint64_t(1)>::value;
-#else
- const size_t __logR = __log2<uint64_t, _URNG::max() - _URNG::min() + uint64_t(1)>::value;
-#endif
- const size_t __k = __b / __logR + (__b % __logR != 0) + (__b == 0);
- const _RealType _Rp = static_cast<_RealType>(_URNG::max() - _URNG::min()) + _RealType(1);
- _RealType __base = _Rp;
- _RealType _Sp = __g() - _URNG::min();
- for (size_t __i = 1; __i < __k; ++__i, __base *= _Rp)
- _Sp += (__g() - _URNG::min()) * __base;
- return _Sp / __base;
-}
-
-// uniform_real_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS uniform_real_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __a_;
- result_type __b_;
- public:
- typedef uniform_real_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __a = 0,
- result_type __b = 1)
- : __a_(__a), __b_(__b) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __a_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __b_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- uniform_real_distribution() : uniform_real_distribution(0) {}
- explicit uniform_real_distribution(result_type __a, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit uniform_real_distribution(result_type __a = 0, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit uniform_real_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __p_.a();}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __p_.b();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return a();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return b();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const uniform_real_distribution& __x,
- const uniform_real_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const uniform_real_distribution& __x,
- const uniform_real_distribution& __y)
- {return !(__x == __y);}
-};
-
-template<class _RealType>
-template<class _URNG>
-inline
-typename uniform_real_distribution<_RealType>::result_type
-uniform_real_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- return (__p.b() - __p.a())
- * _VSTD::generate_canonical<_RealType, numeric_limits<_RealType>::digits>(__g)
- + __p.a();
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const uniform_real_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- return __os << __x.a() << __sp << __x.b();
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- uniform_real_distribution<_RT>& __x)
-{
- typedef uniform_real_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __a;
- result_type __b;
- __is >> __a >> __b;
- if (!__is.fail())
- __x.param(param_type(__a, __b));
- return __is;
-}
-
-// bernoulli_distribution
-
-class _LIBCPP_TEMPLATE_VIS bernoulli_distribution
-{
-public:
- // types
- typedef bool result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- double __p_;
- public:
- typedef bernoulli_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(double __p = 0.5) : __p_(__p) {}
-
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- bernoulli_distribution() : bernoulli_distribution(0.5) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit bernoulli_distribution(double __p) : __p_(param_type(__p)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit bernoulli_distribution(double __p = 0.5) : __p_(param_type(__p)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit bernoulli_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_.p();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return false;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return true;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const bernoulli_distribution& __x,
- const bernoulli_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const bernoulli_distribution& __x,
- const bernoulli_distribution& __y)
- {return !(__x == __y);}
-};
-
-template<class _URNG>
-inline
-bernoulli_distribution::result_type
-bernoulli_distribution::operator()(_URNG& __g, const param_type& __p)
-{
- uniform_real_distribution<double> __gen;
- return __gen(__g) < __p.p();
-}
-
-template <class _CharT, class _Traits>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os, const bernoulli_distribution& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- return __os << __x.p();
-}
-
-template <class _CharT, class _Traits>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is, bernoulli_distribution& __x)
-{
- typedef bernoulli_distribution _Eng;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- double __p;
- __is >> __p;
- if (!__is.fail())
- __x.param(param_type(__p));
- return __is;
-}
-
-// binomial_distribution
-
-template<class _IntType = int>
-class _LIBCPP_TEMPLATE_VIS binomial_distribution
-{
-public:
- // types
- typedef _IntType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __t_;
- double __p_;
- double __pr_;
- double __odds_ratio_;
- result_type __r0_;
- public:
- typedef binomial_distribution distribution_type;
-
- explicit param_type(result_type __t = 1, double __p = 0.5);
-
- _LIBCPP_INLINE_VISIBILITY
- result_type t() const {return __t_;}
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__t_ == __y.__t_ && __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
-
- friend class binomial_distribution;
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- binomial_distribution() : binomial_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit binomial_distribution(result_type __t, double __p = 0.5)
- : __p_(param_type(__t, __p)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit binomial_distribution(result_type __t = 1, double __p = 0.5)
- : __p_(param_type(__t, __p)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit binomial_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type t() const {return __p_.t();}
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_.p();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return t();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const binomial_distribution& __x,
- const binomial_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const binomial_distribution& __x,
- const binomial_distribution& __y)
- {return !(__x == __y);}
-};
-
-#ifndef _LIBCPP_MSVCRT_LIKE
-extern "C" double lgamma_r(double, int *);
-#endif
-
-inline _LIBCPP_INLINE_VISIBILITY double __libcpp_lgamma(double __d) {
-#if defined(_LIBCPP_MSVCRT_LIKE)
- return lgamma(__d);
-#else
- int __sign;
- return lgamma_r(__d, &__sign);
-#endif
-}
-
-template<class _IntType>
-binomial_distribution<_IntType>::param_type::param_type(result_type __t, double __p)
- : __t_(__t), __p_(__p)
-{
- if (0 < __p_ && __p_ < 1)
- {
- __r0_ = static_cast<result_type>((__t_ + 1) * __p_);
- __pr_ = _VSTD::exp(__libcpp_lgamma(__t_ + 1.) -
- __libcpp_lgamma(__r0_ + 1.) -
- __libcpp_lgamma(__t_ - __r0_ + 1.) + __r0_ * _VSTD::log(__p_) +
- (__t_ - __r0_) * _VSTD::log(1 - __p_));
- __odds_ratio_ = __p_ / (1 - __p_);
- }
-}
-
-// Reference: Kemp, C.D. (1986). `A modal method for generating binomial
-// variables', Commun. Statist. - Theor. Meth. 15(3), 805-813.
-template<class _IntType>
-template<class _URNG>
-_IntType
-binomial_distribution<_IntType>::operator()(_URNG& __g, const param_type& __pr)
-{
- if (__pr.__t_ == 0 || __pr.__p_ == 0)
- return 0;
- if (__pr.__p_ == 1)
- return __pr.__t_;
- uniform_real_distribution<double> __gen;
- double __u = __gen(__g) - __pr.__pr_;
- if (__u < 0)
- return __pr.__r0_;
- double __pu = __pr.__pr_;
- double __pd = __pu;
- result_type __ru = __pr.__r0_;
- result_type __rd = __ru;
- while (true)
- {
- bool __break = true;
- if (__rd >= 1)
- {
- __pd *= __rd / (__pr.__odds_ratio_ * (__pr.__t_ - __rd + 1));
- __u -= __pd;
- __break = false;
- if (__u < 0)
- return __rd - 1;
- }
- if ( __rd != 0 )
- --__rd;
- ++__ru;
- if (__ru <= __pr.__t_)
- {
- __pu *= (__pr.__t_ - __ru + 1) * __pr.__odds_ratio_ / __ru;
- __u -= __pu;
- __break = false;
- if (__u < 0)
- return __ru;
- }
- if (__break)
- return 0;
- }
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const binomial_distribution<_IntType>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- return __os << __x.t() << __sp << __x.p();
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- binomial_distribution<_IntType>& __x)
-{
- typedef binomial_distribution<_IntType> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __t;
- double __p;
- __is >> __t >> __p;
- if (!__is.fail())
- __x.param(param_type(__t, __p));
- return __is;
-}
-
-// exponential_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS exponential_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __lambda_;
- public:
- typedef exponential_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type lambda() const {return __lambda_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__lambda_ == __y.__lambda_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- exponential_distribution() : exponential_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit exponential_distribution(result_type __lambda)
- : __p_(param_type(__lambda)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit exponential_distribution(result_type __lambda = 1)
- : __p_(param_type(__lambda)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit exponential_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type lambda() const {return __p_.lambda();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const exponential_distribution& __x,
- const exponential_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const exponential_distribution& __x,
- const exponential_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _RealType>
-template<class _URNG>
-_RealType
-exponential_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- return -_VSTD::log
- (
- result_type(1) -
- _VSTD::generate_canonical<result_type,
- numeric_limits<result_type>::digits>(__g)
- )
- / __p.lambda();
-}
-
-template <class _CharT, class _Traits, class _RealType>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const exponential_distribution<_RealType>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- return __os << __x.lambda();
-}
-
-template <class _CharT, class _Traits, class _RealType>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- exponential_distribution<_RealType>& __x)
-{
- typedef exponential_distribution<_RealType> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __lambda;
- __is >> __lambda;
- if (!__is.fail())
- __x.param(param_type(__lambda));
- return __is;
-}
-
-// normal_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS normal_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __mean_;
- result_type __stddev_;
- public:
- typedef normal_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __mean = 0, result_type __stddev = 1)
- : __mean_(__mean), __stddev_(__stddev) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type mean() const {return __mean_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type stddev() const {return __stddev_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
- result_type _V_;
- bool _V_hot_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- normal_distribution() : normal_distribution(0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit normal_distribution(result_type __mean, result_type __stddev = 1)
- : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit normal_distribution(result_type __mean = 0,
- result_type __stddev = 1)
- : __p_(param_type(__mean, __stddev)), _V_hot_(false) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit normal_distribution(const param_type& __p)
- : __p_(__p), _V_hot_(false) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {_V_hot_ = false;}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type mean() const {return __p_.mean();}
- _LIBCPP_INLINE_VISIBILITY
- result_type stddev() const {return __p_.stddev();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return -numeric_limits<result_type>::infinity();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const normal_distribution& __x,
- const normal_distribution& __y)
- {return __x.__p_ == __y.__p_ && __x._V_hot_ == __y._V_hot_ &&
- (!__x._V_hot_ || __x._V_ == __y._V_);}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const normal_distribution& __x,
- const normal_distribution& __y)
- {return !(__x == __y);}
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const normal_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- normal_distribution<_RT>& __x);
-};
-
-template <class _RealType>
-template<class _URNG>
-_RealType
-normal_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- result_type _Up;
- if (_V_hot_)
- {
- _V_hot_ = false;
- _Up = _V_;
- }
- else
- {
- uniform_real_distribution<result_type> _Uni(-1, 1);
- result_type __u;
- result_type __v;
- result_type __s;
- do
- {
- __u = _Uni(__g);
- __v = _Uni(__g);
- __s = __u * __u + __v * __v;
- } while (__s > 1 || __s == 0);
- result_type _Fp = _VSTD::sqrt(-2 * _VSTD::log(__s) / __s);
- _V_ = __v * _Fp;
- _V_hot_ = true;
- _Up = __u * _Fp;
- }
- return _Up * __p.stddev() + __p.mean();
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const normal_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.mean() << __sp << __x.stddev() << __sp << __x._V_hot_;
- if (__x._V_hot_)
- __os << __sp << __x._V_;
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- normal_distribution<_RT>& __x)
-{
- typedef normal_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __mean;
- result_type __stddev;
- result_type _Vp = 0;
- bool _V_hot = false;
- __is >> __mean >> __stddev >> _V_hot;
- if (_V_hot)
- __is >> _Vp;
- if (!__is.fail())
- {
- __x.param(param_type(__mean, __stddev));
- __x._V_hot_ = _V_hot;
- __x._V_ = _Vp;
- }
- return __is;
-}
-
-// lognormal_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS lognormal_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- normal_distribution<result_type> __nd_;
- public:
- typedef lognormal_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __m = 0, result_type __s = 1)
- : __nd_(__m, __s) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type m() const {return __nd_.mean();}
- _LIBCPP_INLINE_VISIBILITY
- result_type s() const {return __nd_.stddev();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__nd_ == __y.__nd_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- friend class lognormal_distribution;
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const lognormal_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- lognormal_distribution<_RT>& __x);
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- lognormal_distribution() : lognormal_distribution(0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit lognormal_distribution(result_type __m, result_type __s = 1)
- : __p_(param_type(__m, __s)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit lognormal_distribution(result_type __m = 0,
- result_type __s = 1)
- : __p_(param_type(__m, __s)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit lognormal_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {__p_.__nd_.reset();}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g, const param_type& __p)
- {return _VSTD::exp(const_cast<normal_distribution<result_type>&>(__p.__nd_)(__g));}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type m() const {return __p_.m();}
- _LIBCPP_INLINE_VISIBILITY
- result_type s() const {return __p_.s();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const lognormal_distribution& __x,
- const lognormal_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const lognormal_distribution& __x,
- const lognormal_distribution& __y)
- {return !(__x == __y);}
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const lognormal_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- lognormal_distribution<_RT>& __x);
-};
-
-template <class _CharT, class _Traits, class _RT>
-inline _LIBCPP_INLINE_VISIBILITY
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const lognormal_distribution<_RT>& __x)
-{
- return __os << __x.__p_.__nd_;
-}
-
-template <class _CharT, class _Traits, class _RT>
-inline _LIBCPP_INLINE_VISIBILITY
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- lognormal_distribution<_RT>& __x)
-{
- return __is >> __x.__p_.__nd_;
-}
-
-// poisson_distribution
-
-template<class _IntType = int>
-class _LIBCPP_TEMPLATE_VIS poisson_distribution
-{
-public:
- // types
- typedef _IntType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- double __mean_;
- double __s_;
- double __d_;
- double __l_;
- double __omega_;
- double __c0_;
- double __c1_;
- double __c2_;
- double __c3_;
- double __c_;
-
- public:
- typedef poisson_distribution distribution_type;
-
- explicit param_type(double __mean = 1.0);
-
- _LIBCPP_INLINE_VISIBILITY
- double mean() const {return __mean_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__mean_ == __y.__mean_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
-
- friend class poisson_distribution;
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- poisson_distribution() : poisson_distribution(1.0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit poisson_distribution(double __mean)
- : __p_(__mean) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit poisson_distribution(double __mean = 1.0)
- : __p_(__mean) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit poisson_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- double mean() const {return __p_.mean();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::max();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const poisson_distribution& __x,
- const poisson_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const poisson_distribution& __x,
- const poisson_distribution& __y)
- {return !(__x == __y);}
-};
-
-template<class _IntType>
-poisson_distribution<_IntType>::param_type::param_type(double __mean)
- // According to the standard `inf` is a valid input, but it causes the
- // distribution to hang, so we replace it with the maximum representable
- // mean.
- : __mean_(isinf(__mean) ? numeric_limits<double>::max() : __mean)
-{
- if (__mean_ < 10)
- {
- __s_ = 0;
- __d_ = 0;
- __l_ = _VSTD::exp(-__mean_);
- __omega_ = 0;
- __c3_ = 0;
- __c2_ = 0;
- __c1_ = 0;
- __c0_ = 0;
- __c_ = 0;
- }
- else
- {
- __s_ = _VSTD::sqrt(__mean_);
- __d_ = 6 * __mean_ * __mean_;
- __l_ = _VSTD::trunc(__mean_ - 1.1484);
- __omega_ = .3989423 / __s_;
- double __b1_ = .4166667E-1 / __mean_;
- double __b2_ = .3 * __b1_ * __b1_;
- __c3_ = .1428571 * __b1_ * __b2_;
- __c2_ = __b2_ - 15. * __c3_;
- __c1_ = __b1_ - 6. * __b2_ + 45. * __c3_;
- __c0_ = 1. - __b1_ + 3. * __b2_ - 15. * __c3_;
- __c_ = .1069 / __mean_;
- }
-}
-
-template <class _IntType>
-template<class _URNG>
-_IntType
-poisson_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr)
-{
- double __tx;
- uniform_real_distribution<double> __urd;
- if (__pr.__mean_ < 10)
- {
- __tx = 0;
- for (double __p = __urd(__urng); __p > __pr.__l_; ++__tx)
- __p *= __urd(__urng);
- }
- else
- {
- double __difmuk;
- double __g = __pr.__mean_ + __pr.__s_ * normal_distribution<double>()(__urng);
- double __u;
- if (__g > 0)
- {
- __tx = _VSTD::trunc(__g);
- if (__tx >= __pr.__l_)
- return _VSTD::__clamp_to_integral<result_type>(__tx);
- __difmuk = __pr.__mean_ - __tx;
- __u = __urd(__urng);
- if (__pr.__d_ * __u >= __difmuk * __difmuk * __difmuk)
- return _VSTD::__clamp_to_integral<result_type>(__tx);
- }
- exponential_distribution<double> __edist;
- for (bool __using_exp_dist = false; true; __using_exp_dist = true)
- {
- double __e;
- if (__using_exp_dist || __g <= 0)
- {
- double __t;
- do
- {
- __e = __edist(__urng);
- __u = __urd(__urng);
- __u += __u - 1;
- __t = 1.8 + (__u < 0 ? -__e : __e);
- } while (__t <= -.6744);
- __tx = _VSTD::trunc(__pr.__mean_ + __pr.__s_ * __t);
- __difmuk = __pr.__mean_ - __tx;
- __using_exp_dist = true;
- }
- double __px;
- double __py;
- if (__tx < 10 && __tx >= 0)
- {
- const double __fac[] = {1, 1, 2, 6, 24, 120, 720, 5040,
- 40320, 362880};
- __px = -__pr.__mean_;
- __py = _VSTD::pow(__pr.__mean_, (double)__tx) / __fac[static_cast<int>(__tx)];
- }
- else
- {
- double __del = .8333333E-1 / __tx;
- __del -= 4.8 * __del * __del * __del;
- double __v = __difmuk / __tx;
- if (_VSTD::abs(__v) > 0.25)
- __px = __tx * _VSTD::log(1 + __v) - __difmuk - __del;
- else
- __px = __tx * __v * __v * (((((((.1250060 * __v + -.1384794) *
- __v + .1421878) * __v + -.1661269) * __v + .2000118) *
- __v + -.2500068) * __v + .3333333) * __v + -.5) - __del;
- __py = .3989423 / _VSTD::sqrt(__tx);
- }
- double __r = (0.5 - __difmuk) / __pr.__s_;
- double __r2 = __r * __r;
- double __fx = -0.5 * __r2;
- double __fy = __pr.__omega_ * (((__pr.__c3_ * __r2 + __pr.__c2_) *
- __r2 + __pr.__c1_) * __r2 + __pr.__c0_);
- if (__using_exp_dist)
- {
- if (__pr.__c_ * _VSTD::abs(__u) <= __py * _VSTD::exp(__px + __e) -
- __fy * _VSTD::exp(__fx + __e))
- break;
- }
- else
- {
- if (__fy - __u * __fy <= __py * _VSTD::exp(__px - __fx))
- break;
- }
- }
- }
- return _VSTD::__clamp_to_integral<result_type>(__tx);
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const poisson_distribution<_IntType>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- return __os << __x.mean();
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- poisson_distribution<_IntType>& __x)
-{
- typedef poisson_distribution<_IntType> _Eng;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- double __mean;
- __is >> __mean;
- if (!__is.fail())
- __x.param(param_type(__mean));
- return __is;
-}
-
-// weibull_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS weibull_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __a_;
- result_type __b_;
- public:
- typedef weibull_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __a = 1, result_type __b = 1)
- : __a_(__a), __b_(__b) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __a_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __b_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- weibull_distribution() : weibull_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit weibull_distribution(result_type __a, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit weibull_distribution(result_type __a = 1, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit weibull_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g, const param_type& __p)
- {return __p.b() *
- _VSTD::pow(exponential_distribution<result_type>()(__g), 1/__p.a());}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __p_.a();}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __p_.b();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const weibull_distribution& __x,
- const weibull_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const weibull_distribution& __x,
- const weibull_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const weibull_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.a() << __sp << __x.b();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- weibull_distribution<_RT>& __x)
-{
- typedef weibull_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __a;
- result_type __b;
- __is >> __a >> __b;
- if (!__is.fail())
- __x.param(param_type(__a, __b));
- return __is;
-}
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS extreme_value_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __a_;
- result_type __b_;
- public:
- typedef extreme_value_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __a = 0, result_type __b = 1)
- : __a_(__a), __b_(__b) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __a_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __b_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- extreme_value_distribution() : extreme_value_distribution(0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit extreme_value_distribution(result_type __a, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit extreme_value_distribution(result_type __a = 0,
- result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit extreme_value_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __p_.a();}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __p_.b();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return -numeric_limits<result_type>::infinity();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const extreme_value_distribution& __x,
- const extreme_value_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const extreme_value_distribution& __x,
- const extreme_value_distribution& __y)
- {return !(__x == __y);}
-};
-
-template<class _RealType>
-template<class _URNG>
-_RealType
-extreme_value_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- return __p.a() - __p.b() *
- _VSTD::log(-_VSTD::log(1-uniform_real_distribution<result_type>()(__g)));
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const extreme_value_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.a() << __sp << __x.b();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- extreme_value_distribution<_RT>& __x)
-{
- typedef extreme_value_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __a;
- result_type __b;
- __is >> __a >> __b;
- if (!__is.fail())
- __x.param(param_type(__a, __b));
- return __is;
-}
-
-// gamma_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS gamma_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __alpha_;
- result_type __beta_;
- public:
- typedef gamma_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __alpha = 1, result_type __beta = 1)
- : __alpha_(__alpha), __beta_(__beta) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type alpha() const {return __alpha_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type beta() const {return __beta_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__alpha_ == __y.__alpha_ && __x.__beta_ == __y.__beta_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- gamma_distribution() : gamma_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit gamma_distribution(result_type __alpha, result_type __beta = 1)
- : __p_(param_type(__alpha, __beta)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit gamma_distribution(result_type __alpha = 1,
- result_type __beta = 1)
- : __p_(param_type(__alpha, __beta)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit gamma_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type alpha() const {return __p_.alpha();}
- _LIBCPP_INLINE_VISIBILITY
- result_type beta() const {return __p_.beta();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const gamma_distribution& __x,
- const gamma_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const gamma_distribution& __x,
- const gamma_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _RealType>
-template<class _URNG>
-_RealType
-gamma_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- result_type __a = __p.alpha();
- uniform_real_distribution<result_type> __gen(0, 1);
- exponential_distribution<result_type> __egen;
- result_type __x;
- if (__a == 1)
- __x = __egen(__g);
- else if (__a > 1)
- {
- const result_type __b = __a - 1;
- const result_type __c = 3 * __a - result_type(0.75);
- while (true)
- {
- const result_type __u = __gen(__g);
- const result_type __v = __gen(__g);
- const result_type __w = __u * (1 - __u);
- if (__w != 0)
- {
- const result_type __y = _VSTD::sqrt(__c / __w) *
- (__u - result_type(0.5));
- __x = __b + __y;
- if (__x >= 0)
- {
- const result_type __z = 64 * __w * __w * __w * __v * __v;
- if (__z <= 1 - 2 * __y * __y / __x)
- break;
- if (_VSTD::log(__z) <= 2 * (__b * _VSTD::log(__x / __b) - __y))
- break;
- }
- }
- }
- }
- else // __a < 1
- {
- while (true)
- {
- const result_type __u = __gen(__g);
- const result_type __es = __egen(__g);
- if (__u <= 1 - __a)
- {
- __x = _VSTD::pow(__u, 1 / __a);
- if (__x <= __es)
- break;
- }
- else
- {
- const result_type __e = -_VSTD::log((1-__u)/__a);
- __x = _VSTD::pow(1 - __a + __a * __e, 1 / __a);
- if (__x <= __e + __es)
- break;
- }
- }
- }
- return __x * __p.beta();
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const gamma_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.alpha() << __sp << __x.beta();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- gamma_distribution<_RT>& __x)
-{
- typedef gamma_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __alpha;
- result_type __beta;
- __is >> __alpha >> __beta;
- if (!__is.fail())
- __x.param(param_type(__alpha, __beta));
- return __is;
-}
-
-// negative_binomial_distribution
-
-template<class _IntType = int>
-class _LIBCPP_TEMPLATE_VIS negative_binomial_distribution
-{
-public:
- // types
- typedef _IntType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __k_;
- double __p_;
- public:
- typedef negative_binomial_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __k = 1, double __p = 0.5)
- : __k_(__k), __p_(__p) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type k() const {return __k_;}
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__k_ == __y.__k_ && __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- negative_binomial_distribution() : negative_binomial_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit negative_binomial_distribution(result_type __k, double __p = 0.5)
- : __p_(__k, __p) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit negative_binomial_distribution(result_type __k = 1,
- double __p = 0.5)
- : __p_(__k, __p) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit negative_binomial_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type k() const {return __p_.k();}
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_.p();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::max();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const negative_binomial_distribution& __x,
- const negative_binomial_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const negative_binomial_distribution& __x,
- const negative_binomial_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _IntType>
-template<class _URNG>
-_IntType
-negative_binomial_distribution<_IntType>::operator()(_URNG& __urng, const param_type& __pr)
-{
- result_type __k = __pr.k();
- double __p = __pr.p();
- if (__k <= 21 * __p)
- {
- bernoulli_distribution __gen(__p);
- result_type __f = 0;
- result_type __s = 0;
- while (__s < __k)
- {
- if (__gen(__urng))
- ++__s;
- else
- ++__f;
- }
- return __f;
- }
- return poisson_distribution<result_type>(gamma_distribution<double>
- (__k, (1-__p)/__p)(__urng))(__urng);
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const negative_binomial_distribution<_IntType>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- return __os << __x.k() << __sp << __x.p();
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- negative_binomial_distribution<_IntType>& __x)
-{
- typedef negative_binomial_distribution<_IntType> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __k;
- double __p;
- __is >> __k >> __p;
- if (!__is.fail())
- __x.param(param_type(__k, __p));
- return __is;
-}
-
-// geometric_distribution
-
-template<class _IntType = int>
-class _LIBCPP_TEMPLATE_VIS geometric_distribution
-{
-public:
- // types
- typedef _IntType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- double __p_;
- public:
- typedef geometric_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(double __p = 0.5) : __p_(__p) {}
-
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructors and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- geometric_distribution() : geometric_distribution(0.5) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit geometric_distribution(double __p)
- : __p_(__p) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit geometric_distribution(double __p = 0.5)
- : __p_(__p) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit geometric_distribution(const param_type& __p) : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g, const param_type& __p)
- {return negative_binomial_distribution<result_type>(1, __p.p())(__g);}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- double p() const {return __p_.p();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::max();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const geometric_distribution& __x,
- const geometric_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const geometric_distribution& __x,
- const geometric_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _CharT, class _Traits, class _IntType>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const geometric_distribution<_IntType>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- return __os << __x.p();
-}
-
-template <class _CharT, class _Traits, class _IntType>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- geometric_distribution<_IntType>& __x)
-{
- typedef geometric_distribution<_IntType> _Eng;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- double __p;
- __is >> __p;
- if (!__is.fail())
- __x.param(param_type(__p));
- return __is;
-}
-
-// chi_squared_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS chi_squared_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __n_;
- public:
- typedef chi_squared_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __n = 1) : __n_(__n) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __n_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__n_ == __y.__n_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- chi_squared_distribution() : chi_squared_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit chi_squared_distribution(result_type __n)
- : __p_(param_type(__n)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit chi_squared_distribution(result_type __n = 1)
- : __p_(param_type(__n)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit chi_squared_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g, const param_type& __p)
- {return gamma_distribution<result_type>(__p.n() / 2, 2)(__g);}
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __p_.n();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const chi_squared_distribution& __x,
- const chi_squared_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const chi_squared_distribution& __x,
- const chi_squared_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const chi_squared_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- __os << __x.n();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- chi_squared_distribution<_RT>& __x)
-{
- typedef chi_squared_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __n;
- __is >> __n;
- if (!__is.fail())
- __x.param(param_type(__n));
- return __is;
-}
-
-// cauchy_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS cauchy_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __a_;
- result_type __b_;
- public:
- typedef cauchy_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __a = 0, result_type __b = 1)
- : __a_(__a), __b_(__b) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __a_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __b_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- cauchy_distribution() : cauchy_distribution(0) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit cauchy_distribution(result_type __a, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit cauchy_distribution(result_type __a = 0, result_type __b = 1)
- : __p_(param_type(__a, __b)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit cauchy_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> _LIBCPP_INLINE_VISIBILITY result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type a() const {return __p_.a();}
- _LIBCPP_INLINE_VISIBILITY
- result_type b() const {return __p_.b();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return -numeric_limits<result_type>::infinity();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const cauchy_distribution& __x,
- const cauchy_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const cauchy_distribution& __x,
- const cauchy_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _RealType>
-template<class _URNG>
-inline
-_RealType
-cauchy_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- uniform_real_distribution<result_type> __gen;
- // purposefully let tan arg get as close to pi/2 as it wants, tan will return a finite
- return __p.a() + __p.b() * _VSTD::tan(3.1415926535897932384626433832795 * __gen(__g));
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const cauchy_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.a() << __sp << __x.b();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- cauchy_distribution<_RT>& __x)
-{
- typedef cauchy_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __a;
- result_type __b;
- __is >> __a >> __b;
- if (!__is.fail())
- __x.param(param_type(__a, __b));
- return __is;
-}
-
-// fisher_f_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS fisher_f_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __m_;
- result_type __n_;
- public:
- typedef fisher_f_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __m = 1, result_type __n = 1)
- : __m_(__m), __n_(__n) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type m() const {return __m_;}
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __n_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__m_ == __y.__m_ && __x.__n_ == __y.__n_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- fisher_f_distribution() : fisher_f_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit fisher_f_distribution(result_type __m, result_type __n = 1)
- : __p_(param_type(__m, __n)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit fisher_f_distribution(result_type __m = 1, result_type __n = 1)
- : __p_(param_type(__m, __n)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit fisher_f_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type m() const {return __p_.m();}
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __p_.n();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const fisher_f_distribution& __x,
- const fisher_f_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const fisher_f_distribution& __x,
- const fisher_f_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _RealType>
-template<class _URNG>
-_RealType
-fisher_f_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- gamma_distribution<result_type> __gdm(__p.m() * result_type(.5));
- gamma_distribution<result_type> __gdn(__p.n() * result_type(.5));
- return __p.n() * __gdm(__g) / (__p.m() * __gdn(__g));
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const fisher_f_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- __os << __x.m() << __sp << __x.n();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- fisher_f_distribution<_RT>& __x)
-{
- typedef fisher_f_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __m;
- result_type __n;
- __is >> __m >> __n;
- if (!__is.fail())
- __x.param(param_type(__m, __n));
- return __is;
-}
-
-// student_t_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS student_t_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- result_type __n_;
- public:
- typedef student_t_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- explicit param_type(result_type __n = 1) : __n_(__n) {}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __n_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__n_ == __y.__n_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
- };
-
-private:
- param_type __p_;
- normal_distribution<result_type> __nd_;
-
-public:
- // constructor and reset functions
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- student_t_distribution() : student_t_distribution(1) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit student_t_distribution(result_type __n)
- : __p_(param_type(__n)) {}
-#else
- _LIBCPP_INLINE_VISIBILITY
- explicit student_t_distribution(result_type __n = 1)
- : __p_(param_type(__n)) {}
-#endif
- _LIBCPP_INLINE_VISIBILITY
- explicit student_t_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {__nd_.reset();}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- result_type n() const {return __p_.n();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return -numeric_limits<result_type>::infinity();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return numeric_limits<result_type>::infinity();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const student_t_distribution& __x,
- const student_t_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const student_t_distribution& __x,
- const student_t_distribution& __y)
- {return !(__x == __y);}
-};
-
-template <class _RealType>
-template<class _URNG>
-_RealType
-student_t_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- gamma_distribution<result_type> __gd(__p.n() * .5, 2);
- return __nd_(__g) * _VSTD::sqrt(__p.n()/__gd(__g));
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const student_t_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- __os << __x.n();
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- student_t_distribution<_RT>& __x)
-{
- typedef student_t_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- typedef typename _Eng::param_type param_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- result_type __n;
- __is >> __n;
- if (!__is.fail())
- __x.param(param_type(__n));
- return __is;
-}
-
-// discrete_distribution
-
-template<class _IntType = int>
-class _LIBCPP_TEMPLATE_VIS discrete_distribution
-{
-public:
- // types
- typedef _IntType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- vector<double> __p_;
- public:
- typedef discrete_distribution distribution_type;
-
- _LIBCPP_INLINE_VISIBILITY
- param_type() {}
- template<class _InputIterator>
- _LIBCPP_INLINE_VISIBILITY
- param_type(_InputIterator __f, _InputIterator __l)
- : __p_(__f, __l) {__init();}
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- param_type(initializer_list<double> __wl)
- : __p_(__wl.begin(), __wl.end()) {__init();}
-#endif // _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- param_type(size_t __nw, double __xmin, double __xmax,
- _UnaryOperation __fw);
-
- vector<double> probabilities() const;
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
-
- private:
- void __init();
-
- friend class discrete_distribution;
-
- template <class _CharT, class _Traits, class _IT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const discrete_distribution<_IT>& __x);
-
- template <class _CharT, class _Traits, class _IT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- discrete_distribution<_IT>& __x);
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
- _LIBCPP_INLINE_VISIBILITY
- discrete_distribution() {}
- template<class _InputIterator>
- _LIBCPP_INLINE_VISIBILITY
- discrete_distribution(_InputIterator __f, _InputIterator __l)
- : __p_(__f, __l) {}
-#ifndef _LIBCPP_CXX03_LANG
- _LIBCPP_INLINE_VISIBILITY
- discrete_distribution(initializer_list<double> __wl)
- : __p_(__wl) {}
-#endif // _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- _LIBCPP_INLINE_VISIBILITY
- discrete_distribution(size_t __nw, double __xmin, double __xmax,
- _UnaryOperation __fw)
- : __p_(__nw, __xmin, __xmax, __fw) {}
- _LIBCPP_INLINE_VISIBILITY
- explicit discrete_distribution(const param_type& __p)
- : __p_(__p) {}
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- vector<double> probabilities() const {return __p_.probabilities();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return 0;}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return __p_.__p_.size();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const discrete_distribution& __x,
- const discrete_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const discrete_distribution& __x,
- const discrete_distribution& __y)
- {return !(__x == __y);}
-
- template <class _CharT, class _Traits, class _IT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const discrete_distribution<_IT>& __x);
-
- template <class _CharT, class _Traits, class _IT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- discrete_distribution<_IT>& __x);
-};
-
-template<class _IntType>
-template<class _UnaryOperation>
-discrete_distribution<_IntType>::param_type::param_type(size_t __nw,
- double __xmin,
- double __xmax,
- _UnaryOperation __fw)
-{
- if (__nw > 1)
- {
- __p_.reserve(__nw - 1);
- double __d = (__xmax - __xmin) / __nw;
- double __d2 = __d / 2;
- for (size_t __k = 0; __k < __nw; ++__k)
- __p_.push_back(__fw(__xmin + __k * __d + __d2));
- __init();
- }
-}
-
-template<class _IntType>
-void
-discrete_distribution<_IntType>::param_type::__init()
-{
- if (!__p_.empty())
- {
- if (__p_.size() > 1)
- {
- double __s = _VSTD::accumulate(__p_.begin(), __p_.end(), 0.0);
- for (vector<double>::iterator __i = __p_.begin(), __e = __p_.end(); __i < __e; ++__i)
- *__i /= __s;
- vector<double> __t(__p_.size() - 1);
- _VSTD::partial_sum(__p_.begin(), __p_.end() - 1, __t.begin());
- swap(__p_, __t);
- }
- else
- {
- __p_.clear();
- __p_.shrink_to_fit();
- }
- }
-}
-
-template<class _IntType>
-vector<double>
-discrete_distribution<_IntType>::param_type::probabilities() const
-{
- size_t __n = __p_.size();
- vector<double> __p(__n+1);
- _VSTD::adjacent_difference(__p_.begin(), __p_.end(), __p.begin());
- if (__n > 0)
- __p[__n] = 1 - __p_[__n-1];
- else
- __p[0] = 1;
- return __p;
-}
-
-template<class _IntType>
-template<class _URNG>
-_IntType
-discrete_distribution<_IntType>::operator()(_URNG& __g, const param_type& __p)
-{
- uniform_real_distribution<double> __gen;
- return static_cast<_IntType>(
- _VSTD::upper_bound(__p.__p_.begin(), __p.__p_.end(), __gen(__g)) -
- __p.__p_.begin());
-}
-
-template <class _CharT, class _Traits, class _IT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const discrete_distribution<_IT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- size_t __n = __x.__p_.__p_.size();
- __os << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__p_[__i];
- return __os;
-}
-
-template <class _CharT, class _Traits, class _IT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- discrete_distribution<_IT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- size_t __n;
- __is >> __n;
- vector<double> __p(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __p[__i];
- if (!__is.fail())
- swap(__x.__p_.__p_, __p);
- return __is;
-}
-
-// piecewise_constant_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS piecewise_constant_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- vector<result_type> __b_;
- vector<result_type> __densities_;
- vector<result_type> __areas_;
- public:
- typedef piecewise_constant_distribution distribution_type;
-
- param_type();
- template<class _InputIteratorB, class _InputIteratorW>
- param_type(_InputIteratorB __fB, _InputIteratorB __lB,
- _InputIteratorW __fW);
-#ifndef _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- param_type(initializer_list<result_type> __bl, _UnaryOperation __fw);
-#endif // _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- param_type(size_t __nw, result_type __xmin, result_type __xmax,
- _UnaryOperation __fw);
- param_type(param_type const&) = default;
- param_type & operator=(const param_type& __rhs);
-
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> intervals() const {return __b_;}
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> densities() const {return __densities_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
-
- private:
- void __init();
-
- friend class piecewise_constant_distribution;
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_constant_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_constant_distribution<_RT>& __x);
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
- _LIBCPP_INLINE_VISIBILITY
- piecewise_constant_distribution() {}
- template<class _InputIteratorB, class _InputIteratorW>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_constant_distribution(_InputIteratorB __fB,
- _InputIteratorB __lB,
- _InputIteratorW __fW)
- : __p_(__fB, __lB, __fW) {}
-
-#ifndef _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_constant_distribution(initializer_list<result_type> __bl,
- _UnaryOperation __fw)
- : __p_(__bl, __fw) {}
-#endif // _LIBCPP_CXX03_LANG
-
- template<class _UnaryOperation>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_constant_distribution(size_t __nw, result_type __xmin,
- result_type __xmax, _UnaryOperation __fw)
- : __p_(__nw, __xmin, __xmax, __fw) {}
-
- _LIBCPP_INLINE_VISIBILITY
- explicit piecewise_constant_distribution(const param_type& __p)
- : __p_(__p) {}
-
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> intervals() const {return __p_.intervals();}
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> densities() const {return __p_.densities();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return __p_.__b_.front();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return __p_.__b_.back();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const piecewise_constant_distribution& __x,
- const piecewise_constant_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const piecewise_constant_distribution& __x,
- const piecewise_constant_distribution& __y)
- {return !(__x == __y);}
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_constant_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_constant_distribution<_RT>& __x);
-};
-
-template<class _RealType>
-typename piecewise_constant_distribution<_RealType>::param_type &
-piecewise_constant_distribution<_RealType>::param_type::operator=
- (const param_type& __rhs)
-{
-// These can throw
- __b_.reserve (__rhs.__b_.size ());
- __densities_.reserve(__rhs.__densities_.size());
- __areas_.reserve (__rhs.__areas_.size());
-
-// These can not throw
- __b_ = __rhs.__b_;
- __densities_ = __rhs.__densities_;
- __areas_ = __rhs.__areas_;
- return *this;
-}
-
-template<class _RealType>
-void
-piecewise_constant_distribution<_RealType>::param_type::__init()
-{
- // __densities_ contains non-normalized areas
- result_type __total_area = _VSTD::accumulate(__densities_.begin(),
- __densities_.end(),
- result_type());
- for (size_t __i = 0; __i < __densities_.size(); ++__i)
- __densities_[__i] /= __total_area;
- // __densities_ contains normalized areas
- __areas_.assign(__densities_.size(), result_type());
- _VSTD::partial_sum(__densities_.begin(), __densities_.end() - 1,
- __areas_.begin() + 1);
- // __areas_ contains partial sums of normalized areas: [0, __densities_ - 1]
- __densities_.back() = 1 - __areas_.back(); // correct round off error
- for (size_t __i = 0; __i < __densities_.size(); ++__i)
- __densities_[__i] /= (__b_[__i+1] - __b_[__i]);
- // __densities_ now contains __densities_
-}
-
-template<class _RealType>
-piecewise_constant_distribution<_RealType>::param_type::param_type()
- : __b_(2),
- __densities_(1, 1.0),
- __areas_(1, 0.0)
-{
- __b_[1] = 1;
-}
-
-template<class _RealType>
-template<class _InputIteratorB, class _InputIteratorW>
-piecewise_constant_distribution<_RealType>::param_type::param_type(
- _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW)
- : __b_(__fB, __lB)
-{
- if (__b_.size() < 2)
- {
- __b_.resize(2);
- __b_[0] = 0;
- __b_[1] = 1;
- __densities_.assign(1, 1.0);
- __areas_.assign(1, 0.0);
- }
- else
- {
- __densities_.reserve(__b_.size() - 1);
- for (size_t __i = 0; __i < __b_.size() - 1; ++__i, ++__fW)
- __densities_.push_back(*__fW);
- __init();
- }
-}
-
-#ifndef _LIBCPP_CXX03_LANG
-
-template<class _RealType>
-template<class _UnaryOperation>
-piecewise_constant_distribution<_RealType>::param_type::param_type(
- initializer_list<result_type> __bl, _UnaryOperation __fw)
- : __b_(__bl.begin(), __bl.end())
-{
- if (__b_.size() < 2)
- {
- __b_.resize(2);
- __b_[0] = 0;
- __b_[1] = 1;
- __densities_.assign(1, 1.0);
- __areas_.assign(1, 0.0);
- }
- else
- {
- __densities_.reserve(__b_.size() - 1);
- for (size_t __i = 0; __i < __b_.size() - 1; ++__i)
- __densities_.push_back(__fw((__b_[__i+1] + __b_[__i])*.5));
- __init();
- }
-}
-
-#endif // _LIBCPP_CXX03_LANG
-
-template<class _RealType>
-template<class _UnaryOperation>
-piecewise_constant_distribution<_RealType>::param_type::param_type(
- size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw)
- : __b_(__nw == 0 ? 2 : __nw + 1)
-{
- size_t __n = __b_.size() - 1;
- result_type __d = (__xmax - __xmin) / __n;
- __densities_.reserve(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- {
- __b_[__i] = __xmin + __i * __d;
- __densities_.push_back(__fw(__b_[__i] + __d*.5));
- }
- __b_[__n] = __xmax;
- __init();
-}
-
-template<class _RealType>
-template<class _URNG>
-_RealType
-piecewise_constant_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- typedef uniform_real_distribution<result_type> _Gen;
- result_type __u = _Gen()(__g);
- ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(),
- __u) - __p.__areas_.begin() - 1;
- return (__u - __p.__areas_[__k]) / __p.__densities_[__k] + __p.__b_[__k];
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_constant_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- size_t __n = __x.__p_.__b_.size();
- __os << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__b_[__i];
- __n = __x.__p_.__densities_.size();
- __os << __sp << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__densities_[__i];
- __n = __x.__p_.__areas_.size();
- __os << __sp << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__areas_[__i];
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_constant_distribution<_RT>& __x)
-{
- typedef piecewise_constant_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- size_t __n;
- __is >> __n;
- vector<result_type> __b(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __b[__i];
- __is >> __n;
- vector<result_type> __densities(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __densities[__i];
- __is >> __n;
- vector<result_type> __areas(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __areas[__i];
- if (!__is.fail())
- {
- swap(__x.__p_.__b_, __b);
- swap(__x.__p_.__densities_, __densities);
- swap(__x.__p_.__areas_, __areas);
- }
- return __is;
-}
-
-// piecewise_linear_distribution
-
-template<class _RealType = double>
-class _LIBCPP_TEMPLATE_VIS piecewise_linear_distribution
-{
-public:
- // types
- typedef _RealType result_type;
-
- class _LIBCPP_TEMPLATE_VIS param_type
- {
- vector<result_type> __b_;
- vector<result_type> __densities_;
- vector<result_type> __areas_;
- public:
- typedef piecewise_linear_distribution distribution_type;
-
- param_type();
- template<class _InputIteratorB, class _InputIteratorW>
- param_type(_InputIteratorB __fB, _InputIteratorB __lB,
- _InputIteratorW __fW);
-#ifndef _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- param_type(initializer_list<result_type> __bl, _UnaryOperation __fw);
-#endif // _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- param_type(size_t __nw, result_type __xmin, result_type __xmax,
- _UnaryOperation __fw);
- param_type(param_type const&) = default;
- param_type & operator=(const param_type& __rhs);
-
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> intervals() const {return __b_;}
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> densities() const {return __densities_;}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const param_type& __x, const param_type& __y)
- {return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const param_type& __x, const param_type& __y)
- {return !(__x == __y);}
-
- private:
- void __init();
-
- friend class piecewise_linear_distribution;
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_linear_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_linear_distribution<_RT>& __x);
- };
-
-private:
- param_type __p_;
-
-public:
- // constructor and reset functions
- _LIBCPP_INLINE_VISIBILITY
- piecewise_linear_distribution() {}
- template<class _InputIteratorB, class _InputIteratorW>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_linear_distribution(_InputIteratorB __fB,
- _InputIteratorB __lB,
- _InputIteratorW __fW)
- : __p_(__fB, __lB, __fW) {}
-
-#ifndef _LIBCPP_CXX03_LANG
- template<class _UnaryOperation>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_linear_distribution(initializer_list<result_type> __bl,
- _UnaryOperation __fw)
- : __p_(__bl, __fw) {}
-#endif // _LIBCPP_CXX03_LANG
-
- template<class _UnaryOperation>
- _LIBCPP_INLINE_VISIBILITY
- piecewise_linear_distribution(size_t __nw, result_type __xmin,
- result_type __xmax, _UnaryOperation __fw)
- : __p_(__nw, __xmin, __xmax, __fw) {}
-
- _LIBCPP_INLINE_VISIBILITY
- explicit piecewise_linear_distribution(const param_type& __p)
- : __p_(__p) {}
-
- _LIBCPP_INLINE_VISIBILITY
- void reset() {}
-
- // generating functions
- template<class _URNG>
- _LIBCPP_INLINE_VISIBILITY
- result_type operator()(_URNG& __g)
- {return (*this)(__g, __p_);}
- template<class _URNG> result_type operator()(_URNG& __g, const param_type& __p);
-
- // property functions
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> intervals() const {return __p_.intervals();}
- _LIBCPP_INLINE_VISIBILITY
- vector<result_type> densities() const {return __p_.densities();}
-
- _LIBCPP_INLINE_VISIBILITY
- param_type param() const {return __p_;}
- _LIBCPP_INLINE_VISIBILITY
- void param(const param_type& __p) {__p_ = __p;}
-
- _LIBCPP_INLINE_VISIBILITY
- result_type min() const {return __p_.__b_.front();}
- _LIBCPP_INLINE_VISIBILITY
- result_type max() const {return __p_.__b_.back();}
-
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator==(const piecewise_linear_distribution& __x,
- const piecewise_linear_distribution& __y)
- {return __x.__p_ == __y.__p_;}
- friend _LIBCPP_INLINE_VISIBILITY
- bool operator!=(const piecewise_linear_distribution& __x,
- const piecewise_linear_distribution& __y)
- {return !(__x == __y);}
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_ostream<_CharT, _Traits>&
- operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_linear_distribution<_RT>& __x);
-
- template <class _CharT, class _Traits, class _RT>
- friend
- basic_istream<_CharT, _Traits>&
- operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_linear_distribution<_RT>& __x);
-};
-
-template<class _RealType>
-typename piecewise_linear_distribution<_RealType>::param_type &
-piecewise_linear_distribution<_RealType>::param_type::operator=
- (const param_type& __rhs)
-{
-// These can throw
- __b_.reserve (__rhs.__b_.size ());
- __densities_.reserve(__rhs.__densities_.size());
- __areas_.reserve (__rhs.__areas_.size());
-
-// These can not throw
- __b_ = __rhs.__b_;
- __densities_ = __rhs.__densities_;
- __areas_ = __rhs.__areas_;
- return *this;
-}
-
-
-template<class _RealType>
-void
-piecewise_linear_distribution<_RealType>::param_type::__init()
-{
- __areas_.assign(__densities_.size() - 1, result_type());
- result_type _Sp = 0;
- for (size_t __i = 0; __i < __areas_.size(); ++__i)
- {
- __areas_[__i] = (__densities_[__i+1] + __densities_[__i]) *
- (__b_[__i+1] - __b_[__i]) * .5;
- _Sp += __areas_[__i];
- }
- for (size_t __i = __areas_.size(); __i > 1;)
- {
- --__i;
- __areas_[__i] = __areas_[__i-1] / _Sp;
- }
- __areas_[0] = 0;
- for (size_t __i = 1; __i < __areas_.size(); ++__i)
- __areas_[__i] += __areas_[__i-1];
- for (size_t __i = 0; __i < __densities_.size(); ++__i)
- __densities_[__i] /= _Sp;
-}
-
-template<class _RealType>
-piecewise_linear_distribution<_RealType>::param_type::param_type()
- : __b_(2),
- __densities_(2, 1.0),
- __areas_(1, 0.0)
-{
- __b_[1] = 1;
-}
-
-template<class _RealType>
-template<class _InputIteratorB, class _InputIteratorW>
-piecewise_linear_distribution<_RealType>::param_type::param_type(
- _InputIteratorB __fB, _InputIteratorB __lB, _InputIteratorW __fW)
- : __b_(__fB, __lB)
-{
- if (__b_.size() < 2)
- {
- __b_.resize(2);
- __b_[0] = 0;
- __b_[1] = 1;
- __densities_.assign(2, 1.0);
- __areas_.assign(1, 0.0);
- }
- else
- {
- __densities_.reserve(__b_.size());
- for (size_t __i = 0; __i < __b_.size(); ++__i, ++__fW)
- __densities_.push_back(*__fW);
- __init();
- }
-}
-
-#ifndef _LIBCPP_CXX03_LANG
-
-template<class _RealType>
-template<class _UnaryOperation>
-piecewise_linear_distribution<_RealType>::param_type::param_type(
- initializer_list<result_type> __bl, _UnaryOperation __fw)
- : __b_(__bl.begin(), __bl.end())
-{
- if (__b_.size() < 2)
- {
- __b_.resize(2);
- __b_[0] = 0;
- __b_[1] = 1;
- __densities_.assign(2, 1.0);
- __areas_.assign(1, 0.0);
- }
- else
- {
- __densities_.reserve(__b_.size());
- for (size_t __i = 0; __i < __b_.size(); ++__i)
- __densities_.push_back(__fw(__b_[__i]));
- __init();
- }
-}
-
-#endif // _LIBCPP_CXX03_LANG
-
-template<class _RealType>
-template<class _UnaryOperation>
-piecewise_linear_distribution<_RealType>::param_type::param_type(
- size_t __nw, result_type __xmin, result_type __xmax, _UnaryOperation __fw)
- : __b_(__nw == 0 ? 2 : __nw + 1)
-{
- size_t __n = __b_.size() - 1;
- result_type __d = (__xmax - __xmin) / __n;
- __densities_.reserve(__b_.size());
- for (size_t __i = 0; __i < __n; ++__i)
- {
- __b_[__i] = __xmin + __i * __d;
- __densities_.push_back(__fw(__b_[__i]));
- }
- __b_[__n] = __xmax;
- __densities_.push_back(__fw(__b_[__n]));
- __init();
-}
-
-template<class _RealType>
-template<class _URNG>
-_RealType
-piecewise_linear_distribution<_RealType>::operator()(_URNG& __g, const param_type& __p)
-{
- typedef uniform_real_distribution<result_type> _Gen;
- result_type __u = _Gen()(__g);
- ptrdiff_t __k = _VSTD::upper_bound(__p.__areas_.begin(), __p.__areas_.end(),
- __u) - __p.__areas_.begin() - 1;
- __u -= __p.__areas_[__k];
- const result_type __dk = __p.__densities_[__k];
- const result_type __dk1 = __p.__densities_[__k+1];
- const result_type __deltad = __dk1 - __dk;
- const result_type __bk = __p.__b_[__k];
- if (__deltad == 0)
- return __u / __dk + __bk;
- const result_type __bk1 = __p.__b_[__k+1];
- const result_type __deltab = __bk1 - __bk;
- return (__bk * __dk1 - __bk1 * __dk +
- _VSTD::sqrt(__deltab * (__deltab * __dk * __dk + 2 * __deltad * __u))) /
- __deltad;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_ostream<_CharT, _Traits>&
-operator<<(basic_ostream<_CharT, _Traits>& __os,
- const piecewise_linear_distribution<_RT>& __x)
-{
- __save_flags<_CharT, _Traits> __lx(__os);
- typedef basic_ostream<_CharT, _Traits> _OStream;
- __os.flags(_OStream::dec | _OStream::left | _OStream::fixed |
- _OStream::scientific);
- _CharT __sp = __os.widen(' ');
- __os.fill(__sp);
- size_t __n = __x.__p_.__b_.size();
- __os << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__b_[__i];
- __n = __x.__p_.__densities_.size();
- __os << __sp << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__densities_[__i];
- __n = __x.__p_.__areas_.size();
- __os << __sp << __n;
- for (size_t __i = 0; __i < __n; ++__i)
- __os << __sp << __x.__p_.__areas_[__i];
- return __os;
-}
-
-template <class _CharT, class _Traits, class _RT>
-basic_istream<_CharT, _Traits>&
-operator>>(basic_istream<_CharT, _Traits>& __is,
- piecewise_linear_distribution<_RT>& __x)
-{
- typedef piecewise_linear_distribution<_RT> _Eng;
- typedef typename _Eng::result_type result_type;
- __save_flags<_CharT, _Traits> __lx(__is);
- typedef basic_istream<_CharT, _Traits> _Istream;
- __is.flags(_Istream::dec | _Istream::skipws);
- size_t __n;
- __is >> __n;
- vector<result_type> __b(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __b[__i];
- __is >> __n;
- vector<result_type> __densities(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __densities[__i];
- __is >> __n;
- vector<result_type> __areas(__n);
- for (size_t __i = 0; __i < __n; ++__i)
- __is >> __areas[__i];
- if (!__is.fail())
- {
- swap(__x.__p_.__b_, __b);
- swap(__x.__p_.__densities_, __densities);
- swap(__x.__p_.__areas_, __areas);
- }
- return __is;
-}
-
-_LIBCPP_END_NAMESPACE_STD
-
-_LIBCPP_POP_MACROS
-
#endif // _LIBCPP_RANDOM
diff --git a/libcxx/include/ranges b/libcxx/include/ranges
index 8a99ee64cfc9..dd7decf66fa8 100644
--- a/libcxx/include/ranges
+++ b/libcxx/include/ranges
@@ -36,7 +36,7 @@ namespace std::ranges {
inline constexpr bool enable_borrowed_range = false;
template<class T>
- using iterator_t = decltype(ranges::begin(declval<R&>()));
+ using iterator_t = decltype(ranges::begin(declval<T&>()));
template<range R>
using sentinel_t = decltype(ranges::end(declval<R&>()));
template<range R>
diff --git a/libcxx/include/string_view b/libcxx/include/string_view
index 0ad7dcce9848..a5f85e88b502 100644
--- a/libcxx/include/string_view
+++ b/libcxx/include/string_view
@@ -87,6 +87,8 @@ namespace std {
constexpr basic_string_view(const charT* str, size_type len);
template <class It, class End>
constexpr basic_string_view(It begin, End end); // C++20
+ template <class Range>
+ constexpr basic_string_view(Range&& r); // C++23
// 7.4, basic_string_view iterator support
constexpr const_iterator begin() const noexcept;
@@ -171,6 +173,8 @@ namespace std {
// basic_string_view deduction guides
template<class It, class End>
basic_string_view(It, End) -> basic_string_view<iter_value_t<It>>; // C++20
+ template<class Range>
+ basic_string_view(Range&&) -> basic_string_view<ranges::range_value_t<Range>>; // C++23
// 7.11, Hash support
template <class T> struct hash;
@@ -191,12 +195,13 @@ namespace std {
*/
-#include <__concepts/convertible_to.h>
-#include <__concepts/same_as.h>
#include <__config>
#include <__debug>
+#include <__ranges/concepts.h>
+#include <__ranges/data.h>
#include <__ranges/enable_borrowed_range.h>
#include <__ranges/enable_view.h>
+#include <__ranges/size.h>
#include <__string>
#include <algorithm>
#include <compare>
@@ -204,6 +209,7 @@ namespace std {
#include <iterator>
#include <limits>
#include <stdexcept>
+#include <type_traits>
#include <version>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -282,7 +288,7 @@ public:
#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_RANGES)
template <contiguous_iterator _It, sized_sentinel_for<_It> _End>
- requires (same_as<iter_value_t<_It>, _CharT> && !convertible_to<_End, size_type>)
+ requires (is_same_v<iter_value_t<_It>, _CharT> && !is_convertible_v<_End, size_type>)
constexpr _LIBCPP_HIDE_FROM_ABI basic_string_view(_It __begin, _End __end)
: __data(_VSTD::to_address(__begin)), __size(__end - __begin)
{
@@ -290,6 +296,25 @@ public:
}
#endif
+#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES)
+ template <class _Range>
+ requires (
+ !is_same_v<remove_cvref_t<_Range>, basic_string_view> &&
+ ranges::contiguous_range<_Range> &&
+ ranges::sized_range<_Range> &&
+ is_same_v<ranges::range_value_t<_Range>, _CharT> &&
+ !is_convertible_v<_Range, const _CharT*> &&
+ (!requires(remove_cvref_t<_Range>& d) {
+ d.operator _VSTD::basic_string_view<_CharT, _Traits>();
+ }) &&
+ (!requires {
+ typename remove_reference_t<_Range>::traits_type;
+ } || is_same_v<typename remove_reference_t<_Range>::traits_type, _Traits>)
+ )
+ constexpr _LIBCPP_HIDE_FROM_ABI
+ basic_string_view(_Range&& __r) : __data(ranges::data(__r)), __size(ranges::size(__r)) {}
+#endif
+
_LIBCPP_CONSTEXPR _LIBCPP_INLINE_VISIBILITY
basic_string_view(const _CharT* __s)
: __data(__s), __size(_VSTD::__char_traits_length_checked<_Traits>(__s)) {}
@@ -697,6 +722,12 @@ template <contiguous_iterator _It, sized_sentinel_for<_It> _End>
basic_string_view(_It, _End) -> basic_string_view<iter_value_t<_It>>;
#endif
+
+#if _LIBCPP_STD_VER > 20 && !defined(_LIBCPP_HAS_NO_RANGES)
+template <ranges::contiguous_range _Range>
+ basic_string_view(_Range) -> basic_string_view<ranges::range_value_t<_Range>>;
+#endif
+
// [string.view.comparison]
// operator ==
template<class _CharT, class _Traits>
@@ -708,7 +739,9 @@ bool operator==(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) == 0;
}
-template<class _CharT, class _Traits>
+// The dummy default template parameters are used to work around a MSVC issue with mangling, see VSO-409326 for details.
+// This applies to the other sufficient overloads below for the other comparison operators.
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator==(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -717,7 +750,7 @@ bool operator==(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) == 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator==(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
@@ -737,7 +770,7 @@ bool operator!=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha
return __lhs.compare(__rhs) != 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator!=(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -747,7 +780,7 @@ bool operator!=(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) != 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator!=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
@@ -766,7 +799,7 @@ bool operator<(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Char
return __lhs.compare(__rhs) < 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator<(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -774,7 +807,7 @@ bool operator<(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) < 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator<(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
@@ -791,7 +824,7 @@ bool operator> (basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha
return __lhs.compare(__rhs) > 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator>(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -799,7 +832,7 @@ bool operator>(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) > 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator>(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
@@ -816,7 +849,7 @@ bool operator<=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha
return __lhs.compare(__rhs) <= 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator<=(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -824,7 +857,7 @@ bool operator<=(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) <= 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator<=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
@@ -842,7 +875,7 @@ bool operator>=(basic_string_view<_CharT, _Traits> __lhs, basic_string_view<_Cha
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 1>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator>=(basic_string_view<_CharT, _Traits> __lhs,
typename common_type<basic_string_view<_CharT, _Traits> >::type __rhs) _NOEXCEPT
@@ -850,7 +883,7 @@ bool operator>=(basic_string_view<_CharT, _Traits> __lhs,
return __lhs.compare(__rhs) >= 0;
}
-template<class _CharT, class _Traits>
+template<class _CharT, class _Traits, int = 2>
_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_INLINE_VISIBILITY
bool operator>=(typename common_type<basic_string_view<_CharT, _Traits> >::type __lhs,
basic_string_view<_CharT, _Traits> __rhs) _NOEXCEPT
diff --git a/libcxx/include/type_traits b/libcxx/include/type_traits
index e9d5e06f36dc..bfb6fcb05134 100644
--- a/libcxx/include/type_traits
+++ b/libcxx/include/type_traits
@@ -1416,9 +1416,7 @@ template<class _Tp> using type_identity_t = typename type_identity<_Tp>::type;
// is_signed
-// Before Clang 10, __is_signed didn't work for floating-point types or enums.
-#if __has_keyword(__is_signed) && \
- !(defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1000)
+#if __has_keyword(__is_signed)
template<class _Tp>
struct _LIBCPP_TEMPLATE_VIS is_signed : _BoolConstant<__is_signed(_Tp)> { };
diff --git a/libcxx/include/utility b/libcxx/include/utility
index 2b3c4dfa3f0e..4fa90289a412 100644
--- a/libcxx/include/utility
+++ b/libcxx/include/utility
@@ -227,6 +227,7 @@ template <class T>
#include <__utility/move.h>
#include <__utility/pair.h>
#include <__utility/piecewise_construct.h>
+#include <__utility/priority_tag.h>
#include <__utility/rel_ops.h>
#include <__utility/swap.h>
#include <__utility/to_underlying.h>
diff --git a/libcxx/include/vector b/libcxx/include/vector
index e41afbaca509..9b0092cfdbd9 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -350,23 +350,23 @@ class _LIBCPP_TEMPLATE_VIS vector
: private __vector_base<_Tp, _Allocator>
{
private:
- typedef __vector_base<_Tp, _Allocator> __base;
- typedef allocator<_Tp> __default_allocator_type;
+ typedef __vector_base<_Tp, _Allocator> __base;
+ typedef allocator<_Tp> __default_allocator_type;
public:
- typedef vector __self;
- typedef _Tp value_type;
- typedef _Allocator allocator_type;
- typedef allocator_traits<allocator_type> __alloc_traits;
- typedef value_type& reference;
- typedef const value_type& const_reference;
- typedef typename __allocator_traits<allocator_type>::size_type size_type;
- typedef typename __alloc_traits::difference_type difference_type;
- typedef typename __alloc_traits::pointer pointer;
- typedef typename __alloc_traits::const_pointer const_pointer;
- typedef __wrap_iter<pointer> iterator;
- typedef __wrap_iter<const_pointer> const_iterator;
- typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
- typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
+ typedef vector __self;
+ typedef _Tp value_type;
+ typedef _Allocator allocator_type;
+ typedef allocator_traits<allocator_type> __alloc_traits;
+ typedef value_type& reference;
+ typedef const value_type& const_reference;
+ typedef typename __alloc_traits::size_type size_type;
+ typedef typename __alloc_traits::difference_type difference_type;
+ typedef typename __alloc_traits::pointer pointer;
+ typedef typename __alloc_traits::const_pointer const_pointer;
+ typedef __wrap_iter<pointer> iterator;
+ typedef __wrap_iter<const_pointer> const_iterator;
+ typedef _VSTD::reverse_iterator<iterator> reverse_iterator;
+ typedef _VSTD::reverse_iterator<const_iterator> const_reverse_iterator;
static_assert((is_same<typename allocator_type::value_type, value_type>::value),
"Allocator::value_type must be same type as value_type");
@@ -395,7 +395,21 @@ public:
explicit vector(size_type __n, const allocator_type& __a);
#endif
vector(size_type __n, const value_type& __x);
- vector(size_type __n, const value_type& __x, const allocator_type& __a);
+
+ template <class = __enable_if_t<__is_allocator<_Allocator>::value> >
+ vector(size_type __n, const value_type& __x, const allocator_type& __a)
+ : __base(__a)
+ {
+#if _LIBCPP_DEBUG_LEVEL == 2
+ __get_db()->__insert_c(this);
+#endif
+ if (__n > 0)
+ {
+ __vallocate(__n);
+ __construct_at_end(__n, __x);
+ }
+ }
+
template <class _InputIterator>
vector(_InputIterator __first,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
@@ -1127,20 +1141,6 @@ vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x)
}
template <class _Tp, class _Allocator>
-vector<_Tp, _Allocator>::vector(size_type __n, const value_type& __x, const allocator_type& __a)
- : __base(__a)
-{
-#if _LIBCPP_DEBUG_LEVEL == 2
- __get_db()->__insert_c(this);
-#endif
- if (__n > 0)
- {
- __vallocate(__n);
- __construct_at_end(__n, __x);
- }
-}
-
-template <class _Tp, class _Allocator>
template <class _InputIterator>
vector<_Tp, _Allocator>::vector(_InputIterator __first,
typename enable_if<__is_cpp17_input_iterator <_InputIterator>::value &&
diff --git a/libcxx/include/version b/libcxx/include/version
index 7c16ac85e430..9322c3b8c05d 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -41,6 +41,7 @@ __cpp_lib_bool_constant 201505L <type_traits>
__cpp_lib_bounded_array_traits 201902L <type_traits>
__cpp_lib_boyer_moore_searcher 201603L <functional>
__cpp_lib_byte 201603L <cstddef>
+__cpp_lib_byteswap 202110L <bit>
__cpp_lib_char8_t 201811L <atomic> <filesystem> <istream>
<limits> <locale> <ostream>
<string> <string_view>
@@ -72,7 +73,7 @@ __cpp_lib_exchange_function 201304L <utility>
__cpp_lib_execution 201902L <execution>
201603L // C++17
__cpp_lib_filesystem 201703L <filesystem>
-__cpp_lib_format 201907L <format>
+__cpp_lib_format 202106L <format>
__cpp_lib_gcd_lcm 201606L <numeric>
__cpp_lib_generic_associative_lookup 201304L <map> <set>
__cpp_lib_generic_unordered_lookup 201811L <unordered_map> <unordered_set>
@@ -300,7 +301,7 @@ __cpp_lib_void_t 201411L <type_traits>
# undef __cpp_lib_execution
// # define __cpp_lib_execution 201902L
# if !defined(_LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format)
-// # define __cpp_lib_format 201907L
+// # define __cpp_lib_format 202106L
# endif
# define __cpp_lib_generic_unordered_lookup 201811L
# define __cpp_lib_int_pow2 202002L
@@ -344,6 +345,7 @@ __cpp_lib_void_t 201411L <type_traits>
#endif
#if _LIBCPP_STD_VER > 20
+# define __cpp_lib_byteswap 202110L
# define __cpp_lib_is_scoped_enum 202011L
// # define __cpp_lib_stacktrace 202011L
// # define __cpp_lib_stdatomic_h 202011L
diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h
index 70092fe4e24d..a2c340e61083 100644
--- a/libcxx/src/filesystem/filesystem_common.h
+++ b/libcxx/src/filesystem/filesystem_common.h
@@ -60,7 +60,7 @@ errc __win_err_to_errc(int err);
namespace {
-static _LIBCPP_FORMAT_PRINTF(1, 0) string
+static _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 0) string
format_string_impl(const char* msg, va_list ap) {
array<char, 256> buf;
@@ -84,7 +84,7 @@ format_string_impl(const char* msg, va_list ap) {
return result;
}
-static _LIBCPP_FORMAT_PRINTF(1, 2) string
+static _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 1, 2) string
format_string(const char* msg, ...) {
string ret;
va_list ap;
@@ -172,7 +172,7 @@ struct ErrorHandler {
_LIBCPP_UNREACHABLE();
}
- _LIBCPP_FORMAT_PRINTF(3, 0)
+ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 0)
void report_impl(const error_code& ec, const char* msg, va_list ap) const {
if (ec_) {
*ec_ = ec;
@@ -191,7 +191,7 @@ struct ErrorHandler {
_LIBCPP_UNREACHABLE();
}
- _LIBCPP_FORMAT_PRINTF(3, 4)
+ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4)
T report(const error_code& ec, const char* msg, ...) const {
va_list ap;
va_start(ap, msg);
@@ -213,7 +213,7 @@ struct ErrorHandler {
return report(make_error_code(err));
}
- _LIBCPP_FORMAT_PRINTF(3, 4)
+ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4)
T report(errc const& err, const char* msg, ...) const {
va_list ap;
va_start(ap, msg);
diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp
index d3577c9f7cf8..5959d2a25fea 100644
--- a/libunwind/src/Unwind-EHABI.cpp
+++ b/libunwind/src/Unwind-EHABI.cpp
@@ -187,9 +187,14 @@ static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state,
if (result != _URC_CONTINUE_UNWIND)
return result;
- if (__unw_step(reinterpret_cast<unw_cursor_t *>(context)) != UNW_STEP_SUCCESS)
+ switch (__unw_step(reinterpret_cast<unw_cursor_t *>(context))) {
+ case UNW_STEP_SUCCESS:
+ return _URC_CONTINUE_UNWIND;
+ case UNW_STEP_END:
+ return _URC_END_OF_STACK;
+ default:
return _URC_FAILURE;
- return _URC_CONTINUE_UNWIND;
+ }
}
// Generates mask discriminator for _Unwind_VRS_Pop, e.g. for _UVRSC_CORE /
@@ -678,12 +683,13 @@ static _Unwind_Reason_Code
unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
_Unwind_Exception *exception_object, _Unwind_Stop_Fn stop,
void *stop_parameter) {
+ bool endOfStack = false;
// See comment at the start of unwind_phase1 regarding VRS integrity.
__unw_init_local(cursor, uc);
_LIBUNWIND_TRACE_UNWINDING("unwind_phase2_force(ex_ojb=%p)",
static_cast<void *>(exception_object));
// Walk each frame until we reach where search phase said to stop
- while (true) {
+ while (!endOfStack) {
// Update info about this frame.
unw_proc_info_t frameInfo;
if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) {
@@ -756,6 +762,14 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor,
// We may get control back if landing pad calls _Unwind_Resume().
__unw_resume(cursor);
break;
+ case _URC_END_OF_STACK:
+ _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
+ "personality returned "
+ "_URC_END_OF_STACK",
+ (void *)exception_object);
+ // Personalty routine did the step and it can't step forward.
+ endOfStack = true;
+ break;
default:
// Personality routine returned an unknown result code.
_LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): "
@@ -1133,9 +1147,14 @@ extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code
__gnu_unwind_frame(_Unwind_Exception *exception_object,
struct _Unwind_Context *context) {
unw_cursor_t *cursor = (unw_cursor_t *)context;
- if (__unw_step(cursor) != UNW_STEP_SUCCESS)
+ switch (__unw_step(cursor)) {
+ case UNW_STEP_SUCCESS:
+ return _URC_OK;
+ case UNW_STEP_END:
+ return _URC_END_OF_STACK;
+ default:
return _URC_FAILURE;
- return _URC_OK;
+ }
}
#endif // defined(_LIBUNWIND_ARM_EHABI)
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 9f6dbd172509..0bff11f450d1 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -214,7 +214,8 @@ void SectionChunk::applyRelARM(uint8_t *off, uint16_t type, OutputSection *os,
// the page offset from the current instruction to the target.
void applyArm64Addr(uint8_t *off, uint64_t s, uint64_t p, int shift) {
uint32_t orig = read32le(off);
- uint64_t imm = ((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC);
+ int64_t imm =
+ SignExtend64<21>(((orig >> 29) & 0x3) | ((orig >> 3) & 0x1FFFFC));
s += imm;
imm = (s >> shift) - (p >> shift);
uint32_t immLo = (imm & 0x3) << 29;
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 600d14034dea..0788f3519f4e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1211,6 +1211,12 @@ void Writer::createSymbolAndStringTable() {
if (!d || d->writtenToSymtab)
continue;
d->writtenToSymtab = true;
+ if (auto *dc = dyn_cast_or_null<DefinedCOFF>(d)) {
+ COFFSymbolRef symRef = dc->getCOFFSymbol();
+ if (symRef.isSectionDefinition() ||
+ symRef.getStorageClass() == COFF::IMAGE_SYM_CLASS_LABEL)
+ continue;
+ }
if (Optional<coff_symbol16> sym = createSymbol(d))
outputSymtab.push_back(*sym);
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp
index b9fd4cdbad69..741ff26a7e6c 100644
--- a/lld/ELF/AArch64ErrataFix.cpp
+++ b/lld/ELF/AArch64ErrataFix.cpp
@@ -630,8 +630,8 @@ bool AArch64Err843419Patcher::createFixes() {
for (OutputSection *os : outputSections) {
if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
continue;
- for (BaseCommand *bc : os->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
+ for (SectionCommand *cmd : os->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
std::vector<Patch843419Section *> patches =
patchInputSectionDescription(*isd);
if (!patches.empty()) {
diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp
index 77623780ffa5..fe6ec09bd979 100644
--- a/lld/ELF/ARMErrataFix.cpp
+++ b/lld/ELF/ARMErrataFix.cpp
@@ -525,8 +525,8 @@ bool ARMErr657417Patcher::createFixes() {
for (OutputSection *os : outputSections) {
if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
continue;
- for (BaseCommand *bc : os->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
+ for (SectionCommand *cmd : os->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
std::vector<Patch657417Section *> patches =
patchInputSectionDescription(*isd);
if (!patches.empty()) {
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index f2e4a2a14ad6..b7c2eb74757c 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -140,7 +140,16 @@ RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
case R_ARM_THM_MOVT_PREL:
return R_PC;
case R_ARM_ALU_PC_G0:
+ case R_ARM_ALU_PC_G0_NC:
+ case R_ARM_ALU_PC_G1:
+ case R_ARM_ALU_PC_G1_NC:
+ case R_ARM_ALU_PC_G2:
case R_ARM_LDR_PC_G0:
+ case R_ARM_LDR_PC_G1:
+ case R_ARM_LDR_PC_G2:
+ case R_ARM_LDRS_PC_G0:
+ case R_ARM_LDRS_PC_G1:
+ case R_ARM_LDRS_PC_G2:
case R_ARM_THM_ALU_PREL_11_0:
case R_ARM_THM_PC8:
case R_ARM_THM_PC12:
@@ -411,56 +420,83 @@ static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
}
}
-// Utility functions taken from ARMAddressingModes.h, only changes are LLD
-// coding style.
-
// Rotate a 32-bit unsigned value right by a specified amt of bits.
static uint32_t rotr32(uint32_t val, uint32_t amt) {
assert(amt < 32 && "Invalid rotate amount");
return (val >> amt) | (val << ((32 - amt) & 31));
}
-// Rotate a 32-bit unsigned value left by a specified amt of bits.
-static uint32_t rotl32(uint32_t val, uint32_t amt) {
- assert(amt < 32 && "Invalid rotate amount");
- return (val << amt) | (val >> ((32 - amt) & 31));
+static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group,
+ uint32_t val) {
+ uint32_t rem, lz;
+ do {
+ lz = llvm::countLeadingZeros(val) & ~1;
+ rem = val;
+ if (lz == 32) // implies rem == 0
+ break;
+ val &= 0xffffff >> lz;
+ } while (group--);
+ return {rem, lz};
}
-// Try to encode a 32-bit unsigned immediate imm with an immediate shifter
-// operand, this form is an 8-bit immediate rotated right by an even number of
-// bits. We compute the rotate amount to use. If this immediate value cannot be
-// handled with a single shifter-op, determine a good rotate amount that will
-// take a maximal chunk of bits out of the immediate.
-static uint32_t getSOImmValRotate(uint32_t imm) {
- // 8-bit (or less) immediates are trivially shifter_operands with a rotate
- // of zero.
- if ((imm & ~255U) == 0)
- return 0;
-
- // Use CTZ to compute the rotate amount.
- unsigned tz = llvm::countTrailingZeros(imm);
-
- // Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
- // not 9.
- unsigned rotAmt = tz & ~1;
-
- // If we can handle this spread, return it.
- if ((rotr32(imm, rotAmt) & ~255U) == 0)
- return (32 - rotAmt) & 31; // HW rotates right, not left.
+static void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
+ int group, bool check) {
+ // ADD/SUB (immediate) add = bit23, sub = bit22
+ // immediate field carries is a 12-bit modified immediate, made up of a 4-bit
+ // even rotate right and an 8-bit immediate.
+ uint32_t opcode = 0x00800000;
+ if (val >> 63) {
+ opcode = 0x00400000;
+ val = -val;
+ }
+ uint32_t imm, lz;
+ std::tie(imm, lz) = getRemAndLZForGroup(group, val);
+ uint32_t rot = 0;
+ if (lz < 24) {
+ imm = rotr32(imm, 24 - lz);
+ rot = (lz + 8) << 7;
+ }
+ if (check && imm > 0xff)
+ error(getErrorLocation(loc) + "unencodeable immediate " + Twine(val).str() +
+ " for relocation " + toString(rel.type));
+ write32le(loc, (read32le(loc) & 0xff3ff000) | opcode | rot | (imm & 0xff));
+}
- // For values like 0xF000000F, we should ignore the low 6 bits, then
- // retry the hunt.
- if (imm & 63U) {
- unsigned tz2 = countTrailingZeros(imm & ~63U);
- unsigned rotAmt2 = tz2 & ~1;
- if ((rotr32(imm, rotAmt2) & ~255U) == 0)
- return (32 - rotAmt2) & 31; // HW rotates right, not left.
+static void encodeLdrGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
+ int group) {
+ // R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
+ // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+ // bottom bit to recover S + A - P.
+ if (rel.sym->isFunc())
+ val &= ~0x1;
+ // LDR (literal) u = bit23
+ uint32_t opcode = 0x00800000;
+ if (val >> 63) {
+ opcode = 0x0;
+ val = -val;
}
+ uint32_t imm = getRemAndLZForGroup(group, val).first;
+ checkUInt(loc, imm, 12, rel);
+ write32le(loc, (read32le(loc) & 0xff7ff000) | opcode | imm);
+}
- // Otherwise, we have no way to cover this span of bits with a single
- // shifter_op immediate. Return a chunk of bits that will be useful to
- // handle.
- return (32 - rotAmt) & 31; // HW rotates right, not left.
+static void encodeLdrsGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
+ int group) {
+ // R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
+ // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
+ // bottom bit to recover S + A - P.
+ if (rel.sym->isFunc())
+ val &= ~0x1;
+ // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23
+ uint32_t opcode = 0x00800000;
+ if (val >> 63) {
+ opcode = 0x0;
+ val = -val;
+ }
+ uint32_t imm = getRemAndLZForGroup(group, val).first;
+ checkUInt(loc, imm, 8, rel);
+ write32le(loc, (read32le(loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) |
+ (imm & 0xf));
}
void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
@@ -633,45 +669,39 @@ void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
((val << 4) & 0x7000) | // imm3
(val & 0x00ff)); // imm8
break;
- case R_ARM_ALU_PC_G0: {
- // ADR (literal) add = bit23, sub = bit22
- // literal is a 12-bit modified immediate, made up of a 4-bit even rotate
- // right and an 8-bit immediate. The code-sequence here is derived from
- // ARMAddressingModes.h in llvm/Target/ARM/MCTargetDesc. In our case we
- // want to give an error if we cannot encode the constant.
- uint32_t opcode = 0x00800000;
- if (val >> 63) {
- opcode = 0x00400000;
- val = ~val + 1;
- }
- if ((val & ~255U) != 0) {
- uint32_t rotAmt = getSOImmValRotate(val);
- // Error if we cannot encode this with a single shift
- if (rotr32(~255U, rotAmt) & val)
- error(getErrorLocation(loc) + "unencodeable immediate " +
- Twine(val).str() + " for relocation " + toString(rel.type));
- val = rotl32(val, rotAmt) | ((rotAmt >> 1) << 8);
- }
- write32le(loc, (read32le(loc) & 0xff0ff000) | opcode | val);
+ case R_ARM_ALU_PC_G0:
+ encodeAluGroup(loc, rel, val, 0, true);
break;
- }
- case R_ARM_LDR_PC_G0: {
- // R_ARM_LDR_PC_G0 is S + A - P, we have ((S + A) | T) - P, if S is a
- // function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
- // bottom bit to recover S + A - P.
- if (rel.sym->isFunc())
- val &= ~0x1;
- // LDR (literal) u = bit23
- int64_t imm = val;
- uint32_t u = 0x00800000;
- if (imm < 0) {
- imm = -imm;
- u = 0;
- }
- checkUInt(loc, imm, 12, rel);
- write32le(loc, (read32le(loc) & 0xff7ff000) | u | imm);
+ case R_ARM_ALU_PC_G0_NC:
+ encodeAluGroup(loc, rel, val, 0, false);
+ break;
+ case R_ARM_ALU_PC_G1:
+ encodeAluGroup(loc, rel, val, 1, true);
+ break;
+ case R_ARM_ALU_PC_G1_NC:
+ encodeAluGroup(loc, rel, val, 1, false);
+ break;
+ case R_ARM_ALU_PC_G2:
+ encodeAluGroup(loc, rel, val, 2, true);
+ break;
+ case R_ARM_LDR_PC_G0:
+ encodeLdrGroup(loc, rel, val, 0);
+ break;
+ case R_ARM_LDR_PC_G1:
+ encodeLdrGroup(loc, rel, val, 1);
+ break;
+ case R_ARM_LDR_PC_G2:
+ encodeLdrGroup(loc, rel, val, 2);
+ break;
+ case R_ARM_LDRS_PC_G0:
+ encodeLdrsGroup(loc, rel, val, 0);
+ break;
+ case R_ARM_LDRS_PC_G1:
+ encodeLdrsGroup(loc, rel, val, 1);
+ break;
+ case R_ARM_LDRS_PC_G2:
+ encodeLdrsGroup(loc, rel, val, 2);
break;
- }
case R_ARM_THM_ALU_PREL_11_0: {
// ADR encoding T2 (sub), T3 (add) i:imm3:imm8
int64_t imm = val;
@@ -816,7 +846,11 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
((lo & 0x7000) >> 4) | // imm3
(lo & 0x00ff)); // imm8
}
- case R_ARM_ALU_PC_G0: {
+ case R_ARM_ALU_PC_G0:
+ case R_ARM_ALU_PC_G0_NC:
+ case R_ARM_ALU_PC_G1:
+ case R_ARM_ALU_PC_G1_NC:
+ case R_ARM_ALU_PC_G2: {
// 12-bit immediate is a modified immediate made up of a 4-bit even
// right rotation and 8-bit constant. After the rotation the value
// is zero-extended. When bit 23 is set the instruction is an add, when
@@ -825,13 +859,25 @@ int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
return (instr & 0x00400000) ? -val : val;
}
- case R_ARM_LDR_PC_G0: {
+ case R_ARM_LDR_PC_G0:
+ case R_ARM_LDR_PC_G1:
+ case R_ARM_LDR_PC_G2: {
// ADR (literal) add = bit23, sub = bit22
// LDR (literal) u = bit23 unsigned imm12
bool u = read32le(buf) & 0x00800000;
uint32_t imm12 = read32le(buf) & 0xfff;
return u ? imm12 : -imm12;
}
+ case R_ARM_LDRS_PC_G0:
+ case R_ARM_LDRS_PC_G1:
+ case R_ARM_LDRS_PC_G2: {
+ // LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8
+ uint32_t opcode = read32le(buf);
+ bool u = opcode & 0x00800000;
+ uint32_t imm4l = opcode & 0xf;
+ uint32_t imm4h = (opcode & 0xf00) >> 4;
+ return u ? (imm4h | imm4l) : -(imm4h | imm4l);
+ }
case R_ARM_THM_ALU_PREL_11_0: {
// Thumb2 ADR, which is an alias for a sub or add instruction with an
// unsigned immediate.
diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 300ca675519f..c33bd935f363 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -146,7 +146,7 @@ RelExpr Hexagon::getRelExpr(RelType type, const Symbol &s,
case R_HEX_IE_GOT_32_6_X:
case R_HEX_IE_GOT_HI16:
case R_HEX_IE_GOT_LO16:
- config->hasStaticTlsModel = true;
+ config->hasTlsIe = true;
return R_GOTPLT;
case R_HEX_TPREL_11_X:
case R_HEX_TPREL_16:
diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
index 5ee9e4185f1a..a0ea403e241d 100644
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -261,7 +261,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
case R_RISCV_TLS_GD_HI20:
return R_TLSGD_PC;
case R_RISCV_TLS_GOT_HI20:
- config->hasStaticTlsModel = true;
+ config->hasTlsIe = true;
return R_GOT_PC;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_LO12_I:
diff --git a/lld/ELF/Arch/X86.cpp b/lld/ELF/Arch/X86.cpp
index 5d34b769e80e..2560dc883257 100644
--- a/lld/ELF/Arch/X86.cpp
+++ b/lld/ELF/Arch/X86.cpp
@@ -78,13 +78,8 @@ int X86::getTlsGdRelaxSkip(RelType type) const {
RelExpr X86::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
- // There are 4 different TLS variable models with varying degrees of
- // flexibility and performance. LocalExec and InitialExec models are fast but
- // less-flexible models. If they are in use, we set DF_STATIC_TLS flag in the
- // dynamic section to let runtime know about that.
- if (type == R_386_TLS_LE || type == R_386_TLS_LE_32 || type == R_386_TLS_IE ||
- type == R_386_TLS_GOTIE)
- config->hasStaticTlsModel = true;
+ if (type == R_386_TLS_IE || type == R_386_TLS_GOTIE)
+ config->hasTlsIe = true;
switch (type) {
case R_386_8:
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 40436752399b..614b5ed59218 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -99,7 +99,11 @@ X86_64::X86_64() {
defaultImageBase = 0x200000;
}
-int X86_64::getTlsGdRelaxSkip(RelType type) const { return 2; }
+int X86_64::getTlsGdRelaxSkip(RelType type) const {
+ // TLSDESC relocations are processed separately. See relaxTlsGdToLe below.
+ return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 1
+ : 2;
+}
// Opcodes for the different X86_64 jmp instructions.
enum JmpInsnOpcode : uint32_t {
@@ -314,7 +318,7 @@ bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,
RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const {
if (type == R_X86_64_GOTTPOFF)
- config->hasStaticTlsModel = true;
+ config->hasTlsIe = true;
switch (type) {
case R_X86_64_8:
@@ -443,24 +447,24 @@ void X86_64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
// The original code used a pc relative relocation and so we have to
// compensate for the -4 in had in the addend.
write32le(loc + 8, val + 4);
- } else {
- // Convert
- // lea x@tlsgd(%rip), %rax
- // call *(%rax)
- // to the following two instructions.
- assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
- if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
- error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
- "in callq *x@tlsdesc(%rip), %rax");
+ } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
+ // Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG.
+ if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
+ (loc[-1] & 0xc7) != 0x05) {
+ errorOrWarn(getErrorLocation(loc - 3) +
+ "R_X86_64_GOTPC32_TLSDESC must be used "
+ "in leaq x@tlsdesc(%rip), %REG");
return;
}
- // movq $x@tpoff(%rip),%rax
+ loc[-3] = 0x48 | ((loc[-3] >> 2) & 1);
loc[-2] = 0xc7;
- loc[-1] = 0xc0;
+ loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7);
write32le(loc, val + 4);
- // xchg ax,ax
- loc[4] = 0x66;
- loc[5] = 0x90;
+ } else {
+ // Convert call *x@tlsdesc(%REG) to xchg ax, ax.
+ assert(rel.type == R_X86_64_TLSDESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
}
}
@@ -484,23 +488,23 @@ void X86_64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
// Both code sequences are PC relatives, but since we are moving the
// constant forward by 8 bytes we have to subtract the value by 8.
write32le(loc + 8, val - 8);
- } else {
- // Convert
- // lea x@tlsgd(%rip), %rax
- // call *(%rax)
- // to the following two instructions.
+ } else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {
+ // Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG.
assert(rel.type == R_X86_64_GOTPC32_TLSDESC);
- if (memcmp(loc - 3, "\x48\x8d\x05", 3)) {
- error(getErrorLocation(loc - 3) + "R_X86_64_GOTPC32_TLSDESC must be used "
- "in callq *x@tlsdesc(%rip), %rax");
+ if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||
+ (loc[-1] & 0xc7) != 0x05) {
+ errorOrWarn(getErrorLocation(loc - 3) +
+ "R_X86_64_GOTPC32_TLSDESC must be used "
+ "in leaq x@tlsdesc(%rip), %REG");
return;
}
- // movq x@gottpoff(%rip),%rax
loc[-2] = 0x8b;
write32le(loc, val);
- // xchg ax,ax
- loc[4] = 0x66;
- loc[5] = 0x90;
+ } else {
+ // Convert call *x@tlsdesc(%rax) to xchg ax, ax.
+ assert(rel.type == R_X86_64_TLSDESC_CALL);
+ loc[0] = 0x66;
+ loc[1] = 0x90;
}
}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 79c4fe06d7b2..c660a8e67c21 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -261,7 +261,7 @@ struct Configuration {
UnresolvedPolicy unresolvedSymbols;
UnresolvedPolicy unresolvedSymbolsInShlib;
Target2Policy target2;
- bool Power10Stub;
+ bool power10Stubs;
ARMVFPArgKind armVFPArgs = ARMVFPArgKind::Default;
BuildIdKind buildId = BuildIdKind::None;
SeparateSegmentKind zSeparate;
@@ -309,19 +309,10 @@ struct Configuration {
// if that's true.)
bool isMips64EL;
- // True if we need to set the DF_STATIC_TLS flag to an output file,
- // which works as a hint to the dynamic loader that the file contains
- // code compiled with the static TLS model. The thread-local variable
- // compiled with the static TLS model is faster but less flexible, and
- // it may not be loaded using dlopen().
- //
- // We set this flag to true when we see a relocation for the static TLS
- // model. Once this becomes true, it will never become false.
- //
- // Since the flag is updated by multi-threaded code, we use std::atomic.
- // (Writing to a variable is not considered thread-safe even if the
- // variable is boolean and we always set the same value from all threads.)
- std::atomic<bool> hasStaticTlsModel{false};
+ // True if we need to set the DF_STATIC_TLS flag to an output file, which
+ // works as a hint to the dynamic loader that the shared object contains code
+ // compiled with the initial-exec TLS model.
+ bool hasTlsIe = false;
// Holds set of ELF header flags for the target.
uint32_t eflags = 0;
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 9fac04558c46..1376e6c2c253 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -460,19 +460,21 @@ static bool isKnownZFlag(StringRef s) {
s.startswith("start-stop-visibility=");
}
-// Report an error for an unknown -z option.
+// Report a warning for an unknown -z option.
static void checkZOptions(opt::InputArgList &args) {
for (auto *arg : args.filtered(OPT_z))
if (!isKnownZFlag(arg->getValue()))
- error("unknown -z value: " + StringRef(arg->getValue()));
+ warn("unknown -z value: " + StringRef(arg->getValue()));
}
void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
ELFOptTable parser;
opt::InputArgList args = parser.parse(argsArr.slice(1));
- // Interpret this flag early because error() depends on them.
+ // Interpret the flags early because error()/warn() depend on them.
errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
+ errorHandler().fatalWarnings =
+ args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
checkZOptions(args);
// Handle -help
@@ -750,20 +752,6 @@ static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &args) {
return OrphanHandlingPolicy::Place;
}
-// Parses --power10-stubs= flags, to disable or enable Power 10
-// instructions in stubs.
-static bool getP10StubOpt(opt::InputArgList &args) {
-
- if (args.getLastArgValue(OPT_power10_stubs_eq)== "no")
- return false;
-
- if (!args.hasArg(OPT_power10_stubs_eq) &&
- args.hasArg(OPT_no_power10_stubs))
- return false;
-
- return true;
-}
-
// Parse --build-id or --build-id=<style>. We handle "tree" as a
// synonym for "sha1" because all our hash functions including
// --build-id=sha1 are actually tree hashes for performance reasons.
@@ -985,8 +973,6 @@ static void parseClangOption(StringRef opt, const Twine &msg) {
// Initializes Config members by the command line options.
static void readConfigs(opt::InputArgList &args) {
errorHandler().verbose = args.hasArg(OPT_verbose);
- errorHandler().fatalWarnings =
- args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
errorHandler().vsDiagnostics =
args.hasArg(OPT_visual_studio_diagnostics_format, false);
@@ -1190,7 +1176,7 @@ static void readConfigs(opt::InputArgList &args) {
config->zText = getZFlag(args, "text", "notext", true);
config->zWxneeded = hasZOption(args, "wxneeded");
setUnresolvedSymbolPolicy(args);
- config->Power10Stub = getP10StubOpt(args);
+ config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
if (arg->getOption().matches(OPT_eb))
@@ -1691,7 +1677,7 @@ static void handleUndefined(Symbol *sym, const char *option) {
if (!sym->isLazy())
return;
- sym->fetch();
+ sym->extract();
if (!config->whyExtract.empty())
whyExtract.emplace_back(option, sym->file, *sym);
}
@@ -1706,14 +1692,12 @@ static void handleUndefinedGlob(StringRef arg) {
return;
}
+ // Calling sym->extract() in the loop is not safe because it may add new
+ // symbols to the symbol table, invalidating the current iterator.
std::vector<Symbol *> syms;
- for (Symbol *sym : symtab->symbols()) {
- // Calling Sym->fetch() from here is not safe because it may
- // add new symbols to the symbol table, invalidating the
- // current iterator. So we just keep a note.
+ for (Symbol *sym : symtab->symbols())
if (pat->match(sym->getName()))
syms.push_back(sym);
- }
for (Symbol *sym : syms)
handleUndefined(sym, "--undefined-glob");
@@ -1731,7 +1715,7 @@ static void handleLibcall(StringRef name) {
mb = cast<LazyArchive>(sym)->getMemberBuffer();
if (isBitcode(mb))
- sym->fetch();
+ sym->extract();
}
// Handle --dependency-file=<path>. If that option is given, lld creates a
@@ -2207,7 +2191,7 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
symtab->insert(arg->getValue())->traced = true;
// Handle -u/--undefined before input files. If both a.a and b.so define foo,
- // -u foo a.a b.so will fetch a.a.
+ // -u foo a.a b.so will extract a.a.
for (StringRef name : config->undefined)
addUnusedUndefined(name)->referenced = true;
@@ -2297,7 +2281,6 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// Create elfHeader early. We need a dummy section in
// addReservedSymbols to mark the created symbols as not absolute.
Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC);
- Out::elfHeader->size = sizeof(typename ELFT::Ehdr);
std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);
@@ -2476,8 +2459,8 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
// merging MergeInputSections into a single MergeSyntheticSection. From this
// point onwards InputSectionDescription::sections should be used instead of
// sectionBases.
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
sec->finalizeInputSections();
llvm::erase_if(inputSections, [](InputSectionBase *s) {
return isa<MergeInputSection>(s);
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
index c13969806916..0ec748e8f990 100644
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -552,10 +552,10 @@ template <class ELFT> void ICF<ELFT>::run() {
// InputSectionDescription::sections is populated by processSectionCommands().
// ICF may fold some input sections assigned to output sections. Remove them.
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
- for (BaseCommand *sub_base : sec->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(sub_base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
+ for (SectionCommand *subCmd : sec->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(subCmd))
llvm::erase_if(isd->sections,
[](InputSection *isec) { return !isec->isLive(); });
}
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index e8a4188ec775..031a8679db41 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -395,16 +395,6 @@ uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
this);
}
-template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getLocalSymbols() {
- if (this->symbols.empty())
- return {};
- return makeArrayRef(this->symbols).slice(1, this->firstGlobal - 1);
-}
-
-template <class ELFT> ArrayRef<Symbol *> ObjFile<ELFT>::getGlobalSymbols() {
- return makeArrayRef(this->symbols).slice(this->firstGlobal);
-}
-
template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
// Read a section table. justSymbols is usually false.
if (this->justSymbols)
@@ -966,7 +956,7 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
// `nullptr` for the normal case. However, if -r or --emit-relocs is
// specified, we need to copy them to the output. (Some post link analysis
// tools specify --emit-relocs to obtain the information.)
- if (!config->relocatable && !config->emitRelocs)
+ if (!config->copyRelocs)
return nullptr;
InputSection *relocSec = make<InputSection>(*this, sec, name);
// If the relocated section is discarded (due to /DISCARD/ or
@@ -1035,12 +1025,11 @@ InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
name == ".gnu.linkonce.t.__i686.get_pc_thunk.bx")
return &InputSection::discarded;
- // If we are creating a new .build-id section, strip existing .build-id
- // sections so that the output won't have more than one .build-id.
- // This is not usually a problem because input object files normally don't
- // have .build-id sections, but you can create such files by
- // "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it.
- if (name == ".note.gnu.build-id" && config->buildId != BuildIdKind::None)
+ // Strip existing .note.gnu.build-id sections so that the output won't have
+ // more than one build-id. This is not usually a problem because input object
+ // files normally don't have .build-id sections, but you can create such files
+ // by "ld.{bfd,gold,lld} -r --build-id", and we want to guard against it.
+ if (name == ".note.gnu.build-id")
return &InputSection::discarded;
// The linker merges EH (exception handling) frames and creates a
@@ -1147,17 +1136,20 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
if (sec == &InputSection::discarded) {
Undefined und{this, name, binding, stOther, type, secIdx};
Symbol *sym = this->symbols[i];
- // !ArchiveFile::parsed or LazyObjFile::fetched means that the file
+ // !ArchiveFile::parsed or LazyObjFile::extracted means that the file
// containing this object has not finished processing, i.e. this symbol is
- // a result of a lazy symbol fetch. We should demote the lazy symbol to an
- // Undefined so that any relocations outside of the group to it will
+ // a result of a lazy symbol extract. We should demote the lazy symbol to
+ // an Undefined so that any relocations outside of the group to it will
// trigger a discarded section error.
if ((sym->symbolKind == Symbol::LazyArchiveKind &&
!cast<ArchiveFile>(sym->file)->parsed) ||
(sym->symbolKind == Symbol::LazyObjectKind &&
- cast<LazyObjFile>(sym->file)->fetched))
+ cast<LazyObjFile>(sym->file)->extracted)) {
sym->replace(und);
- else
+ // Prevent LTO from internalizing the symbol in case there is a
+ // reference to this symbol from this file.
+ sym->isUsedInRegularObj = true;
+ } else
sym->resolve(und);
continue;
}
@@ -1174,7 +1166,7 @@ template <class ELFT> void ObjFile<ELFT>::initializeSymbols() {
}
// Undefined symbols (excluding those defined relative to non-prevailing
- // sections) can trigger recursive fetch. Process defined symbols first so
+ // sections) can trigger recursive extract. Process defined symbols first so
// that the relative order between a defined symbol and an undefined symbol
// does not change the symbol resolution behavior. In addition, a set of
// interconnected symbols will all be resolved to the same file, instead of
@@ -1202,7 +1194,7 @@ void ArchiveFile::parse() {
}
// Returns a buffer pointing to a member file containing a given symbol.
-void ArchiveFile::fetch(const Archive::Symbol &sym) {
+void ArchiveFile::extract(const Archive::Symbol &sym) {
Archive::Child c =
CHECK(sym.getMember(), toString(this) +
": could not get the member for symbol " +
@@ -1291,7 +1283,7 @@ static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
}
}
-bool ArchiveFile::shouldFetchForCommon(const Archive::Symbol &sym) {
+bool ArchiveFile::shouldExtractForCommon(const Archive::Symbol &sym) {
Archive::Child c =
CHECK(sym.getMember(), toString(this) +
": could not get the member for symbol " +
@@ -1779,10 +1771,10 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName,
}
}
-void LazyObjFile::fetch() {
- if (fetched)
+void LazyObjFile::extract() {
+ if (extracted)
return;
- fetched = true;
+ extracted = true;
InputFile *file = createObjectFile(mb, archiveName, offsetInArchive);
file->groupId = groupId;
@@ -1835,7 +1827,7 @@ template <class ELFT> void LazyObjFile::parse() {
// Replace existing symbols with LazyObject symbols.
//
- // resolve() may trigger this->fetch() if an existing symbol is an
+ // resolve() may trigger this->extract() if an existing symbol is an
// undefined symbol. If that happens, this LazyObjFile has served
// its purpose, and we can exit from the loop early.
for (Symbol *sym : this->symbols) {
@@ -1843,16 +1835,16 @@ template <class ELFT> void LazyObjFile::parse() {
continue;
sym->resolve(LazyObject{*this, sym->getName()});
- // If fetched, stop iterating because this->symbols has been transferred
+ // If extracted, stop iterating because this->symbols has been transferred
// to the instantiated ObjFile.
- if (fetched)
+ if (extracted)
return;
}
return;
}
}
-bool LazyObjFile::shouldFetchForCommon(const StringRef &name) {
+bool LazyObjFile::shouldExtractForCommon(const StringRef &name) {
if (isBitcode(mb))
return isBitcodeNonCommonDef(mb, name, archiveName);
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index fb4d46b43f35..5bbfb7656e47 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -168,6 +168,15 @@ public:
StringRef getStringTable() const { return stringTable; }
+ ArrayRef<Symbol *> getLocalSymbols() {
+ if (symbols.empty())
+ return {};
+ return llvm::makeArrayRef(symbols).slice(1, firstGlobal - 1);
+ }
+ ArrayRef<Symbol *> getGlobalSymbols() {
+ return llvm::makeArrayRef(symbols).slice(firstGlobal);
+ }
+
template <typename ELFT> typename ELFT::SymRange getELFSyms() const {
return typename ELFT::SymRange(
reinterpret_cast<const typename ELFT::Sym *>(elfSyms), numELFSyms);
@@ -197,9 +206,6 @@ public:
return this->ELFFileBase::getObj<ELFT>();
}
- ArrayRef<Symbol *> getLocalSymbols();
- ArrayRef<Symbol *> getGlobalSymbols();
-
ObjFile(MemoryBufferRef m, StringRef archiveName) : ELFFileBase(ObjKind, m) {
this->archiveName = std::string(archiveName);
}
@@ -306,13 +312,13 @@ public:
static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; }
template <class ELFT> void parse();
- void fetch();
+ void extract();
- // Check if a non-common symbol should be fetched to override a common
+ // Check if a non-common symbol should be extracted to override a common
// definition.
- bool shouldFetchForCommon(const StringRef &name);
+ bool shouldExtractForCommon(const StringRef &name);
- bool fetched = false;
+ bool extracted = false;
private:
uint64_t offsetInArchive;
@@ -329,14 +335,14 @@ public:
// returns it. If the same file was instantiated before, this
// function does nothing (so we don't instantiate the same file
// more than once.)
- void fetch(const Archive::Symbol &sym);
+ void extract(const Archive::Symbol &sym);
- // Check if a non-common symbol should be fetched to override a common
+ // Check if a non-common symbol should be extracted to override a common
// definition.
- bool shouldFetchForCommon(const Archive::Symbol &sym);
+ bool shouldExtractForCommon(const Archive::Symbol &sym);
size_t getMemberCount() const;
- size_t getFetchedMemberCount() const { return seen.size(); }
+ size_t getExtractedMemberCount() const { return seen.size(); }
bool parsed = false;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 74d4dd309c79..4d5bd1f1e5f2 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -187,7 +187,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
}
case Regular:
case Synthetic:
- return cast<InputSection>(this)->getOffset(offset);
+ return cast<InputSection>(this)->outSecOff + offset;
case EHFrame:
// The file crtbeginT.o has relocations pointing to the start of an empty
// .eh_frame that is known to be the first in the link. It does that to
@@ -196,7 +196,7 @@ uint64_t SectionBase::getOffset(uint64_t offset) const {
case Merge:
const MergeInputSection *ms = cast<MergeInputSection>(this);
if (InputSection *isec = ms->getParent())
- return isec->getOffset(ms->getParentOffset(offset));
+ return isec->outSecOff + ms->getParentOffset(offset);
return ms->getParentOffset(offset);
}
llvm_unreachable("invalid section kind");
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 4bd1f410e388..7ddc43916a0f 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -78,7 +78,7 @@ public:
// These corresponds to the fields in Elf_Shdr.
uint32_t alignment;
uint64_t flags;
- uint64_t entsize;
+ uint32_t entsize;
uint32_t type;
uint32_t link;
uint32_t info;
@@ -99,9 +99,9 @@ public:
void markDead() { partition = 0; }
protected:
- SectionBase(Kind sectionKind, StringRef name, uint64_t flags,
- uint64_t entsize, uint64_t alignment, uint32_t type,
- uint32_t info, uint32_t link)
+ constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags,
+ uint32_t entsize, uint32_t alignment, uint32_t type,
+ uint32_t info, uint32_t link)
: name(name), repl(this), sectionKind(sectionKind), bss(false),
keepUnique(false), partition(0), alignment(alignment), flags(flags),
entsize(entsize), type(type), link(link), info(info) {}
@@ -121,14 +121,14 @@ public:
static bool classof(const SectionBase *s) { return s->kind() != Output; }
- // Section index of the relocation section if exists.
- uint32_t relSecIdx = 0;
-
// The file which contains this section. Its dynamic type is always
// ObjFile<ELFT>, but in order to avoid ELFT, we use InputFile as
// its static type.
InputFile *file;
+ // Section index of the relocation section if exists.
+ uint32_t relSecIdx = 0;
+
template <class ELFT> ObjFile<ELFT> *getFile() const {
return cast_or_null<ObjFile<ELFT>>(file);
}
@@ -352,8 +352,6 @@ public:
// beginning of the output section.
template <class ELFT> void writeTo(uint8_t *buf);
- uint64_t getOffset(uint64_t offset) const { return outSecOff + offset; }
-
OutputSection *getParent() const;
// This variable has two usages. Initially, it represents an index in the
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index a42d216e4e77..46dc77a6789c 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -279,7 +279,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
// distributed build system that depends on that behavior.
static void thinLTOCreateEmptyIndexFiles() {
for (LazyObjFile *f : lazyObjFiles) {
- if (f->fetched || !isBitcode(f->mb))
+ if (f->extracted || !isBitcode(f->mb))
continue;
std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName()));
std::unique_ptr<raw_fd_ostream> os = openFile(path + ".thinlto.bc");
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index f332b03d757d..cf4da7ab54c9 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -49,23 +49,76 @@ using namespace lld::elf;
LinkerScript *elf::script;
-static uint64_t getOutputSectionVA(SectionBase *sec) {
- OutputSection *os = sec->getOutputSection();
- assert(os && "input section has no output section assigned");
- return os ? os->addr : 0;
+static bool isSectionPrefix(StringRef prefix, StringRef name) {
+ return name.startswith(prefix) || name == prefix.drop_back();
+}
+
+static StringRef getOutputSectionName(const InputSectionBase *s) {
+ if (config->relocatable)
+ return s->name;
+
+ // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want
+ // to emit .rela.text.foo as .rela.text.bar for consistency (this is not
+ // technically required, but not doing it is odd). This code guarantees that.
+ if (auto *isec = dyn_cast<InputSection>(s)) {
+ if (InputSectionBase *rel = isec->getRelocatedSection()) {
+ OutputSection *out = rel->getOutputSection();
+ if (s->type == SHT_RELA)
+ return saver.save(".rela" + out->name);
+ return saver.save(".rel" + out->name);
+ }
+ }
+
+ // A BssSection created for a common symbol is identified as "COMMON" in
+ // linker scripts. It should go to .bss section.
+ if (s->name == "COMMON")
+ return ".bss";
+
+ if (script->hasSectionsCommand)
+ return s->name;
+
+ // When no SECTIONS is specified, emulate GNU ld's internal linker scripts
+ // by grouping sections with certain prefixes.
+
+ // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.",
+ // ".text.unlikely.", ".text.startup." or ".text.exit." before others.
+ // We provide an option -z keep-text-section-prefix to group such sections
+ // into separate output sections. This is more flexible. See also
+ // sortISDBySectionOrder().
+ // ".text.unknown" means the hotness of the section is unknown. When
+ // SampleFDO is used, if a function doesn't have sample, it could be very
+ // cold or it could be a new function never being sampled. Those functions
+ // will be kept in the ".text.unknown" section.
+ // ".text.split." holds symbols which are split out from functions in other
+ // input sections. For example, with -fsplit-machine-functions, placing the
+ // cold parts in .text.split instead of .text.unlikely mitigates against poor
+ // profile inaccuracy. Techniques such as hugepage remapping can make
+ // conservative decisions at the section granularity.
+ if (config->zKeepTextSectionPrefix)
+ for (StringRef v : {".text.hot.", ".text.unknown.", ".text.unlikely.",
+ ".text.startup.", ".text.exit.", ".text.split."})
+ if (isSectionPrefix(v, s->name))
+ return v.drop_back();
+
+ for (StringRef v :
+ {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.",
+ ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.",
+ ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."})
+ if (isSectionPrefix(v, s->name))
+ return v.drop_back();
+
+ return s->name;
}
uint64_t ExprValue::getValue() const {
if (sec)
- return alignTo(sec->getOffset(val) + getOutputSectionVA(sec),
+ return alignTo(sec->getOutputSection()->addr + sec->getOffset(val),
alignment);
return alignTo(val, alignment);
}
uint64_t ExprValue::getSecAddr() const {
- if (sec)
- return sec->getOffset(0) + getOutputSectionVA(sec);
- return 0;
+ return sec ? sec->getOutputSection()->addr + sec->getOffset(0) : 0;
}
uint64_t ExprValue::getSectionOffset() const {
@@ -102,23 +155,22 @@ OutputSection *LinkerScript::getOrCreateOutputSection(StringRef name) {
// Expands the memory region by the specified size.
static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size,
- StringRef regionName, StringRef secName) {
+ StringRef secName) {
memRegion->curPos += size;
uint64_t newSize = memRegion->curPos - (memRegion->origin)().getValue();
uint64_t length = (memRegion->length)().getValue();
if (newSize > length)
- error("section '" + secName + "' will not fit in region '" + regionName +
- "': overflowed by " + Twine(newSize - length) + " bytes");
+ error("section '" + secName + "' will not fit in region '" +
+ memRegion->name + "': overflowed by " + Twine(newSize - length) +
+ " bytes");
}
void LinkerScript::expandMemoryRegions(uint64_t size) {
if (ctx->memRegion)
- expandMemoryRegion(ctx->memRegion, size, ctx->memRegion->name,
- ctx->outSec->name);
+ expandMemoryRegion(ctx->memRegion, size, ctx->outSec->name);
// Only expand the LMARegion if it is different from memRegion.
if (ctx->lmaRegion && ctx->memRegion != ctx->lmaRegion)
- expandMemoryRegion(ctx->lmaRegion, size, ctx->lmaRegion->name,
- ctx->outSec->name);
+ expandMemoryRegion(ctx->lmaRegion, size, ctx->outSec->name);
}
void LinkerScript::expandOutputSection(uint64_t size) {
@@ -215,21 +267,21 @@ using SymbolAssignmentMap =
// Collect section/value pairs of linker-script-defined symbols. This is used to
// check whether symbol values converge.
-static SymbolAssignmentMap
-getSymbolAssignmentValues(const std::vector<BaseCommand *> &sectionCommands) {
+static SymbolAssignmentMap getSymbolAssignmentValues(
+ const std::vector<SectionCommand *> &sectionCommands) {
SymbolAssignmentMap ret;
- for (BaseCommand *base : sectionCommands) {
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- if (cmd->sym) // sym is nullptr for dot.
- ret.try_emplace(cmd->sym,
- std::make_pair(cmd->sym->section, cmd->sym->value));
+ for (SectionCommand *cmd : sectionCommands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ if (assign->sym) // sym is nullptr for dot.
+ ret.try_emplace(assign->sym, std::make_pair(assign->sym->section,
+ assign->sym->value));
continue;
}
- for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands)
- if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base))
- if (cmd->sym)
- ret.try_emplace(cmd->sym,
- std::make_pair(cmd->sym->section, cmd->sym->value));
+ for (SectionCommand *subCmd : cast<OutputSection>(cmd)->commands)
+ if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
+ if (assign->sym)
+ ret.try_emplace(assign->sym, std::make_pair(assign->sym->section,
+ assign->sym->value));
}
return ret;
}
@@ -256,9 +308,9 @@ void LinkerScript::processInsertCommands() {
for (StringRef name : cmd.names) {
// If base is empty, it may have been discarded by
// adjustSectionsBeforeSorting(). We do not handle such output sections.
- auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) {
- return isa<OutputSection>(base) &&
- cast<OutputSection>(base)->name == name;
+ auto from = llvm::find_if(sectionCommands, [&](SectionCommand *subCmd) {
+ return isa<OutputSection>(subCmd) &&
+ cast<OutputSection>(subCmd)->name == name;
});
if (from == sectionCommands.end())
continue;
@@ -266,10 +318,11 @@ void LinkerScript::processInsertCommands() {
sectionCommands.erase(from);
}
- auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) {
- auto *to = dyn_cast<OutputSection>(base);
- return to != nullptr && to->name == cmd.where;
- });
+ auto insertPos =
+ llvm::find_if(sectionCommands, [&cmd](SectionCommand *subCmd) {
+ auto *to = dyn_cast<OutputSection>(subCmd);
+ return to != nullptr && to->name == cmd.where;
+ });
if (insertPos == sectionCommands.end()) {
error("unable to insert " + cmd.names[0] +
(cmd.isAfter ? " after " : " before ") + cmd.where);
@@ -287,9 +340,9 @@ void LinkerScript::processInsertCommands() {
// over symbol assignment commands and create placeholder symbols if needed.
void LinkerScript::declareSymbols() {
assert(!ctx);
- for (BaseCommand *base : sectionCommands) {
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- declareSymbol(cmd);
+ for (SectionCommand *cmd : sectionCommands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ declareSymbol(assign);
continue;
}
@@ -297,12 +350,12 @@ void LinkerScript::declareSymbols() {
// we can't say for sure if it is going to be included or not.
// Skip such sections for now. Improve the checks if we ever
// need symbols from that sections to be declared early.
- auto *sec = cast<OutputSection>(base);
+ auto *sec = cast<OutputSection>(cmd);
if (sec->constraint != ConstraintKind::NoConstraint)
continue;
- for (BaseCommand *base2 : sec->sectionCommands)
- if (auto *cmd = dyn_cast<SymbolAssignment>(base2))
- declareSymbol(cmd);
+ for (SectionCommand *cmd : sec->commands)
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
+ declareSymbol(assign);
}
}
@@ -528,10 +581,10 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) {
continue;
std::vector<InputSectionBase *> secs(part.armExidx->exidxSections.begin(),
part.armExidx->exidxSections.end());
- for (BaseCommand *base : outCmd.sectionCommands)
- if (auto *cmd = dyn_cast<InputSectionDescription>(base)) {
+ for (SectionCommand *cmd : outCmd.commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
std::vector<InputSectionBase *> matches =
- computeInputSections(cmd, secs);
+ computeInputSections(isd, secs);
for (InputSectionBase *s : matches)
discard(s);
}
@@ -542,12 +595,12 @@ std::vector<InputSectionBase *>
LinkerScript::createInputSectionList(OutputSection &outCmd) {
std::vector<InputSectionBase *> ret;
- for (BaseCommand *base : outCmd.sectionCommands) {
- if (auto *cmd = dyn_cast<InputSectionDescription>(base)) {
- cmd->sectionBases = computeInputSections(cmd, inputSections);
- for (InputSectionBase *s : cmd->sectionBases)
+ for (SectionCommand *cmd : outCmd.commands) {
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
+ isd->sectionBases = computeInputSections(isd, inputSections);
+ for (InputSectionBase *s : isd->sectionBases)
s->parent = &outCmd;
- ret.insert(ret.end(), cmd->sectionBases.begin(), cmd->sectionBases.end());
+ ret.insert(ret.end(), isd->sectionBases.begin(), isd->sectionBases.end());
}
}
return ret;
@@ -564,7 +617,7 @@ void LinkerScript::processSectionCommands() {
for (InputSectionBase *s : v)
discard(s);
discardSynthetic(*osec);
- osec->sectionCommands.clear();
+ osec->commands.clear();
return false;
}
@@ -578,7 +631,7 @@ void LinkerScript::processSectionCommands() {
if (!matchConstraints(v, osec->constraint)) {
for (InputSectionBase *s : v)
s->parent = nullptr;
- osec->sectionCommands.clear();
+ osec->commands.clear();
return false;
}
@@ -605,7 +658,7 @@ void LinkerScript::processSectionCommands() {
for (OutputSection *osec : overwriteSections)
if (process(osec) && !map.try_emplace(osec->name, osec).second)
warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name);
- for (BaseCommand *&base : sectionCommands)
+ for (SectionCommand *&base : sectionCommands)
if (auto *osec = dyn_cast<OutputSection>(base)) {
if (OutputSection *overwrite = map.lookup(osec->name)) {
log(overwrite->location + " overwrites " + osec->name);
@@ -639,22 +692,22 @@ void LinkerScript::processSymbolAssignments() {
ctx = &state;
ctx->outSec = aether;
- for (BaseCommand *base : sectionCommands) {
- if (auto *cmd = dyn_cast<SymbolAssignment>(base))
- addSymbol(cmd);
+ for (SectionCommand *cmd : sectionCommands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
+ addSymbol(assign);
else
- for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands)
- if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base))
- addSymbol(cmd);
+ for (SectionCommand *subCmd : cast<OutputSection>(cmd)->commands)
+ if (auto *assign = dyn_cast<SymbolAssignment>(subCmd))
+ addSymbol(assign);
}
ctx = nullptr;
}
-static OutputSection *findByName(ArrayRef<BaseCommand *> vec,
+static OutputSection *findByName(ArrayRef<SectionCommand *> vec,
StringRef name) {
- for (BaseCommand *base : vec)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : vec)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
if (sec->name == name)
return sec;
return nullptr;
@@ -753,8 +806,7 @@ addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map,
// end up being linked to the same output section. The casts are fine
// because everything in the map was created by the orphan placement code.
auto *firstIsec = cast<InputSectionBase>(
- cast<InputSectionDescription>(sec->sectionCommands[0])
- ->sectionBases[0]);
+ cast<InputSectionDescription>(sec->commands[0])->sectionBases[0]);
OutputSection *firstIsecOut =
firstIsec->flags & SHF_LINK_ORDER
? firstIsec->getLinkOrderDep()->getOutputSection()
@@ -848,38 +900,6 @@ void LinkerScript::diagnoseOrphanHandling() const {
}
}
-uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) {
- dot = alignTo(dot, alignment) + size;
- return dot;
-}
-
-void LinkerScript::output(InputSection *s) {
- assert(ctx->outSec == s->getParent());
- uint64_t before = advance(0, 1);
- uint64_t pos = advance(s->getSize(), s->alignment);
- s->outSecOff = pos - s->getSize() - ctx->outSec->addr;
-
- // Update output section size after adding each section. This is so that
- // SIZEOF works correctly in the case below:
- // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
- expandOutputSection(pos - before);
-}
-
-void LinkerScript::switchTo(OutputSection *sec) {
- ctx->outSec = sec;
-
- uint64_t pos = advance(0, 1);
- if (sec->addrExpr && script->hasSectionsCommand) {
- // The alignment is ignored.
- ctx->outSec->addr = pos;
- } else {
- // ctx->outSec->alignment is the max of ALIGN and the maximum of input
- // section alignments.
- ctx->outSec->addr = advance(0, ctx->outSec->alignment);
- expandMemoryRegions(ctx->outSec->addr - pos);
- }
-}
-
// This function searches for a memory region to place the given output
// section in. If found, a pointer to the appropriate memory region is
// returned in the first member of the pair. Otherwise, a nullptr is returned.
@@ -917,7 +937,7 @@ LinkerScript::findMemoryRegion(OutputSection *sec, MemoryRegion *hint) {
// See if a region can be found by matching section flags.
for (auto &pair : memoryRegions) {
MemoryRegion *m = pair.second;
- if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0)
+ if (m->compatibleWith(sec->flags))
return {m, nullptr};
}
@@ -965,10 +985,21 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// between the previous section, if any, and the start of this section.
if (ctx->memRegion && ctx->memRegion->curPos < dot)
expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos,
- ctx->memRegion->name, sec->name);
+ sec->name);
}
- switchTo(sec);
+ ctx->outSec = sec;
+ if (sec->addrExpr && script->hasSectionsCommand) {
+ // The alignment is ignored.
+ sec->addr = dot;
+ } else {
+ // sec->alignment is the max of ALIGN and the maximum of input
+ // section alignments.
+ const uint64_t pos = dot;
+ dot = alignTo(dot, sec->alignment);
+ sec->addr = dot;
+ expandMemoryRegions(dot - pos);
+ }
// ctx->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() or
// AT>, recompute ctx->lmaOffset; otherwise, if both previous/current LMA
@@ -981,14 +1012,14 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
} else if (MemoryRegion *mr = sec->lmaRegion) {
uint64_t lmaStart = alignTo(mr->curPos, sec->alignment);
if (mr->curPos < lmaStart)
- expandMemoryRegion(mr, lmaStart - mr->curPos, mr->name, sec->name);
+ expandMemoryRegion(mr, lmaStart - mr->curPos, sec->name);
ctx->lmaOffset = lmaStart - dot;
} else if (!sameMemRegion || !prevLMARegionIsDefault) {
ctx->lmaOffset = 0;
}
// Propagate ctx->lmaOffset to the first "non-header" section.
- if (PhdrEntry *l = ctx->outSec->ptLoad)
+ if (PhdrEntry *l = sec->ptLoad)
if (sec == findFirstSection(l))
l->lmaOffset = ctx->lmaOffset;
@@ -999,28 +1030,38 @@ void LinkerScript::assignOffsets(OutputSection *sec) {
// We visited SectionsCommands from processSectionCommands to
// layout sections. Now, we visit SectionsCommands again to fix
// section offsets.
- for (BaseCommand *base : sec->sectionCommands) {
+ for (SectionCommand *cmd : sec->commands) {
// This handles the assignments to symbol or to the dot.
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- cmd->addr = dot;
- assignSymbol(cmd, true);
- cmd->size = dot - cmd->addr;
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ assign->addr = dot;
+ assignSymbol(assign, true);
+ assign->size = dot - assign->addr;
continue;
}
// Handle BYTE(), SHORT(), LONG(), or QUAD().
- if (auto *cmd = dyn_cast<ByteCommand>(base)) {
- cmd->offset = dot - ctx->outSec->addr;
- dot += cmd->size;
- expandOutputSection(cmd->size);
+ if (auto *data = dyn_cast<ByteCommand>(cmd)) {
+ data->offset = dot - sec->addr;
+ dot += data->size;
+ expandOutputSection(data->size);
continue;
}
// Handle a single input section description command.
// It calculates and assigns the offsets for each section and also
// updates the output section size.
- for (InputSection *sec : cast<InputSectionDescription>(base)->sections)
- output(sec);
+ for (InputSection *isec : cast<InputSectionDescription>(cmd)->sections) {
+ assert(isec->getParent() == sec);
+ const uint64_t pos = dot;
+ dot = alignTo(dot, isec->alignment);
+ isec->outSecOff = dot - sec->addr;
+ dot += isec->getSize();
+
+ // Update output section size after adding each section. This is so that
+ // SIZEOF works correctly in the case below:
+ // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) }
+ expandOutputSection(dot - pos);
+ }
}
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
@@ -1050,14 +1091,14 @@ static bool isDiscardable(const OutputSection &sec) {
if (sec.usedInExpression)
return false;
- for (BaseCommand *base : sec.sectionCommands) {
- if (auto cmd = dyn_cast<SymbolAssignment>(base))
+ for (SectionCommand *cmd : sec.commands) {
+ if (auto assign = dyn_cast<SymbolAssignment>(cmd))
// Don't create empty output sections just for unreferenced PROVIDE
// symbols.
- if (cmd->name != "." && !cmd->sym)
+ if (assign->name != "." && !assign->sym)
continue;
- if (!isa<InputSectionDescription>(*base))
+ if (!isa<InputSectionDescription>(*cmd))
return false;
}
return true;
@@ -1104,7 +1145,7 @@ void LinkerScript::adjustSectionsBeforeSorting() {
uint64_t flags = SHF_ALLOC;
std::vector<StringRef> defPhdrs;
- for (BaseCommand *&cmd : sectionCommands) {
+ for (SectionCommand *&cmd : sectionCommands) {
auto *sec = dyn_cast<OutputSection>(cmd);
if (!sec)
continue;
@@ -1150,14 +1191,14 @@ void LinkerScript::adjustSectionsBeforeSorting() {
// clutter the output.
// We instead remove trivially empty sections. The bfd linker seems even
// more aggressive at removing them.
- llvm::erase_if(sectionCommands, [&](BaseCommand *base) { return !base; });
+ llvm::erase_if(sectionCommands, [&](SectionCommand *cmd) { return !cmd; });
}
void LinkerScript::adjustSectionsAfterSorting() {
// Try and find an appropriate memory region to assign offsets in.
MemoryRegion *hint = nullptr;
- for (BaseCommand *base : sectionCommands) {
- if (auto *sec = dyn_cast<OutputSection>(base)) {
+ for (SectionCommand *cmd : sectionCommands) {
+ if (auto *sec = dyn_cast<OutputSection>(cmd)) {
if (!sec->lmaRegionName.empty()) {
if (MemoryRegion *m = memoryRegions.lookup(sec->lmaRegionName))
sec->lmaRegion = m;
@@ -1183,8 +1224,8 @@ void LinkerScript::adjustSectionsAfterSorting() {
// Walk the commands and propagate the program headers to commands that don't
// explicitly specify them.
- for (BaseCommand *base : sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
maybePropagatePhdrs(*sec, defPhdrs);
}
@@ -1267,20 +1308,20 @@ const Defined *LinkerScript::assignAddresses() {
dot += getHeaderSize();
}
- auto deleter = std::make_unique<AddressState>();
- ctx = deleter.get();
+ AddressState state;
+ ctx = &state;
errorOnMissingSection = true;
- switchTo(aether);
+ ctx->outSec = aether;
SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands);
- for (BaseCommand *base : sectionCommands) {
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- cmd->addr = dot;
- assignSymbol(cmd, false);
- cmd->size = dot - cmd->addr;
+ for (SectionCommand *cmd : sectionCommands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ assign->addr = dot;
+ assignSymbol(assign, false);
+ assign->size = dot - assign->addr;
continue;
}
- assignOffsets(cast<OutputSection>(base));
+ assignOffsets(cast<OutputSection>(cmd));
}
ctx = nullptr;
diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h
index b366da4f274e..badc4d126be8 100644
--- a/lld/ELF/LinkerScript.h
+++ b/lld/ELF/LinkerScript.h
@@ -41,7 +41,7 @@ class ThunkSection;
struct ExprValue {
ExprValue(SectionBase *sec, bool forceAbsolute, uint64_t val,
const Twine &loc)
- : sec(sec), forceAbsolute(forceAbsolute), val(val), loc(loc.str()) {}
+ : sec(sec), val(val), forceAbsolute(forceAbsolute), loc(loc.str()) {}
ExprValue(uint64_t val) : ExprValue(nullptr, false, val, "") {}
@@ -53,10 +53,6 @@ struct ExprValue {
// If a value is relative to a section, it has a non-null Sec.
SectionBase *sec;
- // True if this expression is enclosed in ABSOLUTE().
- // This flag affects the return value of getValue().
- bool forceAbsolute;
-
uint64_t val;
uint64_t alignment = 1;
@@ -64,6 +60,10 @@ struct ExprValue {
// resets type to STT_NOTYPE.
uint8_t type = llvm::ELF::STT_NOTYPE;
+ // True if this expression is enclosed in ABSOLUTE().
+ // This flag affects the return value of getValue().
+ bool forceAbsolute;
+
// Original source location. Used for error messages.
std::string loc;
};
@@ -82,17 +82,18 @@ enum SectionsCommandKind {
ByteKind // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
};
-struct BaseCommand {
- BaseCommand(int k) : kind(k) {}
+struct SectionCommand {
+ SectionCommand(int k) : kind(k) {}
int kind;
};
// This represents ". = <expr>" or "<symbol> = <expr>".
-struct SymbolAssignment : BaseCommand {
+struct SymbolAssignment : SectionCommand {
SymbolAssignment(StringRef name, Expr e, std::string loc)
- : BaseCommand(AssignmentKind), name(name), expression(e), location(loc) {}
+ : SectionCommand(AssignmentKind), name(name), expression(e),
+ location(loc) {}
- static bool classof(const BaseCommand *c) {
+ static bool classof(const SectionCommand *c) {
return c->kind == AssignmentKind;
}
@@ -132,16 +133,32 @@ enum class ConstraintKind { NoConstraint, ReadOnly, ReadWrite };
// MEMORY command.
struct MemoryRegion {
MemoryRegion(StringRef name, Expr origin, Expr length, uint32_t flags,
- uint32_t negFlags)
+ uint32_t invFlags, uint32_t negFlags, uint32_t negInvFlags)
: name(std::string(name)), origin(origin), length(length), flags(flags),
- negFlags(negFlags) {}
+ invFlags(invFlags), negFlags(negFlags), negInvFlags(negInvFlags) {}
std::string name;
Expr origin;
Expr length;
+ // A section can be assigned to the region if any of these ELF section flags
+ // are set...
uint32_t flags;
+ // ... or any of these flags are not set.
+ // For example, the memory region attribute "r" maps to SHF_WRITE.
+ uint32_t invFlags;
+ // A section cannot be assigned to the region if any of these ELF section
+ // flags are set...
uint32_t negFlags;
+ // ... or any of these flags are not set.
+ // For example, the memory region attribute "!r" maps to SHF_WRITE.
+ uint32_t negInvFlags;
uint64_t curPos = 0;
+
+ bool compatibleWith(uint32_t secFlags) const {
+ if ((secFlags & negFlags) || (~secFlags & negInvFlags))
+ return false;
+ return (secFlags & flags) || (~secFlags & invFlags);
+ }
};
// This struct represents one section match pattern in SECTIONS() command.
@@ -166,7 +183,7 @@ public:
SortSectionPolicy sortInner;
};
-class InputSectionDescription : public BaseCommand {
+class InputSectionDescription : public SectionCommand {
SingleStringMatcher filePat;
// Cache of the most recent input argument and result of matchesFile().
@@ -175,10 +192,10 @@ class InputSectionDescription : public BaseCommand {
public:
InputSectionDescription(StringRef filePattern, uint64_t withFlags = 0,
uint64_t withoutFlags = 0)
- : BaseCommand(InputSectionKind), filePat(filePattern),
+ : SectionCommand(InputSectionKind), filePat(filePattern),
withFlags(withFlags), withoutFlags(withoutFlags) {}
- static bool classof(const BaseCommand *c) {
+ static bool classof(const SectionCommand *c) {
return c->kind == InputSectionKind;
}
@@ -207,12 +224,12 @@ public:
};
// Represents BYTE(), SHORT(), LONG(), or QUAD().
-struct ByteCommand : BaseCommand {
+struct ByteCommand : SectionCommand {
ByteCommand(Expr e, unsigned size, std::string commandString)
- : BaseCommand(ByteKind), commandString(commandString), expression(e),
+ : SectionCommand(ByteKind), commandString(commandString), expression(e),
size(size) {}
- static bool classof(const BaseCommand *c) { return c->kind == ByteKind; }
+ static bool classof(const SectionCommand *c) { return c->kind == ByteKind; }
// Keeps string representing the command. Used for -Map" is perhaps better.
std::string commandString;
@@ -275,10 +292,6 @@ class LinkerScript final {
std::pair<MemoryRegion *, MemoryRegion *>
findMemoryRegion(OutputSection *sec, MemoryRegion *hint);
- void switchTo(OutputSection *sec);
- uint64_t advance(uint64_t size, unsigned align);
- void output(InputSection *sec);
-
void assignOffsets(OutputSection *sec);
// Ctx captures the local AddressState and makes it accessible
@@ -324,7 +337,7 @@ public:
void processInsertCommands();
// SECTIONS command list.
- std::vector<BaseCommand *> sectionCommands;
+ std::vector<SectionCommand *> sectionCommands;
// PHDRS command list.
std::vector<PhdrsCommand> phdrsCommands;
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index c4690ae5aefd..06735802f7f1 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -139,20 +139,7 @@ static void printEhFrame(raw_ostream &os, const EhFrameSection *sec) {
}
}
-void elf::writeMapFile() {
- if (config->mapFile.empty())
- return;
-
- llvm::TimeTraceScope timeScope("Write map file");
-
- // Open a map file for writing.
- std::error_code ec;
- raw_fd_ostream os(config->mapFile, ec, sys::fs::OF_None);
- if (ec) {
- error("cannot open " + config->mapFile + ": " + ec.message());
- return;
- }
-
+static void writeMapFile(raw_fd_ostream &os) {
// Collect symbol info that we want to print out.
std::vector<Defined *> syms = getSymbols();
SymbolMapTy sectionSyms = getSectionSyms(syms);
@@ -164,30 +151,30 @@ void elf::writeMapFile() {
<< " Size Align Out In Symbol\n";
OutputSection* osec = nullptr;
- for (BaseCommand *base : script->sectionCommands) {
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- if (cmd->provide && !cmd->sym)
+ for (SectionCommand *cmd : script->sectionCommands) {
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd)) {
+ if (assign->provide && !assign->sym)
continue;
- uint64_t lma = osec ? osec->getLMA() + cmd->addr - osec->getVA(0) : 0;
- writeHeader(os, cmd->addr, lma, cmd->size, 1);
- os << cmd->commandString << '\n';
+ uint64_t lma = osec ? osec->getLMA() + assign->addr - osec->getVA(0) : 0;
+ writeHeader(os, assign->addr, lma, assign->size, 1);
+ os << assign->commandString << '\n';
continue;
}
- osec = cast<OutputSection>(base);
+ osec = cast<OutputSection>(cmd);
writeHeader(os, osec->addr, osec->getLMA(), osec->size, osec->alignment);
os << osec->name << '\n';
// Dump symbols for each input section.
- for (BaseCommand *base : osec->sectionCommands) {
- if (auto *isd = dyn_cast<InputSectionDescription>(base)) {
+ for (SectionCommand *subCmd : osec->commands) {
+ if (auto *isd = dyn_cast<InputSectionDescription>(subCmd)) {
for (InputSection *isec : isd->sections) {
if (auto *ehSec = dyn_cast<EhFrameSection>(isec)) {
printEhFrame(os, ehSec);
continue;
}
- writeHeader(os, isec->getVA(0), osec->getLMA() + isec->getOffset(0),
+ writeHeader(os, isec->getVA(), osec->getLMA() + isec->outSecOff,
isec->getSize(), isec->alignment);
os << indent8 << toString(isec) << '\n';
for (Symbol *sym : sectionSyms[isec])
@@ -196,19 +183,20 @@ void elf::writeMapFile() {
continue;
}
- if (auto *cmd = dyn_cast<ByteCommand>(base)) {
- writeHeader(os, osec->addr + cmd->offset, osec->getLMA() + cmd->offset,
- cmd->size, 1);
- os << indent8 << cmd->commandString << '\n';
+ if (auto *data = dyn_cast<ByteCommand>(subCmd)) {
+ writeHeader(os, osec->addr + data->offset,
+ osec->getLMA() + data->offset, data->size, 1);
+ os << indent8 << data->commandString << '\n';
continue;
}
- if (auto *cmd = dyn_cast<SymbolAssignment>(base)) {
- if (cmd->provide && !cmd->sym)
+ if (auto *assign = dyn_cast<SymbolAssignment>(subCmd)) {
+ if (assign->provide && !assign->sym)
continue;
- writeHeader(os, cmd->addr, osec->getLMA() + cmd->addr - osec->getVA(0),
- cmd->size, 1);
- os << indent8 << cmd->commandString << '\n';
+ writeHeader(os, assign->addr,
+ osec->getLMA() + assign->addr - osec->getVA(0),
+ assign->size, 1);
+ os << indent8 << assign->commandString << '\n';
continue;
}
}
@@ -234,10 +222,6 @@ void elf::writeWhyExtract() {
}
}
-static void print(StringRef a, StringRef b) {
- lld::outs() << left_justify(a, 49) << " " << b << "\n";
-}
-
// Output a cross reference table to stdout. This is for --cref.
//
// For each global symbol, we print out a file that defines the symbol
@@ -249,10 +233,7 @@ static void print(StringRef a, StringRef b) {
//
// In this case, strlen is defined by libc.so.6 and used by other two
// files.
-void elf::writeCrossReferenceTable() {
- if (!config->cref)
- return;
-
+static void writeCref(raw_fd_ostream &os) {
// Collect symbols and files.
MapVector<Symbol *, SetVector<InputFile *>> map;
for (InputFile *file : objectFiles) {
@@ -265,8 +246,12 @@ void elf::writeCrossReferenceTable() {
}
}
- // Print out a header.
- lld::outs() << "Cross Reference Table\n\n";
+ auto print = [&](StringRef a, StringRef b) {
+ os << left_justify(a, 49) << ' ' << b << '\n';
+ };
+
+ // Print a blank line and a header. The format matches GNU ld.
+ os << "\nCross Reference Table\n\n";
print("Symbol", "File");
// Print out a table.
@@ -281,6 +266,27 @@ void elf::writeCrossReferenceTable() {
}
}
+void elf::writeMapAndCref() {
+ if (config->mapFile.empty() && !config->cref)
+ return;
+
+ llvm::TimeTraceScope timeScope("Write map file");
+
+ // Open a map file for writing.
+ std::error_code ec;
+ StringRef mapFile = config->mapFile.empty() ? "-" : config->mapFile;
+ raw_fd_ostream os(mapFile, ec, sys::fs::OF_None);
+ if (ec) {
+ error("cannot open " + mapFile + ": " + ec.message());
+ return;
+ }
+
+ if (!config->mapFile.empty())
+ writeMapFile(os);
+ if (config->cref)
+ writeCref(os);
+}
+
void elf::writeArchiveStats() {
if (config->printArchiveStats.empty())
return;
@@ -293,8 +299,8 @@ void elf::writeArchiveStats() {
return;
}
- os << "members\tfetched\tarchive\n";
+ os << "members\textracted\tarchive\n";
for (const ArchiveFile *f : archiveFiles)
- os << f->getMemberCount() << '\t' << f->getFetchedMemberCount() << '\t'
+ os << f->getMemberCount() << '\t' << f->getExtractedMemberCount() << '\t'
<< f->getName() << '\n';
}
diff --git a/lld/ELF/MapFile.h b/lld/ELF/MapFile.h
index 1b8c0168c0de..df548988c03b 100644
--- a/lld/ELF/MapFile.h
+++ b/lld/ELF/MapFile.h
@@ -11,9 +11,8 @@
namespace lld {
namespace elf {
-void writeMapFile();
+void writeMapAndCref();
void writeWhyExtract();
-void writeCrossReferenceTable();
void writeArchiveStats();
} // namespace elf
} // namespace lld
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index ce82eb8d2754..f9f9f54a80d8 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -129,7 +129,8 @@ def color_diagnostics_eq: J<"color-diagnostics=">,
HelpText<"Use colors in diagnostics (default: auto)">,
MetaVarName<"[auto,always,never]">;
-def cref: FF<"cref">, HelpText<"Output cross reference table">;
+def cref: FF<"cref">,
+ HelpText<"Output cross reference table. If -Map is specified, print to the map file">;
defm define_common: B<"define-common",
"Assign space to common symbols",
@@ -304,8 +305,8 @@ def no_undefined: F<"no-undefined">,
def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
HelpText<"Path to file to write output">;
-def oformat: Separate<["--"], "oformat">, MetaVarName<"<format>">,
- HelpText<"Specify the binary format for the output object file">;
+defm oformat: EEq<"oformat", "Specify the binary format for the output object file">,
+ MetaVarName<"[elf,binary]">;
def omagic: FF<"omagic">, MetaVarName<"<magic>">,
HelpText<"Set the text and data sections to be readable and writable, do not page align sections, link against static libraries">;
@@ -338,7 +339,7 @@ defm print_icf_sections: B<"print-icf-sections",
def print_archive_stats: J<"print-archive-stats=">,
HelpText<"Write archive usage statistics to the specified file. "
- "Print the numbers of members and fetched members for each archive">;
+ "Print the numbers of members and extracted members for each archive">;
defm print_symbol_order: Eq<"print-symbol-order",
"Print a symbol order specified by --call-graph-ordering-file into the specified file">;
@@ -454,22 +455,19 @@ def verbose: F<"verbose">, HelpText<"Verbose mode">;
def version: F<"version">, HelpText<"Display the version number and exit">;
-def power10_stubs: F<"power10-stubs">, HelpText<"Alias for --power10-stubs=auto">;
-
-def no_power10_stubs: F<"no-power10-stubs">, HelpText<"Alias for --power10-stubs=no">;
-
-def power10_stubs_eq:
- J<"power10-stubs=">, HelpText<
- "Enables Power10 instructions in all stubs without options, "
- "options override previous flags."
- "auto: Allow Power10 instructions in stubs if applicable."
- "no: No Power10 instructions in stubs.">;
+def power10_stubs_eq: JJ<"power10-stubs=">, MetaVarName<"<mode>">,
+ HelpText<"Whether to use Power10 instructions in call stubs for R_PPC64_REL24_NOTOC and TOC/NOTOC "
+ "interworking (yes (default): use; no: don't use). \"auto\" is currently the same as \"yes\"">;
+def power10_stubs: FF<"power10-stubs">, Alias<power10_stubs_eq>, AliasArgs<["yes"]>,
+ HelpText<"Alias for --power10-stubs=auto">;
+def no_power10_stubs: FF<"no-power10-stubs">, Alias<power10_stubs_eq>, AliasArgs<["no"]>,
+ HelpText<"Alias for --power10-stubs=no">;
defm version_script: Eq<"version-script", "Read a version script">;
defm warn_backrefs: BB<"warn-backrefs",
- "Warn about backward symbol references to fetch archive members",
- "Do not warn about backward symbol references to fetch archive members (default)">;
+ "Warn about backward symbol references to extract archive members",
+ "Do not warn about backward symbol references to extract archive members (default)">;
defm warn_backrefs_exclude
: EEq<"warn-backrefs-exclude",
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index cc4f0688701a..a17f713b742a 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -33,7 +33,6 @@ using namespace lld;
using namespace lld::elf;
uint8_t *Out::bufferStart;
-uint8_t Out::first;
PhdrEntry *Out::tlsPhdr;
OutputSection *Out::elfHeader;
OutputSection *Out::programHeaders;
@@ -69,7 +68,7 @@ void OutputSection::writeHeaderTo(typename ELFT::Shdr *shdr) {
}
OutputSection::OutputSection(StringRef name, uint32_t type, uint64_t flags)
- : BaseCommand(OutputSectionKind),
+ : SectionCommand(OutputSectionKind),
SectionBase(Output, name, flags, /*Entsize*/ 0, /*Alignment*/ 1, type,
/*Info*/ 0, /*Link*/ 0) {}
@@ -100,10 +99,9 @@ static bool canMergeToProgbits(unsigned type) {
void OutputSection::recordSection(InputSectionBase *isec) {
partition = isec->partition;
isec->parent = this;
- if (sectionCommands.empty() ||
- !isa<InputSectionDescription>(sectionCommands.back()))
- sectionCommands.push_back(make<InputSectionDescription>(""));
- auto *isd = cast<InputSectionDescription>(sectionCommands.back());
+ if (commands.empty() || !isa<InputSectionDescription>(commands.back()))
+ commands.push_back(make<InputSectionDescription>(""));
+ auto *isd = cast<InputSectionDescription>(commands.back());
isd->sectionBases.push_back(isec);
}
@@ -166,15 +164,15 @@ void OutputSection::commitSection(InputSection *isec) {
// to compute an output offset for each piece of each input section.
void OutputSection::finalizeInputSections() {
std::vector<MergeSyntheticSection *> mergeSections;
- for (BaseCommand *base : sectionCommands) {
- auto *cmd = dyn_cast<InputSectionDescription>(base);
- if (!cmd)
+ for (SectionCommand *cmd : commands) {
+ auto *isd = dyn_cast<InputSectionDescription>(cmd);
+ if (!isd)
continue;
- cmd->sections.reserve(cmd->sectionBases.size());
- for (InputSectionBase *s : cmd->sectionBases) {
+ isd->sections.reserve(isd->sectionBases.size());
+ for (InputSectionBase *s : isd->sectionBases) {
MergeInputSection *ms = dyn_cast<MergeInputSection>(s);
if (!ms) {
- cmd->sections.push_back(cast<InputSection>(s));
+ isd->sections.push_back(cast<InputSection>(s));
continue;
}
@@ -203,17 +201,17 @@ void OutputSection::finalizeInputSections() {
mergeSections.push_back(syn);
i = std::prev(mergeSections.end());
syn->entsize = ms->entsize;
- cmd->sections.push_back(syn);
+ isd->sections.push_back(syn);
}
(*i)->addSection(ms);
}
// sectionBases should not be used from this point onwards. Clear it to
// catch misuses.
- cmd->sectionBases.clear();
+ isd->sectionBases.clear();
// Some input sections may be removed from the list after ICF.
- for (InputSection *s : cmd->sections)
+ for (InputSection *s : isd->sections)
commitSection(s);
}
for (auto *ms : mergeSections)
@@ -237,13 +235,13 @@ uint64_t elf::getHeaderSize() {
return Out::elfHeader->size + Out::programHeaders->size;
}
-bool OutputSection::classof(const BaseCommand *c) {
+bool OutputSection::classof(const SectionCommand *c) {
return c->kind == OutputSectionKind;
}
void OutputSection::sort(llvm::function_ref<int(InputSectionBase *s)> order) {
assert(isLive());
- for (BaseCommand *b : sectionCommands)
+ for (SectionCommand *b : commands)
if (auto *isd = dyn_cast<InputSectionDescription>(b))
sortByOrder(isd->sections, order);
}
@@ -367,8 +365,8 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
// Linker scripts may have BYTE()-family commands with which you
// can write arbitrary bytes to the output. Process them if any.
- for (BaseCommand *base : sectionCommands)
- if (auto *data = dyn_cast<ByteCommand>(base))
+ for (SectionCommand *cmd : commands)
+ if (auto *data = dyn_cast<ByteCommand>(cmd))
writeInt(buf + data->offset, data->expression().getValue(), data->size);
}
@@ -485,8 +483,8 @@ static bool compCtors(const InputSection *a, const InputSection *b) {
// Unfortunately, the rules are different from the one for .{init,fini}_array.
// Read the comment above.
void OutputSection::sortCtorsDtors() {
- assert(sectionCommands.size() == 1);
- auto *isd = cast<InputSectionDescription>(sectionCommands[0]);
+ assert(commands.size() == 1);
+ auto *isd = cast<InputSectionDescription>(commands[0]);
llvm::stable_sort(isd->sections, compCtors);
}
@@ -505,8 +503,8 @@ int elf::getPriority(StringRef s) {
}
InputSection *elf::getFirstInputSection(const OutputSection *os) {
- for (BaseCommand *base : os->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(base))
+ for (SectionCommand *cmd : os->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
if (!isd->sections.empty())
return isd->sections[0];
return nullptr;
@@ -514,8 +512,8 @@ InputSection *elf::getFirstInputSection(const OutputSection *os) {
std::vector<InputSection *> elf::getInputSections(const OutputSection *os) {
std::vector<InputSection *> ret;
- for (BaseCommand *base : os->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(base))
+ for (SectionCommand *cmd : os->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
ret.insert(ret.end(), isd->sections.begin(), isd->sections.end());
return ret;
}
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index a0f806614387..a5b05cf28aa8 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -29,7 +29,7 @@ class InputSectionBase;
// It is composed of multiple InputSections.
// The writer creates multiple OutputSections and assign them unique,
// non-overlapping file offsets and VAs.
-class OutputSection final : public BaseCommand, public SectionBase {
+class OutputSection final : public SectionCommand, public SectionBase {
public:
OutputSection(StringRef name, uint32_t type, uint64_t flags);
@@ -37,7 +37,7 @@ public:
return s->kind() == SectionBase::Output;
}
- static bool classof(const BaseCommand *c);
+ static bool classof(const SectionCommand *c);
uint64_t getLMA() const { return ptLoad ? addr + ptLoad->lmaOffset : addr; }
template <typename ELFT> void writeHeaderTo(typename ELFT::Shdr *sHdr);
@@ -82,7 +82,7 @@ public:
Expr alignExpr;
Expr lmaExpr;
Expr subalignExpr;
- std::vector<BaseCommand *> sectionCommands;
+ std::vector<SectionCommand *> commands;
std::vector<StringRef> phdrs;
llvm::Optional<std::array<uint8_t, 4>> filler;
ConstraintKind constraint = ConstraintKind::NoConstraint;
@@ -128,7 +128,6 @@ std::vector<InputSection *> getInputSections(const OutputSection *os);
// until Writer is initialized.
struct Out {
static uint8_t *bufferStart;
- static uint8_t first;
static PhdrEntry *tlsPhdr;
static OutputSection *elfHeader;
static OutputSection *programHeaders;
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 9c22ce7d6013..5136ba2151a3 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -66,10 +66,10 @@ using namespace lld;
using namespace lld::elf;
static Optional<std::string> getLinkerScriptLocation(const Symbol &sym) {
- for (BaseCommand *base : script->sectionCommands)
- if (auto *cmd = dyn_cast<SymbolAssignment>(base))
- if (cmd->sym == &sym)
- return cmd->location;
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
+ if (assign->sym == &sym)
+ return assign->location;
return None;
}
@@ -366,10 +366,10 @@ template <class ELFT> static void addCopyRelSymbol(SharedSymbol &ss) {
// At this point, sectionBases has been migrated to sections. Append sec to
// sections.
- if (osec->sectionCommands.empty() ||
- !isa<InputSectionDescription>(osec->sectionCommands.back()))
- osec->sectionCommands.push_back(make<InputSectionDescription>(""));
- auto *isd = cast<InputSectionDescription>(osec->sectionCommands.back());
+ if (osec->commands.empty() ||
+ !isa<InputSectionDescription>(osec->commands.back()))
+ osec->commands.push_back(make<InputSectionDescription>(""));
+ auto *isd = cast<InputSectionDescription>(osec->commands.back());
isd->sections.push_back(sec);
osec->commitSection(sec);
@@ -1358,32 +1358,6 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
}
}
- // Relax relocations.
- //
- // If we know that a PLT entry will be resolved within the same ELF module, we
- // can skip PLT access and directly jump to the destination function. For
- // example, if we are linking a main executable, all dynamic symbols that can
- // be resolved within the executable will actually be resolved that way at
- // runtime, because the main executable is always at the beginning of a search
- // list. We can leverage that fact.
- if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) {
- if (expr != R_GOT_PC) {
- // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call
- // stub type. It should be ignored if optimized to R_PC.
- if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL)
- addend &= ~0x8000;
- // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into
- // call __tls_get_addr even if the symbol is non-preemptible.
- if (!(config->emachine == EM_HEXAGON &&
- (type == R_HEX_GD_PLT_B22_PCREL ||
- type == R_HEX_GD_PLT_B22_PCREL_X ||
- type == R_HEX_GD_PLT_B32_PCREL_X)))
- expr = fromPlt(expr);
- } else if (!isAbsoluteValue(sym)) {
- expr = target->adjustGotPcExpr(type, addend, relocatedAddr);
- }
- }
-
// If the relocation does not emit a GOT or GOTPLT entry but its computation
// uses their addresses, we need GOT or GOTPLT to be created.
//
@@ -1411,6 +1385,32 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i,
return;
}
+ // Relax relocations.
+ //
+ // If we know that a PLT entry will be resolved within the same ELF module, we
+ // can skip PLT access and directly jump to the destination function. For
+ // example, if we are linking a main executable, all dynamic symbols that can
+ // be resolved within the executable will actually be resolved that way at
+ // runtime, because the main executable is always at the beginning of a search
+ // list. We can leverage that fact.
+ if (!sym.isPreemptible && (!sym.isGnuIFunc() || config->zIfuncNoplt)) {
+ if (expr != R_GOT_PC) {
+ // The 0x8000 bit of r_addend of R_PPC_PLTREL24 is used to choose call
+ // stub type. It should be ignored if optimized to R_PC.
+ if (config->emachine == EM_PPC && expr == R_PPC32_PLTREL)
+ addend &= ~0x8000;
+ // R_HEX_GD_PLT_B22_PCREL (call a@GDPLT) is transformed into
+ // call __tls_get_addr even if the symbol is non-preemptible.
+ if (!(config->emachine == EM_HEXAGON &&
+ (type == R_HEX_GD_PLT_B22_PCREL ||
+ type == R_HEX_GD_PLT_B22_PCREL_X ||
+ type == R_HEX_GD_PLT_B32_PCREL_X)))
+ expr = fromPlt(expr);
+ } else if (!isAbsoluteValue(sym)) {
+ expr = target->adjustGotPcExpr(type, addend, relocatedAddr);
+ }
+ }
+
// We were asked not to generate PLT entries for ifuncs. Instead, pass the
// direct relocation on through.
if (sym.isGnuIFunc() && config->zIfuncNoplt) {
@@ -1640,7 +1640,7 @@ static void forEachInputSectionDescription(
for (OutputSection *os : outputSections) {
if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
continue;
- for (BaseCommand *bc : os->sectionCommands)
+ for (SectionCommand *bc : os->commands)
if (auto *isd = dyn_cast<InputSectionDescription>(bc))
fn(os, isd);
}
@@ -1817,7 +1817,7 @@ ThunkSection *ThunkCreator::getISThunkSec(InputSection *isec) {
// Find InputSectionRange within Target Output Section (TOS) that the
// InputSection (IS) that we need to precede is in.
OutputSection *tos = isec->getParent();
- for (BaseCommand *bc : tos->sectionCommands) {
+ for (SectionCommand *bc : tos->commands) {
auto *isd = dyn_cast<InputSectionDescription>(bc);
if (!isd || isd->sections.empty())
continue;
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index ad3b3e61ad59..d3b0296acab0 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -93,7 +93,7 @@ private:
void readSectionAddressType(OutputSection *cmd);
OutputSection *readOverlaySectionDescription();
OutputSection *readOutputSectionDescription(StringRef outSec);
- std::vector<BaseCommand *> readOverlay();
+ std::vector<SectionCommand *> readOverlay();
std::vector<StringRef> readOutputSectionPhdrs();
std::pair<uint64_t, uint64_t> readInputSectionFlags();
InputSectionDescription *readInputSectionDescription(StringRef tok);
@@ -113,7 +113,8 @@ private:
Expr getPageSize();
Expr readMemoryAssignment(StringRef, StringRef, StringRef);
- std::pair<uint32_t, uint32_t> readMemoryAttributes();
+ void readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
+ uint32_t &negFlags, uint32_t &negInvFlags);
Expr combine(StringRef op, Expr l, Expr r);
Expr readExpr();
@@ -518,7 +519,7 @@ void ScriptParser::readSearchDir() {
// sections that use the same virtual memory range and normally would trigger
// linker's sections sanity check failures.
// https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description
-std::vector<BaseCommand *> ScriptParser::readOverlay() {
+std::vector<SectionCommand *> ScriptParser::readOverlay() {
// VA and LMA expressions are optional, though for simplicity of
// implementation we assume they are not. That is what OVERLAY was designed
// for first of all: to allow sections with overlapping VAs at different LMAs.
@@ -528,7 +529,7 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() {
Expr lmaExpr = readParenExpr();
expect("{");
- std::vector<BaseCommand *> v;
+ std::vector<SectionCommand *> v;
OutputSection *prev = nullptr;
while (!errorCount() && !consume("}")) {
// VA is the same for all sections. The LMAs are consecutive in memory
@@ -549,7 +550,7 @@ std::vector<BaseCommand *> ScriptParser::readOverlay() {
// Here we want to create the Dot assignment command to achieve that.
Expr moveDot = [=] {
uint64_t max = 0;
- for (BaseCommand *cmd : v)
+ for (SectionCommand *cmd : v)
max = std::max(max, cast<OutputSection>(cmd)->size);
return addrExpr().getValue() + max;
};
@@ -565,11 +566,11 @@ void ScriptParser::readOverwriteSections() {
void ScriptParser::readSections() {
expect("{");
- std::vector<BaseCommand *> v;
+ std::vector<SectionCommand *> v;
while (!errorCount() && !consume("}")) {
StringRef tok = next();
if (tok == "OVERLAY") {
- for (BaseCommand *cmd : readOverlay())
+ for (SectionCommand *cmd : readOverlay())
v.push_back(cmd);
continue;
} else if (tok == "INCLUDE") {
@@ -577,7 +578,7 @@ void ScriptParser::readSections() {
continue;
}
- if (BaseCommand *cmd = readAssignment(tok))
+ if (SectionCommand *cmd = readAssignment(tok))
v.push_back(cmd);
else
v.push_back(readOutputSectionDescription(tok));
@@ -597,7 +598,7 @@ void ScriptParser::readSections() {
setError("expected AFTER/BEFORE, but got '" + next() + "'");
StringRef where = next();
std::vector<StringRef> names;
- for (BaseCommand *cmd : v)
+ for (SectionCommand *cmd : v)
if (auto *os = dyn_cast<OutputSection>(cmd))
names.push_back(os->name);
if (!names.empty())
@@ -848,7 +849,7 @@ OutputSection *ScriptParser::readOverlaySectionDescription() {
uint64_t withoutFlags = 0;
if (consume("INPUT_SECTION_FLAGS"))
std::tie(withFlags, withoutFlags) = readInputSectionFlags();
- cmd->sectionCommands.push_back(
+ cmd->commands.push_back(
readInputSectionRules(next(), withFlags, withoutFlags));
}
return cmd;
@@ -884,9 +885,9 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
if (tok == ";") {
// Empty commands are allowed. Do nothing here.
} else if (SymbolAssignment *assign = readAssignment(tok)) {
- cmd->sectionCommands.push_back(assign);
+ cmd->commands.push_back(assign);
} else if (ByteCommand *data = readByteCommand(tok)) {
- cmd->sectionCommands.push_back(data);
+ cmd->commands.push_back(data);
} else if (tok == "CONSTRUCTORS") {
// CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors
// by name. This is for very old file formats such as ECOFF/XCOFF.
@@ -903,7 +904,7 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
} else if (tok == "INCLUDE") {
readInclude();
} else if (peek() == "(") {
- cmd->sectionCommands.push_back(readInputSectionDescription(tok));
+ cmd->commands.push_back(readInputSectionDescription(tok));
} else {
// We have a file name and no input sections description. It is not a
// commonly used syntax, but still acceptable. In that case, all sections
@@ -913,7 +914,7 @@ OutputSection *ScriptParser::readOutputSectionDescription(StringRef outSec) {
// case above.
auto *isd = make<InputSectionDescription>(tok);
isd->sectionPatterns.push_back({{}, StringMatcher("*")});
- cmd->sectionCommands.push_back(isd);
+ cmd->commands.push_back(isd);
}
}
@@ -1614,9 +1615,11 @@ void ScriptParser::readMemory() {
}
uint32_t flags = 0;
+ uint32_t invFlags = 0;
uint32_t negFlags = 0;
+ uint32_t negInvFlags = 0;
if (consume("(")) {
- std::tie(flags, negFlags) = readMemoryAttributes();
+ readMemoryAttributes(flags, invFlags, negFlags, negInvFlags);
expect(")");
}
expect(":");
@@ -1626,7 +1629,8 @@ void ScriptParser::readMemory() {
Expr length = readMemoryAssignment("LENGTH", "len", "l");
// Add the memory region to the region map.
- MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, negFlags);
+ MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, invFlags,
+ negFlags, negInvFlags);
if (!script->memoryRegions.insert({tok, mr}).second)
setError("region '" + tok + "' already defined");
}
@@ -1635,30 +1639,34 @@ void ScriptParser::readMemory() {
// This function parses the attributes used to match against section
// flags when placing output sections in a memory region. These flags
// are only used when an explicit memory region name is not used.
-std::pair<uint32_t, uint32_t> ScriptParser::readMemoryAttributes() {
- uint32_t flags = 0;
- uint32_t negFlags = 0;
+void ScriptParser::readMemoryAttributes(uint32_t &flags, uint32_t &invFlags,
+ uint32_t &negFlags,
+ uint32_t &negInvFlags) {
bool invert = false;
for (char c : next().lower()) {
- uint32_t flag = 0;
- if (c == '!')
+ if (c == '!') {
invert = !invert;
- else if (c == 'w')
- flag = SHF_WRITE;
+ std::swap(flags, negFlags);
+ std::swap(invFlags, negInvFlags);
+ continue;
+ }
+ if (c == 'w')
+ flags |= SHF_WRITE;
else if (c == 'x')
- flag = SHF_EXECINSTR;
+ flags |= SHF_EXECINSTR;
else if (c == 'a')
- flag = SHF_ALLOC;
- else if (c != 'r')
+ flags |= SHF_ALLOC;
+ else if (c == 'r')
+ invFlags |= SHF_WRITE;
+ else
setError("invalid memory region attribute");
+ }
- if (invert)
- negFlags |= flag;
- else
- flags |= flag;
+ if (invert) {
+ std::swap(flags, negFlags);
+ std::swap(invFlags, negInvFlags);
}
- return {flags, negFlags};
}
void elf::readLinkerScript(MemoryBufferRef mb) {
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index c309957ee5ba..e615fb70a40f 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -113,7 +113,7 @@ Symbol *SymbolTable::find(StringRef name) {
// A version script/dynamic list is only meaningful for a Defined symbol.
// A CommonSymbol will be converted to a Defined in replaceCommonSymbols().
-// A lazy symbol may be made Defined if an LTO libcall fetches it.
+// A lazy symbol may be made Defined if an LTO libcall extracts it.
static bool canBeVersioned(const Symbol &sym) {
return sym.isDefined() || sym.isCommon() || sym.isLazy();
}
diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp
index 5f95a1b3c7ac..8c410b4d5bfb 100644
--- a/lld/ELF/Symbols.cpp
+++ b/lld/ELF/Symbols.cpp
@@ -256,18 +256,11 @@ void Symbol::parseSymbolVersion() {
verstr);
}
-void Symbol::fetch() const {
- if (auto *sym = dyn_cast<LazyArchive>(this)) {
- cast<ArchiveFile>(sym->file)->fetch(sym->sym);
- return;
- }
-
- if (auto *sym = dyn_cast<LazyObject>(this)) {
- dyn_cast<LazyObjFile>(sym->file)->fetch();
- return;
- }
-
- llvm_unreachable("Symbol::fetch() is called on a non-lazy symbol");
+void Symbol::extract() const {
+ if (auto *sym = dyn_cast<LazyArchive>(this))
+ cast<ArchiveFile>(sym->file)->extract(sym->sym);
+ else
+ cast<LazyObjFile>(this->file)->extract();
}
MemoryBufferRef LazyArchive::getMemberBuffer() {
@@ -478,8 +471,8 @@ void Symbol::resolveUndefined(const Undefined &other) {
printTraceSymbol(&other);
if (isLazy()) {
- // An undefined weak will not fetch archive members. See comment on Lazy in
- // Symbols.h for the details.
+ // An undefined weak will not extract archive members. See comment on Lazy
+ // in Symbols.h for the details.
if (other.binding == STB_WEAK) {
binding = STB_WEAK;
type = other.type;
@@ -489,9 +482,9 @@ void Symbol::resolveUndefined(const Undefined &other) {
// Do extra check for --warn-backrefs.
//
// --warn-backrefs is an option to prevent an undefined reference from
- // fetching an archive member written earlier in the command line. It can be
- // used to keep compatibility with GNU linkers to some degree.
- // I'll explain the feature and why you may find it useful in this comment.
+ // extracting an archive member written earlier in the command line. It can
+ // be used to keep compatibility with GNU linkers to some degree. I'll
+ // explain the feature and why you may find it useful in this comment.
//
// lld's symbol resolution semantics is more relaxed than traditional Unix
// linkers. For example,
@@ -538,7 +531,7 @@ void Symbol::resolveUndefined(const Undefined &other) {
// group assignment rule simulates the traditional linker's semantics.
bool backref = config->warnBackrefs && other.file &&
file->groupId < other.file->groupId;
- fetch();
+ extract();
if (!config->whyExtract.empty())
recordWhyExtract(other.file, *file, *this);
@@ -712,23 +705,23 @@ template <class LazyT>
static void replaceCommon(Symbol &oldSym, const LazyT &newSym) {
backwardReferences.erase(&oldSym);
oldSym.replace(newSym);
- newSym.fetch();
+ newSym.extract();
}
template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
// For common objects, we want to look for global or weak definitions that
- // should be fetched as the canonical definition instead.
+ // should be extracted as the canonical definition instead.
if (isCommon() && elf::config->fortranCommon) {
if (auto *laSym = dyn_cast<LazyArchive>(&other)) {
ArchiveFile *archive = cast<ArchiveFile>(laSym->file);
const Archive::Symbol &archiveSym = laSym->sym;
- if (archive->shouldFetchForCommon(archiveSym)) {
+ if (archive->shouldExtractForCommon(archiveSym)) {
replaceCommon(*this, other);
return;
}
} else if (auto *loSym = dyn_cast<LazyObject>(&other)) {
LazyObjFile *obj = cast<LazyObjFile>(loSym->file);
- if (obj->shouldFetchForCommon(loSym->getName())) {
+ if (obj->shouldExtractForCommon(loSym->getName())) {
replaceCommon(*this, other);
return;
}
@@ -742,7 +735,7 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
return;
}
- // An undefined weak will not fetch archive members. See comment on Lazy in
+ // An undefined weak will not extract archive members. See comment on Lazy in
// Symbols.h for the details.
if (isWeak()) {
uint8_t ty = type;
@@ -753,7 +746,7 @@ template <class LazyT> void Symbol::resolveLazy(const LazyT &other) {
}
const InputFile *oldFile = file;
- other.fetch();
+ other.extract();
if (!config->whyExtract.empty())
recordWhyExtract(oldFile, *file, *this);
}
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 816d61563021..cc48ef0ab3b7 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -93,7 +93,7 @@ public:
// Symbol binding. This is not overwritten by replace() to track
// changes during resolution. In particular:
// - An undefined weak is still weak when it resolves to a shared library.
- // - An undefined weak will not fetch archive members, but we have to
+ // - An undefined weak will not extract archive members, but we have to
// remember it is weak.
uint8_t binding;
@@ -216,10 +216,10 @@ public:
void mergeProperties(const Symbol &other);
void resolve(const Symbol &other);
- // If this is a lazy symbol, fetch an input file and add the symbol
+ // If this is a lazy symbol, extract an input file and add the symbol
// in the file to the symbol table. Calling this function on
// non-lazy object causes a runtime error.
- void fetch() const;
+ void extract() const;
static bool isExportDynamic(Kind k, uint8_t visibility) {
if (k == SharedKind)
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f1594eb8df86..4078f7e01674 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -900,7 +900,7 @@ void MipsGotSection::build() {
got.pagesMap) {
const OutputSection *os = p.first;
uint64_t secSize = 0;
- for (BaseCommand *cmd : os->sectionCommands) {
+ for (SectionCommand *cmd : os->commands) {
if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
for (InputSection *isec : isd->sections) {
uint64_t off = alignTo(secSize, isec->alignment);
@@ -1258,43 +1258,6 @@ DynamicSection<ELFT>::DynamicSection()
this->flags = SHF_ALLOC;
}
-template <class ELFT>
-void DynamicSection<ELFT>::add(int32_t tag, std::function<uint64_t()> fn) {
- entries.push_back({tag, fn});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addInt(int32_t tag, uint64_t val) {
- entries.push_back({tag, [=] { return val; }});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addInSec(int32_t tag, InputSection *sec) {
- entries.push_back({tag, [=] { return sec->getVA(0); }});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addInSecRelative(int32_t tag, InputSection *sec) {
- size_t tagOffset = entries.size() * entsize;
- entries.push_back(
- {tag, [=] { return sec->getVA(0) - (getVA() + tagOffset); }});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addOutSec(int32_t tag, OutputSection *sec) {
- entries.push_back({tag, [=] { return sec->addr; }});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addSize(int32_t tag, OutputSection *sec) {
- entries.push_back({tag, [=] { return sec->size; }});
-}
-
-template <class ELFT>
-void DynamicSection<ELFT>::addSym(int32_t tag, Symbol *sym) {
- entries.push_back({tag, [=] { return sym->getVA(); }});
-}
-
// The output section .rela.dyn may include these synthetic sections:
//
// - part.relaDyn
@@ -1303,15 +1266,13 @@ void DynamicSection<ELFT>::addSym(int32_t tag, Symbol *sym) {
// .rela.dyn
//
// DT_RELASZ is the total size of the included sections.
-static std::function<uint64_t()> addRelaSz(RelocationBaseSection *relaDyn) {
- return [=]() {
- size_t size = relaDyn->getSize();
- if (in.relaIplt->getParent() == relaDyn->getParent())
- size += in.relaIplt->getSize();
- if (in.relaPlt->getParent() == relaDyn->getParent())
- size += in.relaPlt->getSize();
- return size;
- };
+static uint64_t addRelaSz(RelocationBaseSection *relaDyn) {
+ size_t size = relaDyn->getSize();
+ if (in.relaIplt->getParent() == relaDyn->getParent())
+ size += in.relaIplt->getSize();
+ if (in.relaPlt->getParent() == relaDyn->getParent())
+ size += in.relaPlt->getSize();
+ return size;
}
// A Linker script may assign the RELA relocation sections to the same
@@ -1327,9 +1288,19 @@ static uint64_t addPltRelSz() {
}
// Add remaining entries to complete .dynamic contents.
-template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
+template <class ELFT>
+std::vector<std::pair<int32_t, uint64_t>>
+DynamicSection<ELFT>::computeContents() {
elf::Partition &part = getPartition();
bool isMain = part.name.empty();
+ std::vector<std::pair<int32_t, uint64_t>> entries;
+
+ auto addInt = [&](int32_t tag, uint64_t val) {
+ entries.emplace_back(tag, val);
+ };
+ auto addInSec = [&](int32_t tag, const InputSection *sec) {
+ entries.emplace_back(tag, sec->getVA());
+ };
for (StringRef s : config->filterList)
addInt(DT_FILTER, part.dynStrTab->addString(s));
@@ -1382,7 +1353,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
}
if (!config->zText)
dtFlags |= DF_TEXTREL;
- if (config->hasStaticTlsModel)
+ if (config->hasTlsIe && config->shared)
dtFlags |= DF_STATIC_TLS;
if (dtFlags)
@@ -1401,14 +1372,11 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
if (!config->shared && !config->relocatable && !config->zRodynamic)
addInt(DT_DEBUG, 0);
- if (OutputSection *sec = part.dynStrTab->getParent())
- this->link = sec->sectionIndex;
-
if (part.relaDyn->isNeeded() ||
(in.relaIplt->isNeeded() &&
part.relaDyn->getParent() == in.relaIplt->getParent())) {
addInSec(part.relaDyn->dynamicTag, part.relaDyn);
- entries.push_back({part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)});
+ entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn));
bool isRela = config->isRela;
addInt(isRela ? DT_RELAENT : DT_RELENT,
@@ -1426,8 +1394,8 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
if (part.relrDyn && !part.relrDyn->relocs.empty()) {
addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR,
part.relrDyn);
- addSize(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ,
- part.relrDyn->getParent());
+ addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ,
+ part.relrDyn->getParent()->size);
addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT,
sizeof(Elf_Relr));
}
@@ -1439,7 +1407,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
// .rel[a].plt section.
if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) {
addInSec(DT_JMPREL, in.relaPlt);
- entries.push_back({DT_PLTRELSZ, addPltRelSz});
+ entries.emplace_back(DT_PLTRELSZ, addPltRelSz());
switch (config->emachine) {
case EM_MIPS:
addInSec(DT_MIPS_PLTGOT, in.gotPlt);
@@ -1481,24 +1449,24 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
if (isMain) {
if (Out::preinitArray) {
- addOutSec(DT_PREINIT_ARRAY, Out::preinitArray);
- addSize(DT_PREINIT_ARRAYSZ, Out::preinitArray);
+ addInt(DT_PREINIT_ARRAY, Out::preinitArray->addr);
+ addInt(DT_PREINIT_ARRAYSZ, Out::preinitArray->size);
}
if (Out::initArray) {
- addOutSec(DT_INIT_ARRAY, Out::initArray);
- addSize(DT_INIT_ARRAYSZ, Out::initArray);
+ addInt(DT_INIT_ARRAY, Out::initArray->addr);
+ addInt(DT_INIT_ARRAYSZ, Out::initArray->size);
}
if (Out::finiArray) {
- addOutSec(DT_FINI_ARRAY, Out::finiArray);
- addSize(DT_FINI_ARRAYSZ, Out::finiArray);
+ addInt(DT_FINI_ARRAY, Out::finiArray->addr);
+ addInt(DT_FINI_ARRAYSZ, Out::finiArray->size);
}
if (Symbol *b = symtab->find(config->init))
if (b->isDefined())
- addSym(DT_INIT, b);
+ addInt(DT_INIT, b->getVA());
if (Symbol *b = symtab->find(config->fini))
if (b->isDefined())
- addSym(DT_FINI, b);
+ addInt(DT_FINI, b->getVA());
}
if (part.verSym && part.verSym->isNeeded())
@@ -1521,8 +1489,7 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
addInt(DT_MIPS_FLAGS, RHF_NOTPOT);
addInt(DT_MIPS_BASE_ADDRESS, target->getImageBase());
addInt(DT_MIPS_SYMTABNO, part.dynSymTab->getNumSymbols());
-
- add(DT_MIPS_LOCAL_GOTNO, [] { return in.mipsGot->getLocalEntriesNum(); });
+ addInt(DT_MIPS_LOCAL_GOTNO, in.mipsGot->getLocalEntriesNum());
if (const Symbol *b = in.mipsGot->getFirstGlobalEntry())
addInt(DT_MIPS_GOTSYM, b->dynsymIndex);
@@ -1534,37 +1501,39 @@ template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
addInSec(DT_MIPS_RLD_MAP, in.mipsRldMap);
// Store the offset to the .rld_map section
// relative to the address of the tag.
- addInSecRelative(DT_MIPS_RLD_MAP_REL, in.mipsRldMap);
+ addInt(DT_MIPS_RLD_MAP_REL,
+ in.mipsRldMap->getVA() - (getVA() + entries.size() * entsize));
}
}
// DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent,
// glibc assumes the old-style BSS PLT layout which we don't support.
if (config->emachine == EM_PPC)
- add(DT_PPC_GOT, [] { return in.got->getVA(); });
+ addInSec(DT_PPC_GOT, in.got);
// Glink dynamic tag is required by the V2 abi if the plt section isn't empty.
if (config->emachine == EM_PPC64 && in.plt->isNeeded()) {
// The Glink tag points to 32 bytes before the first lazy symbol resolution
// stub, which starts directly after the header.
- entries.push_back({DT_PPC64_GLINK, [=] {
- unsigned offset = target->pltHeaderSize - 32;
- return in.plt->getVA(0) + offset;
- }});
+ addInt(DT_PPC64_GLINK, in.plt->getVA() + target->pltHeaderSize - 32);
}
addInt(DT_NULL, 0);
+ return entries;
+}
- getParent()->link = this->link;
- this->size = entries.size() * this->entsize;
+template <class ELFT> void DynamicSection<ELFT>::finalizeContents() {
+ if (OutputSection *sec = getPartition().dynStrTab->getParent())
+ getParent()->link = sec->sectionIndex;
+ this->size = computeContents().size() * this->entsize;
}
template <class ELFT> void DynamicSection<ELFT>::writeTo(uint8_t *buf) {
auto *p = reinterpret_cast<Elf_Dyn *>(buf);
- for (std::pair<int32_t, std::function<uint64_t()>> &kv : entries) {
+ for (std::pair<int32_t, uint64_t> kv : computeContents()) {
p->d_tag = kv.first;
- p->d_un.d_val = kv.second();
+ p->d_un.d_val = kv.second;
++p;
}
}
@@ -2331,8 +2300,8 @@ bool SymtabShndxSection::isNeeded() const {
// late, and we do not know them here. For simplicity, we just always create
// a .symtab_shndx section when the amount of output sections is huge.
size_t size = 0;
- for (BaseCommand *base : script->sectionCommands)
- if (isa<OutputSection>(base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (isa<OutputSection>(cmd))
++size;
return size >= SHN_LORESERVE;
}
@@ -2411,21 +2380,8 @@ void GnuHashTableSection::writeTo(uint8_t *buf) {
write32(buf + 12, Shift2);
buf += 16;
- // Write a bloom filter and a hash table.
- writeBloomFilter(buf);
- buf += config->wordsize * maskWords;
- writeHashTable(buf);
-}
-
-// This function writes a 2-bit bloom filter. This bloom filter alone
-// usually filters out 80% or more of all symbol lookups [1].
-// The dynamic linker uses the hash table only when a symbol is not
-// filtered out by a bloom filter.
-//
-// [1] Ulrich Drepper (2011), "How To Write Shared Libraries" (Ver. 4.1.2),
-// p.9, https://www.akkadia.org/drepper/dsohowto.pdf
-void GnuHashTableSection::writeBloomFilter(uint8_t *buf) {
- unsigned c = config->is64 ? 64 : 32;
+ // Write the 2-bit bloom filter.
+ const unsigned c = config->is64 ? 64 : 32;
for (const Entry &sym : symbols) {
// When C = 64, we choose a word with bits [6:...] and set 1 to two bits in
// the word using bits [0:5] and [26:31].
@@ -2435,9 +2391,9 @@ void GnuHashTableSection::writeBloomFilter(uint8_t *buf) {
val |= uint64_t(1) << ((sym.hash >> Shift2) % c);
writeUint(buf + i * config->wordsize, val);
}
-}
+ buf += config->wordsize * maskWords;
-void GnuHashTableSection::writeHashTable(uint8_t *buf) {
+ // Write the hash table.
uint32_t *buckets = reinterpret_cast<uint32_t *>(buf);
uint32_t oldBucket = -1;
uint32_t *values = buckets + nBuckets;
@@ -3160,7 +3116,7 @@ size_t VersionTableSection::getSize() const {
void VersionTableSection::writeTo(uint8_t *buf) {
buf += 2;
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
- // For an unfetched lazy symbol (undefined weak), it must have been
+ // For an unextracted lazy symbol (undefined weak), it must have been
// converted to Undefined and have VER_NDX_GLOBAL version here.
assert(!s.sym->isLazy());
write16(buf, s.sym->versionId);
@@ -3648,8 +3604,8 @@ PPC32Got2Section::PPC32Got2Section()
bool PPC32Got2Section::isNeeded() const {
// See the comment below. This is not needed if there is no other
// InputSection.
- for (BaseCommand *base : getParent()->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(base))
+ for (SectionCommand *cmd : getParent()->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
for (InputSection *isec : isd->sections)
if (isec != this)
return true;
@@ -3662,8 +3618,8 @@ void PPC32Got2Section::finalizeContents() {
// PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is
// to collect input sections named ".got2".
uint32_t offset = 0;
- for (BaseCommand *base : getParent()->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(base)) {
+ for (SectionCommand *cmd : getParent()->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd)) {
for (InputSection *isec : isd->sections) {
if (isec == this)
continue;
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index bc24922598fe..3d2e73071d09 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -493,9 +493,6 @@ private:
template <class ELFT> class DynamicSection final : public SyntheticSection {
LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- // finalizeContents() fills this vector with the section contents.
- std::vector<std::pair<int32_t, std::function<uint64_t()>>> entries;
-
public:
DynamicSection();
void finalizeContents() override;
@@ -503,14 +500,7 @@ public:
size_t getSize() const override { return size; }
private:
- void add(int32_t tag, std::function<uint64_t()> fn);
- void addInt(int32_t tag, uint64_t val);
- void addInSec(int32_t tag, InputSection *sec);
- void addInSecRelative(int32_t tag, InputSection *sec);
- void addOutSec(int32_t tag, OutputSection *sec);
- void addSize(int32_t tag, OutputSection *sec);
- void addSym(int32_t tag, Symbol *sym);
-
+ std::vector<std::pair<int32_t, uint64_t>> computeContents();
uint64_t size = 0;
};
@@ -685,9 +675,6 @@ private:
// See the comment in writeBloomFilter.
enum { Shift2 = 26 };
- void writeBloomFilter(uint8_t *buf);
- void writeHashTable(uint8_t *buf);
-
struct Entry {
Symbol *sym;
size_t strTabOffset;
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index dbc476ffeeb7..ffbc8d94a800 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -399,24 +399,6 @@ public:
}
};
-// A bl instruction uses a signed 24 bit offset, with an implicit 4 byte
-// alignment. This gives a possible 26 bits of 'reach'. If the caller and
-// callee do not use toc and the call offset is larger than 26 bits,
-// we need to emit a pc-rel based long-branch thunk. The target address of
-// the callee is computed with a PC-relative offset.
-class PPC64PCRelLongBranchThunk final : public Thunk {
-public:
- PPC64PCRelLongBranchThunk(Symbol &dest, int64_t addend)
- : Thunk(dest, addend) {
- alignment = 16;
- }
- uint32_t size() override { return 32; }
- void writeTo(uint8_t *buf) override;
- void addSymbols(ThunkSection &isec) override;
- bool isCompatibleWith(const InputSection &isec,
- const Relocation &rel) const override;
-};
-
} // end anonymous namespace
Defined *Thunk::addSymbol(StringRef name, uint8_t type, uint64_t value,
@@ -932,7 +914,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) {
write32(buf + 4, 0x48000000 | (offset & 0x03fffffc)); // b <offset>
} else if (isInt<34>(offset)) {
int nextInstOffset;
- if (!config->Power10Stub) {
+ if (!config->power10Stubs) {
uint64_t tocOffset = destination.getVA() - getPPC64TocBase();
if (tocOffset >> 16 > 0) {
const uint64_t addi = ADDI_R12_TO_R12_NO_DISP | (tocOffset & 0xffff);
@@ -980,7 +962,7 @@ void PPC64R12SetupStub::writeTo(uint8_t *buf) {
reportRangeError(buf, offset, 34, destination, "R12 setup stub offset");
int nextInstOffset;
- if (!config->Power10Stub) {
+ if (!config->power10Stubs) {
uint32_t off = destination.getVA(addend) - getThunkTargetSym()->getVA() - 8;
write32(buf + 0, 0x7c0802a6); // mflr r12
write32(buf + 4, 0x429f0005); // bcl 20,31,.+4
@@ -1013,7 +995,7 @@ void PPC64PCRelPLTStub::writeTo(uint8_t *buf) {
int nextInstOffset = 0;
int64_t offset = destination.getGotPltVA() - getThunkTargetSym()->getVA();
- if (config->Power10Stub) {
+ if (config->power10Stubs) {
if (!isInt<34>(offset))
reportRangeError(buf, offset, 34, destination,
"PC-relative PLT stub offset");
@@ -1061,42 +1043,6 @@ bool PPC64LongBranchThunk::isCompatibleWith(const InputSection &isec,
return rel.type == R_PPC64_REL24 || rel.type == R_PPC64_REL14;
}
-void PPC64PCRelLongBranchThunk::writeTo(uint8_t *buf) {
- int64_t offset = destination.getVA() - getThunkTargetSym()->getVA();
- if (!isInt<34>(offset))
- reportRangeError(buf, offset, 34, destination,
- "PC-relative long branch stub offset");
-
- int nextInstOffset;
- if (!config->Power10Stub) {
- uint32_t off = destination.getVA(addend) - getThunkTargetSym()->getVA() - 8;
- write32(buf + 0, 0x7c0802a6); // mflr r12
- write32(buf + 4, 0x429f0005); // bcl 20,31,.+4
- write32(buf + 8, 0x7d6802a6); // mflr r11
- write32(buf + 12, 0x7d8803a6); // mtlr r12
- write32(buf + 16, 0x3d8b0000 | computeHiBits(off)); // addis r12,r11,off@ha
- write32(buf + 20, 0x398c0000 | (off & 0xffff)); // addi r12,r12,off@l
- nextInstOffset = 24;
- } else {
- uint64_t paddi = PADDI_R12_NO_DISP | (((offset >> 16) & 0x3ffff) << 32) |
- (offset & 0xffff);
- writePrefixedInstruction(buf + 0, paddi); // paddi r12, 0, func@pcrel, 1
- nextInstOffset = 8;
- }
- write32(buf + nextInstOffset, MTCTR_R12); // mtctr r12
- write32(buf + nextInstOffset + 4, BCTR); // bctr
-}
-
-void PPC64PCRelLongBranchThunk::addSymbols(ThunkSection &isec) {
- addSymbol(saver.save("__long_branch_pcrel_" + destination.getName()),
- STT_FUNC, 0, isec);
-}
-
-bool PPC64PCRelLongBranchThunk::isCompatibleWith(const InputSection &isec,
- const Relocation &rel) const {
- return rel.type == R_PPC64_REL24_NOTOC;
-}
-
Thunk::Thunk(Symbol &d, int64_t a) : destination(d), addend(a), offset(0) {}
Thunk::~Thunk() = default;
@@ -1223,9 +1169,7 @@ static Thunk *addThunkPPC64(RelType type, Symbol &s, int64_t a) {
return make<PPC64R2SaveStub>(s, a);
if (type == R_PPC64_REL24_NOTOC)
- return (s.stOther >> 5) > 1
- ? (Thunk *)make<PPC64R12SetupStub>(s)
- : (Thunk *)make<PPC64PCRelLongBranchThunk>(s, a);
+ return make<PPC64R12SetupStub>(s);
if (config->picThunk)
return make<PPC64PILongBranchThunk>(s, a);
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 6d97852aec43..07c5e2303374 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -91,67 +91,6 @@ private:
};
} // anonymous namespace
-static bool isSectionPrefix(StringRef prefix, StringRef name) {
- return name.startswith(prefix) || name == prefix.drop_back();
-}
-
-StringRef elf::getOutputSectionName(const InputSectionBase *s) {
- if (config->relocatable)
- return s->name;
-
- // This is for --emit-relocs. If .text.foo is emitted as .text.bar, we want
- // to emit .rela.text.foo as .rela.text.bar for consistency (this is not
- // technically required, but not doing it is odd). This code guarantees that.
- if (auto *isec = dyn_cast<InputSection>(s)) {
- if (InputSectionBase *rel = isec->getRelocatedSection()) {
- OutputSection *out = rel->getOutputSection();
- if (s->type == SHT_RELA)
- return saver.save(".rela" + out->name);
- return saver.save(".rel" + out->name);
- }
- }
-
- // A BssSection created for a common symbol is identified as "COMMON" in
- // linker scripts. It should go to .bss section.
- if (s->name == "COMMON")
- return ".bss";
-
- if (script->hasSectionsCommand)
- return s->name;
-
- // When no SECTIONS is specified, emulate GNU ld's internal linker scripts
- // by grouping sections with certain prefixes.
-
- // GNU ld places text sections with prefix ".text.hot.", ".text.unknown.",
- // ".text.unlikely.", ".text.startup." or ".text.exit." before others.
- // We provide an option -z keep-text-section-prefix to group such sections
- // into separate output sections. This is more flexible. See also
- // sortISDBySectionOrder().
- // ".text.unknown" means the hotness of the section is unknown. When
- // SampleFDO is used, if a function doesn't have sample, it could be very
- // cold or it could be a new function never being sampled. Those functions
- // will be kept in the ".text.unknown" section.
- // ".text.split." holds symbols which are split out from functions in other
- // input sections. For example, with -fsplit-machine-functions, placing the
- // cold parts in .text.split instead of .text.unlikely mitigates against poor
- // profile inaccuracy. Techniques such as hugepage remapping can make
- // conservative decisions at the section granularity.
- if (config->zKeepTextSectionPrefix)
- for (StringRef v : {".text.hot.", ".text.unknown.", ".text.unlikely.",
- ".text.startup.", ".text.exit.", ".text.split."})
- if (isSectionPrefix(v, s->name))
- return v.drop_back();
-
- for (StringRef v :
- {".text.", ".rodata.", ".data.rel.ro.", ".data.", ".bss.rel.ro.",
- ".bss.", ".init_array.", ".fini_array.", ".ctors.", ".dtors.", ".tbss.",
- ".gcc_except_table.", ".tdata.", ".ARM.exidx.", ".ARM.extab."})
- if (isSectionPrefix(v, s->name))
- return v.drop_back();
-
- return s->name;
-}
-
static bool needsInterpSection() {
return !config->relocatable && !config->shared &&
!config->dynamicLinker.empty() && script->needsInterpSection();
@@ -332,8 +271,8 @@ void elf::addReservedSymbols() {
}
static OutputSection *findSection(StringRef name, unsigned partition = 1) {
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
if (sec->name == name && sec->partition == partition)
return sec;
return nullptr;
@@ -342,7 +281,10 @@ static OutputSection *findSection(StringRef name, unsigned partition = 1) {
template <class ELFT> void elf::createSyntheticSections() {
// Initialize all pointers with NULL. This is needed because
// you can call lld::elf::main more than once as a library.
- memset(&Out::first, 0, sizeof(Out));
+ Out::tlsPhdr = nullptr;
+ Out::preinitArray = nullptr;
+ Out::initArray = nullptr;
+ Out::finiArray = nullptr;
// Add the .interp section first because it is not a SyntheticSection.
// The removeUnusedSyntheticSections() function relies on the
@@ -426,7 +368,6 @@ template <class ELFT> void elf::createSyntheticSections() {
make<RelocationSection<ELFT>>(relaDynName, config->zCombreloc);
if (config->hasDynSymTab) {
- part.dynSymTab = make<SymbolTableSection<ELFT>>(*part.dynStrTab);
add(part.dynSymTab);
part.verSym = make<VersionTableSection>();
@@ -624,9 +565,8 @@ template <class ELFT> void Writer<ELFT>::run() {
// --print-archive-stats=. Dump them before checkSections() because the files
// may be useful in case checkSections() or openFile() fails, for example, due
// to an erroneous file size.
- writeMapFile();
+ writeMapAndCref();
writeWhyExtract();
- writeCrossReferenceTable();
writeArchiveStats();
if (config->checkSections)
@@ -787,16 +727,16 @@ template <class ELFT> void Writer<ELFT>::copyLocalSymbols() {
// referring to a section (that happens if the section is a synthetic one), we
// don't create a section symbol for that section.
template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
- for (BaseCommand *base : script->sectionCommands) {
- auto *sec = dyn_cast<OutputSection>(base);
+ for (SectionCommand *cmd : script->sectionCommands) {
+ auto *sec = dyn_cast<OutputSection>(cmd);
if (!sec)
continue;
- auto i = llvm::find_if(sec->sectionCommands, [](BaseCommand *base) {
- if (auto *isd = dyn_cast<InputSectionDescription>(base))
+ auto i = llvm::find_if(sec->commands, [](SectionCommand *cmd) {
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
return !isd->sections.empty();
return false;
});
- if (i == sec->sectionCommands.end())
+ if (i == sec->commands.end())
continue;
InputSectionBase *isec = cast<InputSectionDescription>(*i)->sections[0];
@@ -1053,7 +993,8 @@ static unsigned getSectionRank(const OutputSection *sec) {
return rank;
}
-static bool compareSections(const BaseCommand *aCmd, const BaseCommand *bCmd) {
+static bool compareSections(const SectionCommand *aCmd,
+ const SectionCommand *bCmd) {
const OutputSection *a = cast<OutputSection>(aCmd);
const OutputSection *b = cast<OutputSection>(bCmd);
@@ -1210,7 +1151,7 @@ static int getRankProximityAux(OutputSection *a, OutputSection *b) {
return countLeadingZeros(a->sortRank ^ b->sortRank);
}
-static int getRankProximity(OutputSection *a, BaseCommand *b) {
+static int getRankProximity(OutputSection *a, SectionCommand *b) {
auto *sec = dyn_cast<OutputSection>(b);
return (sec && sec->hasInputSections) ? getRankProximityAux(a, sec) : -1;
}
@@ -1229,7 +1170,7 @@ static int getRankProximity(OutputSection *a, BaseCommand *b) {
// /* The RW PT_LOAD starts here*/
// rw_sec : { *(rw_sec) }
// would mean that the RW PT_LOAD would become unaligned.
-static bool shouldSkip(BaseCommand *cmd) {
+static bool shouldSkip(SectionCommand *cmd) {
if (auto *assign = dyn_cast<SymbolAssignment>(cmd))
return assign->name != ".";
return false;
@@ -1238,13 +1179,13 @@ static bool shouldSkip(BaseCommand *cmd) {
// We want to place orphan sections so that they share as much
// characteristics with their neighbors as possible. For example, if
// both are rw, or both are tls.
-static std::vector<BaseCommand *>::iterator
-findOrphanPos(std::vector<BaseCommand *>::iterator b,
- std::vector<BaseCommand *>::iterator e) {
+static std::vector<SectionCommand *>::iterator
+findOrphanPos(std::vector<SectionCommand *>::iterator b,
+ std::vector<SectionCommand *>::iterator e) {
OutputSection *sec = cast<OutputSection>(*e);
// Find the first element that has as close a rank as possible.
- auto i = std::max_element(b, e, [=](BaseCommand *a, BaseCommand *b) {
+ auto i = std::max_element(b, e, [=](SectionCommand *a, SectionCommand *b) {
return getRankProximity(sec, a) < getRankProximity(sec, b);
});
if (i == e)
@@ -1273,7 +1214,7 @@ findOrphanPos(std::vector<BaseCommand *>::iterator b,
break;
}
- auto isOutputSecWithInputSections = [](BaseCommand *cmd) {
+ auto isOutputSecWithInputSections = [](SectionCommand *cmd) {
auto *os = dyn_cast<OutputSection>(cmd);
return os && os->hasInputSections;
};
@@ -1482,7 +1423,7 @@ static void sortSection(OutputSection *sec,
// digit radix sort. The sections may be sorted stably again by a more
// significant key.
if (!order.empty())
- for (BaseCommand *b : sec->sectionCommands)
+ for (SectionCommand *b : sec->commands)
if (auto *isd = dyn_cast<InputSectionDescription>(b))
sortISDBySectionOrder(isd, order);
@@ -1499,8 +1440,8 @@ static void sortSection(OutputSection *sec,
// addressable range of [.got, .got + 0xFFFC] for GOT-relative relocations.
// To reduce the risk of relocation overflow, .toc contents are sorted so
// that sections having smaller relocation offsets are at beginning of .toc
- assert(sec->sectionCommands.size() == 1);
- auto *isd = cast<InputSectionDescription>(sec->sectionCommands[0]);
+ assert(sec->commands.size() == 1);
+ auto *isd = cast<InputSectionDescription>(sec->commands[0]);
llvm::stable_sort(isd->sections,
[](const InputSection *a, const InputSection *b) -> bool {
return a->file->ppc64SmallCodeModelTocRelocs &&
@@ -1515,8 +1456,8 @@ template <class ELFT> void Writer<ELFT>::sortInputSections() {
// Build the order once since it is expensive.
DenseMap<const InputSectionBase *, int> order = buildSectionOrder();
maybeShuffle(order);
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
sortSection(sec, order);
}
@@ -1531,8 +1472,8 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
sortInputSections();
- for (BaseCommand *base : script->sectionCommands) {
- auto *os = dyn_cast<OutputSection>(base);
+ for (SectionCommand *cmd : script->sectionCommands) {
+ auto *os = dyn_cast<OutputSection>(cmd);
if (!os)
continue;
os->sortRank = getSectionRank(os);
@@ -1547,7 +1488,9 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
if (!script->hasSectionsCommand) {
// We know that all the OutputSections are contiguous in this case.
- auto isSection = [](BaseCommand *base) { return isa<OutputSection>(base); };
+ auto isSection = [](SectionCommand *cmd) {
+ return isa<OutputSection>(cmd);
+ };
std::stable_sort(
llvm::find_if(script->sectionCommands, isSection),
llvm::find_if(llvm::reverse(script->sectionCommands), isSection).base(),
@@ -1602,8 +1545,8 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
auto i = script->sectionCommands.begin();
auto e = script->sectionCommands.end();
- auto nonScriptI = std::find_if(i, e, [](BaseCommand *base) {
- if (auto *sec = dyn_cast<OutputSection>(base))
+ auto nonScriptI = std::find_if(i, e, [](SectionCommand *cmd) {
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
return sec->sectionIndex == UINT32_MAX;
return false;
});
@@ -1616,7 +1559,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
// the script with ". = 0xabcd" and the expectation is that every section is
// after that.
auto firstSectionOrDotAssignment =
- std::find_if(i, e, [](BaseCommand *cmd) { return !shouldSkip(cmd); });
+ std::find_if(i, e, [](SectionCommand *cmd) { return !shouldSkip(cmd); });
if (firstSectionOrDotAssignment != e &&
isa<SymbolAssignment>(**firstSectionOrDotAssignment))
++firstSectionOrDotAssignment;
@@ -1629,7 +1572,7 @@ template <class ELFT> void Writer<ELFT>::sortSections() {
// As an optimization, find all sections with the same sort rank
// and insert them with one rotate.
unsigned rank = orphan->sortRank;
- auto end = std::find_if(nonScriptI + 1, e, [=](BaseCommand *cmd) {
+ auto end = std::find_if(nonScriptI + 1, e, [=](SectionCommand *cmd) {
return cast<OutputSection>(cmd)->sortRank != rank;
});
std::rotate(pos, nonScriptI, end);
@@ -1670,8 +1613,8 @@ template <class ELFT> void Writer<ELFT>::resolveShfLinkOrder() {
// Sorting is performed separately.
std::vector<InputSection **> scriptSections;
std::vector<InputSection *> sections;
- for (BaseCommand *base : sec->sectionCommands) {
- auto *isd = dyn_cast<InputSectionDescription>(base);
+ for (SectionCommand *cmd : sec->commands) {
+ auto *isd = dyn_cast<InputSectionDescription>(cmd);
if (!isd)
continue;
bool hasLinkOrder = false;
@@ -1774,7 +1717,7 @@ template <class ELFT> void Writer<ELFT>::finalizeAddressDependentContent() {
// If addrExpr is set, the address may not be a multiple of the alignment.
// Warn because this is error-prone.
- for (BaseCommand *cmd : script->sectionCommands)
+ for (SectionCommand *cmd : script->sectionCommands)
if (auto *os = dyn_cast<OutputSection>(cmd))
if (os->addr % os->alignment != 0)
warn("address (0x" + Twine::utohexstr(os->addr) + ") of section " +
@@ -1892,36 +1835,30 @@ static void removeUnusedSyntheticSections() {
})
.base();
- DenseSet<InputSectionDescription *> isdSet;
- // Mark unused synthetic sections for deletion
- auto end = std::stable_partition(
- start, inputSections.end(), [&](InputSectionBase *s) {
- SyntheticSection *ss = dyn_cast<SyntheticSection>(s);
- OutputSection *os = ss->getParent();
- if (!os || ss->isNeeded())
- return true;
-
- // If we reach here, then ss is an unused synthetic section and we want
- // to remove it from the corresponding input section description, and
- // orphanSections.
- for (BaseCommand *b : os->sectionCommands)
- if (auto *isd = dyn_cast<InputSectionDescription>(b))
- isdSet.insert(isd);
-
- llvm::erase_if(
- script->orphanSections,
- [=](const InputSectionBase *isec) { return isec == ss; });
-
- return false;
+ // Remove unused synthetic sections from inputSections;
+ DenseSet<InputSectionBase *> unused;
+ auto end =
+ std::remove_if(start, inputSections.end(), [&](InputSectionBase *s) {
+ auto *sec = cast<SyntheticSection>(s);
+ if (sec->getParent() && sec->isNeeded())
+ return false;
+ unused.insert(sec);
+ return true;
});
-
- DenseSet<InputSectionBase *> unused(end, inputSections.end());
- for (auto *isd : isdSet)
- llvm::erase_if(isd->sections,
- [=](InputSection *isec) { return unused.count(isec); });
-
- // Erase unused synthetic sections.
inputSections.erase(end, inputSections.end());
+
+ // Remove unused synthetic sections from the corresponding input section
+ // description and orphanSections.
+ for (auto *sec : unused)
+ if (OutputSection *osec = cast<SyntheticSection>(sec)->getParent())
+ for (SectionCommand *cmd : osec->commands)
+ if (auto *isd = dyn_cast<InputSectionDescription>(cmd))
+ llvm::erase_if(isd->sections, [&](InputSection *isec) {
+ return unused.count(isec);
+ });
+ llvm::erase_if(script->orphanSections, [&](const InputSectionBase *sec) {
+ return unused.count(sec);
+ });
}
// Create output section objects and add them to OutputSections.
@@ -1935,8 +1872,8 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// addresses of each section by section name. Add such symbols.
if (!config->relocatable) {
addStartEndSymbols();
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *sec = dyn_cast<OutputSection>(cmd))
addStartStopSymbols(sec);
}
@@ -2087,11 +2024,14 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
sortSections();
- // Now that we have the final list, create a list of all the
- // OutputSections for convenience.
- for (BaseCommand *base : script->sectionCommands)
- if (auto *sec = dyn_cast<OutputSection>(base))
- outputSections.push_back(sec);
+ // Create a list of OutputSections, assign sectionIndex, and populate
+ // in.shStrTab.
+ for (SectionCommand *cmd : script->sectionCommands)
+ if (auto *osec = dyn_cast<OutputSection>(cmd)) {
+ outputSections.push_back(osec);
+ osec->sectionIndex = outputSections.size();
+ osec->shName = in.shStrTab->addString(osec->name);
+ }
// Prefer command line supplied address over other constraints.
for (OutputSection *sec : outputSections) {
@@ -2113,12 +2053,7 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
// to 1 to make __ehdr_start defined. The section number is not
// particularly relevant.
Out::elfHeader->sectionIndex = 1;
-
- for (size_t i = 0, e = outputSections.size(); i != e; ++i) {
- OutputSection *sec = outputSections[i];
- sec->sectionIndex = i + 1;
- sec->shName = in.shStrTab->addString(sec->name);
- }
+ Out::elfHeader->size = sizeof(typename ELFT::Ehdr);
// Binary and relocatable output does not have PHDRS.
// The headers have to be created before finalize as that can influence the
@@ -2608,17 +2543,6 @@ static uint64_t computeFileOffset(OutputSection *os, uint64_t off) {
return first->offset + os->addr - first->addr;
}
-// Set an in-file position to a given section and returns the end position of
-// the section.
-static uint64_t setFileOffset(OutputSection *os, uint64_t off) {
- off = computeFileOffset(os, off);
- os->offset = off;
-
- if (os->type == SHT_NOBITS)
- return off;
- return off + os->size;
-}
-
template <class ELFT> void Writer<ELFT>::assignFileOffsetsBinary() {
// Compute the minimum LMA of all non-empty non-NOBITS sections as minAddr.
auto needsOffset = [](OutputSection &sec) {
@@ -2646,9 +2570,8 @@ static std::string rangeToString(uint64_t addr, uint64_t len) {
// Assign file offsets to output sections.
template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
- uint64_t off = 0;
- off = setFileOffset(Out::elfHeader, off);
- off = setFileOffset(Out::programHeaders, off);
+ Out::programHeaders->offset = Out::elfHeader->size;
+ uint64_t off = Out::elfHeader->size + Out::programHeaders->size;
PhdrEntry *lastRX = nullptr;
for (Partition &part : partitions)
@@ -2661,18 +2584,23 @@ template <class ELFT> void Writer<ELFT>::assignFileOffsets() {
for (OutputSection *sec : outputSections) {
if (!(sec->flags & SHF_ALLOC))
continue;
- off = setFileOffset(sec, off);
+ off = computeFileOffset(sec, off);
+ sec->offset = off;
+ if (sec->type != SHT_NOBITS)
+ off += sec->size;
// If this is a last section of the last executable segment and that
// segment is the last loadable segment, align the offset of the
// following section to avoid loading non-segments parts of the file.
if (config->zSeparate != SeparateSegmentKind::None && lastRX &&
lastRX->lastSec == sec)
- off = alignTo(off, config->commonPageSize);
+ off = alignTo(off, config->maxPageSize);
}
- for (OutputSection *sec : outputSections)
- if (!(sec->flags & SHF_ALLOC))
- off = setFileOffset(sec, off);
+ for (OutputSection *osec : outputSections)
+ if (!(osec->flags & SHF_ALLOC)) {
+ osec->offset = alignTo(off, osec->alignment);
+ off = osec->offset + osec->size;
+ }
sectionHeaderOff = alignTo(off, config->wordsize);
fileSize = sectionHeaderOff + (outputSections.size() + 1) * sizeof(Elf_Shdr);
@@ -2946,9 +2874,9 @@ template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
for (PhdrEntry *p : part.phdrs)
if (p->p_type == PT_LOAD && (p->p_flags & PF_X))
fillTrap(Out::bufferStart + alignDown(p->firstSec->offset + p->p_filesz,
- config->commonPageSize),
+ config->maxPageSize),
Out::bufferStart + alignTo(p->firstSec->offset + p->p_filesz,
- config->commonPageSize));
+ config->maxPageSize));
// Round up the file size of the last segment to the page boundary iff it is
// an executable segment to ensure that other tools don't accidentally
@@ -2960,7 +2888,7 @@ template <class ELFT> void Writer<ELFT>::writeTrapInstr() {
if (last && (last->p_flags & PF_X))
last->p_memsz = last->p_filesz =
- alignTo(last->p_filesz, config->commonPageSize);
+ alignTo(last->p_filesz, config->maxPageSize);
}
}
diff --git a/lld/ELF/Writer.h b/lld/ELF/Writer.h
index 3698544d977b..9c4a5b98451d 100644
--- a/lld/ELF/Writer.h
+++ b/lld/ELF/Writer.h
@@ -51,7 +51,6 @@ struct PhdrEntry {
};
void addReservedSymbols();
-llvm::StringRef getOutputSectionName(const InputSectionBase *s);
template <class ELFT> uint32_t calcMipsEFlags();
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp
index 96167b72a724..d42085737dbb 100644
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -26,8 +26,11 @@ using namespace llvm::support;
using namespace lld;
using namespace lld::macho;
-// Verify ConcatInputSection's size on 64-bit builds.
-static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == 120,
+// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector
+// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),
+// so account for that.
+static_assert(sizeof(void *) != 8 ||
+ sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 96,
"Try to minimize ConcatInputSection's size, we create many "
"instances of it");
diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h
index cc6f51cc5fd3..d1182a0a2d32 100644
--- a/lld/MachO/Symbols.h
+++ b/lld/MachO/Symbols.h
@@ -236,7 +236,12 @@ public:
uint64_t getVA() const override;
bool isWeakDef() const override { return weakDef; }
- bool isWeakRef() const override { return refState == RefState::Weak; }
+
+ // Symbols from weak libraries/frameworks are also weakly-referenced.
+ bool isWeakRef() const override {
+ return refState == RefState::Weak ||
+ (file && getFile()->umbrella->forceWeakImport);
+ }
bool isReferenced() const { return refState != RefState::Unreferenced; }
bool isTlv() const override { return tlv; }
bool isDynamicLookup() const { return file == nullptr; }
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 2527389990fa..99a15666c8fa 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -796,16 +796,18 @@ FunctionStartsSection::FunctionStartsSection()
void FunctionStartsSection::finalizeContents() {
raw_svector_ostream os{contents};
std::vector<uint64_t> addrs;
- for (const Symbol *sym : symtab->getSymbols()) {
- if (const auto *defined = dyn_cast<Defined>(sym)) {
- if (!defined->isec || !isCodeSection(defined->isec) || !defined->isLive())
- continue;
- if (const auto *concatIsec = dyn_cast<ConcatInputSection>(defined->isec))
- if (concatIsec->shouldOmitFromOutput())
- continue;
- // TODO: Add support for thumbs, in that case
- // the lowest bit of nextAddr needs to be set to 1.
- addrs.push_back(defined->getVA());
+ for (const InputFile *file : inputFiles) {
+ if (auto *objFile = dyn_cast<ObjFile>(file)) {
+ for (const Symbol *sym : objFile->symbols) {
+ if (const auto *defined = dyn_cast_or_null<Defined>(sym)) {
+ if (!defined->isec || !isCodeSection(defined->isec) ||
+ !defined->isLive())
+ continue;
+ // TODO: Add support for thumbs, in that case
+ // the lowest bit of nextAddr needs to be set to 1.
+ addrs.push_back(defined->getVA());
+ }
+ }
}
}
llvm::sort(addrs);
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
index 690098c7a3b7..d28c7a33ff36 100644
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -225,8 +225,9 @@ void UnwindInfoSectionImpl<Ptr>::prepareRelocations(ConcatInputSection *isec) {
//
// (See discussions/alternatives already considered on D107533)
if (!defined->isExternal())
- if (const Symbol *sym = symtab->find(defined->getName()))
- r.referent = s = const_cast<Symbol *>(sym);
+ if (Symbol *sym = symtab->find(defined->getName()))
+ if (sym->kind() != Symbol::LazyKind)
+ r.referent = s = sym;
}
if (auto *undefined = dyn_cast<Undefined>(s)) {
treatUndefinedSymbol(*undefined);
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index 1d53177200c3..a2456fc46689 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -31,11 +31,16 @@ ELF Improvements
* ``e_entry`` no longer falls back to the address of ``.text`` if the entry symbol does not exist.
Instead, a value of 0 will be written.
(`D110014 <https://reviews.llvm.org/D110014>`_)
+* If ``-Map`` is specified, ``--cref`` will be printted to the specified file.
+ (`D114663 <https://reviews.llvm.org/D114663>`_)
Architecture specific changes:
* The x86-32 port now supports TLSDESC (``-mtls-dialect=gnu2``).
(`D112582 <https://reviews.llvm.org/D112582>`_)
+* The x86-64 port now handles non-RAX/non-adjacent ``R_X86_64_GOTPC32_TLSDESC``
+ and ``R_X86_64_TLSDESC_CALL`` (``-mtls-dialect=gnu2``).
+ (`D114416 <https://reviews.llvm.org/D114416>`_)
* For x86-64, ``--no-relax`` now suppresses ``R_X86_64_GOTPCRELX`` and
``R_X86_64_REX_GOTPCRELX`` GOT optimization
(`D113615 <https://reviews.llvm.org/D113615>`_)
diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 843f4a1cc282..0422231d78b5 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -141,7 +141,9 @@ you can specify
.Fl O2
to set the compression level to 6.
.It Fl -cref
-Output cross reference table.
+Output cross reference table. If
+.Fl Map
+is specified, print to the map file.
.It Fl -define-common , Fl d
Assign space to common symbols.
.It Fl -defsym Ns = Ns Ar symbol Ns = Ns Ar expression
@@ -454,6 +456,20 @@ is specified, use SHT_ANDROID_RELR instead of SHT_RELR.
Always generate position independent thunks.
.It Fl -pie , Fl -pic-executable
Create a position independent executable.
+.It Fl -power10-stubs Ns = Ns Cm mode
+Whether to use Power10 instructions in call stubs for R_PPC64_REL24_NOTOC and TOC/NOTOC interworking.
+.Ar mode
+may be:
+.Pp
+.Bl -tag -width 2n -compact
+.It Cm yes
+(default) Use.
+.It Cm auto
+Currently the same as yes.
+.It Cm no
+Don't use.
+.El
+
.It Fl -print-gc-sections
List removed unused sections.
.It Fl -print-icf-sections
diff --git a/lldb/bindings/interface/SBDebugger.i b/lldb/bindings/interface/SBDebugger.i
index aae72dd51394..f21e60d62873 100644
--- a/lldb/bindings/interface/SBDebugger.i
+++ b/lldb/bindings/interface/SBDebugger.i
@@ -207,6 +207,9 @@ public:
}
SBError
+ SetInputString (const char* data);
+
+ SBError
SetInputFile (SBFile file);
SBError
diff --git a/lldb/bindings/interface/SBTarget.i b/lldb/bindings/interface/SBTarget.i
index 3f9e4cdc6d67..b98aa70849be 100644
--- a/lldb/bindings/interface/SBTarget.i
+++ b/lldb/bindings/interface/SBTarget.i
@@ -412,6 +412,9 @@ public:
uint32_t
GetCodeByteSize ();
+ uint32_t
+ GetMaximumNumberOfChildrenToDisplay() const;
+
lldb::SBError
SetSectionLoadAddress (lldb::SBSection section,
lldb::addr_t section_base_addr);
diff --git a/lldb/bindings/interface/SBValue.i b/lldb/bindings/interface/SBValue.i
index dd012e667a20..bc66a4ae28f8 100644
--- a/lldb/bindings/interface/SBValue.i
+++ b/lldb/bindings/interface/SBValue.i
@@ -410,6 +410,9 @@ public:
bool
SetData (lldb::SBData &data, lldb::SBError& error);
+ lldb::SBValue
+ Clone(const char *new_name);
+
lldb::addr_t
GetLoadAddress();
diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig
index aa2bcfb8c8ae..fdd3b4e62c10 100644
--- a/lldb/bindings/python/python-swigsafecast.swig
+++ b/lldb/bindings/python/python-swigsafecast.swig
@@ -1,23 +1,14 @@
+namespace lldb_private {
+namespace python {
+
PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) {
return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0);
}
-PyObject *SBTypeToSWIGWrapper(lldb::SBProcess &process_sb) {
- return SWIG_NewPointerObj(&process_sb, SWIGTYPE_p_lldb__SBProcess, 0);
-}
-
PyObject *SBTypeToSWIGWrapper(lldb::SBThread &thread_sb) {
return SWIG_NewPointerObj(&thread_sb, SWIGTYPE_p_lldb__SBThread, 0);
}
-PyObject *SBTypeToSWIGWrapper(lldb::SBThreadPlan &thread_plan_sb) {
- return SWIG_NewPointerObj(&thread_plan_sb, SWIGTYPE_p_lldb__SBThreadPlan, 0);
-}
-
-PyObject *SBTypeToSWIGWrapper(lldb::SBTarget &target_sb) {
- return SWIG_NewPointerObj(&target_sb, SWIGTYPE_p_lldb__SBTarget, 0);
-}
-
PyObject *SBTypeToSWIGWrapper(lldb::SBFrame &frame_sb) {
return SWIG_NewPointerObj(&frame_sb, SWIGTYPE_p_lldb__SBFrame, 0);
}
@@ -26,10 +17,6 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBDebugger &debugger_sb) {
return SWIG_NewPointerObj(&debugger_sb, SWIGTYPE_p_lldb__SBDebugger, 0);
}
-PyObject *SBTypeToSWIGWrapper(lldb::SBBreakpoint &breakpoint_sb) {
- return SWIG_NewPointerObj(&breakpoint_sb, SWIGTYPE_p_lldb__SBBreakpoint, 0);
-}
-
PyObject *SBTypeToSWIGWrapper(lldb::SBWatchpoint &watchpoint_sb) {
return SWIG_NewPointerObj(&watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0);
}
@@ -40,10 +27,6 @@ SBTypeToSWIGWrapper(lldb::SBBreakpointLocation &breakpoint_location_sb) {
SWIGTYPE_p_lldb__SBBreakpointLocation, 0);
}
-PyObject *SBTypeToSWIGWrapper(lldb::SBValue &value_sb) {
- return SWIG_NewPointerObj(&value_sb, SWIGTYPE_p_lldb__SBValue, 0);
-}
-
PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) {
return SWIG_NewPointerObj(&cmd_ret_obj_sb,
SWIGTYPE_p_lldb__SBCommandReturnObject, 0);
@@ -70,3 +53,38 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBSymbolContext &sym_ctx_sb) {
PyObject *SBTypeToSWIGWrapper(lldb::SBStream &stream_sb) {
return SWIG_NewPointerObj(&stream_sb, SWIGTYPE_p_lldb__SBStream, 0);
}
+
+PythonObject ToSWIGHelper(void *obj, swig_type_info *info) {
+ return {PyRefType::Owned, SWIG_NewPointerObj(obj, info, SWIG_POINTER_OWN)};
+}
+
+PythonObject ToSWIGWrapper(std::unique_ptr<lldb::SBValue> value_sb) {
+ return ToSWIGHelper(value_sb.release(), SWIGTYPE_p_lldb__SBValue);
+}
+
+PythonObject ToSWIGWrapper(lldb::ValueObjectSP value_sp) {
+ return ToSWIGWrapper(std::make_unique<lldb::SBValue>(std::move(value_sp)));
+}
+
+PythonObject ToSWIGWrapper(lldb::TargetSP target_sp) {
+ return ToSWIGHelper(new lldb::SBTarget(std::move(target_sp)),
+ SWIGTYPE_p_lldb__SBTarget);
+}
+
+PythonObject ToSWIGWrapper(lldb::ProcessSP process_sp) {
+ return ToSWIGHelper(new lldb::SBProcess(std::move(process_sp)),
+ SWIGTYPE_p_lldb__SBProcess);
+}
+
+PythonObject ToSWIGWrapper(lldb::ThreadPlanSP thread_plan_sp) {
+ return ToSWIGHelper(new lldb::SBThreadPlan(std::move(thread_plan_sp)),
+ SWIGTYPE_p_lldb__SBThreadPlan);
+}
+
+PythonObject ToSWIGWrapper(lldb::BreakpointSP breakpoint_sp) {
+ return ToSWIGHelper(new lldb::SBBreakpoint(std::move(breakpoint_sp)),
+ SWIGTYPE_p_lldb__SBBreakpoint);
+}
+
+} // namespace python
+} // namespace lldb_private
diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig
index 6dc8ca170390..079f8d12dafa 100644
--- a/lldb/bindings/python/python-wrapper.swig
+++ b/lldb/bindings/python/python-wrapper.swig
@@ -22,32 +22,8 @@ private:
bool m_print;
};
-%}
-
-%wrapper %{
-
-// resolve a dotted Python name in the form
-// foo.bar.baz.Foobar to an actual Python object
-// if pmodule is NULL, the __main__ module will be used
-// as the starting point for the search
-
-
-// This function is called by lldb_private::ScriptInterpreterPython::BreakpointCallbackFunction(...)
-// and is used when a script command is attached to a breakpoint for execution.
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
-
-// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has
-// C-linkage specified, but returns UDT 'llvm::Expected<bool>' which is
-// incompatible with C
-#if _MSC_VER
-#pragma warning (push)
-#pragma warning (disable : 4190)
-#endif
-
-SWIGEXPORT llvm::Expected<bool>
-LLDBSwigPythonBreakpointCallbackFunction
+llvm::Expected<bool>
+lldb_private::LLDBSwigPythonBreakpointCallbackFunction
(
const char *python_function_name,
const char *session_dictionary_name,
@@ -93,17 +69,20 @@ LLDBSwigPythonBreakpointCallbackFunction
return result.get().get() != Py_False;
}
-#if _MSC_VER
-#pragma warning (pop)
-#endif
+// resolve a dotted Python name in the form
+// foo.bar.baz.Foobar to an actual Python object
+// if pmodule is NULL, the __main__ module will be used
+// as the starting point for the search
-#pragma clang diagnostic pop
+
+// This function is called by lldb_private::ScriptInterpreterPython::BreakpointCallbackFunction(...)
+// and is used when a script command is attached to a breakpoint for execution.
// This function is called by lldb_private::ScriptInterpreterPython::WatchpointCallbackFunction(...)
// and is used when a script command is attached to a watchpoint for execution.
-SWIGEXPORT bool
-LLDBSwigPythonWatchpointCallbackFunction
+bool
+lldb_private::LLDBSwigPythonWatchpointCallbackFunction
(
const char *python_function_name,
const char *session_dictionary_name,
@@ -134,8 +113,8 @@ LLDBSwigPythonWatchpointCallbackFunction
return stop_at_watchpoint;
}
-SWIGEXPORT bool
-LLDBSwigPythonCallTypeScript
+bool
+lldb_private::LLDBSwigPythonCallTypeScript
(
const char *python_function_name,
const void *session_dictionary,
@@ -145,7 +124,6 @@ LLDBSwigPythonCallTypeScript
std::string& retval
)
{
- lldb::SBValue sb_value (valobj_sp);
lldb::SBTypeSummaryOptions sb_options(options_sp.get());
retval.clear();
@@ -195,7 +173,7 @@ LLDBSwigPythonCallTypeScript
return false;
}
- PythonObject value_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_value));
+ PythonObject value_arg = ToSWIGWrapper(valobj_sp);
PythonObject options_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_options));
if (argc.get().max_positional_args < 3)
@@ -208,8 +186,8 @@ LLDBSwigPythonCallTypeScript
return true;
}
-SWIGEXPORT void*
-LLDBSwigPythonCreateSyntheticProvider
+void*
+lldb_private::LLDBSwigPythonCreateSyntheticProvider
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -227,11 +205,10 @@ LLDBSwigPythonCreateSyntheticProvider
if (!pfunc.IsAllocated())
Py_RETURN_NONE;
- // FIXME: SBValue leaked here
- lldb::SBValue *sb_value = new lldb::SBValue(valobj_sp);
+ auto sb_value = std::make_unique<lldb::SBValue>(valobj_sp);
sb_value->SetPreferSyntheticValue(false);
- PythonObject val_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*sb_value));
+ PythonObject val_arg = ToSWIGWrapper(std::move(sb_value));
if (!val_arg.IsAllocated())
Py_RETURN_NONE;
@@ -243,8 +220,8 @@ LLDBSwigPythonCreateSyntheticProvider
Py_RETURN_NONE;
}
-SWIGEXPORT void*
-LLDBSwigPythonCreateCommandObject
+void*
+lldb_private::LLDBSwigPythonCreateCommandObject
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -271,8 +248,8 @@ LLDBSwigPythonCreateCommandObject
Py_RETURN_NONE;
}
-SWIGEXPORT void*
-LLDBSwigPythonCreateScriptedProcess
+void*
+lldb_private::LLDBSwigPythonCreateScriptedProcess
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -295,12 +272,7 @@ LLDBSwigPythonCreateScriptedProcess
return nullptr;
}
- // FIXME: SBTarget leaked here
- PythonObject target_arg(
- PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBTarget(target_sp)));
-
- if (!target_arg.IsAllocated())
- Py_RETURN_NONE;
+ PythonObject target_arg = ToSWIGWrapper(target_sp);
llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo();
if (!arg_info) {
@@ -330,8 +302,8 @@ LLDBSwigPythonCreateScriptedProcess
Py_RETURN_NONE;
}
-SWIGEXPORT void*
-LLDBSwigPythonCreateScriptedThread
+void*
+lldb_private::LLDBSwigPythonCreateScriptedThread
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -354,14 +326,6 @@ LLDBSwigPythonCreateScriptedThread
return nullptr;
}
- // FIXME: This leaks the SBProcess object
- PythonObject process_arg(
- PyRefType::Owned,
- SBTypeToSWIGWrapper(*new lldb::SBProcess(process_sp)));
-
- if (!process_arg.IsAllocated())
- Py_RETURN_NONE;
-
llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo();
if (!arg_info) {
llvm::handleAllErrors(
@@ -379,7 +343,7 @@ LLDBSwigPythonCreateScriptedThread
if (arg_info.get().max_positional_args == 2) {
// FIXME: SBStructuredData leaked here
PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*new lldb::SBStructuredData(args_impl)));
- result = pfunc(process_arg, args_arg);
+ result = pfunc(ToSWIGWrapper(process_sp), args_arg);
} else {
error_string.assign("wrong number of arguments in __init__, should be 2 (not including self)");
Py_RETURN_NONE;
@@ -390,8 +354,8 @@ LLDBSwigPythonCreateScriptedThread
Py_RETURN_NONE;
}
-SWIGEXPORT void*
-LLDBSwigPythonCreateScriptedThreadPlan
+void*
+lldb_private::LLDBSwigPythonCreateScriptedThreadPlan
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -415,13 +379,7 @@ LLDBSwigPythonCreateScriptedThreadPlan
return nullptr;
}
- // FIXME: SBThreadPlan leaked here
- PythonObject tp_arg(
- PyRefType::Owned,
- SBTypeToSWIGWrapper(*new lldb::SBThreadPlan(thread_plan_sp)));
-
- if (!tp_arg.IsAllocated())
- Py_RETURN_NONE;
+ PythonObject tp_arg = ToSWIGWrapper(thread_plan_sp);
llvm::Expected<PythonCallable::ArgInfo> arg_info = pfunc.GetArgInfo();
if (!arg_info) {
@@ -460,8 +418,8 @@ LLDBSwigPythonCreateScriptedThreadPlan
Py_RETURN_NONE;
}
-SWIGEXPORT bool
-LLDBSWIGPythonCallThreadPlan
+bool
+lldb_private::LLDBSWIGPythonCallThreadPlan
(
void *implementor,
const char *method_name,
@@ -507,15 +465,11 @@ LLDBSWIGPythonCallThreadPlan
return false;
}
-SWIGEXPORT void *
-LLDBSwigPythonCreateScriptedBreakpointResolver
-(
- const char *python_class_name,
- const char *session_dictionary_name,
+void *lldb_private::LLDBSwigPythonCreateScriptedBreakpointResolver(
+ const char *python_class_name, const char *session_dictionary_name,
lldb_private::StructuredDataImpl *args_impl,
- lldb::BreakpointSP &breakpoint_sp
-)
-{
+ const lldb::BreakpointSP &breakpoint_sp) {
+
if (python_class_name == NULL || python_class_name[0] == '\0' || !session_dictionary_name)
Py_RETURN_NONE;
@@ -527,16 +481,11 @@ LLDBSwigPythonCreateScriptedBreakpointResolver
if (!pfunc.IsAllocated())
return nullptr;
- // FIXME: SBBreakpoint leaked here
- lldb::SBBreakpoint *bkpt_value = new lldb::SBBreakpoint(breakpoint_sp);
-
- PythonObject bkpt_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*bkpt_value));
-
// FIXME: SBStructuredData leaked here
lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl);
PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value));
- PythonObject result = pfunc(bkpt_arg, args_arg, dict);
+ PythonObject result = pfunc(ToSWIGWrapper(breakpoint_sp), args_arg, dict);
// FIXME: At this point we should check that the class we found supports all the methods
// that we need.
@@ -552,8 +501,8 @@ LLDBSwigPythonCreateScriptedBreakpointResolver
Py_RETURN_NONE;
}
-SWIGEXPORT unsigned int
-LLDBSwigPythonCallBreakpointResolver
+unsigned int
+lldb_private::LLDBSwigPythonCallBreakpointResolver
(
void *implementor,
const char *method_name,
@@ -603,8 +552,8 @@ LLDBSwigPythonCallBreakpointResolver
return ret_val;
}
-SWIGEXPORT void *
-LLDBSwigPythonCreateScriptedStopHook
+void *
+lldb_private::LLDBSwigPythonCreateScriptedStopHook
(
lldb::TargetSP target_sp,
const char *python_class_name,
@@ -637,16 +586,11 @@ LLDBSwigPythonCreateScriptedStopHook
return nullptr;
}
- // FIXME: SBTarget leaked here
- lldb::SBTarget *target_val
- = new lldb::SBTarget(target_sp);
- PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*target_val));
-
// FIXME: SBStructuredData leaked here
lldb::SBStructuredData *args_value = new lldb::SBStructuredData(args_impl);
PythonObject args_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*args_value));
- PythonObject result = pfunc(target_arg, args_arg, dict);
+ PythonObject result = pfunc(ToSWIGWrapper(target_sp), args_arg, dict);
if (result.IsAllocated())
{
@@ -679,8 +623,8 @@ LLDBSwigPythonCreateScriptedStopHook
Py_RETURN_NONE;
}
-SWIGEXPORT bool
-LLDBSwigPythonStopHookCallHandleStop
+bool
+lldb_private::LLDBSwigPythonStopHookCallHandleStop
(
void *implementor,
lldb::ExecutionContextRefSP exc_ctx_sp,
@@ -755,8 +699,8 @@ LLDBSwigPython_CallOptionalMember
return result.release();
}
-SWIGEXPORT size_t
-LLDBSwigPython_CalculateNumChildren
+size_t
+lldb_private::LLDBSwigPython_CalculateNumChildren
(
PyObject *implementor,
uint32_t max
@@ -793,8 +737,8 @@ LLDBSwigPython_CalculateNumChildren
return ret_val;
}
-SWIGEXPORT PyObject*
-LLDBSwigPython_GetChildAtIndex
+PyObject*
+lldb_private::LLDBSwigPython_GetChildAtIndex
(
PyObject *implementor,
uint32_t idx
@@ -823,8 +767,8 @@ LLDBSwigPython_GetChildAtIndex
return result.release();
}
-SWIGEXPORT int
-LLDBSwigPython_GetIndexOfChildWithName
+int
+lldb_private::LLDBSwigPython_GetIndexOfChildWithName
(
PyObject *implementor,
const char* child_name
@@ -853,8 +797,8 @@ LLDBSwigPython_GetIndexOfChildWithName
return UINT32_MAX;
}
-SWIGEXPORT bool
-LLDBSwigPython_UpdateSynthProviderInstance
+bool
+lldb_private::LLDBSwigPython_UpdateSynthProviderInstance
(
PyObject *implementor
)
@@ -873,8 +817,8 @@ LLDBSwigPython_UpdateSynthProviderInstance
return ret_val;
}
-SWIGEXPORT bool
-LLDBSwigPython_MightHaveChildrenSynthProviderInstance
+bool
+lldb_private::LLDBSwigPython_MightHaveChildrenSynthProviderInstance
(
PyObject *implementor
)
@@ -893,8 +837,8 @@ LLDBSwigPython_MightHaveChildrenSynthProviderInstance
return ret_val;
}
-SWIGEXPORT PyObject*
-LLDBSwigPython_GetValueSynthProviderInstance
+PyObject*
+lldb_private::LLDBSwigPython_GetValueSynthProviderInstance
(
PyObject *implementor
)
@@ -921,8 +865,8 @@ LLDBSwigPython_GetValueSynthProviderInstance
return ret_val;
}
-SWIGEXPORT void*
-LLDBSWIGPython_CastPyObjectToSBData
+void*
+lldb_private::LLDBSWIGPython_CastPyObjectToSBData
(
PyObject* data
)
@@ -938,8 +882,8 @@ LLDBSWIGPython_CastPyObjectToSBData
}
-SWIGEXPORT void*
-LLDBSWIGPython_CastPyObjectToSBError
+void*
+lldb_private::LLDBSWIGPython_CastPyObjectToSBError
(
PyObject* data
)
@@ -955,8 +899,8 @@ LLDBSWIGPython_CastPyObjectToSBError
}
-SWIGEXPORT void*
-LLDBSWIGPython_CastPyObjectToSBValue
+void*
+lldb_private::LLDBSWIGPython_CastPyObjectToSBValue
(
PyObject* data
)
@@ -971,8 +915,8 @@ LLDBSWIGPython_CastPyObjectToSBValue
return sb_ptr;
}
-SWIGEXPORT void*
-LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo
+void*
+lldb_private::LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo
(
PyObject* data
)
@@ -987,8 +931,8 @@ LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo
return sb_ptr;
}
-SWIGEXPORT bool
-LLDBSwigPythonCallCommand
+bool
+lldb_private::LLDBSwigPythonCallCommand
(
const char *python_function_name,
const char *session_dictionary_name,
@@ -1026,8 +970,8 @@ LLDBSwigPythonCallCommand
return true;
}
-SWIGEXPORT bool
-LLDBSwigPythonCallCommandObject
+bool
+lldb_private::LLDBSwigPythonCallCommandObject
(
PyObject *implementor,
lldb::DebuggerSP& debugger,
@@ -1057,8 +1001,8 @@ LLDBSwigPythonCallCommandObject
return true;
}
-SWIGEXPORT void*
-LLDBSWIGPythonCreateOSPlugin
+void*
+lldb_private::LLDBSWIGPythonCreateOSPlugin
(
const char *python_class_name,
const char *session_dictionary_name,
@@ -1076,13 +1020,7 @@ LLDBSWIGPythonCreateOSPlugin
if (!pfunc.IsAllocated())
Py_RETURN_NONE;
- // FIXME: This leaks the SBProcess object
- lldb::SBProcess *process_sb = new lldb::SBProcess(process_sp);
- PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(*process_sb));
- if (!process_arg.IsAllocated())
- Py_RETURN_NONE;
-
- auto result = pfunc(process_arg);
+ auto result = pfunc(ToSWIGWrapper(process_sp));
if (result.IsAllocated())
return result.release();
@@ -1090,8 +1028,8 @@ LLDBSWIGPythonCreateOSPlugin
Py_RETURN_NONE;
}
-SWIGEXPORT void*
-LLDBSWIGPython_CreateFrameRecognizer
+void*
+lldb_private::LLDBSWIGPython_CreateFrameRecognizer
(
const char *python_class_name,
const char *session_dictionary_name
@@ -1116,8 +1054,8 @@ LLDBSWIGPython_CreateFrameRecognizer
Py_RETURN_NONE;
}
-SWIGEXPORT PyObject*
-LLDBSwigPython_GetRecognizedArguments
+PyObject*
+lldb_private::LLDBSwigPython_GetRecognizedArguments
(
PyObject *implementor,
const lldb::StackFrameSP& frame_sp
@@ -1134,8 +1072,8 @@ LLDBSwigPython_GetRecognizedArguments
return result;
}
-SWIGEXPORT void*
-LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb::TargetSP& target_sp)
+void*
+lldb_private::LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb::TargetSP& target_sp)
{
if (!module || !setting)
Py_RETURN_NONE;
@@ -1147,21 +1085,15 @@ LLDBSWIGPython_GetDynamicSetting (void* module, const char* setting, const lldb:
if (!pfunc.IsAllocated())
Py_RETURN_NONE;
- lldb::SBTarget target_sb(target_sp);
- PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_sb));
- auto result = pfunc(target_arg, PythonString(setting));
+ auto result = pfunc(ToSWIGWrapper(target_sp), PythonString(setting));
return result.release();
}
-SWIGEXPORT bool
-LLDBSWIGPythonRunScriptKeywordProcess
-(const char* python_function_name,
-const char* session_dictionary_name,
-lldb::ProcessSP& process,
-std::string& output)
+bool lldb_private::LLDBSWIGPythonRunScriptKeywordProcess(
+ const char *python_function_name, const char *session_dictionary_name,
+ const lldb::ProcessSP &process, std::string &output) {
-{
if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name)
return false;
@@ -1173,17 +1105,15 @@ std::string& output)
if (!pfunc.IsAllocated())
return false;
- lldb::SBProcess process_sb(process);
- PythonObject process_arg(PyRefType::Owned, SBTypeToSWIGWrapper(process_sb));
- auto result = pfunc(process_arg, dict);
+ auto result = pfunc(ToSWIGWrapper(process), dict);
output = result.Str().GetString().str();
return true;
}
-SWIGEXPORT bool
-LLDBSWIGPythonRunScriptKeywordThread
+bool
+lldb_private::LLDBSWIGPythonRunScriptKeywordThread
(const char* python_function_name,
const char* session_dictionary_name,
lldb::ThreadSP& thread,
@@ -1210,14 +1140,10 @@ std::string& output)
return true;
}
-SWIGEXPORT bool
-LLDBSWIGPythonRunScriptKeywordTarget
-(const char* python_function_name,
-const char* session_dictionary_name,
-lldb::TargetSP& target,
-std::string& output)
+bool lldb_private::LLDBSWIGPythonRunScriptKeywordTarget(
+ const char *python_function_name, const char *session_dictionary_name,
+ const lldb::TargetSP &target, std::string &output) {
-{
if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name)
return false;
@@ -1229,17 +1155,15 @@ std::string& output)
if (!pfunc.IsAllocated())
return false;
- lldb::SBTarget target_sb(target);
- PythonObject target_arg(PyRefType::Owned, SBTypeToSWIGWrapper(target_sb));
- auto result = pfunc(target_arg, dict);
+ auto result = pfunc(ToSWIGWrapper(target), dict);
output = result.Str().GetString().str();
return true;
}
-SWIGEXPORT bool
-LLDBSWIGPythonRunScriptKeywordFrame
+bool
+lldb_private::LLDBSWIGPythonRunScriptKeywordFrame
(const char* python_function_name,
const char* session_dictionary_name,
lldb::StackFrameSP& frame,
@@ -1266,14 +1190,10 @@ std::string& output)
return true;
}
-SWIGEXPORT bool
-LLDBSWIGPythonRunScriptKeywordValue
-(const char* python_function_name,
-const char* session_dictionary_name,
-lldb::ValueObjectSP& value,
-std::string& output)
+bool lldb_private::LLDBSWIGPythonRunScriptKeywordValue(
+ const char *python_function_name, const char *session_dictionary_name,
+ const lldb::ValueObjectSP &value, std::string &output) {
-{
if (python_function_name == NULL || python_function_name[0] == '\0' || !session_dictionary_name)
return false;
@@ -1285,17 +1205,15 @@ std::string& output)
if (!pfunc.IsAllocated())
return false;
- lldb::SBValue value_sb(value);
- PythonObject value_arg(PyRefType::Owned, SBTypeToSWIGWrapper(value_sb));
- auto result = pfunc(value_arg, dict);
+ auto result = pfunc(ToSWIGWrapper(value), dict);
output = result.Str().GetString().str();
return true;
}
-SWIGEXPORT bool
-LLDBSwigPythonCallModuleInit
+bool
+lldb_private::LLDBSwigPythonCallModuleInit
(
const char *python_module_name,
const char *session_dictionary_name,
@@ -1322,16 +1240,9 @@ LLDBSwigPythonCallModuleInit
return true;
}
-%}
-
-
-%runtime %{
-// Forward declaration to be inserted at the start of LLDBWrapPython.h
-#include "lldb/API/SBDebugger.h"
-#include "lldb/API/SBValue.h"
-SWIGEXPORT lldb::ValueObjectSP
-LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data)
+lldb::ValueObjectSP
+lldb_private::LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data)
{
lldb::ValueObjectSP valobj_sp;
if (data)
@@ -1342,22 +1253,8 @@ LLDBSWIGPython_GetValueObjectSPFromSBValue (void* data)
return valobj_sp;
}
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton);
-
-#ifdef __cplusplus
-}
-#endif
-%}
-
-%wrapper %{
-
-
// For the LogOutputCallback functions
-void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton) {
+static void LLDBSwigPythonCallPythonLogOutputCallback(const char *str, void *baton) {
if (baton != Py_None) {
SWIG_PYTHON_THREAD_BEGIN_BLOCK;
PyObject *result = PyObject_CallFunction(reinterpret_cast<PyObject*>(baton), const_cast<char*>("s"), str);
diff --git a/lldb/bindings/python/python.swig b/lldb/bindings/python/python.swig
index 9dc4ab87a4bd..5dcbd68d8544 100644
--- a/lldb/bindings/python/python.swig
+++ b/lldb/bindings/python/python.swig
@@ -121,6 +121,7 @@ def lldb_iter(obj, getsize, getelem):
%{
#include "../source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h"
+#include "../source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h"
#include "../bindings/python/python-swigsafecast.swig"
using namespace lldb_private;
using namespace lldb_private::python;
diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h
index 64081f79205d..1c771330cddc 100644
--- a/lldb/include/lldb/API/SBDebugger.h
+++ b/lldb/include/lldb/API/SBDebugger.h
@@ -126,6 +126,8 @@ public:
FILE *GetErrorFileHandle();
+ SBError SetInputString(const char *data);
+
SBError SetInputFile(SBFile file);
SBError SetOutputFile(SBFile file);
diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index 5a6908f040b1..abd9ebf07407 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -336,6 +336,11 @@ public:
/// unit from the Architecture's code bus
uint32_t GetCodeByteSize();
+ /// Gets the target.max-children-count value
+ /// It should be used to limit the number of
+ /// children of large data structures to be displayed.
+ uint32_t GetMaximumNumberOfChildrenToDisplay() const;
+
/// Set the base load address for a module section.
///
/// \param[in] section
diff --git a/lldb/include/lldb/API/SBValue.h b/lldb/include/lldb/API/SBValue.h
index 69be02545b35..a8578abec6b7 100644
--- a/lldb/include/lldb/API/SBValue.h
+++ b/lldb/include/lldb/API/SBValue.h
@@ -246,6 +246,12 @@ public:
bool SetData(lldb::SBData &data, lldb::SBError &error);
+ /// Creates a copy of the SBValue with a new name and setting the current
+ /// SBValue as its parent. It should be used when we want to change the
+ /// name of a SBValue without modifying the actual SBValue itself
+ /// (e.g. sythetic child provider).
+ lldb::SBValue Clone(const char *new_name);
+
lldb::SBDeclaration GetDeclaration();
/// Find out if a SBValue might have children.
diff --git a/lldb/include/lldb/Core/Debugger.h b/lldb/include/lldb/Core/Debugger.h
index f0849c9ac950..1ab21bec54c9 100644
--- a/lldb/include/lldb/Core/Debugger.h
+++ b/lldb/include/lldb/Core/Debugger.h
@@ -176,7 +176,13 @@ public:
repro::DataRecorder *GetInputRecorder();
- void SetInputFile(lldb::FileSP file, repro::DataRecorder *recorder = nullptr);
+ Status SetInputString(const char *data);
+
+ // This method will setup data recorder if reproducer enabled.
+ // On reply mode this method should take instructions from reproducer file.
+ Status SetInputFile(lldb::FileSP file);
+
+ void SetInputFile(lldb::FileSP file, repro::DataRecorder *recorder);
void SetOutputFile(lldb::FileSP file);
diff --git a/lldb/include/lldb/Interpreter/OptionGroupFormat.h b/lldb/include/lldb/Interpreter/OptionGroupFormat.h
index 2d445b8a6c20..551688b0d25f 100644
--- a/lldb/include/lldb/Interpreter/OptionGroupFormat.h
+++ b/lldb/include/lldb/Interpreter/OptionGroupFormat.h
@@ -16,6 +16,9 @@
namespace lldb_private {
+typedef std::vector<std::tuple<lldb::CommandArgumentType, const char *>>
+ OptionGroupFormatUsageTextVector;
+
// OptionGroupFormat
class OptionGroupFormat : public OptionGroup {
@@ -30,7 +33,10 @@ public:
uint64_t default_byte_size =
UINT64_MAX, // Pass UINT64_MAX to disable the "--size" option
uint64_t default_count =
- UINT64_MAX); // Pass UINT64_MAX to disable the "--count" option
+ UINT64_MAX, // Pass UINT64_MAX to disable the "--count" option
+ OptionGroupFormatUsageTextVector usage_text_vector = {}
+ // Use to override default option usage text with the command specific one
+ );
~OptionGroupFormat() override = default;
@@ -73,6 +79,7 @@ protected:
char m_prev_gdb_format;
char m_prev_gdb_size;
bool m_has_gdb_format;
+ OptionDefinition m_option_definitions[4];
};
} // namespace lldb_private
diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h
index 4ccd7f92064d..0a8b38b2c642 100644
--- a/lldb/include/lldb/Symbol/ObjectFile.h
+++ b/lldb/include/lldb/Symbol/ObjectFile.h
@@ -19,6 +19,7 @@
#include "lldb/Utility/FileSpec.h"
#include "lldb/Utility/UUID.h"
#include "lldb/lldb-private.h"
+#include "llvm/Support/Threading.h"
#include "llvm/Support/VersionTuple.h"
namespace lldb_private {
@@ -322,12 +323,26 @@ public:
/// Gets the symbol table for the currently selected architecture (and
/// object for archives).
///
- /// Symbol table parsing can be deferred by ObjectFile instances until this
- /// accessor is called the first time.
+ /// This function will manage when ParseSymtab(...) is called to actually do
+ /// the symbol table parsing in each plug-in. This function will take care of
+ /// taking all the necessary locks and finalizing the symbol table when the
+ /// symbol table does get parsed.
///
/// \return
/// The symbol table for this object file.
- virtual Symtab *GetSymtab() = 0;
+ Symtab *GetSymtab();
+
+ /// Parse the symbol table into the provides symbol table object.
+ ///
+ /// Symbol table parsing will be done once when this function is called by
+ /// each object file plugin. All of the necessary locks will already be
+ /// acquired before this function is called and the symbol table object to
+ /// populate is supplied as an argument and doesn't need to be created by
+ /// each plug-in.
+ ///
+ /// \param
+ /// The symbol table to populate.
+ virtual void ParseSymtab(Symtab &symtab) = 0;
/// Perform relocations on the section if necessary.
///
@@ -708,7 +723,12 @@ protected:
const lldb::addr_t m_memory_addr;
std::unique_ptr<lldb_private::SectionList> m_sections_up;
std::unique_ptr<lldb_private::Symtab> m_symtab_up;
- uint32_t m_synthetic_symbol_idx;
+ /// We need a llvm::once_flag that we can use to avoid locking the module
+ /// lock and deadlocking LLDB. See comments in ObjectFile::GetSymtab() for
+ /// the full details. We also need to be able to clear the symbol table, so we
+ /// need to use a std::unique_ptr to a llvm::once_flag so if we clear the
+ /// symbol table, we can have a new once flag to use when it is created again.
+ std::unique_ptr<llvm::once_flag> m_symtab_once_up;
/// Sets the architecture for a module. At present the architecture can
/// only be set if it is invalid. It is not allowed to switch from one
diff --git a/lldb/include/lldb/Symbol/Symtab.h b/lldb/include/lldb/Symbol/Symtab.h
index e1ad0dfd2eb8..e5d21c1bf4b3 100644
--- a/lldb/include/lldb/Symbol/Symtab.h
+++ b/lldb/include/lldb/Symbol/Symtab.h
@@ -119,20 +119,13 @@ public:
lldb::addr_t file_addr, std::function<bool(Symbol *)> const &callback);
void FindFunctionSymbols(ConstString name, uint32_t name_type_mask,
SymbolContextList &sc_list);
- void CalculateSymbolSizes();
void SortSymbolIndexesByValue(std::vector<uint32_t> &indexes,
bool remove_duplicates) const;
static void DumpSymbolHeader(Stream *s);
- void Finalize() {
- // Shrink to fit the symbols so we don't waste memory
- if (m_symbols.capacity() > m_symbols.size()) {
- collection new_symbols(m_symbols.begin(), m_symbols.end());
- m_symbols.swap(new_symbols);
- }
- }
+ void Finalize();
void AppendSymbolNamesToMap(const IndexCollection &indexes,
bool add_demangled, bool add_mangled,
diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h
index 956b29e45dba..26127359a322 100644
--- a/lldb/include/lldb/Target/Platform.h
+++ b/lldb/include/lldb/Target/Platform.h
@@ -310,25 +310,7 @@ public:
/// Get the platform's supported architectures in the order in which they
/// should be searched.
- ///
- /// \param[in] idx
- /// A zero based architecture index
- ///
- /// \param[out] arch
- /// A copy of the architecture at index if the return value is
- /// \b true.
- ///
- /// \return
- /// \b true if \a arch was filled in and is valid, \b false
- /// otherwise.
- virtual bool GetSupportedArchitectureAtIndex(uint32_t idx,
- ArchSpec &arch);
-
- /// Get the platform's supported architectures in the order in which they
- /// should be searched.
- /// NB: This implementation is mutually recursive with
- /// GetSupportedArchitectureAtIndex. Subclasses should implement one of them.
- virtual std::vector<ArchSpec> GetSupportedArchitectures();
+ virtual std::vector<ArchSpec> GetSupportedArchitectures() = 0;
virtual size_t GetSoftwareBreakpointTrapOpcode(Target &target,
BreakpointSite *bp_site);
@@ -971,10 +953,6 @@ private:
bool GetCachedSharedModule(const ModuleSpec &module_spec,
lldb::ModuleSP &module_sp, bool *did_create_ptr);
- Status LoadCachedExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr);
-
FileSpec GetModuleCacheRoot();
};
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index 4627502abd25..e27cb8cbf2aa 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -1762,7 +1762,7 @@ public:
///
/// If load_addr is within the address space the process has mapped
/// range_info will be filled in with the start and end of that range as
- /// well as the permissions for that range and range_info. GetMapped will
+ /// well as the permissions for that range and range_info.GetMapped will
/// return true.
///
/// If load_addr is outside any mapped region then range_info will have its
@@ -1771,21 +1771,23 @@ public:
/// there are no valid mapped ranges between load_addr and the end of the
/// process address space.
///
- /// GetMemoryRegionInfo calls DoGetMemoryRegionInfo. Override that function in
- /// process subclasses.
+ /// GetMemoryRegionInfo will only return an error if it is unimplemented for
+ /// the current process.
///
/// \param[in] load_addr
- /// The load address to query the range_info for. May include non
- /// address bits, these will be removed by the the ABI plugin if there is
- /// one.
+ /// The load address to query the range_info for.
///
/// \param[out] range_info
/// An range_info value containing the details of the range.
///
/// \return
/// An error value.
- Status GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info);
+ virtual Status GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) {
+ Status error;
+ error.SetErrorString("Process::GetMemoryRegionInfo() not supported");
+ return error;
+ }
/// Obtain all the mapped memory regions within this process.
///
@@ -2605,26 +2607,6 @@ protected:
virtual size_t DoReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
Status &error) = 0;
- /// DoGetMemoryRegionInfo is called by GetMemoryRegionInfo after it has
- /// removed non address bits from load_addr. Override this method in
- /// subclasses of Process.
- ///
- /// See GetMemoryRegionInfo for details of the logic.
- ///
- /// \param[in] load_addr
- /// The load address to query the range_info for. (non address bits
- /// removed)
- ///
- /// \param[out] range_info
- /// An range_info value containing the details of the range.
- ///
- /// \return
- /// An error value.
- virtual Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) {
- return Status("Process::DoGetMemoryRegionInfo() not supported");
- }
-
lldb::StateType GetPrivateState();
/// The "private" side of resuming a process. This doesn't alter the state
diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp
index 4bb23c3e705c..844b91de4cd0 100644
--- a/lldb/source/API/SBDebugger.cpp
+++ b/lldb/source/API/SBDebugger.cpp
@@ -327,12 +327,32 @@ void SBDebugger::SkipAppInitFiles(bool b) {
void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) {
LLDB_RECORD_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool), fh,
transfer_ownership);
- SetInputFile((FileSP)std::make_shared<NativeFile>(fh, transfer_ownership));
+ if (m_opaque_sp)
+ m_opaque_sp->SetInputFile(
+ (FileSP)std::make_shared<NativeFile>(fh, transfer_ownership));
}
-SBError SBDebugger::SetInputFile(FileSP file_sp) {
- LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp);
- return LLDB_RECORD_RESULT(SetInputFile(SBFile(file_sp)));
+SBError SBDebugger::SetInputString(const char *data) {
+ LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputString, (const char *), data);
+ SBError sb_error;
+ if (data == nullptr) {
+ sb_error.SetErrorString("String data is null");
+ return LLDB_RECORD_RESULT(sb_error);
+ }
+
+ size_t size = strlen(data);
+ if (size == 0) {
+ sb_error.SetErrorString("String data is empty");
+ return LLDB_RECORD_RESULT(sb_error);
+ }
+
+ if (!m_opaque_sp) {
+ sb_error.SetErrorString("invalid debugger");
+ return LLDB_RECORD_RESULT(sb_error);
+ }
+
+ sb_error.SetError(m_opaque_sp->SetInputString(data));
+ return LLDB_RECORD_RESULT(sb_error);
}
// Shouldn't really be settable after initialization as this could cause lots
@@ -346,36 +366,15 @@ SBError SBDebugger::SetInputFile(SBFile file) {
error.ref().SetErrorString("invalid debugger");
return LLDB_RECORD_RESULT(error);
}
-
- repro::DataRecorder *recorder = nullptr;
- if (repro::Generator *g = repro::Reproducer::Instance().GetGenerator())
- recorder = g->GetOrCreate<repro::CommandProvider>().GetNewRecorder();
-
- FileSP file_sp = file.m_opaque_sp;
-
- static std::unique_ptr<repro::MultiLoader<repro::CommandProvider>> loader =
- repro::MultiLoader<repro::CommandProvider>::Create(
- repro::Reproducer::Instance().GetLoader());
- if (loader) {
- llvm::Optional<std::string> nextfile = loader->GetNextFile();
- FILE *fh = nextfile ? FileSystem::Instance().Fopen(nextfile->c_str(), "r")
- : nullptr;
- // FIXME Jonas Devlieghere: shouldn't this error be propagated out to the
- // reproducer somehow if fh is NULL?
- if (fh) {
- file_sp = std::make_shared<NativeFile>(fh, true);
- }
- }
-
- if (!file_sp || !file_sp->IsValid()) {
- error.ref().SetErrorString("invalid file");
- return LLDB_RECORD_RESULT(error);
- }
-
- m_opaque_sp->SetInputFile(file_sp, recorder);
+ error.SetError(m_opaque_sp->SetInputFile(file.m_opaque_sp));
return LLDB_RECORD_RESULT(error);
}
+SBError SBDebugger::SetInputFile(FileSP file_sp) {
+ LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp);
+ return LLDB_RECORD_RESULT(SetInputFile(SBFile(file_sp)));
+}
+
SBError SBDebugger::SetOutputFile(FileSP file_sp) {
LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp);
return LLDB_RECORD_RESULT(SetOutputFile(SBFile(file_sp)));
@@ -1771,6 +1770,7 @@ template <> void RegisterMethods<SBDebugger>(Registry &R) {
LLDB_REGISTER_METHOD(bool, SBDebugger, GetAsync, ());
LLDB_REGISTER_METHOD(void, SBDebugger, SkipLLDBInitFiles, (bool));
LLDB_REGISTER_METHOD(void, SBDebugger, SkipAppInitFiles, (bool));
+ LLDB_REGISTER_METHOD(SBError, SBDebugger, SetInputString, (const char *));
LLDB_REGISTER_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool));
LLDB_REGISTER_METHOD(FILE *, SBDebugger, GetInputFileHandle, ());
LLDB_REGISTER_METHOD(FILE *, SBDebugger, GetOutputFileHandle, ());
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 98158f457a04..dc79c77fee9e 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -1745,6 +1745,16 @@ uint32_t SBTarget::GetCodeByteSize() {
return 0;
}
+uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const {
+ LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay);
+
+ TargetSP target_sp(GetSP());
+ if(target_sp){
+ return target_sp->GetMaximumNumberOfChildrenToDisplay();
+ }
+ return 0;
+}
+
uint32_t SBTarget::GetAddressByteSize() {
LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetAddressByteSize);
@@ -2679,6 +2689,7 @@ void RegisterMethods<SBTarget>(Registry &R) {
LLDB_REGISTER_METHOD(const char *, SBTarget, GetTriple, ());
LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetDataByteSize, ());
LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetCodeByteSize, ());
+ LLDB_REGISTER_METHOD_CONST(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay,());
LLDB_REGISTER_METHOD(uint32_t, SBTarget, GetAddressByteSize, ());
LLDB_REGISTER_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndex,
(uint32_t));
diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp
index 9faee102c5e3..e3325b8d36fa 100644
--- a/lldb/source/API/SBValue.cpp
+++ b/lldb/source/API/SBValue.cpp
@@ -1431,6 +1431,18 @@ bool SBValue::SetData(lldb::SBData &data, SBError &error) {
return ret;
}
+lldb::SBValue SBValue::Clone(const char *new_name) {
+ LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Clone, (const char *), new_name);
+
+ ValueLocker locker;
+ lldb::ValueObjectSP value_sp(GetSP(locker));
+
+ if (value_sp)
+ return lldb::SBValue(value_sp->Clone(ConstString(new_name)));
+ else
+ return lldb::SBValue();
+}
+
lldb::SBDeclaration SBValue::GetDeclaration() {
LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDeclaration, SBValue, GetDeclaration);
@@ -1656,6 +1668,7 @@ void RegisterMethods<SBValue>(Registry &R) {
LLDB_REGISTER_METHOD(lldb::SBData, SBValue, GetData, ());
LLDB_REGISTER_METHOD(bool, SBValue, SetData,
(lldb::SBData &, lldb::SBError &));
+ LLDB_REGISTER_METHOD(lldb::SBValue, SBValue, Clone, (const char *));
LLDB_REGISTER_METHOD(lldb::SBDeclaration, SBValue, GetDeclaration, ());
LLDB_REGISTER_METHOD(lldb::SBWatchpoint, SBValue, Watch,
(bool, bool, bool, lldb::SBError &));
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index f27d4bd7e4b2..094ce6f8558f 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -1222,7 +1222,15 @@ public:
interpreter, "memory write",
"Write to the memory of the current target process.", nullptr,
eCommandRequiresProcess | eCommandProcessMustBeLaunched),
- m_option_group(), m_format_options(eFormatBytes, 1, UINT64_MAX),
+ m_option_group(),
+ m_format_options(
+ eFormatBytes, 1, UINT64_MAX,
+ {std::make_tuple(
+ eArgTypeFormat,
+ "The format to use for each of the value to be written."),
+ std::make_tuple(
+ eArgTypeByteSize,
+ "The size in bytes to write from input file or each value.")}),
m_memory_options() {
CommandArgumentEntry arg1;
CommandArgumentEntry arg2;
@@ -1240,6 +1248,7 @@ public:
// Define the first (and only) variant of this arg.
value_arg.arg_type = eArgTypeValue;
value_arg.arg_repetition = eArgRepeatPlus;
+ value_arg.arg_opt_set_association = LLDB_OPT_SET_1;
// There is only one variant this argument could be; put it into the
// argument entry.
@@ -1278,6 +1287,12 @@ protected:
m_cmd_name.c_str());
return false;
}
+ if (argc > 1) {
+ result.AppendErrorWithFormat(
+ "%s takes only a destination address when writing file contents.\n",
+ m_cmd_name.c_str());
+ return false;
+ }
} else if (argc < 2) {
result.AppendErrorWithFormat(
"%s takes a destination address and at least one value.\n",
diff --git a/lldb/source/Core/Debugger.cpp b/lldb/source/Core/Debugger.cpp
index 32dcfb1ce17b..ae454fae3322 100644
--- a/lldb/source/Core/Debugger.cpp
+++ b/lldb/source/Core/Debugger.cpp
@@ -45,6 +45,7 @@
#include "lldb/Utility/Listener.h"
#include "lldb/Utility/Log.h"
#include "lldb/Utility/Reproducer.h"
+#include "lldb/Utility/ReproducerProvider.h"
#include "lldb/Utility/State.h"
#include "lldb/Utility/Stream.h"
#include "lldb/Utility/StreamCallback.h"
@@ -75,6 +76,14 @@
#include <string>
#include <system_error>
+// Includes for pipe()
+#if defined(_WIN32)
+#include <fcntl.h>
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+
namespace lldb_private {
class Address;
}
@@ -810,6 +819,86 @@ void Debugger::SetAsyncExecution(bool async_execution) {
repro::DataRecorder *Debugger::GetInputRecorder() { return m_input_recorder; }
+static inline int OpenPipe(int fds[2], std::size_t size) {
+#ifdef _WIN32
+ return _pipe(fds, size, O_BINARY);
+#else
+ (void)size;
+ return pipe(fds);
+#endif
+}
+
+Status Debugger::SetInputString(const char *data) {
+ Status result;
+ enum PIPES { READ, WRITE }; // Indexes for the read and write fds
+ int fds[2] = {-1, -1};
+
+ if (data == nullptr) {
+ result.SetErrorString("String data is null");
+ return result;
+ }
+
+ size_t size = strlen(data);
+ if (size == 0) {
+ result.SetErrorString("String data is empty");
+ return result;
+ }
+
+ if (OpenPipe(fds, size) != 0) {
+ result.SetErrorString(
+ "can't create pipe file descriptors for LLDB commands");
+ return result;
+ }
+
+ write(fds[WRITE], data, size);
+ // Close the write end of the pipe, so that the command interpreter will exit
+ // when it consumes all the data.
+ llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]);
+
+ // Open the read file descriptor as a FILE * that we can return as an input
+ // handle.
+ FILE *commands_file = fdopen(fds[READ], "rb");
+ if (commands_file == nullptr) {
+ result.SetErrorStringWithFormat("fdopen(%i, \"rb\") failed (errno = %i) "
+ "when trying to open LLDB commands pipe",
+ fds[READ], errno);
+ llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]);
+ return result;
+ }
+
+ return SetInputFile(
+ (FileSP)std::make_shared<NativeFile>(commands_file, true));
+}
+
+Status Debugger::SetInputFile(FileSP file_sp) {
+ Status error;
+ repro::DataRecorder *recorder = nullptr;
+ if (repro::Generator *g = repro::Reproducer::Instance().GetGenerator())
+ recorder = g->GetOrCreate<repro::CommandProvider>().GetNewRecorder();
+
+ static std::unique_ptr<repro::MultiLoader<repro::CommandProvider>> loader =
+ repro::MultiLoader<repro::CommandProvider>::Create(
+ repro::Reproducer::Instance().GetLoader());
+ if (loader) {
+ llvm::Optional<std::string> nextfile = loader->GetNextFile();
+ FILE *fh = nextfile ? FileSystem::Instance().Fopen(nextfile->c_str(), "r")
+ : nullptr;
+ // FIXME Jonas Devlieghere: shouldn't this error be propagated out to the
+ // reproducer somehow if fh is NULL?
+ if (fh) {
+ file_sp = std::make_shared<NativeFile>(fh, true);
+ }
+ }
+
+ if (!file_sp || !file_sp->IsValid()) {
+ error.SetErrorString("invalid file");
+ return error;
+ }
+
+ SetInputFile(file_sp, recorder);
+ return error;
+}
+
void Debugger::SetInputFile(FileSP file_sp, repro::DataRecorder *recorder) {
assert(file_sp && file_sp->IsValid());
m_input_recorder = recorder;
diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index bd0a667171a5..cbecbb9aa5fe 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -1379,12 +1379,15 @@ void Module::PreloadSymbols() {
if (!sym_file)
return;
- // Prime the symbol file first, since it adds symbols to the symbol table.
- sym_file->PreloadSymbols();
-
- // Now we can prime the symbol table.
+ // Load the object file symbol table and any symbols from the SymbolFile that
+ // get appended using SymbolFile::AddSymbols(...).
if (Symtab *symtab = sym_file->GetSymtab())
symtab->PreloadSymbols();
+
+ // Now let the symbol file preload its data and the symbol table will be
+ // available without needing to take the module lock.
+ sym_file->PreloadSymbols();
+
}
void Module::SetSymbolFileFileSpec(const FileSpec &file) {
diff --git a/lldb/source/Interpreter/CommandObject.cpp b/lldb/source/Interpreter/CommandObject.cpp
index 64b23d04abea..dcae27ff5479 100644
--- a/lldb/source/Interpreter/CommandObject.cpp
+++ b/lldb/source/Interpreter/CommandObject.cpp
@@ -454,6 +454,9 @@ void CommandObject::GetFormattedCommandArguments(Stream &str,
opt_set_mask == LLDB_OPT_SET_ALL
? m_arguments[i]
: OptSetFiltered(opt_set_mask, m_arguments[i]);
+ // This argument is not associated with the current option set, so skip it.
+ if (arg_entry.empty())
+ continue;
int num_alternatives = arg_entry.size();
if ((num_alternatives == 2) && IsPairType(arg_entry[0].arg_repetition)) {
diff --git a/lldb/source/Interpreter/OptionGroupFormat.cpp b/lldb/source/Interpreter/OptionGroupFormat.cpp
index 1cc5e70282c1..a2ca9ff39818 100644
--- a/lldb/source/Interpreter/OptionGroupFormat.cpp
+++ b/lldb/source/Interpreter/OptionGroupFormat.cpp
@@ -16,15 +16,7 @@
using namespace lldb;
using namespace lldb_private;
-OptionGroupFormat::OptionGroupFormat(lldb::Format default_format,
- uint64_t default_byte_size,
- uint64_t default_count)
- : m_format(default_format, default_format),
- m_byte_size(default_byte_size, default_byte_size),
- m_count(default_count, default_count), m_prev_gdb_format('x'),
- m_prev_gdb_size('w') {}
-
-static constexpr OptionDefinition g_option_table[] = {
+static constexpr OptionDefinition g_default_option_definitions[] = {
{LLDB_OPT_SET_1, false, "format", 'f', OptionParser::eRequiredArgument,
nullptr, {}, 0, eArgTypeFormat,
"Specify a format to be used for display."},
@@ -39,8 +31,34 @@ static constexpr OptionDefinition g_option_table[] = {
"The number of total items to display."},
};
+OptionGroupFormat::OptionGroupFormat(
+ lldb::Format default_format, uint64_t default_byte_size,
+ uint64_t default_count, OptionGroupFormatUsageTextVector usage_text_vector)
+ : m_format(default_format, default_format),
+ m_byte_size(default_byte_size, default_byte_size),
+ m_count(default_count, default_count), m_prev_gdb_format('x'),
+ m_prev_gdb_size('w') {
+ // Copy the default option definitions.
+ std::copy(std::begin(g_default_option_definitions),
+ std::end(g_default_option_definitions),
+ std::begin(m_option_definitions));
+
+ for (auto usage_text_tuple : usage_text_vector) {
+ switch (std::get<0>(usage_text_tuple)) {
+ case eArgTypeFormat:
+ m_option_definitions[0].usage_text = std::get<1>(usage_text_tuple);
+ break;
+ case eArgTypeByteSize:
+ m_option_definitions[2].usage_text = std::get<1>(usage_text_tuple);
+ break;
+ default:
+ llvm_unreachable("Unimplemented option");
+ }
+ }
+}
+
llvm::ArrayRef<OptionDefinition> OptionGroupFormat::GetDefinitions() {
- auto result = llvm::makeArrayRef(g_option_table);
+ auto result = llvm::makeArrayRef(m_option_definitions);
if (m_byte_size.GetDefaultValue() < UINT64_MAX) {
if (m_count.GetDefaultValue() < UINT64_MAX)
return result;
@@ -54,7 +72,7 @@ Status OptionGroupFormat::SetOptionValue(uint32_t option_idx,
llvm::StringRef option_arg,
ExecutionContext *execution_context) {
Status error;
- const int short_option = g_option_table[option_idx].short_option;
+ const int short_option = m_option_definitions[option_idx].short_option;
switch (short_option) {
case 'f':
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
index 50e9f7827838..1437d7b58293 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangUserExpression.cpp
@@ -516,7 +516,7 @@ CppModuleConfiguration GetModuleConfig(lldb::LanguageType language,
// Try to create a configuration from the files. If there is no valid
// configuration possible with the files, this just returns an invalid
// configuration.
- return CppModuleConfiguration(files);
+ return CppModuleConfiguration(files, target->GetArchitecture().GetTriple());
}
bool ClangUserExpression::PrepareForParsing(
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp
index ffab16b1682b..befb1f125406 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp
@@ -10,6 +10,7 @@
#include "ClangHost.h"
#include "lldb/Host/FileSystem.h"
+#include "llvm/ADT/Triple.h"
using namespace lldb_private;
@@ -30,7 +31,35 @@ bool CppModuleConfiguration::SetOncePath::TrySet(llvm::StringRef path) {
return false;
}
-bool CppModuleConfiguration::analyzeFile(const FileSpec &f) {
+static llvm::SmallVector<std::string, 2>
+getTargetIncludePaths(const llvm::Triple &triple) {
+ llvm::SmallVector<std::string, 2> paths;
+ if (!triple.str().empty()) {
+ paths.push_back("/usr/include/" + triple.str());
+ if (!triple.getArchName().empty() ||
+ triple.getOSAndEnvironmentName().empty())
+ paths.push_back(("/usr/include/" + triple.getArchName() + "-" +
+ triple.getOSAndEnvironmentName())
+ .str());
+ }
+ return paths;
+}
+
+/// Returns the include path matching the given pattern for the given file
+/// path (or None if the path doesn't match the pattern).
+static llvm::Optional<llvm::StringRef>
+guessIncludePath(llvm::StringRef path_to_file, llvm::StringRef pattern) {
+ if (pattern.empty())
+ return llvm::NoneType();
+ size_t pos = path_to_file.find(pattern);
+ if (pos == llvm::StringRef::npos)
+ return llvm::NoneType();
+
+ return path_to_file.substr(0, pos + pattern.size());
+}
+
+bool CppModuleConfiguration::analyzeFile(const FileSpec &f,
+ const llvm::Triple &triple) {
using namespace llvm::sys::path;
// Convert to slashes to make following operations simpler.
std::string dir_buffer = convert_to_slash(f.GetDirectory().GetStringRef());
@@ -43,15 +72,25 @@ bool CppModuleConfiguration::analyzeFile(const FileSpec &f) {
// need to be specified in the header search.
if (libcpp_regex.match(f.GetPath()) &&
parent_path(posix_dir, Style::posix).endswith("c++")) {
- return m_std_inc.TrySet(posix_dir);
+ if (!m_std_inc.TrySet(posix_dir))
+ return false;
+ if (triple.str().empty())
+ return true;
+
+ posix_dir.consume_back("c++/v1");
+ // Check if this is a target-specific libc++ include directory.
+ return m_std_target_inc.TrySet(
+ (posix_dir + triple.str() + "/c++/v1").str());
}
- // Check for /usr/include. On Linux this might be /usr/include/bits, so
- // we should remove that '/bits' suffix to get the actual include directory.
- if (posix_dir.endswith("/usr/include/bits"))
- posix_dir.consume_back("/bits");
- if (posix_dir.endswith("/usr/include"))
- return m_c_inc.TrySet(posix_dir);
+ llvm::Optional<llvm::StringRef> inc_path;
+ // Target specific paths contains /usr/include, so we check them first
+ for (auto &path : getTargetIncludePaths(triple)) {
+ if ((inc_path = guessIncludePath(posix_dir, path)))
+ return m_c_target_inc.TrySet(*inc_path);
+ }
+ if ((inc_path = guessIncludePath(posix_dir, "/usr/include")))
+ return m_c_inc.TrySet(*inc_path);
// File wasn't interesting, continue analyzing.
return true;
@@ -92,11 +131,11 @@ bool CppModuleConfiguration::hasValidConfig() {
}
CppModuleConfiguration::CppModuleConfiguration(
- const FileSpecList &support_files) {
+ const FileSpecList &support_files, const llvm::Triple &triple) {
// Analyze all files we were given to build the configuration.
bool error = !llvm::all_of(support_files,
std::bind(&CppModuleConfiguration::analyzeFile,
- this, std::placeholders::_1));
+ this, std::placeholders::_1, triple));
// If we have a valid configuration at this point, set the
// include directories and module list that should be used.
if (!error && hasValidConfig()) {
@@ -109,6 +148,10 @@ CppModuleConfiguration::CppModuleConfiguration(
// This order matches the way Clang orders these directories.
m_include_dirs = {m_std_inc.Get().str(), m_resource_inc,
m_c_inc.Get().str()};
+ if (m_c_target_inc.Valid())
+ m_include_dirs.push_back(m_c_target_inc.Get().str());
+ if (m_std_target_inc.Valid())
+ m_include_dirs.push_back(m_std_target_inc.Get().str());
m_imported_modules = {"std"};
}
}
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
index 907db5d625dc..5db8abbdbdf3 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h
@@ -42,8 +42,15 @@ class CppModuleConfiguration {
/// If valid, the include path used for the std module.
SetOncePath m_std_inc;
+ /// If valid, the per-target include path used for the std module.
+ /// This is an optional path only required on some systems.
+ SetOncePath m_std_target_inc;
/// If valid, the include path to the C library (e.g. /usr/include).
SetOncePath m_c_inc;
+ /// If valid, the include path to target-specific C library files
+ /// (e.g. /usr/include/x86_64-linux-gnu).
+ /// This is an optional path only required on some systems.
+ SetOncePath m_c_target_inc;
/// The Clang resource include path for this configuration.
std::string m_resource_inc;
@@ -53,11 +60,13 @@ class CppModuleConfiguration {
/// Analyze a given source file to build the current configuration.
/// Returns false iff there was a fatal error that makes analyzing any
/// further files pointless as the configuration is now invalid.
- bool analyzeFile(const FileSpec &f);
+ bool analyzeFile(const FileSpec &f, const llvm::Triple &triple);
public:
/// Creates a configuration by analyzing the given list of used source files.
- explicit CppModuleConfiguration(const FileSpecList &support_files);
+ /// The triple (if valid) is used to search for target-specific include paths.
+ explicit CppModuleConfiguration(const FileSpecList &support_files,
+ const llvm::Triple &triple);
/// Creates an empty and invalid configuration.
CppModuleConfiguration() = default;
diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
index 83e8e52b86f2..f1925990e94a 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
@@ -914,11 +914,21 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
stl_deref_flags,
"lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::optional<.+>(( )?&)?$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_synth_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdOptionalSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
RegularExpression("^std::multiset<.+> >(( )?&)?$"),
SyntheticChildrenSP(new ScriptedSyntheticChildren(
stl_deref_flags,
"lldb.formatters.cpp.gnu_libstdcpp.StdMapLikeSynthProvider")));
cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
+ RegularExpression("^std::unordered_(multi)?(map|set)<.+> >$"),
+ SyntheticChildrenSP(new ScriptedSyntheticChildren(
+ stl_deref_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdUnorderedMapSynthProvider")));
+ cpp_category_sp->GetRegexTypeSyntheticsContainer()->Add(
RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"),
SyntheticChildrenSP(new ScriptedSyntheticChildren(
stl_synth_flags,
@@ -928,9 +938,15 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
SyntheticChildrenSP(new ScriptedSyntheticChildren(
stl_synth_flags,
"lldb.formatters.cpp.gnu_libstdcpp.StdForwardListSynthProvider")));
+
stl_summary_flags.SetDontShowChildren(false);
stl_summary_flags.SetSkipPointers(false);
cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::optional<.+>(( )?&)?$"),
+ TypeSummaryImplSP(new ScriptSummaryFormat(
+ stl_summary_flags,
+ "lldb.formatters.cpp.gnu_libstdcpp.StdOptionalSummaryProvider")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
RegularExpression("^std::bitset<.+>(( )?&)?$"),
TypeSummaryImplSP(
new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
@@ -955,13 +971,17 @@ static void LoadLibStdcppFormatters(lldb::TypeCategoryImplSP cpp_category_sp) {
TypeSummaryImplSP(
new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
+ RegularExpression("^std::unordered_(multi)?(map|set)<.+> >$"),
+ TypeSummaryImplSP(
+ new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
RegularExpression("^std::(__cxx11::)?list<.+>(( )?&)?$"),
TypeSummaryImplSP(
new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
cpp_category_sp->GetRegexTypeSummariesContainer()->Add(
RegularExpression("^std::(__cxx11::)?forward_list<.+>(( )?&)?$"),
TypeSummaryImplSP(
- new StringSummaryFormat(stl_summary_flags, "size=${svar%#}")));
+ new ScriptSummaryFormat(stl_summary_flags, "lldb.formatters.cpp.gnu_libstdcpp.ForwardListSummaryProvider")));
AddCXXSynthetic(
cpp_category_sp,
diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
index fc8255983436..c8063915b178 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
@@ -81,12 +81,11 @@ bool GenericBitsetFrontEnd::Update() {
TargetSP target_sp = m_backend.GetTargetSP();
if (!target_sp)
return false;
- size_t capping_size = target_sp->GetMaximumNumberOfChildrenToDisplay();
size_t size = 0;
if (auto arg = m_backend.GetCompilerType().GetIntegralTemplateArgument(0))
- size = arg->value.getLimitedValue(capping_size);
+ size = arg->value.getLimitedValue();
m_elements.assign(size, ValueObjectSP());
m_first = m_backend.GetChildMemberWithName(GetDataContainerMemberName(), true)
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp
index c0c819632851..c1b40ba65e7d 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxOptional.cpp
@@ -45,7 +45,7 @@ bool OptionalFrontEnd::Update() {
// __engaged_ is a bool flag and is true if the optional contains a value.
// Converting it to unsigned gives us a size of 1 if it contains a value
// and 0 if not.
- m_has_value = engaged_sp->GetValueAsUnsigned(0) == 1;
+ m_has_value = engaged_sp->GetValueAsUnsigned(0) != 0;
return false;
}
diff --git a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
index 3a441973fc73..57c5ba87c397 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/LibCxxUnorderedMap.cpp
@@ -62,9 +62,7 @@ lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::
size_t lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::
CalculateNumChildren() {
- if (m_num_elements != UINT32_MAX)
- return m_num_elements;
- return 0;
+ return m_num_elements;
}
lldb::ValueObjectSP lldb_private::formatters::
@@ -160,7 +158,7 @@ lldb::ValueObjectSP lldb_private::formatters::
bool lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::
Update() {
- m_num_elements = UINT32_MAX;
+ m_num_elements = 0;
m_next_element = nullptr;
m_elements_cache.clear();
ValueObjectSP table_sp =
@@ -195,8 +193,13 @@ bool lldb_private::formatters::LibcxxStdUnorderedMapSyntheticFrontEnd::
if (!num_elements_sp)
return false;
- m_num_elements = num_elements_sp->GetValueAsUnsigned(0);
+
m_tree = table_sp->GetChildAtNamePath(next_path).get();
+ if (m_tree == nullptr)
+ return false;
+
+ m_num_elements = num_elements_sp->GetValueAsUnsigned(0);
+
if (m_num_elements > 0)
m_next_element =
table_sp->GetChildAtNamePath(next_path).get();
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
index bad730512ff4..ce701fd823fd 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp
@@ -116,9 +116,10 @@ bool ObjectFileBreakpad::ParseHeader() {
return true;
}
-Symtab *ObjectFileBreakpad::GetSymtab() {
- // TODO
- return nullptr;
+void ObjectFileBreakpad::ParseSymtab(Symtab &symtab) {
+ // Nothing to do for breakpad files, all information is parsed as debug info
+ // which means "lldb_private::Function" objects are used, or symbols are added
+ // by the SymbolFileBreakpad::AddSymbols(...) function in the symbol file.
}
void ObjectFileBreakpad::CreateSections(SectionList &unified_section_list) {
diff --git a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
index c320c7ad3e2e..f04e0b4dd7a7 100644
--- a/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
+++ b/lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h
@@ -71,7 +71,7 @@ public:
return AddressClass::eInvalid;
}
- Symtab *GetSymtab() override;
+ void ParseSymtab(lldb_private::Symtab &symtab) override;
bool IsStripped() override { return false; }
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 8e0f228a988f..96e94ef08a45 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -2687,155 +2687,131 @@ unsigned ObjectFileELF::RelocateDebugSections(const ELFSectionHeader *rel_hdr,
return 0;
}
-Symtab *ObjectFileELF::GetSymtab() {
+void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
ModuleSP module_sp(GetModule());
if (!module_sp)
- return nullptr;
+ return;
+
+ Progress progress(
+ llvm::formatv("Parsing symbol table for {0}",
+ m_file.GetFilename().AsCString("<Unknown>")));
+ ElapsedTime elapsed(module_sp->GetSymtabParseTime());
// We always want to use the main object file so we (hopefully) only have one
// cached copy of our symtab, dynamic sections, etc.
ObjectFile *module_obj_file = module_sp->GetObjectFile();
if (module_obj_file && module_obj_file != this)
- return module_obj_file->GetSymtab();
-
- if (m_symtab_up == nullptr) {
- Progress progress(
- llvm::formatv("Parsing symbol table for {0}",
- m_file.GetFilename().AsCString("<Unknown>")));
- ElapsedTime elapsed(module_sp->GetSymtabParseTime());
- SectionList *section_list = module_sp->GetSectionList();
- if (!section_list)
- return nullptr;
+ return module_obj_file->ParseSymtab(lldb_symtab);
- uint64_t symbol_id = 0;
- std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
-
- // Sharable objects and dynamic executables usually have 2 distinct symbol
- // tables, one named ".symtab", and the other ".dynsym". The dynsym is a
- // smaller version of the symtab that only contains global symbols. The
- // information found in the dynsym is therefore also found in the symtab,
- // while the reverse is not necessarily true.
- Section *symtab =
- section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
- if (symtab) {
- m_symtab_up = std::make_unique<Symtab>(symtab->GetObjectFile());
- symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, symtab);
- }
-
- // The symtab section is non-allocable and can be stripped, while the
- // .dynsym section which should always be always be there. To support the
- // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo
- // section, nomatter if .symtab was already parsed or not. This is because
- // minidebuginfo normally removes the .symtab symbols which have their
- // matching .dynsym counterparts.
- if (!symtab ||
- GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
- Section *dynsym =
- section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
- .get();
- if (dynsym) {
- if (!m_symtab_up)
- m_symtab_up = std::make_unique<Symtab>(dynsym->GetObjectFile());
- symbol_id += ParseSymbolTable(m_symtab_up.get(), symbol_id, dynsym);
- }
- }
+ SectionList *section_list = module_sp->GetSectionList();
+ if (!section_list)
+ return;
- // DT_JMPREL
- // If present, this entry's d_ptr member holds the address of
- // relocation
- // entries associated solely with the procedure linkage table.
- // Separating
- // these relocation entries lets the dynamic linker ignore them during
- // process initialization, if lazy binding is enabled. If this entry is
- // present, the related entries of types DT_PLTRELSZ and DT_PLTREL must
- // also be present.
- const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL);
- if (symbol) {
- // Synthesize trampoline symbols to help navigate the PLT.
- addr_t addr = symbol->d_ptr;
- Section *reloc_section =
- section_list->FindSectionContainingFileAddress(addr).get();
- if (reloc_section) {
- user_id_t reloc_id = reloc_section->GetID();
- const ELFSectionHeaderInfo *reloc_header =
- GetSectionHeaderByIndex(reloc_id);
- if (reloc_header) {
- if (m_symtab_up == nullptr)
- m_symtab_up =
- std::make_unique<Symtab>(reloc_section->GetObjectFile());
-
- ParseTrampolineSymbols(m_symtab_up.get(), symbol_id, reloc_header,
- reloc_id);
- }
- }
- }
+ uint64_t symbol_id = 0;
- if (DWARFCallFrameInfo *eh_frame =
- GetModule()->GetUnwindTable().GetEHFrameInfo()) {
- if (m_symtab_up == nullptr)
- m_symtab_up = std::make_unique<Symtab>(this);
- ParseUnwindSymbols(m_symtab_up.get(), eh_frame);
+ // Sharable objects and dynamic executables usually have 2 distinct symbol
+ // tables, one named ".symtab", and the other ".dynsym". The dynsym is a
+ // smaller version of the symtab that only contains global symbols. The
+ // information found in the dynsym is therefore also found in the symtab,
+ // while the reverse is not necessarily true.
+ Section *symtab =
+ section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
+ if (symtab)
+ symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
+
+ // The symtab section is non-allocable and can be stripped, while the
+ // .dynsym section which should always be always be there. To support the
+ // minidebuginfo case we parse .dynsym when there's a .gnu_debuginfo
+ // section, nomatter if .symtab was already parsed or not. This is because
+ // minidebuginfo normally removes the .symtab symbols which have their
+ // matching .dynsym counterparts.
+ if (!symtab ||
+ GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
+ Section *dynsym =
+ section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
+ .get();
+ if (dynsym)
+ symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
+ }
+
+ // DT_JMPREL
+ // If present, this entry's d_ptr member holds the address of
+ // relocation
+ // entries associated solely with the procedure linkage table.
+ // Separating
+ // these relocation entries lets the dynamic linker ignore them during
+ // process initialization, if lazy binding is enabled. If this entry is
+ // present, the related entries of types DT_PLTRELSZ and DT_PLTREL must
+ // also be present.
+ const ELFDynamic *symbol = FindDynamicSymbol(DT_JMPREL);
+ if (symbol) {
+ // Synthesize trampoline symbols to help navigate the PLT.
+ addr_t addr = symbol->d_ptr;
+ Section *reloc_section =
+ section_list->FindSectionContainingFileAddress(addr).get();
+ if (reloc_section) {
+ user_id_t reloc_id = reloc_section->GetID();
+ const ELFSectionHeaderInfo *reloc_header =
+ GetSectionHeaderByIndex(reloc_id);
+ if (reloc_header)
+ ParseTrampolineSymbols(&lldb_symtab, symbol_id, reloc_header, reloc_id);
}
+ }
- // If we still don't have any symtab then create an empty instance to avoid
- // do the section lookup next time.
- if (m_symtab_up == nullptr)
- m_symtab_up = std::make_unique<Symtab>(this);
-
- // In the event that there's no symbol entry for the entry point we'll
- // artificially create one. We delegate to the symtab object the figuring
- // out of the proper size, this will usually make it span til the next
- // symbol it finds in the section. This means that if there are missing
- // symbols the entry point might span beyond its function definition.
- // We're fine with this as it doesn't make it worse than not having a
- // symbol entry at all.
- if (CalculateType() == eTypeExecutable) {
- ArchSpec arch = GetArchitecture();
- auto entry_point_addr = GetEntryPointAddress();
- bool is_valid_entry_point =
- entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset();
- addr_t entry_point_file_addr = entry_point_addr.GetFileAddress();
- if (is_valid_entry_point && !m_symtab_up->FindSymbolContainingFileAddress(
- entry_point_file_addr)) {
- uint64_t symbol_id = m_symtab_up->GetNumSymbols();
- // Don't set the name for any synthetic symbols, the Symbol
- // object will generate one if needed when the name is accessed
- // via accessors.
- SectionSP section_sp = entry_point_addr.GetSection();
- Symbol symbol(
- /*symID=*/symbol_id,
- /*name=*/llvm::StringRef(), // Name will be auto generated.
- /*type=*/eSymbolTypeCode,
- /*external=*/true,
- /*is_debug=*/false,
- /*is_trampoline=*/false,
- /*is_artificial=*/true,
- /*section_sp=*/section_sp,
- /*offset=*/0,
- /*size=*/0, // FDE can span multiple symbols so don't use its size.
- /*size_is_valid=*/false,
- /*contains_linker_annotations=*/false,
- /*flags=*/0);
- // When the entry point is arm thumb we need to explicitly set its
- // class address to reflect that. This is important because expression
- // evaluation relies on correctly setting a breakpoint at this
- // address.
- if (arch.GetMachine() == llvm::Triple::arm &&
- (entry_point_file_addr & 1)) {
- symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1);
- m_address_class_map[entry_point_file_addr ^ 1] =
- AddressClass::eCodeAlternateISA;
- } else {
- m_address_class_map[entry_point_file_addr] = AddressClass::eCode;
- }
- m_symtab_up->AddSymbol(symbol);
+ if (DWARFCallFrameInfo *eh_frame =
+ GetModule()->GetUnwindTable().GetEHFrameInfo()) {
+ ParseUnwindSymbols(&lldb_symtab, eh_frame);
+ }
+
+ // In the event that there's no symbol entry for the entry point we'll
+ // artificially create one. We delegate to the symtab object the figuring
+ // out of the proper size, this will usually make it span til the next
+ // symbol it finds in the section. This means that if there are missing
+ // symbols the entry point might span beyond its function definition.
+ // We're fine with this as it doesn't make it worse than not having a
+ // symbol entry at all.
+ if (CalculateType() == eTypeExecutable) {
+ ArchSpec arch = GetArchitecture();
+ auto entry_point_addr = GetEntryPointAddress();
+ bool is_valid_entry_point =
+ entry_point_addr.IsValid() && entry_point_addr.IsSectionOffset();
+ addr_t entry_point_file_addr = entry_point_addr.GetFileAddress();
+ if (is_valid_entry_point && !lldb_symtab.FindSymbolContainingFileAddress(
+ entry_point_file_addr)) {
+ uint64_t symbol_id = lldb_symtab.GetNumSymbols();
+ // Don't set the name for any synthetic symbols, the Symbol
+ // object will generate one if needed when the name is accessed
+ // via accessors.
+ SectionSP section_sp = entry_point_addr.GetSection();
+ Symbol symbol(
+ /*symID=*/symbol_id,
+ /*name=*/llvm::StringRef(), // Name will be auto generated.
+ /*type=*/eSymbolTypeCode,
+ /*external=*/true,
+ /*is_debug=*/false,
+ /*is_trampoline=*/false,
+ /*is_artificial=*/true,
+ /*section_sp=*/section_sp,
+ /*offset=*/0,
+ /*size=*/0, // FDE can span multiple symbols so don't use its size.
+ /*size_is_valid=*/false,
+ /*contains_linker_annotations=*/false,
+ /*flags=*/0);
+ // When the entry point is arm thumb we need to explicitly set its
+ // class address to reflect that. This is important because expression
+ // evaluation relies on correctly setting a breakpoint at this
+ // address.
+ if (arch.GetMachine() == llvm::Triple::arm &&
+ (entry_point_file_addr & 1)) {
+ symbol.GetAddressRef().SetOffset(entry_point_addr.GetOffset() ^ 1);
+ m_address_class_map[entry_point_file_addr ^ 1] =
+ AddressClass::eCodeAlternateISA;
+ } else {
+ m_address_class_map[entry_point_file_addr] = AddressClass::eCode;
}
+ lldb_symtab.AddSymbol(symbol);
}
-
- m_symtab_up->CalculateSymbolSizes();
}
-
- return m_symtab_up.get();
}
void ObjectFileELF::RelocateSection(lldb_private::Section *section)
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index 5738e5cf60d5..554f623ec8af 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -110,7 +110,7 @@ public:
lldb_private::AddressClass GetAddressClass(lldb::addr_t file_addr) override;
- lldb_private::Symtab *GetSymtab() override;
+ void ParseSymtab(lldb_private::Symtab &symtab) override;
bool IsStripped() override;
@@ -123,7 +123,7 @@ public:
lldb_private::UUID GetUUID() override;
/// Return the contents of the .gnu_debuglink section, if the object file
- /// contains it.
+ /// contains it.
llvm::Optional<lldb_private::FileSpec> GetDebugLink();
uint32_t GetDependentModules(lldb_private::FileSpecList &files) override;
@@ -278,8 +278,9 @@ private:
/// number of dynamic symbols parsed.
size_t ParseDynamicSymbols();
- /// Populates m_symtab_up will all non-dynamic linker symbols. This method
- /// will parse the symbols only once. Returns the number of symbols parsed.
+ /// Populates the symbol table with all non-dynamic linker symbols. This
+ /// method will parse the symbols only once. Returns the number of symbols
+ /// parsed.
unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table,
lldb::user_id_t start_id,
lldb_private::Section *symtab);
@@ -384,7 +385,7 @@ private:
lldb_private::UUID &uuid);
bool AnySegmentHasPhysicalAddress();
-
+
/// Takes the .gnu_debugdata and returns the decompressed object file that is
/// stored within that section.
///
diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
index bec0099517c8..ca9337454889 100644
--- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
+++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.cpp
@@ -106,23 +106,10 @@ uint32_t ObjectFileJIT::GetAddressByteSize() const {
return m_data.GetAddressByteSize();
}
-Symtab *ObjectFileJIT::GetSymtab() {
- ModuleSP module_sp(GetModule());
- if (module_sp) {
- std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
- if (m_symtab_up == nullptr) {
- ElapsedTime elapsed(module_sp->GetSymtabParseTime());
- m_symtab_up = std::make_unique<Symtab>(this);
- std::lock_guard<std::recursive_mutex> symtab_guard(
- m_symtab_up->GetMutex());
- ObjectFileJITDelegateSP delegate_sp(m_delegate_wp.lock());
- if (delegate_sp)
- delegate_sp->PopulateSymtab(this, *m_symtab_up);
- // TODO: get symbols from delegate
- m_symtab_up->Finalize();
- }
- }
- return m_symtab_up.get();
+void ObjectFileJIT::ParseSymtab(Symtab &symtab) {
+ ObjectFileJITDelegateSP delegate_sp(m_delegate_wp.lock());
+ if (delegate_sp)
+ delegate_sp->PopulateSymtab(this, symtab);
}
bool ObjectFileJIT::IsStripped() {
diff --git a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
index 03ac001988a0..be31139df549 100644
--- a/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
+++ b/lldb/source/Plugins/ObjectFile/JIT/ObjectFileJIT.h
@@ -67,7 +67,7 @@ public:
uint32_t GetAddressByteSize() const override;
- lldb_private::Symtab *GetSymtab() override;
+ void ParseSymtab(lldb_private::Symtab &symtab) override;
bool IsStripped() override;
diff --git a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
index 36e71e21332f..da999d2b55a7 100644
--- a/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
+++ b/lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h
@@ -68,7 +68,7 @@ public:
bool IsExecutable() const override { return false; }
- Symtab *GetSymtab() override { return nullptr; }
+ void ParseSymtab(lldb_private::Symtab &symtab) override {}
bool IsStripped() override { return false; }
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 0e6329885528..7445f8311c50 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -246,7 +246,7 @@ bool ObjectFileWasm::ParseHeader() {
return true;
}
-Symtab *ObjectFileWasm::GetSymtab() { return nullptr; }
+void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
index 44939b6d4ea0..d7b5bc22caad 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h
@@ -78,7 +78,7 @@ public:
return AddressClass::eInvalid;
}
- Symtab *GetSymtab() override;
+ void ParseSymtab(lldb_private::Symtab &symtab) override;
bool IsStripped() override { return !!GetExternalDebugInfoFileSpec(); }
diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp
new file mode 100644
index 000000000000..90c290b6fbc7
--- /dev/null
+++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp
@@ -0,0 +1,148 @@
+//===-- PlatformQemuUser.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Plugins/Platform/QemuUser/PlatformQemuUser.h"
+#include "Plugins/Process/gdb-remote/ProcessGDBRemote.h"
+#include "lldb/Core/PluginManager.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/ProcessLaunchInfo.h"
+#include "lldb/Interpreter/OptionValueProperties.h"
+#include "lldb/Target/Process.h"
+#include "lldb/Target/Target.h"
+#include "lldb/Utility/Listener.h"
+#include "lldb/Utility/Log.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+LLDB_PLUGIN_DEFINE(PlatformQemuUser)
+
+#define LLDB_PROPERTIES_platformqemuuser
+#include "PlatformQemuUserProperties.inc"
+
+enum {
+#define LLDB_PROPERTIES_platformqemuuser
+#include "PlatformQemuUserPropertiesEnum.inc"
+};
+
+class PluginProperties : public Properties {
+public:
+ PluginProperties() {
+ m_collection_sp = std::make_shared<OptionValueProperties>(
+ ConstString(PlatformQemuUser::GetPluginNameStatic()));
+ m_collection_sp->Initialize(g_platformqemuuser_properties);
+ }
+
+ llvm::StringRef GetArchitecture() {
+ return m_collection_sp->GetPropertyAtIndexAsString(
+ nullptr, ePropertyArchitecture, "");
+ }
+
+ FileSpec GetEmulatorPath() {
+ return m_collection_sp->GetPropertyAtIndexAsFileSpec(nullptr,
+ ePropertyEmulatorPath);
+ }
+};
+
+static PluginProperties &GetGlobalProperties() {
+ static PluginProperties g_settings;
+ return g_settings;
+}
+
+llvm::StringRef PlatformQemuUser::GetPluginDescriptionStatic() {
+ return "Platform for debugging binaries under user mode qemu";
+}
+
+void PlatformQemuUser::Initialize() {
+ PluginManager::RegisterPlugin(
+ GetPluginNameStatic(), GetPluginDescriptionStatic(),
+ PlatformQemuUser::CreateInstance, PlatformQemuUser::DebuggerInitialize);
+}
+
+void PlatformQemuUser::Terminate() {
+ PluginManager::UnregisterPlugin(PlatformQemuUser::CreateInstance);
+}
+
+void PlatformQemuUser::DebuggerInitialize(Debugger &debugger) {
+ if (!PluginManager::GetSettingForPlatformPlugin(
+ debugger, ConstString(GetPluginNameStatic()))) {
+ PluginManager::CreateSettingForPlatformPlugin(
+ debugger, GetGlobalProperties().GetValueProperties(),
+ ConstString("Properties for the qemu-user platform plugin."),
+ /*is_global_property=*/true);
+ }
+}
+
+PlatformSP PlatformQemuUser::CreateInstance(bool force, const ArchSpec *arch) {
+ if (force)
+ return PlatformSP(new PlatformQemuUser());
+ return nullptr;
+}
+
+std::vector<ArchSpec> PlatformQemuUser::GetSupportedArchitectures() {
+ llvm::Triple triple = HostInfo::GetArchitecture().GetTriple();
+ triple.setEnvironment(llvm::Triple::UnknownEnvironment);
+ triple.setArchName(GetGlobalProperties().GetArchitecture());
+ if (triple.getArch() != llvm::Triple::UnknownArch)
+ return {ArchSpec(triple)};
+ return {};
+}
+
+static auto get_arg_range(const Args &args) {
+ return llvm::make_range(args.GetArgumentArrayRef().begin(),
+ args.GetArgumentArrayRef().end());
+}
+
+lldb::ProcessSP PlatformQemuUser::DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger,
+ Target &target, Status &error) {
+ Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PLATFORM);
+
+ std::string qemu = GetGlobalProperties().GetEmulatorPath().GetPath();
+
+ llvm::SmallString<0> socket_model, socket_path;
+ HostInfo::GetProcessTempDir().GetPath(socket_model);
+ llvm::sys::path::append(socket_model, "qemu-%%%%%%%%.socket");
+ do {
+ llvm::sys::fs::createUniquePath(socket_model, socket_path, false);
+ } while (FileSystem::Instance().Exists(socket_path));
+
+ Args args(
+ {qemu, "-g", socket_path, launch_info.GetExecutableFile().GetPath()});
+ for (size_t i = 1; i < launch_info.GetArguments().size(); ++i)
+ args.AppendArgument(launch_info.GetArguments()[i].ref());
+
+ LLDB_LOG(log, "{0} -> {1}", get_arg_range(launch_info.GetArguments()),
+ get_arg_range(args));
+
+ launch_info.SetArguments(args, true);
+ launch_info.SetLaunchInSeparateProcessGroup(true);
+ launch_info.GetFlags().Clear(eLaunchFlagDebug);
+ launch_info.SetMonitorProcessCallback(ProcessLaunchInfo::NoOpMonitorCallback,
+ false);
+
+ error = Host::LaunchProcess(launch_info);
+ if (error.Fail())
+ return nullptr;
+
+ ProcessSP process_sp = target.CreateProcess(
+ launch_info.GetListener(),
+ process_gdb_remote::ProcessGDBRemote::GetPluginNameStatic(), nullptr,
+ true);
+ ListenerSP listener_sp =
+ Listener::MakeListener("lldb.platform_qemu_user.debugprocess");
+ launch_info.SetHijackListener(listener_sp);
+ Process::ProcessEventHijacker hijacker(*process_sp, listener_sp);
+
+ error = process_sp->ConnectRemote(("unix-connect://" + socket_path).str());
+ if (error.Fail())
+ return nullptr;
+
+ process_sp->WaitForProcessToStop(llvm::None, nullptr, false, listener_sp);
+ return process_sp;
+}
diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h
new file mode 100644
index 000000000000..f4f5d224a8cd
--- /dev/null
+++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h
@@ -0,0 +1,57 @@
+//===-- PlatformQemuUser.h ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/Host.h"
+#include "lldb/Host/HostInfo.h"
+#include "lldb/Target/Platform.h"
+
+namespace lldb_private {
+
+class PlatformQemuUser : public Platform {
+public:
+ static void Initialize();
+ static void Terminate();
+
+ static llvm::StringRef GetPluginNameStatic() { return "qemu-user"; }
+ static llvm::StringRef GetPluginDescriptionStatic();
+
+ llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }
+ llvm::StringRef GetDescription() override {
+ return GetPluginDescriptionStatic();
+ }
+
+ UserIDResolver &GetUserIDResolver() override {
+ return HostInfo::GetUserIDResolver();
+ }
+
+ std::vector<ArchSpec> GetSupportedArchitectures() override;
+
+ lldb::ProcessSP DebugProcess(ProcessLaunchInfo &launch_info,
+ Debugger &debugger, Target &target,
+ Status &error) override;
+
+ lldb::ProcessSP Attach(ProcessAttachInfo &attach_info, Debugger &debugger,
+ Target *target, Status &status) override {
+ status.SetErrorString("Not supported");
+ return nullptr;
+ }
+
+ bool IsConnected() const override { return true; }
+
+ void CalculateTrapHandlerSymbolNames() override {}
+
+ Environment GetEnvironment() override { return Host::GetEnvironment(); }
+
+private:
+ static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch);
+ static void DebuggerInitialize(Debugger &debugger);
+
+ PlatformQemuUser() : Platform(/*is_host=*/false) {}
+};
+
+} // namespace lldb_private
diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td
new file mode 100644
index 000000000000..abfab7f59de4
--- /dev/null
+++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUserProperties.td
@@ -0,0 +1,12 @@
+include "../../../../include/lldb/Core/PropertiesBase.td"
+
+let Definition = "platformqemuuser" in {
+ def Architecture: Property<"architecture", "String">,
+ Global,
+ DefaultStringValue<"">,
+ Desc<"Architecture to emulate.">;
+ def EmulatorPath: Property<"emulator-path", "FileSpec">,
+ Global,
+ DefaultStringValue<"">,
+ Desc<"Path to the emulator binary.">;
+}
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
index 23b346d5c17f..b852a0164375 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp
@@ -281,8 +281,8 @@ size_t ProcessElfCore::ReadMemory(lldb::addr_t addr, void *buf, size_t size,
return DoReadMemory(addr, buf, size, error);
}
-Status ProcessElfCore::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region_info) {
+Status ProcessElfCore::GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region_info) {
region_info.Clear();
const VMRangeToPermissions::Entry *permission_entry =
m_core_range_infos.FindEntryThatContainsOrFollows(load_addr);
diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
index fd36e5027816..67df3c5fac76 100644
--- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
+++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h
@@ -86,6 +86,10 @@ public:
size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
lldb_private::Status &error) override;
+ lldb_private::Status
+ GetMemoryRegionInfo(lldb::addr_t load_addr,
+ lldb_private::MemoryRegionInfo &region_info) override;
+
lldb::addr_t GetImageInfoAddress() override;
lldb_private::ArchSpec GetArchitecture();
@@ -101,10 +105,6 @@ protected:
bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list,
lldb_private::ThreadList &new_thread_list) override;
- lldb_private::Status
- DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- lldb_private::MemoryRegionInfo &region_info) override;
-
private:
struct NT_FILE_Entry {
lldb::addr_t start;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index 4ce79da48f07..25ae08838bf8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -81,11 +81,6 @@ GDBRemoteCommunication::~GDBRemoteCommunication() {
if (m_decompression_scratch)
free (m_decompression_scratch);
#endif
-
- // Stop the communications read thread which is used to parse all incoming
- // packets. This function will block until the read thread returns.
- if (m_read_thread_enabled)
- StopReadThread();
}
char GDBRemoteCommunication::CalculcateChecksum(llvm::StringRef payload) {
@@ -193,7 +188,7 @@ GDBRemoteCommunication::SendRawPacketNoLock(llvm::StringRef packet,
GDBRemoteCommunication::PacketResult GDBRemoteCommunication::GetAck() {
StringExtractorGDBRemote packet;
- PacketResult result = ReadPacket(packet, GetPacketTimeout(), false);
+ PacketResult result = WaitForPacketNoLock(packet, GetPacketTimeout(), false);
if (result == PacketResult::Success) {
if (packet.GetResponseType() ==
StringExtractorGDBRemote::ResponseType::eAck)
@@ -225,40 +220,18 @@ GDBRemoteCommunication::PacketResult
GDBRemoteCommunication::ReadPacket(StringExtractorGDBRemote &response,
Timeout<std::micro> timeout,
bool sync_on_timeout) {
- if (m_read_thread_enabled)
- return PopPacketFromQueue(response, timeout);
- else
- return WaitForPacketNoLock(response, timeout, sync_on_timeout);
-}
+ using ResponseType = StringExtractorGDBRemote::ResponseType;
-// This function is called when a packet is requested.
-// A whole packet is popped from the packet queue and returned to the caller.
-// Packets are placed into this queue from the communication read thread. See
-// GDBRemoteCommunication::AppendBytesToCache.
-GDBRemoteCommunication::PacketResult
-GDBRemoteCommunication::PopPacketFromQueue(StringExtractorGDBRemote &response,
- Timeout<std::micro> timeout) {
- auto pred = [&] { return !m_packet_queue.empty() && IsConnected(); };
- // lock down the packet queue
- std::unique_lock<std::mutex> lock(m_packet_queue_mutex);
-
- if (!timeout)
- m_condition_queue_not_empty.wait(lock, pred);
- else {
- if (!m_condition_queue_not_empty.wait_for(lock, *timeout, pred))
- return PacketResult::ErrorReplyTimeout;
- if (!IsConnected())
- return PacketResult::ErrorDisconnected;
+ Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PACKETS));
+ for (;;) {
+ PacketResult result =
+ WaitForPacketNoLock(response, timeout, sync_on_timeout);
+ if (result != PacketResult::Success ||
+ (response.GetResponseType() != ResponseType::eAck &&
+ response.GetResponseType() != ResponseType::eNack))
+ return result;
+ LLDB_LOG(log, "discarding spurious `{0}` packet", response.GetStringRef());
}
-
- // get the front element of the queue
- response = m_packet_queue.front();
-
- // remove the front element
- m_packet_queue.pop();
-
- // we got a packet
- return PacketResult::Success;
}
GDBRemoteCommunication::PacketResult
@@ -1287,53 +1260,6 @@ GDBRemoteCommunication::ScopedTimeout::~ScopedTimeout() {
m_gdb_comm.SetPacketTimeout(m_saved_timeout);
}
-// This function is called via the Communications class read thread when bytes
-// become available for this connection. This function will consume all
-// incoming bytes and try to parse whole packets as they become available. Full
-// packets are placed in a queue, so that all packet requests can simply pop
-// from this queue. Async notification packets will be dispatched immediately
-// to the ProcessGDBRemote Async thread via an event.
-void GDBRemoteCommunication::AppendBytesToCache(const uint8_t *bytes,
- size_t len, bool broadcast,
- lldb::ConnectionStatus status) {
- StringExtractorGDBRemote packet;
-
- while (true) {
- PacketType type = CheckForPacket(bytes, len, packet);
-
- // scrub the data so we do not pass it back to CheckForPacket on future
- // passes of the loop
- bytes = nullptr;
- len = 0;
-
- // we may have received no packet so lets bail out
- if (type == PacketType::Invalid)
- break;
-
- if (type == PacketType::Standard) {
- // scope for the mutex
- {
- // lock down the packet queue
- std::lock_guard<std::mutex> guard(m_packet_queue_mutex);
- // push a new packet into the queue
- m_packet_queue.push(packet);
- // Signal condition variable that we have a packet
- m_condition_queue_not_empty.notify_one();
- }
- }
-
- if (type == PacketType::Notify) {
- // put this packet into an event
- const char *pdata = packet.GetStringRef().data();
-
- // as the communication class, we are a broadcaster and the async thread
- // is tuned to listen to us
- BroadcastEvent(eBroadcastBitGdbReadThreadGotNotify,
- new EventDataBytes(pdata));
- }
- }
-}
-
void llvm::format_provider<GDBRemoteCommunication::PacketResult>::format(
const GDBRemoteCommunication::PacketResult &result, raw_ostream &Stream,
StringRef Style) {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
index 5da568e9b4d4..afc7e740d4c9 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h
@@ -84,8 +84,6 @@ class GDBRemoteCommunication : public Communication {
public:
enum {
eBroadcastBitRunPacketSent = kLoUserBroadcastBit,
- eBroadcastBitGdbReadThreadGotNotify =
- kLoUserBroadcastBit << 1 // Sent when we received a notify packet.
};
enum class PacketType { Invalid = 0, Standard, Notify };
@@ -196,10 +194,6 @@ protected:
bool sync_on_timeout,
llvm::function_ref<void(llvm::StringRef)> output_callback);
- // Pop a packet from the queue in a thread safe manner
- PacketResult PopPacketFromQueue(StringExtractorGDBRemote &response,
- Timeout<std::micro> timeout);
-
PacketResult WaitForPacketNoLock(StringExtractorGDBRemote &response,
Timeout<std::micro> timeout,
bool sync_on_timeout);
@@ -226,24 +220,7 @@ protected:
static lldb::thread_result_t ListenThread(lldb::thread_arg_t arg);
- // GDB-Remote read thread
- // . this thread constantly tries to read from the communication
- // class and stores all packets received in a queue. The usual
- // threads read requests simply pop packets off the queue in the
- // usual order.
- // This setup allows us to intercept and handle async packets, such
- // as the notify packet.
-
- // This method is defined as part of communication.h
- // when the read thread gets any bytes it will pass them on to this function
- void AppendBytesToCache(const uint8_t *bytes, size_t len, bool broadcast,
- lldb::ConnectionStatus status) override;
-
private:
- std::queue<StringExtractorGDBRemote> m_packet_queue; // The packet queue
- std::mutex m_packet_queue_mutex; // Mutex for accessing queue
- std::condition_variable
- m_condition_queue_not_empty; // Condition variable to wait for packets
// Promise used to grab the port number from listening thread
std::promise<uint16_t> m_port_promise;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index 78e722eee080..07dfa5e04ee5 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -86,13 +86,6 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) {
std::chrono::steady_clock::time_point start_of_handshake =
std::chrono::steady_clock::now();
if (SendAck()) {
- // Wait for any responses that might have been queued up in the remote
- // GDB server and flush them all
- StringExtractorGDBRemote response;
- PacketResult packet_result = PacketResult::Success;
- while (packet_result == PacketResult::Success)
- packet_result = ReadPacket(response, milliseconds(10), false);
-
// The return value from QueryNoAckModeSupported() is true if the packet
// was sent and _any_ response (including UNIMPLEMENTED) was received), or
// false if no response was received. This quickly tells us if we have a
@@ -106,17 +99,15 @@ bool GDBRemoteCommunicationClient::HandshakeWithServer(Status *error_ptr) {
std::chrono::duration<double>(end_of_handshake - start_of_handshake)
.count();
if (error_ptr) {
- if (packet_result == PacketResult::ErrorDisconnected)
+ if (!IsConnected())
error_ptr->SetErrorString("Connection shut down by remote side "
"while waiting for reply to initial "
"handshake packet");
- else if (packet_result == PacketResult::ErrorReplyTimeout)
+ else
error_ptr->SetErrorStringWithFormat(
"failed to get reply to handshake packet within timeout of "
"%.1f seconds",
handshake_timeout);
- else
- error_ptr->SetErrorString("failed to get reply to handshake packet");
}
}
} else {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
index 11cac9fa3a4d..49d88b72b01b 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp
@@ -46,7 +46,7 @@ GDBRemoteCommunicationServer::GetPacketAndSendResponse(
Timeout<std::micro> timeout, Status &error, bool &interrupt, bool &quit) {
StringExtractorGDBRemote packet;
- PacketResult packet_result = WaitForPacketNoLock(packet, timeout, false);
+ PacketResult packet_result = ReadPacket(packet, timeout, false);
if (packet_result == PacketResult::Success) {
const StringExtractorGDBRemote::ServerPacketType packet_type =
packet.GetServerPacketType();
@@ -150,10 +150,6 @@ GDBRemoteCommunicationServer::SendOKResponse() {
return SendPacketNoLock("OK");
}
-bool GDBRemoteCommunicationServer::HandshakeWithClient() {
- return GetAck() == PacketResult::Success;
-}
-
GDBRemoteCommunication::PacketResult
GDBRemoteCommunicationServer::SendJSONResponse(const json::Value &value) {
std::string json_string;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h
index 68448eae2b9f..5de344061ec9 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h
@@ -44,10 +44,6 @@ public:
Status &error, bool &interrupt,
bool &quit);
- // After connecting, do a little handshake with the client to make sure
- // we are at least communicating
- bool HandshakeWithClient();
-
protected:
std::map<StringExtractorGDBRemote::ServerPacketType, PacketHandler>
m_packet_handlers;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index 5360db3d8462..30f14a52dfb5 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -1088,18 +1088,6 @@ void GDBRemoteCommunicationServerLLGS::NewSubprocess(
void GDBRemoteCommunicationServerLLGS::DataAvailableCallback() {
Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_COMM));
- if (!m_handshake_completed) {
- if (!HandshakeWithClient()) {
- LLDB_LOGF(log,
- "GDBRemoteCommunicationServerLLGS::%s handshake with "
- "client failed, exiting",
- __FUNCTION__);
- m_mainloop.RequestTermination();
- return;
- }
- m_handshake_completed = true;
- }
-
bool interrupt = false;
bool done = false;
Status error;
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
index 6c75771f6427..17ee4130dc34 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h
@@ -104,7 +104,6 @@ protected:
std::mutex m_saved_registers_mutex;
std::unordered_map<uint32_t, lldb::DataBufferSP> m_saved_registers_map;
uint32_t m_next_saved_registers_id = 1;
- bool m_handshake_completed = false;
bool m_thread_suffix_supported = false;
bool m_list_threads_in_stop_reply = false;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 2233bf675819..3ade8c815feb 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -282,9 +282,7 @@ ProcessGDBRemote::ProcessGDBRemote(lldb::TargetSP target_sp,
__FUNCTION__);
}
- const uint32_t gdb_event_mask =
- Communication::eBroadcastBitReadThreadDidExit |
- GDBRemoteCommunication::eBroadcastBitGdbReadThreadGotNotify;
+ const uint32_t gdb_event_mask = Communication::eBroadcastBitReadThreadDidExit;
if (m_async_listener_sp->StartListeningForEvents(
&m_gdb_comm, gdb_event_mask) != gdb_event_mask) {
LLDB_LOGF(log,
@@ -1324,24 +1322,6 @@ Status ProcessGDBRemote::DoResume() {
return error;
}
-void ProcessGDBRemote::HandleStopReplySequence() {
- while (true) {
- // Send vStopped
- StringExtractorGDBRemote response;
- m_gdb_comm.SendPacketAndWaitForResponse("vStopped", response);
-
- // OK represents end of signal list
- if (response.IsOKResponse())
- break;
-
- // If not OK or a normal packet we have a problem
- if (!response.IsNormalResponse())
- break;
-
- SetLastStopPacket(response);
- }
-}
-
void ProcessGDBRemote::ClearThreadIDList() {
std::lock_guard<std::recursive_mutex> guard(m_thread_list_real.GetMutex());
m_thread_ids.clear();
@@ -2897,8 +2877,8 @@ lldb::addr_t ProcessGDBRemote::DoAllocateMemory(size_t size,
return allocated_addr;
}
-Status ProcessGDBRemote::DoGetMemoryRegionInfo(addr_t load_addr,
- MemoryRegionInfo &region_info) {
+Status ProcessGDBRemote::GetMemoryRegionInfo(addr_t load_addr,
+ MemoryRegionInfo &region_info) {
Status error(m_gdb_comm.GetMemoryRegionInfo(load_addr, region_info));
return error;
@@ -3539,31 +3519,6 @@ void ProcessGDBRemote::StopAsyncThread() {
__FUNCTION__);
}
-bool ProcessGDBRemote::HandleNotifyPacket(StringExtractorGDBRemote &packet) {
- // get the packet at a string
- const std::string &pkt = std::string(packet.GetStringRef());
- // skip %stop:
- StringExtractorGDBRemote stop_info(pkt.c_str() + 5);
-
- // pass as a thread stop info packet
- SetLastStopPacket(stop_info);
-
- // check for more stop reasons
- HandleStopReplySequence();
-
- // if the process is stopped then we need to fake a resume so that we can
- // stop properly with the new break. This is possible due to
- // SetPrivateState() broadcasting the state change as a side effect.
- if (GetPrivateState() == lldb::StateType::eStateStopped) {
- SetPrivateState(lldb::StateType::eStateRunning);
- }
-
- // since we have some stopped packets we can halt the process
- SetPrivateState(lldb::StateType::eStateStopped);
-
- return true;
-}
-
thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
ProcessGDBRemote *process = (ProcessGDBRemote *)arg;
@@ -3712,17 +3667,6 @@ thread_result_t ProcessGDBRemote::AsyncThread(void *arg) {
done = true;
break;
- case GDBRemoteCommunication::eBroadcastBitGdbReadThreadGotNotify: {
- lldb_private::Event *event = event_sp.get();
- const EventDataBytes *continue_packet =
- EventDataBytes::GetEventDataFromEvent(event);
- StringExtractorGDBRemote notify(
- (const char *)continue_packet->GetBytes());
- // Hand this over to the process to handle
- process->HandleNotifyPacket(notify);
- break;
- }
-
default:
LLDB_LOGF(log,
"ProcessGDBRemote::%s (arg = %p, pid = %" PRIu64
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
index 8134bc6b530d..488336b8c1b8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
@@ -144,6 +144,9 @@ public:
lldb::addr_t DoAllocateMemory(size_t size, uint32_t permissions,
Status &error) override;
+ Status GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region_info) override;
+
Status DoDeallocateMemory(lldb::addr_t ptr) override;
// Process STDIO
@@ -343,8 +346,6 @@ protected:
size_t UpdateThreadIDsFromStopReplyThreadsValue(llvm::StringRef value);
- bool HandleNotifyPacket(StringExtractorGDBRemote &packet);
-
bool StartAsyncThread();
void StopAsyncThread();
@@ -375,8 +376,6 @@ protected:
lldb::addr_t dispatch_queue_t, std::string &queue_name,
lldb::QueueKind queue_kind, uint64_t queue_serial);
- void HandleStopReplySequence();
-
void ClearThreadIDList();
bool UpdateThreadIDList();
@@ -421,9 +420,6 @@ protected:
Status DoWriteMemoryTags(lldb::addr_t addr, size_t len, int32_t type,
const std::vector<uint8_t> &tags) override;
- Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region_info) override;
-
private:
// For ProcessGDBRemote only
std::string m_partial_profile_data;
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
index 736cfa070088..37ee5466c5b9 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp
@@ -73,7 +73,7 @@ public:
bool IsExecutable() const override { return false; }
ArchSpec GetArchitecture() override { return m_arch; }
UUID GetUUID() override { return m_uuid; }
- Symtab *GetSymtab() override { return m_symtab_up.get(); }
+ void ParseSymtab(lldb_private::Symtab &symtab) override {}
bool IsStripped() override { return true; }
ByteOrder GetByteOrder() const override { return m_arch.GetByteOrder(); }
@@ -439,8 +439,8 @@ void ProcessMinidump::BuildMemoryRegions() {
llvm::sort(*m_memory_regions);
}
-Status ProcessMinidump::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region) {
+Status ProcessMinidump::GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region) {
BuildMemoryRegions();
region = MinidumpParser::GetMemoryRegionInfo(*m_memory_regions, load_addr);
return Status();
diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
index 5360269199cd..3501d38a0f27 100644
--- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
+++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.h
@@ -75,6 +75,9 @@ public:
ArchSpec GetArchitecture();
+ Status GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) override;
+
Status GetMemoryRegions(
lldb_private::MemoryRegionInfos &region_list) override;
@@ -95,9 +98,6 @@ protected:
bool DoUpdateThreadList(ThreadList &old_thread_list,
ThreadList &new_thread_list) override;
- Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) override;
-
void ReadModuleList();
lldb::ModuleSP GetOrCreateModule(lldb_private::UUID minidump_uuid,
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index 15d3d43d9993..c1b7294a7f58 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -248,8 +248,8 @@ ArchSpec ScriptedProcess::GetArchitecture() {
return GetTarget().GetArchitecture();
}
-Status ScriptedProcess::DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &region) {
+Status ScriptedProcess::GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &region) {
CheckInterpreterAndScriptObject();
Status error;
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
index c8355f35548a..d56658a2e48a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.h
@@ -84,6 +84,9 @@ public:
ArchSpec GetArchitecture();
+ Status GetMemoryRegionInfo(lldb::addr_t load_addr,
+ MemoryRegionInfo &range_info) override;
+
Status
GetMemoryRegions(lldb_private::MemoryRegionInfos &region_list) override;
@@ -97,9 +100,6 @@ protected:
bool DoUpdateThreadList(ThreadList &old_thread_list,
ThreadList &new_thread_list) override;
- Status DoGetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) override;
-
private:
friend class ScriptedThread;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
index 798d947a0a7d..c7af13598843 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h
@@ -15,8 +15,12 @@
#if LLDB_ENABLE_PYTHON
+// LLDB Python header must be included first
+#include "lldb-python.h"
+
#include "lldb/lldb-forward.h"
#include "lldb/lldb-types.h"
+#include "llvm/Support/Error.h"
namespace lldb_private {
@@ -41,20 +45,148 @@ template <> const char *GetPythonValueFormatString(unsigned long long);
template <> const char *GetPythonValueFormatString(float t);
template <> const char *GetPythonValueFormatString(double t);
-extern "C" void *LLDBSwigPythonCreateScriptedProcess(
+void *LLDBSWIGPython_CastPyObjectToSBData(PyObject *data);
+void *LLDBSWIGPython_CastPyObjectToSBError(PyObject *data);
+void *LLDBSWIGPython_CastPyObjectToSBValue(PyObject *data);
+void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(PyObject *data);
+
+// These prototypes are the Pythonic implementations of the required callbacks.
+// Although these are scripting-language specific, their definition depends on
+// the public API.
+
+void *LLDBSwigPythonCreateScriptedProcess(const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::TargetSP &target_sp,
+ StructuredDataImpl *args_impl,
+ std::string &error_string);
+
+void *LLDBSwigPythonCreateScriptedThread(const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::ProcessSP &process_sp,
+ StructuredDataImpl *args_impl,
+ std::string &error_string);
+
+llvm::Expected<bool> LLDBSwigPythonBreakpointCallbackFunction(
+ const char *python_function_name, const char *session_dictionary_name,
+ const lldb::StackFrameSP &sb_frame,
+ const lldb::BreakpointLocationSP &sb_bp_loc,
+ lldb_private::StructuredDataImpl *args_impl);
+
+bool LLDBSwigPythonWatchpointCallbackFunction(
+ const char *python_function_name, const char *session_dictionary_name,
+ const lldb::StackFrameSP &sb_frame, const lldb::WatchpointSP &sb_wp);
+
+bool LLDBSwigPythonCallTypeScript(const char *python_function_name,
+ const void *session_dictionary,
+ const lldb::ValueObjectSP &valobj_sp,
+ void **pyfunct_wrapper,
+ const lldb::TypeSummaryOptionsSP &options_sp,
+ std::string &retval);
+
+void *
+LLDBSwigPythonCreateSyntheticProvider(const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::ValueObjectSP &valobj_sp);
+
+void *LLDBSwigPythonCreateCommandObject(const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::DebuggerSP debugger_sp);
+
+void *LLDBSwigPythonCreateScriptedThreadPlan(
const char *python_class_name, const char *session_dictionary_name,
- const lldb::TargetSP &target_sp, StructuredDataImpl *args_impl,
- std::string &error_string);
+ lldb_private::StructuredDataImpl *args_data, std::string &error_string,
+ const lldb::ThreadPlanSP &thread_plan_sp);
-extern "C" void *LLDBSwigPythonCreateScriptedThread(
+bool LLDBSWIGPythonCallThreadPlan(void *implementor, const char *method_name,
+ lldb_private::Event *event_sp,
+ bool &got_error);
+
+void *LLDBSwigPythonCreateScriptedBreakpointResolver(
const char *python_class_name, const char *session_dictionary_name,
- const lldb::ProcessSP &process_sp, StructuredDataImpl *args_impl,
- std::string &error_string);
+ lldb_private::StructuredDataImpl *args, const lldb::BreakpointSP &bkpt_sp);
+
+unsigned int
+LLDBSwigPythonCallBreakpointResolver(void *implementor, const char *method_name,
+ lldb_private::SymbolContext *sym_ctx);
+
+void *LLDBSwigPythonCreateScriptedStopHook(
+ lldb::TargetSP target_sp, const char *python_class_name,
+ const char *session_dictionary_name, lldb_private::StructuredDataImpl *args,
+ lldb_private::Status &error);
+
+bool LLDBSwigPythonStopHookCallHandleStop(void *implementor,
+ lldb::ExecutionContextRefSP exc_ctx,
+ lldb::StreamSP stream);
+
+size_t LLDBSwigPython_CalculateNumChildren(PyObject *implementor, uint32_t max);
+
+PyObject *LLDBSwigPython_GetChildAtIndex(PyObject *implementor, uint32_t idx);
+
+int LLDBSwigPython_GetIndexOfChildWithName(PyObject *implementor,
+ const char *child_name);
+
+lldb::ValueObjectSP LLDBSWIGPython_GetValueObjectSPFromSBValue(void *data);
+
+bool LLDBSwigPython_UpdateSynthProviderInstance(PyObject *implementor);
+
+bool LLDBSwigPython_MightHaveChildrenSynthProviderInstance(
+ PyObject *implementor);
+
+PyObject *LLDBSwigPython_GetValueSynthProviderInstance(PyObject *implementor);
+
+bool LLDBSwigPythonCallCommand(const char *python_function_name,
+ const char *session_dictionary_name,
+ lldb::DebuggerSP &debugger, const char *args,
+ lldb_private::CommandReturnObject &cmd_retobj,
+ lldb::ExecutionContextRefSP exe_ctx_ref_sp);
+
+bool LLDBSwigPythonCallCommandObject(
+ PyObject *implementor, lldb::DebuggerSP &debugger, const char *args,
+ lldb_private::CommandReturnObject &cmd_retobj,
+ lldb::ExecutionContextRefSP exe_ctx_ref_sp);
+
+bool LLDBSwigPythonCallModuleInit(const char *python_module_name,
+ const char *session_dictionary_name,
+ lldb::DebuggerSP &debugger);
+
+void *LLDBSWIGPythonCreateOSPlugin(const char *python_class_name,
+ const char *session_dictionary_name,
+ const lldb::ProcessSP &process_sp);
+
+void *LLDBSWIGPython_CreateFrameRecognizer(const char *python_class_name,
+ const char *session_dictionary_name);
+
+PyObject *
+LLDBSwigPython_GetRecognizedArguments(PyObject *implementor,
+ const lldb::StackFrameSP &frame_sp);
+
+bool LLDBSWIGPythonRunScriptKeywordProcess(const char *python_function_name,
+ const char *session_dictionary_name,
+ const lldb::ProcessSP &process,
+ std::string &output);
+
+bool LLDBSWIGPythonRunScriptKeywordThread(const char *python_function_name,
+ const char *session_dictionary_name,
+ lldb::ThreadSP &thread,
+ std::string &output);
+
+bool LLDBSWIGPythonRunScriptKeywordTarget(const char *python_function_name,
+ const char *session_dictionary_name,
+ const lldb::TargetSP &target,
+ std::string &output);
+
+bool LLDBSWIGPythonRunScriptKeywordFrame(const char *python_function_name,
+ const char *session_dictionary_name,
+ lldb::StackFrameSP &frame,
+ std::string &output);
+
+bool LLDBSWIGPythonRunScriptKeywordValue(const char *python_function_name,
+ const char *session_dictionary_name,
+ const lldb::ValueObjectSP &value,
+ std::string &output);
-extern "C" void *LLDBSWIGPython_CastPyObjectToSBData(void *data);
-extern "C" void *LLDBSWIGPython_CastPyObjectToSBError(void *data);
-extern "C" void *LLDBSWIGPython_CastPyObjectToSBValue(void *data);
-extern "C" void *LLDBSWIGPython_CastPyObjectToSBMemoryRegionInfo(void *data);
+void *LLDBSWIGPython_GetDynamicSetting(void *module, const char *setting,
+ const lldb::TargetSP &target_sp);
} // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index c1f4c2d3b4d3..5f282d74e364 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -70,153 +70,6 @@ extern "C" void init_lldb(void);
#define LLDBSwigPyInit init_lldb
#endif
-// These prototypes are the Pythonic implementations of the required callbacks.
-// Although these are scripting-language specific, their definition depends on
-// the public API.
-
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wreturn-type-c-linkage"
-
-// Disable warning C4190: 'LLDBSwigPythonBreakpointCallbackFunction' has
-// C-linkage specified, but returns UDT 'llvm::Expected<bool>' which is
-// incompatible with C
-#if _MSC_VER
-#pragma warning (push)
-#pragma warning (disable : 4190)
-#endif
-
-extern "C" llvm::Expected<bool> LLDBSwigPythonBreakpointCallbackFunction(
- const char *python_function_name, const char *session_dictionary_name,
- const lldb::StackFrameSP &sb_frame,
- const lldb::BreakpointLocationSP &sb_bp_loc, StructuredDataImpl *args_impl);
-
-#if _MSC_VER
-#pragma warning (pop)
-#endif
-
-#pragma clang diagnostic pop
-
-extern "C" bool LLDBSwigPythonWatchpointCallbackFunction(
- const char *python_function_name, const char *session_dictionary_name,
- const lldb::StackFrameSP &sb_frame, const lldb::WatchpointSP &sb_wp);
-
-extern "C" bool LLDBSwigPythonCallTypeScript(
- const char *python_function_name, void *session_dictionary,
- const lldb::ValueObjectSP &valobj_sp, void **pyfunct_wrapper,
- const lldb::TypeSummaryOptionsSP &options_sp, std::string &retval);
-
-extern "C" void *
-LLDBSwigPythonCreateSyntheticProvider(const char *python_class_name,
- const char *session_dictionary_name,
- const lldb::ValueObjectSP &valobj_sp);
-
-extern "C" void *
-LLDBSwigPythonCreateCommandObject(const char *python_class_name,
- const char *session_dictionary_name,
- const lldb::DebuggerSP debugger_sp);
-
-extern "C" void *LLDBSwigPythonCreateScriptedThreadPlan(
- const char *python_class_name, const char *session_dictionary_name,
- StructuredDataImpl *args_data,
- std::string &error_string,
- const lldb::ThreadPlanSP &thread_plan_sp);
-
-extern "C" bool LLDBSWIGPythonCallThreadPlan(void *implementor,
- const char *method_name,
- Event *event_sp, bool &got_error);
-
-extern "C" void *LLDBSwigPythonCreateScriptedBreakpointResolver(
- const char *python_class_name, const char *session_dictionary_name,
- lldb_private::StructuredDataImpl *args, lldb::BreakpointSP &bkpt_sp);
-
-extern "C" unsigned int
-LLDBSwigPythonCallBreakpointResolver(void *implementor, const char *method_name,
- lldb_private::SymbolContext *sym_ctx);
-
-extern "C" void *LLDBSwigPythonCreateScriptedStopHook(
- TargetSP target_sp, const char *python_class_name,
- const char *session_dictionary_name, lldb_private::StructuredDataImpl *args,
- lldb_private::Status &error);
-
-extern "C" bool
-LLDBSwigPythonStopHookCallHandleStop(void *implementor,
- lldb::ExecutionContextRefSP exc_ctx,
- lldb::StreamSP stream);
-
-extern "C" size_t LLDBSwigPython_CalculateNumChildren(void *implementor,
- uint32_t max);
-
-extern "C" void *LLDBSwigPython_GetChildAtIndex(void *implementor,
- uint32_t idx);
-
-extern "C" int LLDBSwigPython_GetIndexOfChildWithName(void *implementor,
- const char *child_name);
-
-extern lldb::ValueObjectSP
-LLDBSWIGPython_GetValueObjectSPFromSBValue(void *data);
-
-extern "C" bool LLDBSwigPython_UpdateSynthProviderInstance(void *implementor);
-
-extern "C" bool
-LLDBSwigPython_MightHaveChildrenSynthProviderInstance(void *implementor);
-
-extern "C" void *
-LLDBSwigPython_GetValueSynthProviderInstance(void *implementor);
-
-extern "C" bool
-LLDBSwigPythonCallCommand(const char *python_function_name,
- const char *session_dictionary_name,
- lldb::DebuggerSP &debugger, const char *args,
- lldb_private::CommandReturnObject &cmd_retobj,
- lldb::ExecutionContextRefSP exe_ctx_ref_sp);
-
-extern "C" bool
-LLDBSwigPythonCallCommandObject(void *implementor, lldb::DebuggerSP &debugger,
- const char *args,
- lldb_private::CommandReturnObject &cmd_retobj,
- lldb::ExecutionContextRefSP exe_ctx_ref_sp);
-
-extern "C" bool
-LLDBSwigPythonCallModuleInit(const char *python_module_name,
- const char *session_dictionary_name,
- lldb::DebuggerSP &debugger);
-
-extern "C" void *
-LLDBSWIGPythonCreateOSPlugin(const char *python_class_name,
- const char *session_dictionary_name,
- const lldb::ProcessSP &process_sp);
-
-extern "C" void *
-LLDBSWIGPython_CreateFrameRecognizer(const char *python_class_name,
- const char *session_dictionary_name);
-
-extern "C" void *
-LLDBSwigPython_GetRecognizedArguments(void *implementor,
- const lldb::StackFrameSP &frame_sp);
-
-extern "C" bool LLDBSWIGPythonRunScriptKeywordProcess(
- const char *python_function_name, const char *session_dictionary_name,
- lldb::ProcessSP &process, std::string &output);
-
-extern "C" bool LLDBSWIGPythonRunScriptKeywordThread(
- const char *python_function_name, const char *session_dictionary_name,
- lldb::ThreadSP &thread, std::string &output);
-
-extern "C" bool LLDBSWIGPythonRunScriptKeywordTarget(
- const char *python_function_name, const char *session_dictionary_name,
- lldb::TargetSP &target, std::string &output);
-
-extern "C" bool LLDBSWIGPythonRunScriptKeywordFrame(
- const char *python_function_name, const char *session_dictionary_name,
- lldb::StackFrameSP &frame, std::string &output);
-
-extern "C" bool LLDBSWIGPythonRunScriptKeywordValue(
- const char *python_function_name, const char *session_dictionary_name,
- lldb::ValueObjectSP &value, std::string &output);
-
-extern "C" void *
-LLDBSWIGPython_GetDynamicSetting(void *module, const char *setting,
- const lldb::TargetSP &target_sp);
static ScriptInterpreterPythonImpl *GetPythonInterpreter(Debugger &debugger) {
ScriptInterpreter *script_interpreter =
@@ -1591,9 +1444,9 @@ lldb::ValueObjectListSP ScriptInterpreterPythonImpl::GetRecognizedArguments(
if (!implementor.IsAllocated())
return ValueObjectListSP();
- PythonObject py_return(PyRefType::Owned,
- (PyObject *)LLDBSwigPython_GetRecognizedArguments(
- implementor.get(), frame_sp));
+ PythonObject py_return(
+ PyRefType::Owned,
+ LLDBSwigPython_GetRecognizedArguments(implementor.get(), frame_sp));
// if it fails, print the error but otherwise go on
if (PyErr_Occurred()) {
@@ -2423,7 +2276,7 @@ size_t ScriptInterpreterPythonImpl::CalculateNumChildren(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return 0;
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return 0;
@@ -2446,7 +2299,7 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetChildAtIndex(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return lldb::ValueObjectSP();
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return lldb::ValueObjectSP();
@@ -2454,7 +2307,7 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetChildAtIndex(
{
Locker py_lock(this,
Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN);
- void *child_ptr = LLDBSwigPython_GetChildAtIndex(implementor, idx);
+ PyObject *child_ptr = LLDBSwigPython_GetChildAtIndex(implementor, idx);
if (child_ptr != nullptr && child_ptr != Py_None) {
lldb::SBValue *sb_value_ptr =
(lldb::SBValue *)LLDBSWIGPython_CastPyObjectToSBValue(child_ptr);
@@ -2478,7 +2331,7 @@ int ScriptInterpreterPythonImpl::GetIndexOfChildWithName(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return UINT32_MAX;
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return UINT32_MAX;
@@ -2503,7 +2356,7 @@ bool ScriptInterpreterPythonImpl::UpdateSynthProviderInstance(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return ret_val;
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return ret_val;
@@ -2526,7 +2379,7 @@ bool ScriptInterpreterPythonImpl::MightHaveChildrenSynthProviderInstance(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return ret_val;
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return ret_val;
@@ -2550,14 +2403,15 @@ lldb::ValueObjectSP ScriptInterpreterPythonImpl::GetSyntheticValue(
StructuredData::Generic *generic = implementor_sp->GetAsGeneric();
if (!generic)
return ret_val;
- void *implementor = generic->GetValue();
+ auto *implementor = static_cast<PyObject *>(generic->GetValue());
if (!implementor)
return ret_val;
{
Locker py_lock(this,
Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN);
- void *child_ptr = LLDBSwigPython_GetValueSynthProviderInstance(implementor);
+ PyObject *child_ptr =
+ LLDBSwigPython_GetValueSynthProviderInstance(implementor);
if (child_ptr != nullptr && child_ptr != Py_None) {
lldb::SBValue *sb_value_ptr =
(lldb::SBValue *)LLDBSWIGPython_CastPyObjectToSBValue(child_ptr);
@@ -2653,11 +2507,11 @@ bool ScriptInterpreterPythonImpl::RunScriptFormatKeyword(
}
{
- ProcessSP process_sp(process->shared_from_this());
Locker py_lock(this,
Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN);
ret_val = LLDBSWIGPythonRunScriptKeywordProcess(
- impl_function, m_dictionary_name.c_str(), process_sp, output);
+ impl_function, m_dictionary_name.c_str(), process->shared_from_this(),
+ output);
if (!ret_val)
error.SetErrorString("python script evaluation failed");
}
@@ -2753,11 +2607,10 @@ bool ScriptInterpreterPythonImpl::RunScriptFormatKeyword(
}
{
- ValueObjectSP value_sp(value->GetSP());
Locker py_lock(this,
Locker::AcquireLock | Locker::InitSession | Locker::NoSTDIN);
ret_val = LLDBSWIGPythonRunScriptKeywordValue(
- impl_function, m_dictionary_name.c_str(), value_sp, output);
+ impl_function, m_dictionary_name.c_str(), value->GetSP(), output);
if (!ret_val)
error.SetErrorString("python script evaluation failed");
}
@@ -3076,9 +2929,9 @@ bool ScriptInterpreterPythonImpl::RunScriptBasedCommand(
SynchronicityHandler synch_handler(debugger_sp, synchronicity);
std::string args_str = args.str();
- ret_val = LLDBSwigPythonCallCommandObject(impl_obj_sp->GetValue(),
- debugger_sp, args_str.c_str(),
- cmd_retobj, exe_ctx_ref_sp);
+ ret_val = LLDBSwigPythonCallCommandObject(
+ static_cast<PyObject *>(impl_obj_sp->GetValue()), debugger_sp,
+ args_str.c_str(), cmd_retobj, exe_ctx_ref_sp);
}
if (!ret_val)
diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
index b07674af3bd9..9d23f1baf931 100644
--- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
+++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
@@ -500,7 +500,7 @@ void SymbolFileBreakpad::AddSymbols(Symtab &symtab) {
for (Symbol &symbol : symbols)
symtab.AddSymbol(std::move(symbol));
- symtab.CalculateSymbolSizes();
+ symtab.Finalize();
}
llvm::Expected<lldb::addr_t>
@@ -927,4 +927,3 @@ uint64_t SymbolFileBreakpad::GetDebugInfoSize() {
// Breakpad files are all debug info.
return m_objfile_sp->GetByteSize();
}
-
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 2dd7ae60b231..8c20244a6c44 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -2067,6 +2067,13 @@ uint32_t SymbolFileDWARF::ResolveSymbolContext(
}
void SymbolFileDWARF::PreloadSymbols() {
+ // Get the symbol table for the symbol file prior to taking the module lock
+ // so that it is available without needing to take the module lock. The DWARF
+ // indexing might end up needing to relocate items when DWARF sections are
+ // loaded as they might end up getting the section contents which can call
+ // ObjectFileELF::RelocateSection() which in turn will ask for the symbol
+ // table and can cause deadlocks.
+ GetSymtab();
std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
m_index->Preload();
}
@@ -3271,15 +3278,14 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
}
const DWARFDIE parent_context_die = GetDeclContextDIEContainingDIE(die);
- const dw_tag_t parent_tag = die.GetParent().Tag();
+ const DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die);
+ const dw_tag_t parent_tag = sc_parent_die.Tag();
bool is_static_member = (parent_tag == DW_TAG_compile_unit ||
parent_tag == DW_TAG_partial_unit) &&
(parent_context_die.Tag() == DW_TAG_class_type ||
parent_context_die.Tag() == DW_TAG_structure_type);
ValueType scope = eValueTypeInvalid;
-
- const DWARFDIE sc_parent_die = GetParentSymbolContextDIE(die);
SymbolContextScope *symbol_context_scope = nullptr;
bool has_explicit_mangled = mangled != nullptr;
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 8af90cb66e87..bf101ac1acf1 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -1182,8 +1182,9 @@ void SymbolFileNativePDB::FindFunctions(
FunctionNameType name_type_mask, bool include_inlines,
SymbolContextList &sc_list) {
std::lock_guard<std::recursive_mutex> guard(GetModuleMutex());
- // For now we only support lookup by method name.
- if (!(name_type_mask & eFunctionNameTypeMethod))
+ // For now we only support lookup by method name or full name.
+ if (!(name_type_mask & eFunctionNameTypeFull ||
+ name_type_mask & eFunctionNameTypeMethod))
return;
using SymbolAndOffset = std::pair<uint32_t, llvm::codeview::CVSymbol>;
diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
index 45dfc4b9a152..db0ae241be7e 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
@@ -1421,7 +1421,6 @@ void SymbolFilePDB::AddSymbols(lldb_private::Symtab &symtab) {
));
}
- symtab.CalculateSymbolSizes();
symtab.Finalize();
}
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index b20ae32a08ac..b1dbc382ff04 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -91,7 +91,7 @@ static void VerifyDecl(clang::Decl *decl) {
assert(decl && "VerifyDecl called with nullptr?");
#ifndef NDEBUG
// We don't care about the actual access value here but only want to trigger
- // that Clang calls its internal Decl::AccessDeclContextSanity check.
+ // that Clang calls its internal Decl::AccessDeclContextCheck validation.
decl->getAccess();
#endif
}
diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index 101af01341a2..bfab741b0d66 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -244,7 +244,7 @@ ObjectFile::ObjectFile(const lldb::ModuleSP &module_sp,
m_type(eTypeInvalid), m_strata(eStrataInvalid),
m_file_offset(file_offset), m_length(length), m_data(), m_process_wp(),
m_memory_addr(LLDB_INVALID_ADDRESS), m_sections_up(), m_symtab_up(),
- m_synthetic_symbol_idx(0) {
+ m_symtab_once_up(new llvm::once_flag()) {
if (file_spec_ptr)
m_file = *file_spec_ptr;
if (data_sp)
@@ -265,7 +265,7 @@ ObjectFile::ObjectFile(const lldb::ModuleSP &module_sp,
: ModuleChild(module_sp), m_file(), m_type(eTypeInvalid),
m_strata(eStrataInvalid), m_file_offset(0), m_length(0), m_data(),
m_process_wp(process_sp), m_memory_addr(header_addr), m_sections_up(),
- m_symtab_up(), m_synthetic_symbol_idx(0) {
+ m_symtab_up(), m_symtab_once_up(new llvm::once_flag()) {
if (header_data_sp)
m_data.SetData(header_data_sp, 0, header_data_sp->GetByteSize());
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
@@ -571,11 +571,13 @@ bool ObjectFile::SplitArchivePathWithObject(llvm::StringRef path_with_object,
void ObjectFile::ClearSymtab() {
ModuleSP module_sp(GetModule());
if (module_sp) {
- std::lock_guard<std::recursive_mutex> guard(module_sp->GetMutex());
Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
LLDB_LOGF(log, "%p ObjectFile::ClearSymtab () symtab = %p",
static_cast<void *>(this),
static_cast<void *>(m_symtab_up.get()));
+ // Since we need to clear the symbol table, we need a new llvm::once_flag
+ // instance so we can safely create another symbol table
+ m_symtab_once_up.reset(new llvm::once_flag());
m_symtab_up.reset();
}
}
@@ -715,3 +717,33 @@ void llvm::format_provider<ObjectFile::Strata>::format(
break;
}
}
+
+
+Symtab *ObjectFile::GetSymtab() {
+ ModuleSP module_sp(GetModule());
+ if (module_sp) {
+ // We can't take the module lock in ObjectFile::GetSymtab() or we can
+ // deadlock in DWARF indexing when any file asks for the symbol table from
+ // an object file. This currently happens in the preloading of symbols in
+ // SymbolFileDWARF::PreloadSymbols() because the main thread will take the
+ // module lock, and then threads will be spun up to index the DWARF and
+ // any of those threads might end up trying to relocate items in the DWARF
+ // sections which causes ObjectFile::GetSectionData(...) to relocate section
+ // data which requires the symbol table.
+ //
+ // So to work around this, we create the symbol table one time using
+ // llvm::once_flag, lock it, and then set the unique pointer. Any other
+ // thread that gets ahold of the symbol table before parsing is done, will
+ // not be able to access the symbol table contents since all APIs in Symtab
+ // are protected by a mutex in the Symtab object itself.
+ llvm::call_once(*m_symtab_once_up, [&]() {
+ ElapsedTime elapsed(module_sp->GetSymtabParseTime());
+ Symtab *symtab = new Symtab(this);
+ std::lock_guard<std::recursive_mutex> symtab_guard(symtab->GetMutex());
+ m_symtab_up.reset(symtab);
+ ParseSymtab(*m_symtab_up);
+ m_symtab_up->Finalize();
+ });
+ }
+ return m_symtab_up.get();
+}
diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp
index 19c1fee2bb38..c67955523bfb 100644
--- a/lldb/source/Symbol/Symtab.cpp
+++ b/lldb/source/Symbol/Symtab.cpp
@@ -997,10 +997,15 @@ void Symtab::InitAddressIndexes() {
}
}
-void Symtab::CalculateSymbolSizes() {
+void Symtab::Finalize() {
std::lock_guard<std::recursive_mutex> guard(m_mutex);
- // Size computation happens inside InitAddressIndexes.
+ // Calculate the size of symbols inside InitAddressIndexes.
InitAddressIndexes();
+ // Shrink to fit the symbols so we don't waste memory
+ if (m_symbols.capacity() > m_symbols.size()) {
+ collection new_symbols(m_symbols.begin(), m_symbols.end());
+ m_symbols.swap(new_symbols);
+ }
}
Symbol *Symtab::FindSymbolAtFileAddress(addr_t file_addr) {
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index bd455310f08e..af5ca0225169 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1222,22 +1222,6 @@ Platform::CreateArchList(llvm::ArrayRef<llvm::Triple::ArchType> archs,
return list;
}
-bool Platform::GetSupportedArchitectureAtIndex(uint32_t idx, ArchSpec &arch) {
- const auto &archs = GetSupportedArchitectures();
- if (idx >= archs.size())
- return false;
- arch = archs[idx];
- return true;
-}
-
-std::vector<ArchSpec> Platform::GetSupportedArchitectures() {
- std::vector<ArchSpec> result;
- ArchSpec arch;
- for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(idx, arch); ++idx)
- result.push_back(arch);
- return result;
-}
-
/// Lets a platform answer if it is compatible with a given
/// architecture and the target triple contained within.
bool Platform::IsCompatibleArchitecture(const ArchSpec &arch,
@@ -1563,28 +1547,20 @@ Status
Platform::GetCachedExecutable(ModuleSpec &module_spec,
lldb::ModuleSP &module_sp,
const FileSpecList *module_search_paths_ptr) {
- const auto platform_spec = module_spec.GetFileSpec();
- const auto error =
- LoadCachedExecutable(module_spec, module_sp, module_search_paths_ptr);
- if (error.Success()) {
- module_spec.GetFileSpec() = module_sp->GetFileSpec();
- module_spec.GetPlatformFileSpec() = platform_spec;
- }
-
- return error;
-}
-
-Status
-Platform::LoadCachedExecutable(const ModuleSpec &module_spec,
- lldb::ModuleSP &module_sp,
- const FileSpecList *module_search_paths_ptr) {
- return GetRemoteSharedModule(
+ FileSpec platform_spec = module_spec.GetFileSpec();
+ Status error = GetRemoteSharedModule(
module_spec, nullptr, module_sp,
[&](const ModuleSpec &spec) {
return ResolveRemoteExecutable(spec, module_sp,
module_search_paths_ptr);
},
nullptr);
+ if (error.Success()) {
+ module_spec.GetFileSpec() = module_sp->GetFileSpec();
+ module_spec.GetPlatformFileSpec() = platform_spec;
+ }
+
+ return error;
}
Status Platform::GetRemoteSharedModule(const ModuleSpec &module_spec,
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 84dc2b94a0eb..94f378886e50 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -5853,13 +5853,6 @@ Process::AdvanceAddressToNextBranchInstruction(Address default_stop_addr,
return retval;
}
-Status Process::GetMemoryRegionInfo(lldb::addr_t load_addr,
- MemoryRegionInfo &range_info) {
- if (auto abi = GetABI())
- load_addr = abi->FixDataAddress(load_addr);
- return DoGetMemoryRegionInfo(load_addr, range_info);
-}
-
Status
Process::GetMemoryRegions(lldb_private::MemoryRegionInfos &region_list) {
diff --git a/lldb/source/Target/RemoteAwarePlatform.cpp b/lldb/source/Target/RemoteAwarePlatform.cpp
index eb39fc6db304..b92d4d5fcaa7 100644
--- a/lldb/source/Target/RemoteAwarePlatform.cpp
+++ b/lldb/source/Target/RemoteAwarePlatform.cpp
@@ -131,9 +131,9 @@ Status RemoteAwarePlatform::ResolveExecutable(
// architectures that we should be using (in the correct order) and see
// if we can find a match that way
StreamString arch_names;
- for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(
- idx, resolved_module_spec.GetArchitecture());
- ++idx) {
+ llvm::ListSeparator LS;
+ for (const ArchSpec &arch : GetSupportedArchitectures()) {
+ resolved_module_spec.GetArchitecture() = arch;
error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp,
module_search_paths_ptr, nullptr, nullptr);
// Did we find an executable using one of the
@@ -144,10 +144,7 @@ Status RemoteAwarePlatform::ResolveExecutable(
error.SetErrorToGenericError();
}
- if (idx > 0)
- arch_names.PutCString(", ");
- arch_names.PutCString(
- resolved_module_spec.GetArchitecture().GetArchitectureName());
+ arch_names << LS << arch.GetArchitectureName();
}
if (error.Fail() || !exe_module_sp) {
diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp
index a51c124f9615..977cc306bb4e 100644
--- a/lldb/tools/driver/Driver.cpp
+++ b/lldb/tools/driver/Driver.cpp
@@ -24,7 +24,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -44,14 +43,6 @@
#include <cstring>
#include <fcntl.h>
-// Includes for pipe()
-#if defined(_WIN32)
-#include <fcntl.h>
-#include <io.h>
-#else
-#include <unistd.h>
-#endif
-
#if !defined(__APPLE__)
#include "llvm/Support/DataTypes.h"
#endif
@@ -421,60 +412,6 @@ SBError Driver::ProcessArgs(const opt::InputArgList &args, bool &exiting) {
return error;
}
-static inline int OpenPipe(int fds[2], std::size_t size) {
-#ifdef _WIN32
- return _pipe(fds, size, O_BINARY);
-#else
- (void)size;
- return pipe(fds);
-#endif
-}
-
-static ::FILE *PrepareCommandsForSourcing(const char *commands_data,
- size_t commands_size) {
- enum PIPES { READ, WRITE }; // Indexes for the read and write fds
- int fds[2] = {-1, -1};
-
- if (OpenPipe(fds, commands_size) != 0) {
- WithColor::error()
- << "can't create pipe file descriptors for LLDB commands\n";
- return nullptr;
- }
-
- ssize_t nrwr = write(fds[WRITE], commands_data, commands_size);
- if (size_t(nrwr) != commands_size) {
- WithColor::error()
- << format(
- "write(%i, %p, %" PRIu64
- ") failed (errno = %i) when trying to open LLDB commands pipe",
- fds[WRITE], static_cast<const void *>(commands_data),
- static_cast<uint64_t>(commands_size), errno)
- << '\n';
- llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]);
- llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]);
- return nullptr;
- }
-
- // Close the write end of the pipe, so that the command interpreter will exit
- // when it consumes all the data.
- llvm::sys::Process::SafelyCloseFileDescriptor(fds[WRITE]);
-
- // Open the read file descriptor as a FILE * that we can return as an input
- // handle.
- ::FILE *commands_file = fdopen(fds[READ], "rb");
- if (commands_file == nullptr) {
- WithColor::error() << format("fdopen(%i, \"rb\") failed (errno = %i) "
- "when trying to open LLDB commands pipe",
- fds[READ], errno)
- << '\n';
- llvm::sys::Process::SafelyCloseFileDescriptor(fds[READ]);
- return nullptr;
- }
-
- // 'commands_file' now owns the read descriptor.
- return commands_file;
-}
-
std::string EscapeString(std::string arg) {
std::string::size_type pos = 0;
while ((pos = arg.find_first_of("\"\\", pos)) != std::string::npos) {
@@ -604,21 +541,15 @@ int Driver::MainLoop() {
// Check if we have any data in the commands stream, and if so, save it to a
// temp file
// so we can then run the command interpreter using the file contents.
- const char *commands_data = commands_stream.GetData();
- const size_t commands_size = commands_stream.GetSize();
-
bool go_interactive = true;
- if ((commands_data != nullptr) && (commands_size != 0u)) {
- FILE *commands_file =
- PrepareCommandsForSourcing(commands_data, commands_size);
-
- if (commands_file == nullptr) {
- // We should have already printed an error in PrepareCommandsForSourcing.
+ if ((commands_stream.GetData() != nullptr) &&
+ (commands_stream.GetSize() != 0u)) {
+ SBError error = m_debugger.SetInputString(commands_stream.GetData());
+ if (error.Fail()) {
+ WithColor::error() << error.GetCString() << '\n';
return 1;
}
- m_debugger.SetInputFileHandle(commands_file, true);
-
// Set the debugger into Sync mode when running the command file. Otherwise
// command files that run the target won't run in a sensible way.
bool old_async = m_debugger.GetAsync();
@@ -651,12 +582,9 @@ int Driver::MainLoop() {
SBStream crash_commands_stream;
WriteCommandsForSourcing(eCommandPlacementAfterCrash,
crash_commands_stream);
- const char *crash_commands_data = crash_commands_stream.GetData();
- const size_t crash_commands_size = crash_commands_stream.GetSize();
- commands_file =
- PrepareCommandsForSourcing(crash_commands_data, crash_commands_size);
- if (commands_file != nullptr) {
- m_debugger.SetInputFileHandle(commands_file, true);
+ SBError error =
+ m_debugger.SetInputString(crash_commands_stream.GetData());
+ if (error.Success()) {
SBCommandInterpreterRunResult local_results =
m_debugger.RunCommandInterpreter(options);
if (local_results.GetResult() ==
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index d4b54362bb46..9e07f4c8debd 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -364,23 +364,17 @@ int main_platform(int argc, char *argv[]) {
fprintf(stderr, "failed to start gdbserver: %s\n", error.AsCString());
}
- // After we connected, we need to get an initial ack from...
- if (platform.HandshakeWithClient()) {
- bool interrupt = false;
- bool done = false;
- while (!interrupt && !done) {
- if (platform.GetPacketAndSendResponse(llvm::None, error, interrupt,
- done) !=
- GDBRemoteCommunication::PacketResult::Success)
- break;
- }
-
- if (error.Fail()) {
- WithColor::error() << error.AsCString() << '\n';
- }
- } else {
- WithColor::error() << "handshake with client failed\n";
+ bool interrupt = false;
+ bool done = false;
+ while (!interrupt && !done) {
+ if (platform.GetPacketAndSendResponse(llvm::None, error, interrupt,
+ done) !=
+ GDBRemoteCommunication::PacketResult::Success)
+ break;
}
+
+ if (error.Fail())
+ WithColor::error() << error.AsCString() << '\n';
}
} while (g_server);
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index d170eff17951..f2183ff52bfb 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -2377,10 +2377,21 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit);
*
* @{
*/
+
+/** Deprecated: Use LLVMAddAlias2 instead. */
LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
const char *Name);
/**
+ * Add a GlobalAlias with the given value type, address space and aliasee.
+ *
+ * @see llvm::GlobalAlias::create()
+ */
+LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy,
+ unsigned AddrSpace, LLVMValueRef Aliasee,
+ const char *Name);
+
+/**
* Obtain a GlobalAlias value from a Module by its name.
*
* The returned value corresponds to a llvm::GlobalAlias value.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 595cd94b6b8f..c2660502a419 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -1458,10 +1458,8 @@ public:
/// uint64_t. The bitwidth must be <= 64 or the value must fit within a
/// uint64_t. Otherwise an assertion will result.
uint64_t getZExtValue() const {
- if (isSingleWord()) {
- assert(BitWidth && "zero width values not allowed");
+ if (isSingleWord())
return U.VAL;
- }
assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
return U.pVal[0];
}
diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h
index 8a7c0a78a0fc..ad35e09f0f74 100644
--- a/llvm/include/llvm/ADT/SCCIterator.h
+++ b/llvm/include/llvm/ADT/SCCIterator.h
@@ -28,6 +28,10 @@
#include <cassert>
#include <cstddef>
#include <iterator>
+#include <queue>
+#include <set>
+#include <unordered_map>
+#include <unordered_set>
#include <vector>
namespace llvm {
@@ -234,6 +238,135 @@ template <class T> scc_iterator<T> scc_end(const T &G) {
return scc_iterator<T>::end(G);
}
+/// Sort the nodes of a directed SCC in the decreasing order of the edge
+/// weights. The instantiating GraphT type should have weighted edge type
+/// declared in its graph traits in order to use this iterator.
+///
+/// This is implemented using Kruskal's minimal spanning tree algorithm followed
+/// by a BFS walk. First a maximum spanning tree (forest) is built based on all
+/// edges within the SCC collection. Then a BFS walk is initiated on tree nodes
+/// that do not have a predecessor. Finally, the BFS order computed is the
+/// traversal order of the nodes of the SCC. Such order ensures that
+/// high-weighted edges are visited first during the tranversal.
+template <class GraphT, class GT = GraphTraits<GraphT>>
+class scc_member_iterator {
+ using NodeType = typename GT::NodeType;
+ using EdgeType = typename GT::EdgeType;
+ using NodesType = std::vector<NodeType *>;
+
+ // Auxilary node information used during the MST calculation.
+ struct NodeInfo {
+ NodeInfo *Group = this;
+ uint32_t Rank = 0;
+ bool Visited = true;
+ };
+
+ // Find the root group of the node and compress the path from node to the
+ // root.
+ NodeInfo *find(NodeInfo *Node) {
+ if (Node->Group != Node)
+ Node->Group = find(Node->Group);
+ return Node->Group;
+ }
+
+ // Union the source and target node into the same group and return true.
+ // Returns false if they are already in the same group.
+ bool unionGroups(const EdgeType *Edge) {
+ NodeInfo *G1 = find(&NodeInfoMap[Edge->Source]);
+ NodeInfo *G2 = find(&NodeInfoMap[Edge->Target]);
+
+ // If the edge forms a cycle, do not add it to MST
+ if (G1 == G2)
+ return false;
+
+ // Make the smaller rank tree a direct child or the root of high rank tree.
+ if (G1->Rank < G1->Rank)
+ G1->Group = G2;
+ else {
+ G2->Group = G1;
+ // If the ranks are the same, increment root of one tree by one.
+ if (G1->Rank == G2->Rank)
+ G2->Rank++;
+ }
+ return true;
+ }
+
+ std::unordered_map<NodeType *, NodeInfo> NodeInfoMap;
+ NodesType Nodes;
+
+public:
+ scc_member_iterator(const NodesType &InputNodes);
+
+ NodesType &operator*() { return Nodes; }
+};
+
+template <class GraphT, class GT>
+scc_member_iterator<GraphT, GT>::scc_member_iterator(
+ const NodesType &InputNodes) {
+ if (InputNodes.size() <= 1) {
+ Nodes = InputNodes;
+ return;
+ }
+
+ // Initialize auxilary node information.
+ NodeInfoMap.clear();
+ for (auto *Node : InputNodes) {
+ // This is specifically used to construct a `NodeInfo` object in place. An
+ // insert operation will involve a copy construction which invalidate the
+ // initial value of the `Group` field which should be `this`.
+ (void)NodeInfoMap[Node].Group;
+ }
+
+ // Sort edges by weights.
+ struct EdgeComparer {
+ bool operator()(const EdgeType *L, const EdgeType *R) const {
+ return L->Weight > R->Weight;
+ }
+ };
+
+ std::multiset<const EdgeType *, EdgeComparer> SortedEdges;
+ for (auto *Node : InputNodes) {
+ for (auto &Edge : Node->Edges) {
+ if (NodeInfoMap.count(Edge.Target))
+ SortedEdges.insert(&Edge);
+ }
+ }
+
+ // Traverse all the edges and compute the Maximum Weight Spanning Tree
+ // using Kruskal's algorithm.
+ std::unordered_set<const EdgeType *> MSTEdges;
+ for (auto *Edge : SortedEdges) {
+ if (unionGroups(Edge))
+ MSTEdges.insert(Edge);
+ }
+
+ // Do BFS on MST, starting from nodes that have no incoming edge. These nodes
+ // are "roots" of the MST forest. This ensures that nodes are visited before
+ // their decsendents are, thus ensures hot edges are processed before cold
+ // edges, based on how MST is computed.
+ for (const auto *Edge : MSTEdges)
+ NodeInfoMap[Edge->Target].Visited = false;
+
+ std::queue<NodeType *> Queue;
+ for (auto &Node : NodeInfoMap)
+ if (Node.second.Visited)
+ Queue.push(Node.first);
+
+ while (!Queue.empty()) {
+ auto *Node = Queue.front();
+ Queue.pop();
+ Nodes.push_back(Node);
+ for (auto &Edge : Node->Edges) {
+ if (MSTEdges.count(&Edge) && !NodeInfoMap[Edge.Target].Visited) {
+ NodeInfoMap[Edge.Target].Visited = true;
+ Queue.push(Edge.Target);
+ }
+ }
+ }
+
+ assert(InputNodes.size() == Nodes.size() && "missing nodes in MST");
+ std::reverse(Nodes.begin(), Nodes.end());
+}
} // end namespace llvm
#endif // LLVM_ADT_SCCITERATOR_H
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 48f15b02283a..f9b658ca960a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1016,20 +1016,39 @@ public:
private:
std::tuple<RangeTs...> Ranges;
- template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) {
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) {
+ return iterator(std::get<Ns>(Ranges)...);
+ }
+ template <size_t... Ns>
+ iterator begin_impl(std::index_sequence<Ns...>) const {
return iterator(std::get<Ns>(Ranges)...);
}
template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) {
return iterator(make_range(std::end(std::get<Ns>(Ranges)),
std::end(std::get<Ns>(Ranges)))...);
}
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const {
+ return iterator(make_range(std::end(std::get<Ns>(Ranges)),
+ std::end(std::get<Ns>(Ranges)))...);
+ }
public:
concat_range(RangeTs &&... Ranges)
: Ranges(std::forward<RangeTs>(Ranges)...) {}
- iterator begin() { return begin_impl(std::index_sequence_for<RangeTs...>{}); }
- iterator end() { return end_impl(std::index_sequence_for<RangeTs...>{}); }
+ iterator begin() {
+ return begin_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator begin() const {
+ return begin_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator end() {
+ return end_impl(std::index_sequence_for<RangeTs...>{});
+ }
+ iterator end() const {
+ return end_impl(std::index_sequence_for<RangeTs...>{});
+ }
};
} // end namespace detail
@@ -1977,10 +1996,16 @@ public:
enumerator_iter<R> begin() {
return enumerator_iter<R>(0, std::begin(TheRange));
}
+ enumerator_iter<R> begin() const {
+ return enumerator_iter<R>(0, std::begin(TheRange));
+ }
enumerator_iter<R> end() {
return enumerator_iter<R>(std::end(TheRange));
}
+ enumerator_iter<R> end() const {
+ return enumerator_iter<R>(std::end(TheRange));
+ }
private:
R TheRange;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index c26dbc457949..ea4c0312e073 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
@@ -50,6 +51,7 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMulAdd, ///< Fused multiply-add of floats (a * b + c).
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
///< invariant
SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
@@ -260,6 +262,12 @@ public:
SmallVector<Instruction *, 4> getReductionOpChain(PHINode *Phi,
Loop *L) const;
+ /// Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
+ static bool isFMulAddIntrinsic(Instruction *I) {
+ return isa<IntrinsicInst>(I) &&
+ cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::fmuladd;
+ }
+
private:
// The starting value of the recurrence.
// It does not have to be zero!
diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h
index 6eb637e72782..4ceae2d29f16 100644
--- a/llvm/include/llvm/Analysis/Lint.h
+++ b/llvm/include/llvm/Analysis/Lint.h
@@ -6,11 +6,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines lint interfaces that can be used for some sanity checking
-// of input to the system, and for checking that transformations
-// haven't done something bad. In contrast to the Verifier, the Lint checker
-// checks for undefined behavior or constructions with likely unintended
-// behavior.
+// This file defines lint interfaces that can be used for some validation of
+// input to the system, and for checking that transformations haven't done
+// something bad. In contrast to the Verifier, the Lint checker checks for
+// undefined behavior or constructions with likely unintended behavior.
//
// To see what specifically is checked, look at Lint.cpp
//
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index a2260688e3d6..df50611832ce 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1378,6 +1378,8 @@ private:
/// includes an exact count and a maximum count.
///
class BackedgeTakenInfo {
+ friend class ScalarEvolution;
+
/// A list of computable exits and their not-taken counts. Loops almost
/// never have more than one computable exit.
SmallVector<ExitNotTakenInfo, 1> ExitNotTaken;
@@ -1398,9 +1400,6 @@ private:
/// True iff the backedge is taken either exactly Max or zero times.
bool MaxOrZero = false;
- /// SCEV expressions used in any of the ExitNotTakenInfo counts.
- SmallPtrSet<const SCEV *, 4> Operands;
-
bool isComplete() const { return IsComplete; }
const SCEV *getConstantMax() const { return ConstantMax; }
@@ -1466,10 +1465,6 @@ private:
/// Return true if the number of times this backedge is taken is either the
/// value returned by getConstantMax or zero.
bool isConstantMaxOrZero(ScalarEvolution *SE) const;
-
- /// Return true if any backedge taken count expressions refer to the given
- /// subexpression.
- bool hasOperand(const SCEV *S) const;
};
/// Cache the backedge-taken count of the loops for this function as they
@@ -1480,6 +1475,10 @@ private:
/// function as they are computed.
DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
+ /// Loops whose backedge taken counts directly use this non-constant SCEV.
+ DenseMap<const SCEV *, SmallPtrSet<PointerIntPair<const Loop *, 1, bool>, 4>>
+ BECountUsers;
+
/// This map contains entries for all of the PHI instructions that we
/// attempt to compute constant evolutions for. This allows us to avoid
/// potentially expensive recomputation of these properties. An instruction
@@ -1492,6 +1491,11 @@ private:
DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>>
ValuesAtScopes;
+ /// Reverse map for invalidation purposes: Stores of which SCEV and which
+ /// loop this is the value-at-scope of.
+ DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>>
+ ValuesAtScopesUsers;
+
/// Memoized computeLoopDisposition results.
DenseMap<const SCEV *,
SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
@@ -1616,11 +1620,6 @@ private:
/// SCEV+Loop pair.
const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
- /// This looks up computed SCEV values for all instructions that depend on
- /// the given instruction and removes them from the ValueExprMap map if they
- /// reference SymName. This is used during PHI resolution.
- void forgetSymbolicName(Instruction *I, const SCEV *SymName);
-
/// Return the BackedgeTakenInfo for the given loop, lazily computing new
/// values if the loop hasn't been analyzed yet. The returned result is
/// guaranteed not to be predicated.
@@ -1911,6 +1910,9 @@ private:
bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags);
+ /// Forget predicated/non-predicated backedge taken counts for the given loop.
+ void forgetBackedgeTakenCounts(const Loop *L, bool Predicated);
+
/// Drop memoized information for all \p SCEVs.
void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs);
@@ -1923,6 +1925,9 @@ private:
/// Erase Value from ValueExprMap and ExprValueMap.
void eraseValueFromMap(Value *V);
+ /// Insert V to S mapping into ValueExprMap and ExprValueMap.
+ void insertValueToMap(Value *V, const SCEV *S);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index ded53617b304..9c1abef33b28 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1048,6 +1048,12 @@ TLI_DEFINE_STRING_INTERNAL("memset")
/// void memset_pattern16(void *b, const void *pattern16, size_t len);
TLI_DEFINE_ENUM_INTERNAL(memset_pattern16)
TLI_DEFINE_STRING_INTERNAL("memset_pattern16")
+/// void memset_pattern4(void *b, const void *pattern4, size_t len);
+TLI_DEFINE_ENUM_INTERNAL(memset_pattern4)
+TLI_DEFINE_STRING_INTERNAL("memset_pattern4")
+/// void memset_pattern8(void *b, const void *pattern8, size_t len);
+TLI_DEFINE_ENUM_INTERNAL(memset_pattern8)
+TLI_DEFINE_STRING_INTERNAL("memset_pattern8")
/// int mkdir(const char *path, mode_t mode);
TLI_DEFINE_ENUM_INTERNAL(mkdir)
TLI_DEFINE_STRING_INTERNAL("mkdir")
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 24e2318de48b..751c88a4ecbb 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -115,7 +115,7 @@ struct VFShape {
return {EC, Parameters};
}
- /// Sanity check on the Parameters in the VFShape.
+ /// Validation check on the Parameters in the VFShape.
bool hasValidParameterList() const;
};
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index a270fd399aeb..c199e933116a 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1602,6 +1602,13 @@ enum {
NT_FREEBSD_PROCSTAT_AUXV = 16,
};
+// NetBSD core note types.
+enum {
+ NT_NETBSDCORE_PROCINFO = 1,
+ NT_NETBSDCORE_AUXV = 2,
+ NT_NETBSDCORE_LWPSTATUS = 24,
+};
+
// OpenBSD core note types.
enum {
NT_OPENBSD_PROCINFO = 10,
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index ed3cd54df272..73d39fecc268 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -130,6 +130,7 @@ bool getEnableMachineFunctionSplitter();
bool getEnableDebugEntryValues();
bool getValueTrackingVariableLocations();
+Optional<bool> getExplicitValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
@@ -170,6 +171,10 @@ void setFunctionAttributes(StringRef CPU, StringRef Features, Function &F);
/// Set function attributes of functions in Module M based on CPU,
/// Features, and command line flags.
void setFunctionAttributes(StringRef CPU, StringRef Features, Module &M);
+
+/// Should value-tracking variable locations / instruction referencing be
+/// enabled by default for this triple?
+bool getDefaultValueTrackingVariableLocations(const llvm::Triple &T);
} // namespace codegen
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index ff4ad4b72636..f3fa652b0175 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -564,6 +564,7 @@ public:
/// This variant does not erase \p MI after calling the build function.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
bool matchRotateOutOfRange(MachineInstr &MI);
@@ -648,6 +649,54 @@ public:
/// (fma fneg(x), fneg(y), z) -> (fma x, y, z)
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate = false);
+
+ /// Transform (fadd (fmul x, y), z) -> (fma x, y, z)
+ /// (fadd (fmul x, y), z) -> (fmad x, y, z)
+ bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ /// (fadd (fpext (fmul x, y)), z) -> (fmad (fpext x), (fpext y), z)
+ bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ /// (fadd (fmad x, y, (fmul u, v)), z) -> (fmad x, y, (fmad u, v, z))
+ bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ // Transform (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ // (fadd (fmad x, y, (fpext (fmul u, v))), z)
+ // -> (fmad x, y, (fmad (fpext u), (fpext v), z))
+ bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fmul x, y), z) -> (fma x, y, -z)
+ /// (fsub (fmul x, y), z) -> (fmad x, y, -z)
+ bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+ /// (fsub (fneg (fmul, x, y)), z) -> (fmad (fneg x), y, (fneg z))
+ bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fpext (fmul x, y)), z)
+ /// -> (fma (fpext x), (fpext y), (fneg z))
+ /// (fsub (fpext (fmul x, y)), z)
+ /// -> (fmad (fpext x), (fpext y), (fneg z))
+ bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// Transform (fsub (fpext (fneg (fmul x, y))), z)
+ /// -> (fneg (fma (fpext x), (fpext y), z))
+ /// (fsub (fpext (fneg (fmul x, y))), z)
+ /// -> (fneg (fmad (fpext x), (fpext y), z))
+ bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index e813d030eec3..a41166bb4c6b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -129,6 +129,43 @@ inline SpecificConstantMatch m_SpecificICst(int64_t RequestedValue) {
return SpecificConstantMatch(RequestedValue);
}
+/// Matcher for a specific constant splat.
+struct SpecificConstantSplatMatch {
+ int64_t RequestedVal;
+ SpecificConstantSplatMatch(int64_t RequestedVal)
+ : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return isBuildVectorConstantSplat(Reg, MRI, RequestedVal,
+ /* AllowUndef */ false);
+ }
+};
+
+/// Matches a constant splat of \p RequestedValue.
+inline SpecificConstantSplatMatch m_SpecificICstSplat(int64_t RequestedValue) {
+ return SpecificConstantSplatMatch(RequestedValue);
+}
+
+/// Matcher for a specific constant or constant splat.
+struct SpecificConstantOrSplatMatch {
+ int64_t RequestedVal;
+ SpecificConstantOrSplatMatch(int64_t RequestedVal)
+ : RequestedVal(RequestedVal) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ int64_t MatchedVal;
+ if (mi_match(Reg, MRI, m_ICst(MatchedVal)) && MatchedVal == RequestedVal)
+ return true;
+ return isBuildVectorConstantSplat(Reg, MRI, RequestedVal,
+ /* AllowUndef */ false);
+ }
+};
+
+/// Matches a \p RequestedValue constant or a constant splat of \p
+/// RequestedValue.
+inline SpecificConstantOrSplatMatch
+m_SpecificICstOrSplat(int64_t RequestedValue) {
+ return SpecificConstantOrSplatMatch(RequestedValue);
+}
+
///{
/// Convenience matchers for specific integer values.
inline SpecificConstantMatch m_ZeroInt() { return SpecificConstantMatch(0); }
@@ -489,6 +526,11 @@ inline UnaryOp_match<SrcTy, TargetOpcode::COPY> m_Copy(SrcTy &&Src) {
return UnaryOp_match<SrcTy, TargetOpcode::COPY>(std::forward<SrcTy>(Src));
}
+template <typename SrcTy>
+inline UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT> m_GFSqrt(const SrcTy &Src) {
+ return UnaryOp_match<SrcTy, TargetOpcode::G_FSQRT>(Src);
+}
+
// General helper for generic MI compares, i.e. G_ICMP and G_FCMP
// TODO: Allow checking a specific predicate.
template <typename Pred_P, typename LHS_P, typename RHS_P, unsigned Opcode>
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 86545b976b8d..4126e2ac7b8f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -378,6 +378,18 @@ Optional<FPValueAndVReg> getFConstantSplat(Register VReg,
const MachineRegisterInfo &MRI,
bool AllowUndef = true);
+/// Return true if the specified register is defined by G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef.
+bool isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef);
+
+/// Return true if the specified instruction is a G_BUILD_VECTOR or
+/// G_BUILD_VECTOR_TRUNC where all of the elements are \p SplatValue or undef.
+bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef);
+
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const MachineInstr &MI,
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index dcbd19ac6b5a..ec23dde0c6c0 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -938,7 +938,8 @@ public:
int64_t Offset, LLT Ty);
MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
- return getMachineMemOperand(MMO, Offset, LLT::scalar(8 * Size));
+ return getMachineMemOperand(
+ MMO, Offset, Size == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * Size));
}
/// getMachineMemOperand - Allocate a new MachineMemOperand by copying
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index fa22ca6a98ac..a855a0797723 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -139,10 +139,13 @@ public:
///
int getOffsetOfLocalArea() const { return LocalAreaOffset; }
- /// isFPCloseToIncomingSP - Return true if the frame pointer is close to
- /// the incoming stack pointer, false if it is close to the post-prologue
- /// stack pointer.
- virtual bool isFPCloseToIncomingSP() const { return true; }
+ /// Control the placement of special register scavenging spill slots when
+ /// allocating a stack frame.
+ ///
+ /// If this returns true, the frame indexes used by the RegScavenger will be
+ /// allocated closest to the incoming stack pointer.
+ virtual bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const;
/// assignCalleeSavedSpillSlots - Allows target to override spill slot
/// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
@@ -220,6 +223,9 @@ public:
virtual void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologueMBB) const {}
+ /// Does the stack probe function call return with a modified stack pointer?
+ virtual bool stackProbeFunctionModifiesSP() const { return false; }
+
/// Adjust the prologue to have the function use segmented stacks. This works
/// by adding a check even before the "normal" function prologue.
virtual void adjustForSegmentedStacks(MachineFunction &MF,
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8bc730a3eda5..d43dd9fac85d 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1913,6 +1913,12 @@ public:
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
}
+ /// Optional target hook to create the LLVM IR attributes for the outlined
+ /// function. If overridden, the overriding function must call the default
+ /// implementation.
+ virtual void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const;
+
/// Returns how or if \p MI should be outlined.
virtual outliner::InstrType
getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 87f5168ec48f..d862701c37d7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -425,6 +425,12 @@ public:
return true;
}
+ /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
+ /// using generic code in SelectionDAGBuilder.
+ virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const {
+ return true;
+ }
+
/// Return true if it is profitable to convert a select of FP constants into
/// a constant pool load whose address depends on the select condition. The
/// parameter may be used to differentiate a select with FP compare from
@@ -806,9 +812,12 @@ public:
/// Return true if target always benefits from combining into FMA for a
/// given value type. This must typically return false on targets where FMA
/// takes more cycles to execute than FADD.
- virtual bool enableAggressiveFMAFusion(EVT VT) const {
- return false;
- }
+ virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; }
+
+ /// Return true if target always benefits from combining into FMA for a
+ /// given value type. This must typically return false on targets where FMA
+ /// takes more cycles to execute than FADD.
+ virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; }
/// Return the ValueType of the result of SETCC operations.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
@@ -2710,6 +2719,14 @@ public:
/// Return true if an fpext operation input to an \p Opcode operation is free
/// (for instance, because half-precision floating-point numbers are
/// implicitly extended to float-precision) for an FMA instruction.
+ virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
+ LLT DestTy, LLT SrcTy) const {
+ return false;
+ }
+
+ /// Return true if an fpext operation input to an \p Opcode operation is free
+ /// (for instance, because half-precision floating-point numbers are
+ /// implicitly extended to float-precision) for an FMA instruction.
virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
@@ -2748,11 +2765,47 @@ public:
return false;
}
+ /// Return true if an FMA operation is faster than a pair of fmul and fadd
+ /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+ /// returns true, otherwise fmuladd is expanded to fmul + fadd.
+ ///
+ /// NOTE: This may be called before legalization on types for which FMAs are
+ /// not legal, but should return true if those types will eventually legalize
+ /// to types that support FMAs. After legalization, it will only be called on
+ /// types that support FMAs (via Legal or Custom actions)
+ virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ LLT) const {
+ return false;
+ }
+
/// IR version
virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
return false;
}
+ /// Returns true if \p MI can be combined with another instruction to
+ /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD,
+ /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be
+ /// distributed into an fadd/fsub.
+ virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const {
+ assert((MI.getOpcode() == TargetOpcode::G_FADD ||
+ MI.getOpcode() == TargetOpcode::G_FSUB ||
+ MI.getOpcode() == TargetOpcode::G_FMUL) &&
+ "unexpected node in FMAD forming combine");
+ switch (Ty.getScalarSizeInBits()) {
+ case 16:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16);
+ case 32:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32);
+ case 64:
+ return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64);
+ default:
+ break;
+ }
+
+ return false;
+ }
+
/// Returns true if be combined with to form an ISD::FMAD. \p N may be an
/// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
/// fadd/fsub.
@@ -2852,6 +2905,12 @@ public:
/// passed to the fp16 to fp conversion library function.
virtual bool shouldKeepZExtForFP16Conv() const { return false; }
+ /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT
+ /// from min(max(fptoi)) saturation patterns.
+ virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const {
+ return isOperationLegalOrCustom(Op, VT);
+ }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 902973ff5722..ae1afeb668be 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -156,6 +156,11 @@ public:
NormalUnits.getNumInfoUnits());
}
+ const DWARFUnitVector &getNormalUnitsVector() {
+ parseNormalUnits();
+ return NormalUnits;
+ }
+
/// Get units from .debug_types in this context.
unit_iterator_range types_section_units() {
parseNormalUnits();
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index d471b80c7fe1..505686bfbf59 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include <cstdint>
#include <map>
#include <set>
@@ -153,8 +154,8 @@ private:
/// \param SectionKind The object-file section kind that S comes from.
///
/// \returns The number of errors that occurred during verification.
- unsigned verifyUnitSection(const DWARFSection &S,
- DWARFSectionKind SectionKind);
+ unsigned verifyUnitSection(const DWARFSection &S);
+ unsigned verifyUnits(const DWARFUnitVector &Units);
/// Verifies that a call site entry is nested within a subprogram with a
/// DW_AT_call attribute.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index 362e8ab8e296..2180be3341e1 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -519,6 +519,7 @@ private:
/// symbols of an error.
class MaterializationResponsibility {
friend class ExecutionSession;
+ friend class JITDylib;
public:
MaterializationResponsibility(MaterializationResponsibility &&) = delete;
@@ -535,10 +536,10 @@ public:
/// Returns the target JITDylib that these symbols are being materialized
/// into.
- JITDylib &getTargetJITDylib() const { return *JD; }
+ JITDylib &getTargetJITDylib() const { return JD; }
/// Returns the ExecutionSession for this instance.
- ExecutionSession &getExecutionSession();
+ ExecutionSession &getExecutionSession() const;
/// Returns the symbol flags map for this responsibility instance.
/// Note: The returned flags may have transient flags (Lazy, Materializing)
@@ -640,15 +641,16 @@ public:
private:
/// Create a MaterializationResponsibility for the given JITDylib and
/// initial symbols.
- MaterializationResponsibility(JITDylibSP JD, SymbolFlagsMap SymbolFlags,
+ MaterializationResponsibility(ResourceTrackerSP RT,
+ SymbolFlagsMap SymbolFlags,
SymbolStringPtr InitSymbol)
- : JD(std::move(JD)), SymbolFlags(std::move(SymbolFlags)),
- InitSymbol(std::move(InitSymbol)) {
- assert(this->JD && "Cannot initialize with null JITDylib");
+ : JD(RT->getJITDylib()), RT(std::move(RT)),
+ SymbolFlags(std::move(SymbolFlags)), InitSymbol(std::move(InitSymbol)) {
assert(!this->SymbolFlags.empty() && "Materializing nothing?");
}
- JITDylibSP JD;
+ JITDylib &JD;
+ ResourceTrackerSP RT;
SymbolFlagsMap SymbolFlags;
SymbolStringPtr InitSymbol;
};
@@ -913,12 +915,26 @@ public:
const SymbolLookupSet &LookupSet) = 0;
};
-/// A symbol table that supports asynchoronous symbol queries.
+/// Represents a JIT'd dynamic library.
+///
+/// This class aims to mimic the behavior of a regular dylib or shared object,
+/// but without requiring the contained program representations to be compiled
+/// up-front. The JITDylib's content is defined by adding MaterializationUnits,
+/// and contained MaterializationUnits will typically rely on the JITDylib's
+/// links-against order to resolve external references (similar to a regular
+/// dylib).
+///
+/// The JITDylib object is a thin wrapper that references state held by the
+/// ExecutionSession. JITDylibs can be removed, clearing this underlying state
+/// and leaving the JITDylib object in a defunct state. In this state the
+/// JITDylib's name is guaranteed to remain accessible. If the ExecutionSession
+/// is still alive then other operations are callable but will return an Error
+/// or null result (depending on the API). It is illegal to call any operation
+/// other than getName on a JITDylib after the ExecutionSession has been torn
+/// down.
///
-/// Represents a virtual shared object. Instances can not be copied or moved, so
-/// their addresses may be used as keys for resource management.
-/// JITDylib state changes must be made via an ExecutionSession to guarantee
-/// that they are synchronized with respect to other JITDylib operations.
+/// JITDylibs cannot be moved or copied. Their address is stable, and useful as
+/// a key in some JIT data structures.
class JITDylib : public ThreadSafeRefCountedBase<JITDylib>,
public jitlink::JITLinkDylib {
friend class AsynchronousSymbolQuery;
@@ -931,10 +947,21 @@ public:
JITDylib &operator=(const JITDylib &) = delete;
JITDylib(JITDylib &&) = delete;
JITDylib &operator=(JITDylib &&) = delete;
+ ~JITDylib();
/// Get a reference to the ExecutionSession for this JITDylib.
+ ///
+ /// It is legal to call this method on a defunct JITDylib, however the result
+ /// will only usable if the ExecutionSession is still alive. If this JITDylib
+ /// is held by an error that may have torn down the JIT then the result
+ /// should not be used.
ExecutionSession &getExecutionSession() const { return ES; }
+ /// Dump current JITDylib state to OS.
+ ///
+ /// It is legal to call this method on a defunct JITDylib.
+ void dump(raw_ostream &OS);
+
/// Calls remove on all trackers currently associated with this JITDylib.
/// Does not run static deinits.
///
@@ -942,12 +969,21 @@ public:
/// added concurrently while the clear is underway, and the newly added
/// code will *not* be cleared. Adding new code concurrently with a clear
/// is usually a bug and should be avoided.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
Error clear();
/// Get the default resource tracker for this JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
ResourceTrackerSP getDefaultResourceTracker();
/// Create a resource tracker for this JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
ResourceTrackerSP createResourceTracker();
/// Adds a definition generator to this JITDylib and returns a referenece to
@@ -956,6 +992,9 @@ public:
/// When JITDylibs are searched during lookup, if no existing definition of
/// a symbol is found, then any generators that have been added are run (in
/// the order that they were added) to potentially generate a definition.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename GeneratorT>
GeneratorT &addGenerator(std::unique_ptr<GeneratorT> DefGenerator);
@@ -963,6 +1002,9 @@ public:
///
/// The given generator must exist in this JITDylib's generators list (i.e.
/// have been added and not yet removed).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void removeGenerator(DefinitionGenerator &G);
/// Set the link order to be used when fixing up definitions in JITDylib.
@@ -983,26 +1025,41 @@ public:
/// as the first in the link order (instead of this dylib) ensures that
/// definitions within this dylib resolve to the lazy-compiling stubs,
/// rather than immediately materializing the definitions in this dylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void setLinkOrder(JITDylibSearchOrder NewSearchOrder,
bool LinkAgainstThisJITDylibFirst = true);
/// Add the given JITDylib to the link order for definitions in this
/// JITDylib.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void addToLinkOrder(JITDylib &JD,
JITDylibLookupFlags JDLookupFlags =
JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Replace OldJD with NewJD in the link order if OldJD is present.
/// Otherwise this operation is a no-op.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
JITDylibLookupFlags JDLookupFlags =
JITDylibLookupFlags::MatchExportedSymbolsOnly);
/// Remove the given JITDylib from the link order for this JITDylib if it is
/// present. Otherwise this operation is a no-op.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
void removeFromLinkOrder(JITDylib &JD);
/// Do something with the link order (run under the session lock).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename Func>
auto withLinkOrderDo(Func &&F)
-> decltype(F(std::declval<const JITDylibSearchOrder &>()));
@@ -1014,6 +1071,9 @@ public:
///
/// This overload always takes ownership of the MaterializationUnit. If any
/// errors occur, the MaterializationUnit consumed.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename MaterializationUnitType>
Error define(std::unique_ptr<MaterializationUnitType> &&MU,
ResourceTrackerSP RT = nullptr);
@@ -1025,6 +1085,9 @@ public:
/// generated. If an error occurs, ownership remains with the caller. This
/// may allow the caller to modify the MaterializationUnit to correct the
/// issue, then re-call define.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
template <typename MaterializationUnitType>
Error define(std::unique_ptr<MaterializationUnitType> &MU,
ResourceTrackerSP RT = nullptr);
@@ -1039,28 +1102,40 @@ public:
///
/// On success, all symbols are removed. On failure, the JITDylib state is
/// left unmodified (no symbols are removed).
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
Error remove(const SymbolNameSet &Names);
- /// Dump current JITDylib state to OS.
- void dump(raw_ostream &OS);
-
/// Returns the given JITDylibs and all of their transitive dependencies in
/// DFS order (based on linkage relationships). Each JITDylib will appear
/// only once.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
static std::vector<JITDylibSP> getDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
/// Returns the given JITDylibs and all of their transitive dependensies in
/// reverse DFS order (based on linkage relationships). Each JITDylib will
/// appear only once.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
static std::vector<JITDylibSP>
getReverseDFSLinkOrder(ArrayRef<JITDylibSP> JDs);
/// Return this JITDylib and its transitive dependencies in DFS order
/// based on linkage relationships.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
std::vector<JITDylibSP> getDFSLinkOrder();
/// Rteurn this JITDylib and its transitive dependencies in reverse DFS order
/// based on linkage relationships.
+ ///
+ /// It is illegal to call this method on a defunct JITDylib and the client
+ /// is responsible for ensuring that they do not do so.
std::vector<JITDylibSP> getReverseDFSLinkOrder();
private:
@@ -1151,7 +1226,6 @@ private:
JITDylib(ExecutionSession &ES, std::string Name);
- ResourceTrackerSP getTracker(MaterializationResponsibility &MR);
std::pair<AsynchronousSymbolQuerySet, std::shared_ptr<SymbolDependenceMap>>
removeTracker(ResourceTracker &RT);
@@ -1197,8 +1271,8 @@ private:
failSymbols(FailedSymbolsWorklist);
ExecutionSession &ES;
+ enum { Open, Closing, Closed } State = Open;
std::mutex GeneratorsMutex;
- bool Open = true;
SymbolTable Symbols;
UnmaterializedInfosMap UnmaterializedInfos;
MaterializingInfosMap MaterializingInfos;
@@ -1208,7 +1282,8 @@ private:
// Map trackers to sets of symbols tracked.
DenseMap<ResourceTracker *, SymbolNameVector> TrackerSymbols;
- DenseMap<MaterializationResponsibility *, ResourceTracker *> MRTrackers;
+ DenseMap<ResourceTracker *, DenseSet<MaterializationResponsibility *>>
+ TrackerMRs;
};
/// Platforms set up standard symbols and mediate interactions between dynamic
@@ -1363,6 +1438,18 @@ public:
/// If no Platform is attached this call is equivalent to createBareJITDylib.
Expected<JITDylib &> createJITDylib(std::string Name);
+ /// Closes the given JITDylib.
+ ///
+ /// This method clears all resources held for the JITDylib, puts it in the
+ /// closed state, and clears all references held by the ExecutionSession and
+ /// other JITDylibs. No further code can be added to the JITDylib, and the
+ /// object will be freed once any remaining JITDylibSPs to it are destroyed.
+ ///
+ /// This method does *not* run static destructors.
+ ///
+ /// This method can only be called once for each JITDylib.
+ Error removeJITDylib(JITDylib &JD);
+
/// Set the error reporter function.
ExecutionSession &setErrorReporter(ErrorReporter ReportError) {
this->ReportError = std::move(ReportError);
@@ -1574,9 +1661,9 @@ private:
SymbolStringPtr InitSymbol) {
auto &JD = RT.getJITDylib();
std::unique_ptr<MaterializationResponsibility> MR(
- new MaterializationResponsibility(&JD, std::move(Symbols),
+ new MaterializationResponsibility(&RT, std::move(Symbols),
std::move(InitSymbol)));
- JD.MRTrackers[MR.get()] = &RT;
+ JD.TrackerMRs[&RT].insert(MR.get());
return MR;
}
@@ -1660,18 +1747,17 @@ private:
JITDispatchHandlers;
};
-inline ExecutionSession &MaterializationResponsibility::getExecutionSession() {
- return JD->getExecutionSession();
+inline ExecutionSession &
+MaterializationResponsibility::getExecutionSession() const {
+ return JD.getExecutionSession();
}
template <typename Func>
Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const {
- return JD->getExecutionSession().runSessionLocked([&]() -> Error {
- auto I = JD->MRTrackers.find(this);
- assert(I != JD->MRTrackers.end() && "No tracker for this MR");
- if (I->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(I->second);
- F(I->second->getKeyUnsafe());
+ return JD.getExecutionSession().runSessionLocked([&]() -> Error {
+ if (RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(RT);
+ F(RT->getKeyUnsafe());
return Error::success();
});
}
@@ -1679,14 +1765,17 @@ Error MaterializationResponsibility::withResourceKeyDo(Func &&F) const {
template <typename GeneratorT>
GeneratorT &JITDylib::addGenerator(std::unique_ptr<GeneratorT> DefGenerator) {
auto &G = *DefGenerator;
- std::lock_guard<std::mutex> Lock(GeneratorsMutex);
- DefGenerators.push_back(std::move(DefGenerator));
+ ES.runSessionLocked([&] {
+ assert(State == Open && "Cannot add generator to closed JITDylib");
+ DefGenerators.push_back(std::move(DefGenerator));
+ });
return G;
}
template <typename Func>
auto JITDylib::withLinkOrderDo(Func &&F)
-> decltype(F(std::declval<const JITDylibSearchOrder &>())) {
+ assert(State == Open && "Cannot use link order of closed JITDylib");
return ES.runSessionLocked([&]() { return F(LinkOrder); });
}
@@ -1715,6 +1804,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &&MU,
});
return ES.runSessionLocked([&, this]() -> Error {
+ assert(State == Open && "JD is defunct");
+
if (auto Err = defineImpl(*MU))
return Err;
@@ -1756,6 +1847,8 @@ Error JITDylib::define(std::unique_ptr<MaterializationUnitType> &MU,
});
return ES.runSessionLocked([&, this]() -> Error {
+ assert(State == Open && "JD is defunct");
+
if (auto Err = defineImpl(*MU))
return Err;
@@ -1800,50 +1893,50 @@ private:
// ---------------------------------------------
inline MaterializationResponsibility::~MaterializationResponsibility() {
- JD->getExecutionSession().OL_destroyMaterializationResponsibility(*this);
+ getExecutionSession().OL_destroyMaterializationResponsibility(*this);
}
inline SymbolNameSet MaterializationResponsibility::getRequestedSymbols() const {
- return JD->getExecutionSession().OL_getRequestedSymbols(*this);
+ return getExecutionSession().OL_getRequestedSymbols(*this);
}
inline Error MaterializationResponsibility::notifyResolved(
const SymbolMap &Symbols) {
- return JD->getExecutionSession().OL_notifyResolved(*this, Symbols);
+ return getExecutionSession().OL_notifyResolved(*this, Symbols);
}
inline Error MaterializationResponsibility::notifyEmitted() {
- return JD->getExecutionSession().OL_notifyEmitted(*this);
+ return getExecutionSession().OL_notifyEmitted(*this);
}
inline Error MaterializationResponsibility::defineMaterializing(
SymbolFlagsMap SymbolFlags) {
- return JD->getExecutionSession().OL_defineMaterializing(
- *this, std::move(SymbolFlags));
+ return getExecutionSession().OL_defineMaterializing(*this,
+ std::move(SymbolFlags));
}
inline void MaterializationResponsibility::failMaterialization() {
- JD->getExecutionSession().OL_notifyFailed(*this);
+ getExecutionSession().OL_notifyFailed(*this);
}
inline Error MaterializationResponsibility::replace(
std::unique_ptr<MaterializationUnit> MU) {
- return JD->getExecutionSession().OL_replace(*this, std::move(MU));
+ return getExecutionSession().OL_replace(*this, std::move(MU));
}
inline Expected<std::unique_ptr<MaterializationResponsibility>>
MaterializationResponsibility::delegate(const SymbolNameSet &Symbols) {
- return JD->getExecutionSession().OL_delegate(*this, Symbols);
+ return getExecutionSession().OL_delegate(*this, Symbols);
}
inline void MaterializationResponsibility::addDependencies(
const SymbolStringPtr &Name, const SymbolDependenceMap &Dependencies) {
- JD->getExecutionSession().OL_addDependencies(*this, Name, Dependencies);
+ getExecutionSession().OL_addDependencies(*this, Name, Dependencies);
}
inline void MaterializationResponsibility::addDependenciesForAll(
const SymbolDependenceMap &Dependencies) {
- JD->getExecutionSession().OL_addDependenciesForAll(*this, Dependencies);
+ getExecutionSession().OL_addDependenciesForAll(*this, Dependencies);
}
} // End namespace orc
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index 2fec3e7e4230..d2f9bac16e5a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -120,6 +120,10 @@ enum class OMPScheduleType {
Runtime = 37,
Auto = 38, // auto
+ StaticBalancedChunked = 45, // static with chunk adjustment (e.g., simd)
+ GuidedSimd = 46, // guided with chunk adjustment
+ RuntimeSimd = 47, // runtime with chunk adjustment
+
ModifierMonotonic =
(1 << 29), // Set if the monotonic schedule modifier was present
ModifierNonmonotonic =
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index b4e099e4ec20..bcf52278ccbb 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1670,32 +1670,6 @@ public:
return CreateAlignedLoad(Ty, Ptr, MaybeAlign(), isVolatile, Name);
}
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- const char *Name),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
- }
-
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, Name);
- }
-
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateLoad(Value *Ptr,
- bool isVolatile,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateLoad(Ptr->getType()->getPointerElementType(), Ptr, isVolatile,
- Name);
- }
-
StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
return CreateAlignedStore(Val, Ptr, MaybeAlign(), isVolatile);
}
@@ -1719,35 +1693,6 @@ public:
return Insert(new LoadInst(Ty, Ptr, Twine(), isVolatile, *Align), Name);
}
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- const char *Name),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, Name);
- }
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, Name);
- }
- // Deprecated [opaque pointer types]
- LLVM_ATTRIBUTE_DEPRECATED(LoadInst *CreateAlignedLoad(Value *Ptr,
- MaybeAlign Align,
- bool isVolatile,
- const Twine &Name = ""),
- "Use the version that explicitly specifies the "
- "loaded type instead") {
- return CreateAlignedLoad(Ptr->getType()->getPointerElementType(), Ptr,
- Align, isVolatile, Name);
- }
-
StoreInst *CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align,
bool isVolatile = false) {
if (!Align) {
@@ -1788,14 +1733,6 @@ public:
return Insert(new AtomicRMWInst(Op, Ptr, Val, *Align, Ordering, SSID));
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateGEP(Value *Ptr, ArrayRef<Value *> IdxList,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateGEP(Ptr->getType()->getScalarType()->getPointerElementType(),
- Ptr, IdxList, Name);
- }
-
Value *CreateGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name = "") {
if (auto *PC = dyn_cast<Constant>(Ptr)) {
@@ -1810,15 +1747,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, IdxList), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateInBoundsGEP(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- Name);
- }
-
Value *CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
const Twine &Name = "") {
if (auto *PC = dyn_cast<Constant>(Ptr)) {
@@ -1849,15 +1777,6 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP1_32(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0,
const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
@@ -1914,15 +1833,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP1_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0,
const Twine &Name = "") {
Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
@@ -1933,15 +1843,6 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idx), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP1_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Name);
- }
-
Value *CreateConstGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1,
const Twine &Name = "") {
Value *Idxs[] = {
@@ -1955,15 +1856,6 @@ public:
return Insert(GetElementPtrInst::Create(Ty, Ptr, Idxs), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
- const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstGEP2_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Idx1, Name);
- }
-
Value *CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0,
uint64_t Idx1, const Twine &Name = "") {
Value *Idxs[] = {
@@ -1977,28 +1869,11 @@ public:
return Insert(GetElementPtrInst::CreateInBounds(Ty, Ptr, Idxs), Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0,
- uint64_t Idx1, const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP2_64(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, Idx0,
- Idx1, Name);
- }
-
Value *CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx,
const Twine &Name = "") {
return CreateConstInBoundsGEP2_32(Ty, Ptr, 0, Idx, Name);
}
- LLVM_ATTRIBUTE_DEPRECATED(
- Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = ""),
- "Use the version with explicit element type instead") {
- return CreateConstInBoundsGEP2_32(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, 0, Idx,
- Name);
- }
-
/// Same as CreateGlobalString, but return a pointer with "i8*" type
/// instead of a pointer to array of i8.
///
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 6d32a898b668..046e9b5e809e 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -975,15 +975,6 @@ public:
NameStr, InsertAtEnd);
}
- LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds(
- Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr = "",
- Instruction *InsertBefore = nullptr),
- "Use the version with explicit element type instead") {
- return CreateInBounds(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- NameStr, InsertBefore);
- }
-
/// Create an "inbounds" getelementptr. See the documentation for the
/// "inbounds" flag in LangRef.html for details.
static GetElementPtrInst *
@@ -996,15 +987,6 @@ public:
return GEP;
}
- LLVM_ATTRIBUTE_DEPRECATED(static GetElementPtrInst *CreateInBounds(
- Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr,
- BasicBlock *InsertAtEnd),
- "Use the version with explicit element type instead") {
- return CreateInBounds(
- Ptr->getType()->getScalarType()->getPointerElementType(), Ptr, IdxList,
- NameStr, InsertAtEnd);
- }
-
static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr,
ArrayRef<Value *> IdxList,
const Twine &NameStr,
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 8290342c0d51..b01fa10763b8 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -524,6 +524,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
+ // BCD intrinsics.
+ def int_ppc_bcdadd : GCCBuiltin<"__builtin_ppc_bcdadd">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdadd_p : GCCBuiltin<"__builtin_ppc_bcdadd_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+ def int_ppc_bcdsub : GCCBuiltin<"__builtin_ppc_bcdsub">, Intrinsic<
+ [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ def int_ppc_bcdsub_p : GCCBuiltin<"__builtin_ppc_bcdsub_p">, Intrinsic<
+ [llvm_i32_ty], [llvm_i32_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, ImmArg<ArgIndex<0>>]>;
+
// P10 Vector Extract with Mask
def int_ppc_altivec_vextractbm : GCCBuiltin<"__builtin_altivec_vextractbm">,
Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
@@ -1073,6 +1087,10 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ def int_ppc_altivec_crypto_vpermxor_be :
+ GCCBuiltin<"__builtin_altivec_crypto_vpermxor_be">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+ llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmad :
GCCBuiltin<"__builtin_altivec_crypto_vshasigmad">,
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index b83d83f0d0ab..7d232bba0864 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -250,8 +250,16 @@ public:
bool operator!=(const FastMathFlags &OtherFlags) const {
return Flags != OtherFlags.Flags;
}
+
+ /// Print fast-math flags to \p O.
+ void print(raw_ostream &O) const;
};
+inline raw_ostream &operator<<(raw_ostream &O, FastMathFlags FMF) {
+ FMF.print(O);
+ return O;
+}
+
/// Utility class for floating point operations which can have
/// information about relaxed accuracy requirements attached to them.
class FPMathOperator : public Operator {
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index b858733530e3..320deb80bb1f 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2285,6 +2285,31 @@ m_Not(const ValTy &V) {
return m_c_Xor(V, m_AllOnes());
}
+template <typename ValTy> struct NotForbidUndef_match {
+ ValTy Val;
+ NotForbidUndef_match(const ValTy &V) : Val(V) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ // We do not use m_c_Xor because that could match an arbitrary APInt that is
+ // not -1 as C and then fail to match the other operand if it is -1.
+ // This code should still work even when both operands are constants.
+ Value *X;
+ const APInt *C;
+ if (m_Xor(m_Value(X), m_APIntForbidUndef(C)).match(V) && C->isAllOnes())
+ return Val.match(X);
+ if (m_Xor(m_APIntForbidUndef(C), m_Value(X)).match(V) && C->isAllOnes())
+ return Val.match(X);
+ return false;
+ }
+};
+
+/// Matches a bitwise 'not' as 'xor V, -1' or 'xor -1, V'. For vectors, the
+/// constant value must be composed of only -1 scalar elements.
+template <typename ValTy>
+inline NotForbidUndef_match<ValTy> m_NotForbidUndef(const ValTy &V) {
+ return NotForbidUndef_match<ValTy>(V);
+}
+
/// Matches an SMin with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 47431adc6fac..c899c46d4055 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -368,6 +368,8 @@ public:
Type *getPointerElementType() const {
assert(getTypeID() == PointerTyID);
+ assert(NumContainedTys &&
+ "Attempting to get element type of opaque pointer");
return ContainedTys[0];
}
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 361d6357b303..a3c6b4e70bf5 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -38,7 +38,7 @@
// is one VP intrinsic that maps directly to one SDNode that goes by the
// same name. Since the operands are also the same, we open the property
// scopes for both the VPIntrinsic and the SDNode at once.
-// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p VPSD The SelectionDAG Node id (eg VP_ADD).
// \p LEGALPOS The operand position of the SDNode that is used for legalizing
// this SDNode. This can be `-1`, in which case the return type of
// the SDNode is used.
@@ -46,12 +46,12 @@
// \p MASKPOS The mask operand position.
// \p EVLPOS The explicit vector length operand position.
#ifndef BEGIN_REGISTER_VP_SDNODE
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS)
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS)
#endif
// End the property scope of a new VP SDNode.
#ifndef END_REGISTER_VP_SDNODE
-#define END_REGISTER_VP_SDNODE(SDOPC)
+#define END_REGISTER_VP_SDNODE(VPSD)
#endif
// Helper macros for the common "1:1 - Intrinsic : SDNode" case.
@@ -60,22 +60,21 @@
// same name. Since the operands are also the same, we open the property
// scopes for both the VPIntrinsic and the SDNode at once.
//
-// \p INTRIN The canonical name (eg `vp_add`, which at the same time is the
+// \p VPID The canonical name (eg `vp_add`, which at the same time is the
// name of the intrinsic and the TableGen def of the SDNode).
// \p MASKPOS The mask operand position.
// \p EVLPOS The explicit vector length operand position.
-// \p SDOPC The SelectionDAG Node id (eg VP_ADD).
+// \p VPSD The SelectionDAG Node id (eg VP_ADD).
// \p LEGALPOS The operand position of the SDNode that is used for legalizing
// this SDNode. This can be `-1`, in which case the return type of
// the SDNode is used.
-#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDOPC, LEGALPOS) \
-BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \
-BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, INTRIN, MASKPOS, EVLPOS)
-
-#define END_REGISTER_VP(INTRIN, SDOPC) \
-END_REGISTER_VP_INTRINSIC(INTRIN) \
-END_REGISTER_VP_SDNODE(SDOPC)
+#define BEGIN_REGISTER_VP(VPID, MASKPOS, EVLPOS, VPSD, LEGALPOS) \
+ BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, EVLPOS) \
+ BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, VPID, MASKPOS, EVLPOS)
+#define END_REGISTER_VP(VPID, VPSD) \
+ END_REGISTER_VP_INTRINSIC(VPID) \
+ END_REGISTER_VP_SDNODE(VPSD)
// The following macros attach properties to the scope they are placed in. This
// assigns the property to the VP Intrinsic and/or SDNode that belongs to the
@@ -84,9 +83,9 @@ END_REGISTER_VP_SDNODE(SDOPC)
// Property Macros {
// The intrinsic and/or SDNode has the same function as this LLVM IR Opcode.
-// \p OPC The standard IR opcode.
-#ifndef HANDLE_VP_TO_OPC
-#define HANDLE_VP_TO_OPC(OPC)
+// \p OPC The opcode of the instruction with the same function.
+#ifndef VP_PROPERTY_FUNCTIONAL_OPC
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC)
#endif
// Whether the intrinsic may have a rounding mode or exception behavior operand
@@ -96,34 +95,30 @@ END_REGISTER_VP_SDNODE(SDOPC)
// \p HASEXCEPT '1' if the intrinsic can have an exception behavior operand
// bundle, '0' otherwise.
// \p INTRINID The constrained fp intrinsic this VP intrinsic corresponds to.
-#ifndef HANDLE_VP_TO_CONSTRAINEDFP
-#define HANDLE_VP_TO_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID)
+#ifndef VP_PROPERTY_CONSTRAINEDFP
+#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID)
#endif
// Map this VP intrinsic to its canonical functional intrinsic.
-#ifndef HANDLE_VP_TO_INTRIN
-#define HANDLE_VP_TO_INTRIN(ID)
+// \p INTRIN The non-VP intrinsics with the same function.
+#ifndef VP_PROPERTY_FUNCTIONAL_INTRINSIC
+#define VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN)
#endif
// This VP Intrinsic is a memory operation
// The pointer arg is at POINTERPOS and the data arg is at DATAPOS.
-#ifndef HANDLE_VP_IS_MEMOP
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS)
+#ifndef VP_PROPERTY_MEMOP
+#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS)
#endif
// Map this VP reduction intrinsic to its reduction operand positions.
-#ifndef HANDLE_VP_REDUCTION
-#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS)
+#ifndef VP_PROPERTY_REDUCTION
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS)
#endif
// A property to infer VP binary-op SDNode opcodes automatically.
-#ifndef PROPERTY_VP_BINARYOP_SDNODE
-#define PROPERTY_VP_BINARYOP_SDNODE(ID)
-#endif
-
-// A property to infer VP reduction SDNode opcodes automatically.
-#ifndef PROPERTY_VP_REDUCTION_SDNODE
-#define PROPERTY_VP_REDUCTION_SDNODE(ID)
+#ifndef VP_PROPERTY_BINARYOP
+#define VP_PROPERTY_BINARYOP
#endif
/// } Property Macros
@@ -132,15 +127,14 @@ END_REGISTER_VP_SDNODE(SDOPC)
// Specialized helper macro for integer binary operators (%x, %y, %mask, %evl).
#ifdef HELPER_REGISTER_BINARY_INT_VP
-#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
-BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
-HANDLE_VP_TO_OPC(OPC) \
-PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
-END_REGISTER_VP(INTRIN, SDOPC)
-
-
+#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC) \
+ BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_BINARYOP \
+ END_REGISTER_VP(VPID, VPSD)
// llvm.vp.add(x,y,mask,vlen)
HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add)
@@ -193,12 +187,12 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
#error \
"The internal helper macro HELPER_REGISTER_BINARY_FP_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, SDOPC, OPC) \
- BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \
- HANDLE_VP_TO_OPC(OPC) \
- HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
- PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
- END_REGISTER_VP(vp_##OPSUFFIX, SDOPC)
+#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC) \
+ BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
+ VP_PROPERTY_BINARYOP \
+ END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
// llvm.vp.fadd(x,y,mask,vlen)
HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd)
@@ -224,34 +218,34 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
// chain = VP_STORE chain,val,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
-HANDLE_VP_TO_OPC(Store)
-HANDLE_VP_TO_INTRIN(masked_store)
-HANDLE_VP_IS_MEMOP(vp_store, 1, 0)
+VP_PROPERTY_FUNCTIONAL_OPC(Store)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_store)
+VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_store, VP_STORE)
// llvm.vp.scatter(ptr,val,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
-HANDLE_VP_TO_INTRIN(masked_scatter)
-HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_scatter)
+VP_PROPERTY_MEMOP(1, 0)
END_REGISTER_VP(vp_scatter, VP_SCATTER)
// llvm.vp.load(ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
// val,chain = VP_LOAD chain,base,offset,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
-HANDLE_VP_TO_OPC(Load)
-HANDLE_VP_TO_INTRIN(masked_load)
-HANDLE_VP_IS_MEMOP(vp_load, 0, None)
+VP_PROPERTY_FUNCTIONAL_OPC(Load)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load)
+VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_load, VP_LOAD)
// llvm.vp.gather(ptr,mask,vlen)
BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
-HANDLE_VP_TO_INTRIN(masked_gather)
-HANDLE_VP_IS_MEMOP(vp_gather, 0, None)
+VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_gather)
+VP_PROPERTY_MEMOP(0, None)
END_REGISTER_VP(vp_gather, VP_GATHER)
///// } Memory Operations
@@ -260,14 +254,14 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
// Specialized helper macro for VP reductions (%start, %x, %mask, %evl).
#ifdef HELPER_REGISTER_REDUCTION_VP
-#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
#endif
-#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \
-BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \
-HANDLE_VP_TO_INTRIN(INTRIN) \
-HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
-PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
-END_REGISTER_VP(VPINTRIN, SDOPC)
+#define HELPER_REGISTER_REDUCTION_VP(VPID, VPSD, INTRIN) \
+ BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \
+ VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP(VPID, VPSD)
// llvm.vp.reduce.add(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
@@ -320,19 +314,19 @@ HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
// fast-math flags in the IR and as two distinct ISD opcodes in the
// SelectionDAG.
#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
-#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
+#error \
+ "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
#endif
-#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \
-BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \
-BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \
-END_REGISTER_VP_SDNODE(SDOPC) \
-BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \
-END_REGISTER_VP_SDNODE(SEQ_SDOPC) \
-HANDLE_VP_TO_INTRIN(INTRIN) \
-HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
-PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
-PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \
-END_REGISTER_VP_INTRINSIC(VPINTRIN)
+#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPID, VPSD, SEQ_VPSD, INTRIN) \
+ BEGIN_REGISTER_VP_INTRINSIC(VPID, 2, 3) \
+ BEGIN_REGISTER_VP_SDNODE(VPSD, -1, VPID, 2, 3) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP_SDNODE(VPSD) \
+ BEGIN_REGISTER_VP_SDNODE(SEQ_VPSD, -1, VPID, 2, 3) \
+ VP_PROPERTY_REDUCTION(0, 1) \
+ END_REGISTER_VP_SDNODE(SEQ_VPSD) \
+ VP_PROPERTY_FUNCTIONAL_INTRINSIC(INTRIN) \
+ END_REGISTER_VP_INTRINSIC(VPID)
// llvm.vp.reduce.fadd(start,x,mask,vlen)
HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
@@ -356,8 +350,7 @@ BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
// END_REGISTER_CASES(vp_select, VP_SELECT)
END_REGISTER_VP_INTRINSIC(vp_select)
-BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5,
- EXPERIMENTAL_VP_SPLICE, -1)
+BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5, EXPERIMENTAL_VP_SPLICE, -1)
END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
///// } Shuffles
@@ -368,10 +361,9 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
#undef END_REGISTER_VP
#undef END_REGISTER_VP_INTRINSIC
#undef END_REGISTER_VP_SDNODE
-#undef HANDLE_VP_TO_OPC
-#undef HANDLE_VP_TO_CONSTRAINEDFP
-#undef HANDLE_VP_TO_INTRIN
-#undef HANDLE_VP_IS_MEMOP
-#undef HANDLE_VP_REDUCTION
-#undef PROPERTY_VP_BINARYOP_SDNODE
-#undef PROPERTY_VP_REDUCTION_SDNODE
+#undef VP_PROPERTY_BINARYOP
+#undef VP_PROPERTY_CONSTRAINEDFP
+#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
+#undef VP_PROPERTY_FUNCTIONAL_OPC
+#undef VP_PROPERTY_MEMOP
+#undef VP_PROPERTY_REDUCTION
diff --git a/llvm/include/llvm/IR/Verifier.h b/llvm/include/llvm/IR/Verifier.h
index f4381d2ae4a9..52a4c7b4301f 100644
--- a/llvm/include/llvm/IR/Verifier.h
+++ b/llvm/include/llvm/IR/Verifier.h
@@ -6,9 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the function verifier interface, that can be used for some
-// sanity checking of input to the system, and for checking that transformations
-// haven't done something bad.
+// This file defines the function verifier interface, that can be used for
+// validation checking of input to the system, and for checking that
+// transformations haven't done something bad.
//
// Note that this does not provide full 'java style' security and verifications,
// instead it just tries to ensure that code is well formed.
diff --git a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
index 2b0f391570cd..8c0ad2699b8d 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/Scheduler.h
@@ -264,9 +264,10 @@ public:
// Update the ready queues.
void dump() const;
- // This routine performs a sanity check. This routine should only be called
- // when we know that 'IR' is not in the scheduler's instruction queues.
- void sanityCheck(const InstRef &IR) const {
+ // This routine performs a basic correctness check. This routine should only
+ // be called when we know that 'IR' is not in the scheduler's instruction
+ // queues.
+ void instructionCheck(const InstRef &IR) const {
assert(!is_contained(WaitSet, IR) && "Already in the wait set!");
assert(!is_contained(ReadySet, IR) && "Already in the ready set!");
assert(!is_contained(IssuedSet, IR) && "Already executing!");
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index ee89f4eac61f..38a7de3d6131 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -121,6 +121,7 @@ struct LinkEditData {
MachOYAML::ExportEntry ExportTrie;
std::vector<NListEntry> NameList;
std::vector<StringRef> StringTable;
+ std::vector<yaml::Hex32> IndirectSymbols;
bool isEmpty() const;
};
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
new file mode 100644
index 000000000000..d64227e4ba31
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -0,0 +1,61 @@
+#ifndef MEMPROF_DATA_INC
+#define MEMPROF_DATA_INC
+/*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the main file that defines all the data structure, signature,
+ * constant literals that are shared across profiling runtime library,
+ * and host tools (reader/writer).
+ *
+ * This file has two identical copies. The primary copy lives in LLVM and
+ * the other one sits in compiler-rt/include/profile directory. To make changes
+ * in this file, first modify the primary copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+\*===----------------------------------------------------------------------===*/
+
+
+#ifdef _MSC_VER
+#define PACKED(__decl__) __pragma(pack(push,1)) __decl__ __pragma(pack(pop))
+#else
+#define PACKED(__decl__) __decl__ __attribute__((__packed__))
+#endif
+
+// A 64-bit magic number to uniquely identify the raw binary memprof profile file.
+#define MEMPROF_RAW_MAGIC_64 \
+ ((uint64_t)255 << 56 | (uint64_t)'m' << 48 | (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | \
+ (uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
+
+// The version number of the raw binary format.
+#define MEMPROF_RAW_VERSION 1ULL
+
+namespace llvm {
+namespace memprof {
+// A struct describing the header used for the raw binary memprof profile format.
+PACKED(struct Header {
+ uint64_t Magic;
+ uint64_t Version;
+ uint64_t TotalSize;
+ uint64_t SegmentOffset;
+ uint64_t MIBOffset;
+ uint64_t StackOffset;
+});
+
+// A struct describing the information necessary to describe a /proc/maps
+// segment entry for a particular binary/library identified by its build id.
+PACKED(struct SegmentEntry {
+ uint64_t Start;
+ uint64_t End;
+ uint64_t Offset;
+ uint8_t BuildId[32];
+});
+} // namespace memprof
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
new file mode 100644
index 000000000000..45544927a86f
--- /dev/null
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -0,0 +1,43 @@
+#ifndef LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
+#define LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
+//===- MemProfReader.h - Instrumented memory profiling reader ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading MemProf profiling data.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+namespace memprof {
+
+class RawMemProfReader {
+public:
+ RawMemProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
+ : DataBuffer(std::move(DataBuffer)) {}
+ // Prints aggregate counts for each raw profile parsed from the DataBuffer.
+ void printSummaries(raw_ostream &OS) const;
+
+ // Return true if the \p DataBuffer starts with magic bytes indicating it is
+ // a raw binary memprof profile.
+ static bool hasFormat(const MemoryBuffer &DataBuffer);
+
+ // Create a RawMemProfReader after sanity checking the contents of the file at
+ // \p Path.
+ static Expected<std::unique_ptr<RawMemProfReader>> create(const Twine &Path);
+
+private:
+ std::unique_ptr<MemoryBuffer> DataBuffer;
+};
+
+} // namespace memprof
+} // namespace llvm
+
+#endif // LLVM_PROFILEDATA_RAWMEMPROFREADER_H_
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index b3cfb71601f1..48e82fa55a0f 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -181,7 +181,8 @@ AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
(AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
- AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16))
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_BF16))
AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index 131a58412db6..15bb428f19bc 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -137,15 +137,6 @@ void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
-struct ParsedBranchProtection {
- StringRef Scope;
- StringRef Key;
- bool BranchTargetEnforcement;
-};
-
-bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
- StringRef &Err);
-
} // namespace AArch64
} // namespace llvm
diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h
index 5d12b7e08d58..b46a4d9f690f 100644
--- a/llvm/include/llvm/Support/ARMAttributeParser.h
+++ b/llvm/include/llvm/Support/ARMAttributeParser.h
@@ -67,6 +67,10 @@ class ARMAttributeParser : public ELFAttributeParser {
Error DSP_extension(ARMBuildAttrs::AttrType tag);
Error T2EE_use(ARMBuildAttrs::AttrType tag);
Error Virtualization_use(ARMBuildAttrs::AttrType tag);
+ Error PAC_extension(ARMBuildAttrs::AttrType tag);
+ Error BTI_extension(ARMBuildAttrs::AttrType tag);
+ Error PACRET_use(ARMBuildAttrs::AttrType tag);
+ Error BTI_use(ARMBuildAttrs::AttrType tag);
Error nodefaults(ARMBuildAttrs::AttrType tag);
public:
diff --git a/llvm/include/llvm/Support/ARMBuildAttributes.h b/llvm/include/llvm/Support/ARMBuildAttributes.h
index 37c37522fd26..b4405e7d4908 100644
--- a/llvm/include/llvm/Support/ARMBuildAttributes.h
+++ b/llvm/include/llvm/Support/ARMBuildAttributes.h
@@ -70,9 +70,13 @@ enum AttrType : unsigned {
DIV_use = 44,
DSP_extension = 46,
MVE_arch = 48,
+ PAC_extension = 50,
+ BTI_extension = 52,
also_compatible_with = 65,
conformance = 67,
Virtualization_use = 68,
+ BTI_use = 74,
+ PACRET_use = 76,
/// Legacy Tags
Section = 2, // deprecated (ABI r2.09)
@@ -237,7 +241,25 @@ enum {
// Tag_Virtualization_use, (=68), uleb128
AllowTZ = 1,
AllowVirtualization = 2,
- AllowTZVirtualization = 3
+ AllowTZVirtualization = 3,
+
+ // Tag_PAC_extension, (=50), uleb128
+ DisallowPAC = 0,
+ AllowPACInNOPSpace = 1,
+ AllowPAC = 2,
+
+ // Tag_BTI_extension, (=52), uleb128
+ DisallowBTI = 0,
+ AllowBTIInNOPSpace = 1,
+ AllowBTI = 2,
+
+ // Tag_BTI_use, (=74), uleb128
+ BTINotUsed = 0,
+ BTIUsed = 1,
+
+ // Tag_PACRET_use, (=76), uleb128
+ PACRETNotUsed = 0,
+ PACRETUsed = 1
};
} // namespace ARMBuildAttrs
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index fd08f3e6960c..7d29808f0501 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -201,6 +201,7 @@ ARM_ARCH_EXT_NAME("cdecp4", ARM::AEK_CDECP4, "+cdecp4", "-cdecp4")
ARM_ARCH_EXT_NAME("cdecp5", ARM::AEK_CDECP5, "+cdecp5", "-cdecp5")
ARM_ARCH_EXT_NAME("cdecp6", ARM::AEK_CDECP6, "+cdecp6", "-cdecp6")
ARM_ARCH_EXT_NAME("cdecp7", ARM::AEK_CDECP7, "+cdecp7", "-cdecp7")
+ARM_ARCH_EXT_NAME("pacbti", ARM::AEK_PACBTI, "+pacbti", "-pacbti")
#undef ARM_ARCH_EXT_NAME
#ifndef ARM_HW_DIV_NAME
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index b1ffcfb34552..b40704c24e87 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -59,7 +59,7 @@ enum ArchExtKind : uint64_t {
AEK_CDECP5 = 1 << 27,
AEK_CDECP6 = 1 << 28,
AEK_CDECP7 = 1 << 29,
-
+ AEK_PACBTI = 1 << 30,
// Unsupported extensions.
AEK_OS = 1ULL << 59,
AEK_IWMMXT = 1ULL << 60,
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index 21fd50763b1f..f39400c26eab 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -528,9 +528,9 @@ protected:
/// of CFG edges must not delete the CFG nodes before calling this function.
///
/// The applyUpdates function can reorder the updates and remove redundant
- /// ones internally. The batch updater is also able to detect sequences of
- /// zero and exactly one update -- it's optimized to do less work in these
- /// cases.
+ /// ones internally (as long as it is done in a deterministic fashion). The
+ /// batch updater is also able to detect sequences of zero and exactly one
+ /// update -- it's optimized to do less work in these cases.
///
/// Note that for postdominators it automatically takes care of applying
/// updates on reverse edges internally (so there's no need to swap the
@@ -538,8 +538,8 @@ protected:
/// The type of updates is the same for DomTreeBase<T> and PostDomTreeBase<T>
/// with the same template parameter T.
///
- /// \param Updates An unordered sequence of updates to perform. The current
- /// CFG and the reverse of these updates provides the pre-view of the CFG.
+ /// \param Updates An ordered sequence of updates to perform. The current CFG
+ /// and the reverse of these updates provides the pre-view of the CFG.
///
void applyUpdates(ArrayRef<UpdateType> Updates) {
GraphDiff<NodePtr, IsPostDominator> PreViewCFG(
@@ -547,9 +547,9 @@ protected:
DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr);
}
- /// \param Updates An unordered sequence of updates to perform. The current
- /// CFG and the reverse of these updates provides the pre-view of the CFG.
- /// \param PostViewUpdates An unordered sequence of update to perform in order
+ /// \param Updates An ordered sequence of updates to perform. The current CFG
+ /// and the reverse of these updates provides the pre-view of the CFG.
+ /// \param PostViewUpdates An ordered sequence of update to perform in order
/// to obtain a post-view of the CFG. The DT will be updated assuming the
/// obtained PostViewCFG is the desired end state.
void applyUpdates(ArrayRef<UpdateType> Updates,
diff --git a/llvm/include/llvm/Support/HTTPClient.h b/llvm/include/llvm/Support/HTTPClient.h
new file mode 100644
index 000000000000..3172610c2d8b
--- /dev/null
+++ b/llvm/include/llvm/Support/HTTPClient.h
@@ -0,0 +1,113 @@
+//===-- llvm/Support/HTTPClient.h - HTTP client library ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declarations of the HTTPClient, HTTPMethod,
+/// HTTPResponseHandler, and BufferedHTTPResponseHandler classes, as well as
+/// the HTTPResponseBuffer and HTTPRequest structs.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HTTP_CLIENT_H
+#define LLVM_SUPPORT_HTTP_CLIENT_H
+
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+namespace llvm {
+
+enum class HTTPMethod { GET };
+
+/// A stateless description of an outbound HTTP request.
+struct HTTPRequest {
+ SmallString<128> Url;
+ HTTPMethod Method = HTTPMethod::GET;
+ bool FollowRedirects = true;
+ HTTPRequest(StringRef Url);
+};
+
+bool operator==(const HTTPRequest &A, const HTTPRequest &B);
+
+/// A handler for state updates occurring while an HTTPRequest is performed.
+/// Can trigger the client to abort the request by returning an Error from any
+/// of its methods.
+class HTTPResponseHandler {
+public:
+ /// Processes one line of HTTP response headers.
+ virtual Error handleHeaderLine(StringRef HeaderLine) = 0;
+
+ /// Processes an additional chunk of bytes of the HTTP response body.
+ virtual Error handleBodyChunk(StringRef BodyChunk) = 0;
+
+ /// Processes the HTTP response status code.
+ virtual Error handleStatusCode(unsigned Code) = 0;
+
+protected:
+ ~HTTPResponseHandler();
+};
+
+/// An HTTP response status code bundled with a buffer to store the body.
+struct HTTPResponseBuffer {
+ unsigned Code = 0;
+ std::unique_ptr<WritableMemoryBuffer> Body;
+};
+
+/// A simple handler which writes returned data to an HTTPResponseBuffer.
+/// Ignores all headers except the Content-Length, which it uses to
+/// allocate an appropriately-sized Body buffer.
+class BufferedHTTPResponseHandler final : public HTTPResponseHandler {
+ size_t Offset = 0;
+
+public:
+ /// Stores the data received from the HTTP server.
+ HTTPResponseBuffer ResponseBuffer;
+
+ /// These callbacks store the body and status code in an HTTPResponseBuffer
+ /// allocated based on Content-Length. The Content-Length header must be
+ /// handled by handleHeaderLine before any calls to handleBodyChunk.
+ Error handleHeaderLine(StringRef HeaderLine) override;
+ Error handleBodyChunk(StringRef BodyChunk) override;
+ Error handleStatusCode(unsigned Code) override;
+};
+
+/// A reusable client that can perform HTTPRequests through a network socket.
+class HTTPClient {
+public:
+ HTTPClient();
+ ~HTTPClient();
+
+ /// Returns true only if LLVM has been compiled with a working HTTPClient.
+ static bool isAvailable();
+
+ /// Must be called at the beginning of a program, while it is a single thread.
+ static void initialize();
+
+ /// Must be called at the end of a program, while it is a single thread.
+ static void cleanup();
+
+ /// Sets the timeout for the entire request, in milliseconds. A zero or
+ /// negative value means the request never times out.
+ void setTimeout(std::chrono::milliseconds Timeout);
+
+ /// Performs the Request, passing response data to the Handler. Returns all
+ /// errors which occur during the request. Aborts if an error is returned by a
+ /// Handler method.
+ Error perform(const HTTPRequest &Request, HTTPResponseHandler &Handler);
+
+ /// Performs the Request with the default BufferedHTTPResponseHandler, and
+ /// returns its HTTPResponseBuffer or an Error.
+ Expected<HTTPResponseBuffer> perform(const HTTPRequest &Request);
+
+ /// Performs an HTTPRequest with the default configuration to make a GET
+ /// request to the given Url. Returns an HTTPResponseBuffer or an Error.
+ Expected<HTTPResponseBuffer> get(StringRef Url);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HTTP_CLIENT_H
diff --git a/llvm/include/llvm/Support/Mutex.h b/llvm/include/llvm/Support/Mutex.h
index 1d8a0d3c87cb..d73bb8ef1120 100644
--- a/llvm/include/llvm/Support/Mutex.h
+++ b/llvm/include/llvm/Support/Mutex.h
@@ -36,7 +36,7 @@ namespace llvm
return true;
} else {
// Single-threaded debugging code. This would be racy in
- // multithreaded mode, but provides not sanity checks in single
+ // multithreaded mode, but provides not basic checks in single
// threaded mode.
++acquired;
return true;
@@ -49,7 +49,7 @@ namespace llvm
return true;
} else {
// Single-threaded debugging code. This would be racy in
- // multithreaded mode, but provides not sanity checks in single
+ // multithreaded mode, but provides not basic checks in single
// threaded mode.
assert(acquired && "Lock not acquired before release!");
--acquired;
diff --git a/llvm/include/llvm/Support/RWMutex.h b/llvm/include/llvm/Support/RWMutex.h
index 150bc7dbbce1..33a5d3efffee 100644
--- a/llvm/include/llvm/Support/RWMutex.h
+++ b/llvm/include/llvm/Support/RWMutex.h
@@ -114,7 +114,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
++readers;
return true;
}
@@ -126,7 +126,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(readers > 0 && "Reader lock not acquired before release!");
--readers;
return true;
@@ -139,7 +139,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(writers == 0 && "Writer lock already acquired!");
++writers;
return true;
@@ -152,7 +152,7 @@ public:
}
// Single-threaded debugging code. This would be racy in multithreaded
- // mode, but provides not sanity checks in single threaded mode.
+ // mode, but provides not basic checks in single threaded mode.
assert(writers == 1 && "Writer lock not acquired before release!");
--writers;
return true;
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index 366dd3cf55c6..b11467dcce28 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -177,6 +177,18 @@ StringRef resolveTuneCPUAlias(StringRef TuneCPU, bool IsRV64);
} // namespace RISCV
+namespace ARM {
+struct ParsedBranchProtection {
+ StringRef Scope;
+ StringRef Key;
+ bool BranchTargetEnforcement;
+};
+
+bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err);
+
+} // namespace ARM
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index 4c41b88d6043..8d30e8e92755 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -36,9 +36,6 @@ namespace llvm {
/// for some work to become available.
class ThreadPool {
public:
- using TaskTy = std::function<void()>;
- using PackagedTaskTy = std::packaged_task<void()>;
-
/// Construct a pool using the hardware strategy \p S for mapping hardware
/// execution resources (threads, cores, CPUs)
/// Defaults to using the maximum execution resources in the system, but
@@ -51,17 +48,17 @@ public:
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
template <typename Function, typename... Args>
- inline std::shared_future<void> async(Function &&F, Args &&... ArgList) {
+ inline auto async(Function &&F, Args &&...ArgList) {
auto Task =
std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
- return asyncImpl(std::move(Task));
+ return async(std::move(Task));
}
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- template <typename Function>
- inline std::shared_future<void> async(Function &&F) {
- return asyncImpl(std::forward<Function>(F));
+ template <typename Func>
+ auto async(Func &&F) -> std::shared_future<decltype(F())> {
+ return asyncImpl(std::function<decltype(F())()>(std::forward<Func>(F)));
}
/// Blocking wait for all the threads to complete and the queue to be empty.
@@ -74,17 +71,70 @@ public:
bool isWorkerThread() const;
private:
+ /// Helpers to create a promise and a callable wrapper of \p Task that sets
+ /// the result of the promise. Returns the callable and a future to access the
+ /// result.
+ template <typename ResTy>
+ static std::pair<std::function<void()>, std::future<ResTy>>
+ createTaskAndFuture(std::function<ResTy()> Task) {
+ std::shared_ptr<std::promise<ResTy>> Promise =
+ std::make_shared<std::promise<ResTy>>();
+ auto F = Promise->get_future();
+ return {
+ [Promise = std::move(Promise), Task]() { Promise->set_value(Task()); },
+ std::move(F)};
+ }
+ static std::pair<std::function<void()>, std::future<void>>
+ createTaskAndFuture(std::function<void()> Task) {
+ std::shared_ptr<std::promise<void>> Promise =
+ std::make_shared<std::promise<void>>();
+ auto F = Promise->get_future();
+ return {[Promise = std::move(Promise), Task]() {
+ Task();
+ Promise->set_value();
+ },
+ std::move(F)};
+ }
+
bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); }
/// Asynchronous submission of a task to the pool. The returned future can be
/// used to wait for the task to finish and is *non-blocking* on destruction.
- std::shared_future<void> asyncImpl(TaskTy F);
+ template <typename ResTy>
+ std::shared_future<ResTy> asyncImpl(std::function<ResTy()> Task) {
+
+#if LLVM_ENABLE_THREADS
+ /// Wrap the Task in a std::function<void()> that sets the result of the
+ /// corresponding future.
+ auto R = createTaskAndFuture(Task);
+
+ {
+ // Lock the queue and push the new task
+ std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+ // Don't allow enqueueing after disabling the pool
+ assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
+ Tasks.push(std::move(R.first));
+ }
+ QueueCondition.notify_one();
+ return R.second.share();
+
+#else // LLVM_ENABLE_THREADS Disabled
+
+ // Get a Future with launch::deferred execution using std::async
+ auto Future = std::async(std::launch::deferred, std::move(Task)).share();
+ // Wrap the future so that both ThreadPool::wait() can operate and the
+ // returned future can be sync'ed on.
+ Tasks.push([Future]() { Future.get(); });
+ return Future;
+#endif
+ }
/// Threads in flight
std::vector<llvm::thread> Threads;
/// Tasks waiting for execution in the pool.
- std::queue<PackagedTaskTy> Tasks;
+ std::queue<std::function<void()>> Tasks;
/// Locking and signaling for accessing the Tasks queue.
std::mutex QueueLock;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2d3dbdda88a..1d189c6dea6d 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -645,6 +645,13 @@ def extract_vec_elt_combines : GICombineGroup<[
extract_vec_elt_build_vec,
extract_all_elts_from_build_vector]>;
+def funnel_shift_from_or_shift : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return Helper.matchOrShiftToFunnelShift(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
def funnel_shift_to_rotate : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_FSHL, G_FSHR):$root,
@@ -683,7 +690,8 @@ def bitfield_extract_from_and : GICombineRule<
[{ return Helper.matchBitfieldExtractFromAnd(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
-def funnel_shift_combines : GICombineGroup<[funnel_shift_to_rotate]>;
+def funnel_shift_combines : GICombineGroup<[funnel_shift_from_or_shift,
+ funnel_shift_to_rotate]>;
def bitfield_extract_from_sext_inreg : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
@@ -751,6 +759,84 @@ def redundant_neg_operands: GICombineRule<
[{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+// Transform (fadd x, (fmul y, z)) -> (fma y, z, x)
+// (fadd x, (fmul y, z)) -> (fmad y, z, x)
+// Transform (fadd (fmul x, y), z) -> (fma x, y, z)
+// (fadd (fmul x, y), z) -> (fmad x, y, z)
+def combine_fadd_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+// -> (fmad (fpext x), (fpext y), z)
+// Transform (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+// -> (fmad (fpext y), (fpext z), x)
+def combine_fadd_fpext_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fma x, y, (fmul z, u)), v) -> (fma x, y, (fma z, u, v))
+// (fadd (fmad x, y, (fmul z, u)), v) -> (fmad x, y, (fmad z, u, v))
+// Transform (fadd v, (fma x, y, (fmul z, u))) -> (fma x, y, (fma z, u, v))
+// (fadd v, (fmad x, y, (fmul z, u))) -> (fmad x, y, (fmad z, u, v))
+def combine_fadd_fma_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFMAFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fadd (fma x, y, (fpext (fmul u, v))), z) ->
+// (fma x, y, (fma (fpext u), (fpext v), z))
+def combine_fadd_fpext_fma_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FADD):$root,
+ [{ return Helper.matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ *${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fmul x, y), z) -> (fma x, y, -z)
+// -> (fmad x, y, -z)
+def combine_fsub_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
+// (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+def combine_fsub_fneg_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFNegFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fpext (fmul x, y)), z) ->
+// (fma (fpext x), (fpext y), (fneg z))
+def combine_fsub_fpext_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFpExtFMulToFMadOrFMA(*${root},
+ ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+// Transform (fsub (fneg (fpext (fmul x, y))), z) ->
+// (fneg (fma (fpext x), (fpext y), z))
+def combine_fsub_fpext_fneg_fmul_to_fmad_or_fma: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_FSUB):$root,
+ [{ return Helper.matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ *${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -783,6 +869,12 @@ def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
mul_by_neg_one]>;
+def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
+ combine_fadd_fpext_fmul_to_fmad_or_fma, combine_fadd_fma_fmul_to_fmad_or_fma,
+ combine_fadd_fpext_fma_fmul_to_fmad_or_fma, combine_fsub_fmul_to_fmad_or_fma,
+ combine_fsub_fneg_fmul_to_fmad_or_fma, combine_fsub_fpext_fmul_to_fmad_or_fma,
+ combine_fsub_fpext_fneg_fmul_to_fmad_or_fma]>;
+
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
@@ -799,7 +891,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
form_bitfield_extract, constant_fold, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
- and_or_disjoint_mask ]>;
+ and_or_disjoint_mask, fma_combines]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 6e45f8f6fb05..429fcbd81b45 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -24,22 +24,47 @@ using namespace sampleprof;
namespace llvm {
namespace sampleprof {
+struct ProfiledCallGraphNode;
+
+struct ProfiledCallGraphEdge {
+ ProfiledCallGraphEdge(ProfiledCallGraphNode *Source,
+ ProfiledCallGraphNode *Target, uint64_t Weight)
+ : Source(Source), Target(Target), Weight(Weight) {}
+ ProfiledCallGraphNode *Source;
+ ProfiledCallGraphNode *Target;
+ uint64_t Weight;
+
+ // The call destination is the only important data here,
+ // allow to transparently unwrap into it.
+ operator ProfiledCallGraphNode *() const { return Target; }
+};
+
struct ProfiledCallGraphNode {
- ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {}
- StringRef Name;
- struct ProfiledCallGraphNodeComparer {
- bool operator()(const ProfiledCallGraphNode *L,
- const ProfiledCallGraphNode *R) const {
- return L->Name < R->Name;
+ // Sort edges by callee names only since all edges to be compared are from
+ // same caller. Edge weights are not considered either because for the same
+ // callee only the edge with the largest weight is added to the edge set.
+ struct ProfiledCallGraphEdgeComparer {
+ bool operator()(const ProfiledCallGraphEdge &L,
+ const ProfiledCallGraphEdge &R) const {
+ return L.Target->Name < R.Target->Name;
}
};
- std::set<ProfiledCallGraphNode *, ProfiledCallGraphNodeComparer> Callees;
+
+ using iterator = std::set<ProfiledCallGraphEdge>::iterator;
+ using const_iterator = std::set<ProfiledCallGraphEdge>::const_iterator;
+ using edge = ProfiledCallGraphEdge;
+ using edges = std::set<ProfiledCallGraphEdge, ProfiledCallGraphEdgeComparer>;
+
+ ProfiledCallGraphNode(StringRef FName = StringRef()) : Name(FName) {}
+
+ StringRef Name;
+ edges Edges;
};
class ProfiledCallGraph {
public:
- using iterator = std::set<ProfiledCallGraphNode *>::iterator;
+ using iterator = std::set<ProfiledCallGraphEdge>::iterator;
// Constructor for non-CS profile.
ProfiledCallGraph(SampleProfileMap &ProfileMap) {
@@ -63,8 +88,9 @@ public:
while (!Queue.empty()) {
ContextTrieNode *Caller = Queue.front();
Queue.pop();
- // Add calls for context. When AddNodeWithSamplesOnly is true, both caller
- // and callee need to have context profile.
+ FunctionSamples *CallerSamples = Caller->getFunctionSamples();
+
+ // Add calls for context.
// Note that callsite target samples are completely ignored since they can
// conflict with the context edges, which are formed by context
// compression during profile generation, for cyclic SCCs. This may
@@ -74,31 +100,61 @@ public:
ContextTrieNode *Callee = &Child.second;
addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
+
+ // Fetch edge weight from the profile.
+ uint64_t Weight;
+ FunctionSamples *CalleeSamples = Callee->getFunctionSamples();
+ if (!CalleeSamples || !CallerSamples) {
+ Weight = 0;
+ } else {
+ uint64_t CalleeEntryCount = CalleeSamples->getEntrySamples();
+ uint64_t CallsiteCount = 0;
+ LineLocation Callsite = Callee->getCallSiteLoc();
+ if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
+ SampleRecord::CallTargetMap &TargetCounts = CallTargets.get();
+ auto It = TargetCounts.find(CalleeSamples->getName());
+ if (It != TargetCounts.end())
+ CallsiteCount = It->second;
+ }
+ Weight = std::max(CallsiteCount, CalleeEntryCount);
+ }
+
addProfiledCall(ContextTracker.getFuncNameFor(Caller),
- ContextTracker.getFuncNameFor(Callee));
+ ContextTracker.getFuncNameFor(Callee), Weight);
}
}
}
- iterator begin() { return Root.Callees.begin(); }
- iterator end() { return Root.Callees.end(); }
+ iterator begin() { return Root.Edges.begin(); }
+ iterator end() { return Root.Edges.end(); }
ProfiledCallGraphNode *getEntryNode() { return &Root; }
void addProfiledFunction(StringRef Name) {
if (!ProfiledFunctions.count(Name)) {
// Link to synthetic root to make sure every node is reachable
// from root. This does not affect SCC order.
ProfiledFunctions[Name] = ProfiledCallGraphNode(Name);
- Root.Callees.insert(&ProfiledFunctions[Name]);
+ Root.Edges.emplace(&Root, &ProfiledFunctions[Name], 0);
}
}
- void addProfiledCall(StringRef CallerName, StringRef CalleeName) {
+private:
+ void addProfiledCall(StringRef CallerName, StringRef CalleeName,
+ uint64_t Weight = 0) {
assert(ProfiledFunctions.count(CallerName));
auto CalleeIt = ProfiledFunctions.find(CalleeName);
- if (CalleeIt == ProfiledFunctions.end()) {
+ if (CalleeIt == ProfiledFunctions.end())
return;
+ ProfiledCallGraphEdge Edge(&ProfiledFunctions[CallerName],
+ &CalleeIt->second, Weight);
+ auto &Edges = ProfiledFunctions[CallerName].Edges;
+ auto EdgeIt = Edges.find(Edge);
+ if (EdgeIt == Edges.end()) {
+ Edges.insert(Edge);
+ } else if (EdgeIt->Weight < Edge.Weight) {
+ // Replace existing call edges with same target but smaller weight.
+ Edges.erase(EdgeIt);
+ Edges.insert(Edge);
}
- ProfiledFunctions[CallerName].Callees.insert(&CalleeIt->second);
}
void addProfiledCalls(const FunctionSamples &Samples) {
@@ -107,20 +163,20 @@ public:
for (const auto &Sample : Samples.getBodySamples()) {
for (const auto &Target : Sample.second.getCallTargets()) {
addProfiledFunction(Target.first());
- addProfiledCall(Samples.getFuncName(), Target.first());
+ addProfiledCall(Samples.getFuncName(), Target.first(), Target.second);
}
}
for (const auto &CallsiteSamples : Samples.getCallsiteSamples()) {
for (const auto &InlinedSamples : CallsiteSamples.second) {
addProfiledFunction(InlinedSamples.first);
- addProfiledCall(Samples.getFuncName(), InlinedSamples.first);
+ addProfiledCall(Samples.getFuncName(), InlinedSamples.first,
+ InlinedSamples.second.getEntrySamples());
addProfiledCalls(InlinedSamples.second);
}
}
}
-private:
ProfiledCallGraphNode Root;
StringMap<ProfiledCallGraphNode> ProfiledFunctions;
};
@@ -128,12 +184,14 @@ private:
} // end namespace sampleprof
template <> struct GraphTraits<ProfiledCallGraphNode *> {
+ using NodeType = ProfiledCallGraphNode;
using NodeRef = ProfiledCallGraphNode *;
- using ChildIteratorType = std::set<ProfiledCallGraphNode *>::iterator;
+ using EdgeType = NodeType::edge;
+ using ChildIteratorType = NodeType::const_iterator;
static NodeRef getEntryNode(NodeRef PCGN) { return PCGN; }
- static ChildIteratorType child_begin(NodeRef N) { return N->Callees.begin(); }
- static ChildIteratorType child_end(NodeRef N) { return N->Callees.end(); }
+ static ChildIteratorType child_begin(NodeRef N) { return N->Edges.begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->Edges.end(); }
};
template <>
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index c13407a44091..6002f0270083 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -141,7 +141,7 @@ private:
AsanDtorKind DestructorKind;
};
-// Insert AddressSanitizer (address sanity checking) instrumentation
+// Insert AddressSanitizer (address basic correctness checking) instrumentation
FunctionPass *createAddressSanitizerFunctionPass(
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false,
diff --git a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
index d76b55babc74..45983ad9d571 100644
--- a/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
+++ b/llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h
@@ -22,6 +22,7 @@ class Function;
struct AnnotationRemarksPass : public PassInfoMixin<AnnotationRemarksPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 72cb606eb51a..3c529abce85a 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -55,7 +55,6 @@ class MDNode;
class MemorySSAUpdater;
class PHINode;
class StoreInst;
-class SwitchInst;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -238,10 +237,6 @@ CallInst *createCallMatchingInvoke(InvokeInst *II);
/// This function converts the specified invoek into a normall call.
void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
-/// This function removes the default destination from the specified switch.
-void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU = nullptr);
-
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index 22b2295cc9d7..c233e3dc168e 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -169,6 +169,10 @@ public:
/// Called to update debug info associated with the instruction.
virtual void updateDebugInfo(Instruction *I) const {}
+
+ /// Return false if a sub-class wants to keep one of the loads/stores
+ /// after the SSA construction.
+ virtual bool shouldDelete(Instruction *I) const { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
new file mode 100644
index 000000000000..e1f681bbd367
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -0,0 +1,284 @@
+//===- Transforms/Utils/SampleProfileInference.h ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for the profile inference algorithm, profi.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
+#define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+
+namespace llvm {
+
+class BasicBlock;
+class Function;
+class MachineBasicBlock;
+class MachineFunction;
+
+namespace afdo_detail {
+
+template <class BlockT> struct TypeMap {};
+template <> struct TypeMap<BasicBlock> {
+ using BasicBlockT = BasicBlock;
+ using FunctionT = Function;
+};
+template <> struct TypeMap<MachineBasicBlock> {
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+};
+
+} // end namespace afdo_detail
+
+struct FlowJump;
+
+/// A wrapper of a binary basic block.
+struct FlowBlock {
+ uint64_t Index;
+ uint64_t Weight{0};
+ bool UnknownWeight{false};
+ uint64_t Flow{0};
+ bool HasSelfEdge{false};
+ std::vector<FlowJump *> SuccJumps;
+ std::vector<FlowJump *> PredJumps;
+
+ /// Check if it is the entry block in the function.
+ bool isEntry() const { return PredJumps.empty(); }
+
+ /// Check if it is an exit block in the function.
+ bool isExit() const { return SuccJumps.empty(); }
+};
+
+/// A wrapper of a jump between two basic blocks.
+struct FlowJump {
+ uint64_t Source;
+ uint64_t Target;
+ uint64_t Flow{0};
+ bool IsUnlikely{false};
+};
+
+/// A wrapper of binary function with basic blocks and jumps.
+struct FlowFunction {
+ std::vector<FlowBlock> Blocks;
+ std::vector<FlowJump> Jumps;
+ /// The index of the entry block.
+ uint64_t Entry;
+};
+
+void applyFlowInference(FlowFunction &Func);
+
+/// Sample profile inference pass.
+template <typename BT> class SampleProfileInference {
+public:
+ using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT;
+ using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT;
+ using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
+ using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
+ using EdgeWeightMap = DenseMap<Edge, uint64_t>;
+ using BlockEdgeMap =
+ DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>;
+
+ SampleProfileInference(FunctionT &F, BlockEdgeMap &Successors,
+ BlockWeightMap &SampleBlockWeights)
+ : F(F), Successors(Successors), SampleBlockWeights(SampleBlockWeights) {}
+
+ /// Apply the profile inference algorithm for a given function
+ void apply(BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
+
+private:
+ /// Try to infer branch probabilities mimicking implementation of
+ /// BranchProbabilityInfo. Unlikely taken branches are marked so that the
+ /// inference algorithm can avoid sending flow along corresponding edges.
+ void findUnlikelyJumps(const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func);
+
+ /// Determine whether the block is an exit in the CFG.
+ bool isExit(const BasicBlockT *BB);
+
+ /// Function.
+ const FunctionT &F;
+
+ /// Successors for each basic block in the CFG.
+ BlockEdgeMap &Successors;
+
+ /// Map basic blocks to their sampled weights.
+ BlockWeightMap &SampleBlockWeights;
+};
+
+template <typename BT>
+void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
+ EdgeWeightMap &EdgeWeights) {
+ // Find all forwards reachable blocks which the inference algorithm will be
+ // applied on.
+ df_iterator_default_set<const BasicBlockT *> Reachable;
+ for (auto *BB : depth_first_ext(&F, Reachable))
+ (void)BB /* Mark all reachable blocks */;
+
+ // Find all backwards reachable blocks which the inference algorithm will be
+ // applied on.
+ df_iterator_default_set<const BasicBlockT *> InverseReachable;
+ for (const auto &BB : F) {
+ // An exit block is a block without any successors.
+ if (isExit(&BB)) {
+ for (auto *RBB : inverse_depth_first_ext(&BB, InverseReachable))
+ (void)RBB;
+ }
+ }
+
+ // Keep a stable order for reachable blocks
+ DenseMap<const BasicBlockT *, uint64_t> BlockIndex;
+ std::vector<const BasicBlockT *> BasicBlocks;
+ BlockIndex.reserve(Reachable.size());
+ BasicBlocks.reserve(Reachable.size());
+ for (const auto &BB : F) {
+ if (Reachable.count(&BB) && InverseReachable.count(&BB)) {
+ BlockIndex[&BB] = BasicBlocks.size();
+ BasicBlocks.push_back(&BB);
+ }
+ }
+
+ BlockWeights.clear();
+ EdgeWeights.clear();
+ bool HasSamples = false;
+ for (const auto *BB : BasicBlocks) {
+ auto It = SampleBlockWeights.find(BB);
+ if (It != SampleBlockWeights.end() && It->second > 0) {
+ HasSamples = true;
+ BlockWeights[BB] = It->second;
+ }
+ }
+ // Quit early for functions with a single block or ones w/o samples
+ if (BasicBlocks.size() <= 1 || !HasSamples) {
+ return;
+ }
+
+ // Create necessary objects
+ FlowFunction Func;
+ Func.Blocks.reserve(BasicBlocks.size());
+ // Create FlowBlocks
+ for (const auto *BB : BasicBlocks) {
+ FlowBlock Block;
+ if (SampleBlockWeights.find(BB) != SampleBlockWeights.end()) {
+ Block.UnknownWeight = false;
+ Block.Weight = SampleBlockWeights[BB];
+ } else {
+ Block.UnknownWeight = true;
+ Block.Weight = 0;
+ }
+ Block.Index = Func.Blocks.size();
+ Func.Blocks.push_back(Block);
+ }
+ // Create FlowEdges
+ for (const auto *BB : BasicBlocks) {
+ for (auto *Succ : Successors[BB]) {
+ if (!BlockIndex.count(Succ))
+ continue;
+ FlowJump Jump;
+ Jump.Source = BlockIndex[BB];
+ Jump.Target = BlockIndex[Succ];
+ Func.Jumps.push_back(Jump);
+ if (BB == Succ) {
+ Func.Blocks[BlockIndex[BB]].HasSelfEdge = true;
+ }
+ }
+ }
+ for (auto &Jump : Func.Jumps) {
+ Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
+ Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
+ }
+
+ // Try to infer probabilities of jumps based on the content of basic block
+ findUnlikelyJumps(BasicBlocks, Successors, Func);
+
+ // Find the entry block
+ for (size_t I = 0; I < Func.Blocks.size(); I++) {
+ if (Func.Blocks[I].isEntry()) {
+ Func.Entry = I;
+ break;
+ }
+ }
+
+ // Create and apply the inference network model.
+ applyFlowInference(Func);
+
+ // Extract the resulting weights from the control flow
+ // All weights are increased by one to avoid propagation errors introduced by
+ // zero weights.
+ for (const auto *BB : BasicBlocks) {
+ BlockWeights[BB] = Func.Blocks[BlockIndex[BB]].Flow;
+ }
+ for (auto &Jump : Func.Jumps) {
+ Edge E = std::make_pair(BasicBlocks[Jump.Source], BasicBlocks[Jump.Target]);
+ EdgeWeights[E] = Jump.Flow;
+ }
+
+#ifndef NDEBUG
+ // Unreachable blocks and edges should not have a weight.
+ for (auto &I : BlockWeights) {
+ assert(Reachable.contains(I.first));
+ assert(InverseReachable.contains(I.first));
+ }
+ for (auto &I : EdgeWeights) {
+ assert(Reachable.contains(I.first.first) &&
+ Reachable.contains(I.first.second));
+ assert(InverseReachable.contains(I.first.first) &&
+ InverseReachable.contains(I.first.second));
+ }
+#endif
+}
+
+template <typename BT>
+inline void SampleProfileInference<BT>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {}
+
+template <>
+inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {
+ for (auto &Jump : Func.Jumps) {
+ const auto *BB = BasicBlocks[Jump.Source];
+ const auto *Succ = BasicBlocks[Jump.Target];
+ const Instruction *TI = BB->getTerminator();
+ // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
+ // In that case block Succ should be a landing pad
+ if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
+ if (isa<InvokeInst>(TI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ const Instruction *SuccTI = Succ->getTerminator();
+ // Check if the target block contains UnreachableInst and mark it unlikely
+ if (SuccTI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(SuccTI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ }
+}
+
+template <typename BT>
+inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) {
+ return BB->succ_empty();
+}
+
+template <>
+inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) {
+ return succ_empty(BB);
+}
+
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 6a2f0acf46f3..175bdde7fd05 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -38,6 +38,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
namespace llvm {
@@ -74,6 +75,8 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
+extern cl::opt<bool> SampleProfileUseProfi;
+
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
@@ -142,6 +145,9 @@ protected:
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
void propagateWeights(FunctionT &F);
+ void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
+ BlockWeightMap &SampleBlockWeights,
+ BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
@@ -150,6 +156,11 @@ protected:
bool
computeAndPropagateWeights(FunctionT &F,
const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ void initWeightPropagation(FunctionT &F,
+ const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ void
+ finalizeWeightPropagation(FunctionT &F,
+ const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void emitCoverageRemarks(FunctionT &F);
/// Map basic blocks to their computed weights.
@@ -741,50 +752,65 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
/// known).
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
- bool Changed = true;
- unsigned I = 0;
-
- // If BB weight is larger than its corresponding loop's header BB weight,
- // use the BB weight to replace the loop header BB weight.
- for (auto &BI : F) {
- BasicBlockT *BB = &BI;
- LoopT *L = LI->getLoopFor(BB);
- if (!L) {
- continue;
+ // Flow-based profile inference is only usable with BasicBlock instantiation
+ // of SampleProfileLoaderBaseImpl.
+ if (SampleProfileUseProfi) {
+ // Prepare block sample counts for inference.
+ BlockWeightMap SampleBlockWeights;
+ for (const auto &BI : F) {
+ ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
+ if (Weight)
+ SampleBlockWeights[&BI] = Weight.get();
}
- BasicBlockT *Header = L->getHeader();
- if (Header && BlockWeights[BB] > BlockWeights[Header]) {
- BlockWeights[Header] = BlockWeights[BB];
+ // Fill in BlockWeights and EdgeWeights using an inference algorithm.
+ applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
+ } else {
+ bool Changed = true;
+ unsigned I = 0;
+
+ // If BB weight is larger than its corresponding loop's header BB weight,
+ // use the BB weight to replace the loop header BB weight.
+ for (auto &BI : F) {
+ BasicBlockT *BB = &BI;
+ LoopT *L = LI->getLoopFor(BB);
+ if (!L) {
+ continue;
+ }
+ BasicBlockT *Header = L->getHeader();
+ if (Header && BlockWeights[BB] > BlockWeights[Header]) {
+ BlockWeights[Header] = BlockWeights[BB];
+ }
}
- }
- // Before propagation starts, build, for each block, a list of
- // unique predecessors and successors. This is necessary to handle
- // identical edges in multiway branches. Since we visit all blocks and all
- // edges of the CFG, it is cleaner to build these lists once at the start
- // of the pass.
- buildEdges(F);
+ // Propagate until we converge or we go past the iteration limit.
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
- // Propagate until we converge or we go past the iteration limit.
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
- }
+ // The first propagation propagates BB counts from annotated BBs to unknown
+ // BBs. The 2nd propagation pass resets edges weights, and use all BB
+ // weights to propagate edge weights.
+ VisitedEdges.clear();
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, false);
+ }
- // The first propagation propagates BB counts from annotated BBs to unknown
- // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
- // to propagate edge weights.
- VisitedEdges.clear();
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, false);
+ // The 3rd propagation pass allows adjust annotated BB weights that are
+ // obviously wrong.
+ Changed = true;
+ while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+ Changed = propagateThroughEdges(F, true);
+ }
}
+}
- // The 3rd propagation pass allows adjust annotated BB weights that are
- // obviously wrong.
- Changed = true;
- while (Changed && I++ < SampleProfileMaxPropagateIterations) {
- Changed = propagateThroughEdges(F, true);
- }
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::applyProfi(
+ FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
+ BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
+ auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
+ Infer.apply(BlockWeights, EdgeWeights);
}
/// Generate branch weight metadata for all branches in \p F.
@@ -842,26 +868,64 @@ bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights(
Changed |= computeBlockWeights(F);
if (Changed) {
- // Add an entry count to the function using the samples gathered at the
- // function entry.
- // Sets the GUIDs that are inlined in the profiled binary. This is used
- // for ThinLink to make correct liveness analysis, and also make the IR
- // match the profiled binary before annotation.
- getFunction(F).setEntryCount(
- ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
- &InlinedGUIDs);
+ // Initialize propagation.
+ initWeightPropagation(F, InlinedGUIDs);
+ // Propagate weights to all edges.
+ propagateWeights(F);
+
+ // Post-process propagated weights.
+ finalizeWeightPropagation(F, InlinedGUIDs);
+ }
+
+ return Changed;
+}
+
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation(
+ FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ // Add an entry count to the function using the samples gathered at the
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ getFunction(F).setEntryCount(
+ ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
+ &InlinedGUIDs);
+
+ if (!SampleProfileUseProfi) {
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
// Find equivalence classes.
findEquivalenceClasses(F);
-
- // Propagate weights to all edges.
- propagateWeights(F);
}
- return Changed;
+ // Before propagation starts, build, for each block, a list of
+ // unique predecessors and successors. This is necessary to handle
+ // identical edges in multiway branches. Since we visit all blocks and all
+ // edges of the CFG, it is cleaner to build these lists once at the start
+ // of the pass.
+ buildEdges(F);
+}
+
+template <typename BT>
+void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
+ FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ // If we utilize a flow-based count inference, then we trust the computed
+ // counts and set the entry count as computed by the algorithm. This is
+ // primarily done to sync the counts produced by profi and BFI inference,
+ // which uses the entry count for mass propagation.
+ // If profi produces a zero-value for the entry count, we fallback to
+ // Samples->getHeadSamples() + 1 to avoid functions with zero count.
+ if (SampleProfileUseProfi) {
+ const BasicBlockT *EntryBB = getEntryBB(&F);
+ if (BlockWeights[EntryBB] > 0) {
+ getFunction(F).setEntryCount(
+ ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
+ &InlinedGUIDs);
+ }
+ }
}
template <typename BT>
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 33fdc8b628c5..856d7e90acb2 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -104,12 +104,67 @@ static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
/// All reachable probability will proportionally share the remaining part.
static const BranchProbability UR_TAKEN_PROB = BranchProbability::getRaw(1);
+/// Heuristics and lookup tables for non-loop branches:
+/// Pointer Heuristics (PH)
static const uint32_t PH_TAKEN_WEIGHT = 20;
static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+static const BranchProbability
+ PtrTakenProb(PH_TAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ PtrUntakenProb(PH_NONTAKEN_WEIGHT, PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+
+using ProbabilityList = SmallVector<BranchProbability>;
+using ProbabilityTable = std::map<CmpInst::Predicate, ProbabilityList>;
+
+/// Pointer comparisons:
+static const ProbabilityTable PointerTable{
+ {ICmpInst::ICMP_NE, {PtrTakenProb, PtrUntakenProb}}, /// p != q -> Likely
+ {ICmpInst::ICMP_EQ, {PtrUntakenProb, PtrTakenProb}}, /// p == q -> Unlikely
+};
+/// Zero Heuristics (ZH)
static const uint32_t ZH_TAKEN_WEIGHT = 20;
static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+static const BranchProbability
+ ZeroTakenProb(ZH_TAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ ZeroUntakenProb(ZH_NONTAKEN_WEIGHT, ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+
+/// Integer compares with 0:
+static const ProbabilityTable ICmpWithZeroTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == 0 -> Unlikely
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != 0 -> Likely
+ {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X < 0 -> Unlikely
+ {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X > 0 -> Likely
+};
+
+/// Integer compares with -1:
+static const ProbabilityTable ICmpWithMinusOneTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}}, /// X == -1 -> Unlikely
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}}, /// X != -1 -> Likely
+ // InstCombine canonicalizes X >= 0 into X > -1
+ {CmpInst::ICMP_SGT, {ZeroTakenProb, ZeroUntakenProb}}, /// X >= 0 -> Likely
+};
+
+/// Integer compares with 1:
+static const ProbabilityTable ICmpWithOneTable{
+ // InstCombine canonicalizes X <= 0 into X < 1
+ {CmpInst::ICMP_SLT, {ZeroUntakenProb, ZeroTakenProb}}, /// X <= 0 -> Unlikely
+};
+
+/// strcmp and similar functions return zero, negative, or positive, if the
+/// first string is equal, less, or greater than the second. We consider it
+/// likely that the strings are not equal, so a comparison with zero is
+/// probably false, but also a comparison with any other number is also
+/// probably false given that what exactly is returned for nonzero values is
+/// not specified. Any kind of comparison other than equality we know
+/// nothing about.
+static const ProbabilityTable ICmpWithLibCallTable{
+ {CmpInst::ICMP_EQ, {ZeroUntakenProb, ZeroTakenProb}},
+ {CmpInst::ICMP_NE, {ZeroTakenProb, ZeroUntakenProb}},
+};
+// Floating-Point Heuristics (FPH)
static const uint32_t FPH_TAKEN_WEIGHT = 20;
static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
@@ -120,6 +175,21 @@ static const uint32_t FPH_ORD_WEIGHT = 1024 * 1024 - 1;
/// exceptional case, so the result is unlikely.
static const uint32_t FPH_UNO_WEIGHT = 1;
+static const BranchProbability FPOrdTakenProb(FPH_ORD_WEIGHT,
+ FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
+static const BranchProbability
+ FPOrdUntakenProb(FPH_UNO_WEIGHT, FPH_ORD_WEIGHT + FPH_UNO_WEIGHT);
+static const BranchProbability
+ FPTakenProb(FPH_TAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+static const BranchProbability
+ FPUntakenProb(FPH_NONTAKEN_WEIGHT, FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+
+/// Floating-Point compares:
+static const ProbabilityTable FCmpTable{
+ {FCmpInst::FCMP_ORD, {FPOrdTakenProb, FPOrdUntakenProb}}, /// !isnan -> Likely
+ {FCmpInst::FCMP_UNO, {FPOrdUntakenProb, FPOrdTakenProb}}, /// isnan -> Unlikely
+};
+
/// Set of dedicated "absolute" execution weights for a block. These weights are
/// meaningful relative to each other and their derivatives only.
enum class BlockExecWeight : std::uint32_t {
@@ -468,21 +538,10 @@ bool BranchProbabilityInfo::calcPointerHeuristics(const BasicBlock *BB) {
assert(CI->getOperand(1)->getType()->isPointerTy());
- BranchProbability TakenProb(PH_TAKEN_WEIGHT,
- PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
- BranchProbability UntakenProb(PH_NONTAKEN_WEIGHT,
- PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
-
- // p != 0 -> isProb = true
- // p == 0 -> isProb = false
- // p != q -> isProb = true
- // p == q -> isProb = false;
- bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ auto Search = PointerTable.find(CI->getPredicate());
+ if (Search == PointerTable.end())
+ return false;
+ setEdgeProbability(BB, Search->second);
return true;
}
@@ -949,86 +1008,33 @@ bool BranchProbabilityInfo::calcZeroHeuristics(const BasicBlock *BB,
if (Function *CalledFn = Call->getCalledFunction())
TLI->getLibFunc(*CalledFn, Func);
- bool isProb;
+ ProbabilityTable::const_iterator Search;
if (Func == LibFunc_strcasecmp ||
Func == LibFunc_strcmp ||
Func == LibFunc_strncasecmp ||
Func == LibFunc_strncmp ||
Func == LibFunc_memcmp ||
Func == LibFunc_bcmp) {
- // strcmp and similar functions return zero, negative, or positive, if the
- // first string is equal, less, or greater than the second. We consider it
- // likely that the strings are not equal, so a comparison with zero is
- // probably false, but also a comparison with any other number is also
- // probably false given that what exactly is returned for nonzero values is
- // not specified. Any kind of comparison other than equality we know
- // nothing about.
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- isProb = true;
- break;
- default:
+ Search = ICmpWithLibCallTable.find(CI->getPredicate());
+ if (Search == ICmpWithLibCallTable.end())
return false;
- }
} else if (CV->isZero()) {
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- // X == 0 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- // X != 0 -> Likely
- isProb = true;
- break;
- case CmpInst::ICMP_SLT:
- // X < 0 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_SGT:
- // X > 0 -> Likely
- isProb = true;
- break;
- default:
+ Search = ICmpWithZeroTable.find(CI->getPredicate());
+ if (Search == ICmpWithZeroTable.end())
+ return false;
+ } else if (CV->isOne()) {
+ Search = ICmpWithOneTable.find(CI->getPredicate());
+ if (Search == ICmpWithOneTable.end())
return false;
- }
- } else if (CV->isOne() && CI->getPredicate() == CmpInst::ICMP_SLT) {
- // InstCombine canonicalizes X <= 0 into X < 1.
- // X <= 0 -> Unlikely
- isProb = false;
} else if (CV->isMinusOne()) {
- switch (CI->getPredicate()) {
- case CmpInst::ICMP_EQ:
- // X == -1 -> Unlikely
- isProb = false;
- break;
- case CmpInst::ICMP_NE:
- // X != -1 -> Likely
- isProb = true;
- break;
- case CmpInst::ICMP_SGT:
- // InstCombine canonicalizes X >= 0 into X > -1.
- // X >= 0 -> Likely
- isProb = true;
- break;
- default:
+ Search = ICmpWithMinusOneTable.find(CI->getPredicate());
+ if (Search == ICmpWithMinusOneTable.end())
return false;
- }
} else {
return false;
}
- BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
- ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
- BranchProbability UntakenProb(ZH_NONTAKEN_WEIGHT,
- ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ setEdgeProbability(BB, Search->second);
return true;
}
@@ -1042,34 +1048,21 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(const BasicBlock *BB) {
if (!FCmp)
return false;
- uint32_t TakenWeight = FPH_TAKEN_WEIGHT;
- uint32_t NontakenWeight = FPH_NONTAKEN_WEIGHT;
- bool isProb;
+ ProbabilityList ProbList;
if (FCmp->isEquality()) {
- // f1 == f2 -> Unlikely
- // f1 != f2 -> Likely
- isProb = !FCmp->isTrueWhenEqual();
- } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
- // !isnan -> Likely
- isProb = true;
- TakenWeight = FPH_ORD_WEIGHT;
- NontakenWeight = FPH_UNO_WEIGHT;
- } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
- // isnan -> Unlikely
- isProb = false;
- TakenWeight = FPH_ORD_WEIGHT;
- NontakenWeight = FPH_UNO_WEIGHT;
+ ProbList = !FCmp->isTrueWhenEqual() ?
+ // f1 == f2 -> Unlikely
+ ProbabilityList({FPTakenProb, FPUntakenProb}) :
+ // f1 != f2 -> Likely
+ ProbabilityList({FPUntakenProb, FPTakenProb});
} else {
- return false;
+ auto Search = FCmpTable.find(FCmp->getPredicate());
+ if (Search == FCmpTable.end())
+ return false;
+ ProbList = Search->second;
}
- BranchProbability TakenProb(TakenWeight, TakenWeight + NontakenWeight);
- BranchProbability UntakenProb(NontakenWeight, TakenWeight + NontakenWeight);
- if (!isProb)
- std::swap(TakenProb, UntakenProb);
-
- setEdgeProbability(
- BB, SmallVector<BranchProbability, 2>({TakenProb, UntakenProb}));
+ setEdgeProbability(BB, ProbList);
return true;
}
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 3634526370f5..7426d0c07592 100644
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -24,12 +24,12 @@
// divergent can help the compiler to selectively run these optimizations.
//
// This implementation is derived from the Vectorization Analysis of the
-// Region Vectorizer (RV). That implementation in turn is based on the approach
-// described in
+// Region Vectorizer (RV). The analysis is based on the approach described in
//
-// Improving Performance of OpenCL on CPUs
-// Ralf Karrenberg and Sebastian Hack
-// CC '12
+// An abstract interpretation for SPMD divergence
+// on reducible control flow graphs.
+// Julian Rosemann, Simon Moll and Sebastian Hack
+// POPL '21
//
// This implementation is generic in the sense that it does
// not itself identify original sources of divergence.
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index f22c6aa04f5e..2ec6cbeabda2 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -820,7 +820,7 @@ void IRSimilarityIdentifier::populateMapper(
/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
/// the IRInstructionData in subsequence.
///
-/// \param [in] Mapper - The instruction mapper for sanity checks.
+/// \param [in] Mapper - The instruction mapper for basic correctness checks.
/// \param [in] InstrList - The vector that holds the instruction data.
/// \param [in] IntegerMapping - The vector that holds the mapped integers.
/// \param [out] CandsForRepSubstring - The vector to store the generated
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index c4b7239b43ab..cfe910df4e91 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -81,6 +81,7 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
case RecurKind::Mul:
case RecurKind::FAdd:
case RecurKind::FMul:
+ case RecurKind::FMulAdd:
return true;
}
return false;
@@ -194,21 +195,28 @@ static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
// vectorizing floating point operations without unsafe math.
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
Instruction *Exit, PHINode *Phi) {
- // Currently only FAdd is supported
- if (Kind != RecurKind::FAdd)
+ // Currently only FAdd and FMulAdd are supported.
+ if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
return false;
- // Ensure the exit instruction is an FAdd, and that it only has one user
- // other than the reduction PHI
- if (Exit->getOpcode() != Instruction::FAdd || Exit->hasNUsesOrMore(3) ||
- Exit != ExactFPMathInst)
+ if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
+ return false;
+
+ if (Kind == RecurKind::FMulAdd &&
+ !RecurrenceDescriptor::isFMulAddIntrinsic(Exit))
+ return false;
+
+ // Ensure the exit instruction has only one user other than the reduction PHI
+ if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
return false;
// The only pattern accepted is the one in which the reduction PHI
// is used as one of the operands of the exit instruction
- auto *LHS = Exit->getOperand(0);
- auto *RHS = Exit->getOperand(1);
- if (LHS != Phi && RHS != Phi)
+ auto *Op0 = Exit->getOperand(0);
+ auto *Op1 = Exit->getOperand(1);
+ if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
+ return false;
+ if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
return false;
LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
@@ -389,6 +397,12 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
for (User *U : Cur->users()) {
Instruction *UI = cast<Instruction>(U);
+ // If the user is a call to llvm.fmuladd then the instruction can only be
+ // the final operand.
+ if (isFMulAddIntrinsic(UI))
+ if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
+ return false;
+
// Check if we found the exit user.
BasicBlock *Parent = UI->getParent();
if (!TheLoop->contains(Parent)) {
@@ -710,6 +724,9 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
I->hasNoSignedZeros())) &&
isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
+ else if (isFMulAddIntrinsic(I))
+ return InstDesc(Kind == RecurKind::FMulAdd, I,
+ I->hasAllowReassoc() ? nullptr : I);
return InstDesc(false, I);
}
}
@@ -804,6 +821,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
<< " PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -927,6 +949,7 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
case RecurKind::FMul:
// Multiplying a number by 1 does not change it.
return ConstantFP::get(Tp, 1.0L);
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
// Adding zero to a number does not change it.
// FIXME: Ideally we should not need to check FMF for FAdd and should always
@@ -974,6 +997,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::Xor;
case RecurKind::FMul:
return Instruction::FMul;
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
return Instruction::FAdd;
case RecurKind::SMax:
@@ -1032,6 +1056,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
return SelectPatternResult::isMinOrMax(
matchSelectPattern(Cur, LHS, RHS).Flavor);
}
+ // Recognize a call to the llvm.fmuladd intrinsic.
+ if (isFMulAddIntrinsic(Cur))
+ return true;
+
return Cur->getOpcode() == RedOp;
};
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 864eeea4f8bf..22d2ce11cc90 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2180,6 +2180,55 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q) {
return ::SimplifyAndInst(Op0, Op1, Q, RecursionLimit);
}
+static Value *simplifyOrLogic(Value *X, Value *Y) {
+ assert(X->getType() == Y->getType() && "Expected same type for 'or' ops");
+ Type *Ty = X->getType();
+
+ // X | ~X --> -1
+ if (match(Y, m_Not(m_Specific(X))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ // X | ~(X & ?) = -1
+ if (match(Y, m_Not(m_c_And(m_Specific(X), m_Value()))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ // X | (X & ?) --> X
+ if (match(Y, m_c_And(m_Specific(X), m_Value())))
+ return X;
+
+ Value *A, *B;
+
+ // (A & ~B) | (A ^ B) --> A ^ B
+ // (~B & A) | (A ^ B) --> A ^ B
+ // (A & ~B) | (B ^ A) --> B ^ A
+ // (~B & A) | (B ^ A) --> B ^ A
+ if (match(X, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+ match(Y, m_c_Xor(m_Specific(A), m_Specific(B))))
+ return Y;
+
+ // (~A ^ B) | (A & B) --> ~A ^ B
+ // (B ^ ~A) | (A & B) --> B ^ ~A
+ // (~A ^ B) | (B & A) --> ~A ^ B
+ // (B ^ ~A) | (B & A) --> B ^ ~A
+ if (match(X, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Y, m_c_And(m_Specific(A), m_Specific(B))))
+ return X;
+
+ // (A ^ B) | (A | B) --> A | B
+ // (A ^ B) | (B | A) --> B | A
+ if (match(X, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Y;
+
+ // ~(A ^ B) | (A | B) --> -1
+ // ~(A ^ B) | (B | A) --> -1
+ if (match(X, m_Not(m_Xor(m_Value(A), m_Value(B)))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return ConstantInt::getAllOnesValue(Ty);
+
+ return nullptr;
+}
+
/// Given operands for an Or, see if we can fold the result.
/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
@@ -2202,81 +2251,15 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (Op0 == Op1 || match(Op1, m_Zero()))
return Op0;
- // A | ~A = ~A | A = -1
- if (match(Op0, m_Not(m_Specific(Op1))) ||
- match(Op1, m_Not(m_Specific(Op0))))
- return Constant::getAllOnesValue(Op0->getType());
-
- // (A & ?) | A = A
- if (match(Op0, m_c_And(m_Specific(Op1), m_Value())))
- return Op1;
-
- // A | (A & ?) = A
- if (match(Op1, m_c_And(m_Specific(Op0), m_Value())))
- return Op0;
-
- // ~(A & ?) | A = -1
- if (match(Op0, m_Not(m_c_And(m_Specific(Op1), m_Value()))))
- return Constant::getAllOnesValue(Op1->getType());
-
- // A | ~(A & ?) = -1
- if (match(Op1, m_Not(m_c_And(m_Specific(Op0), m_Value()))))
- return Constant::getAllOnesValue(Op0->getType());
+ if (Value *R = simplifyOrLogic(Op0, Op1))
+ return R;
+ if (Value *R = simplifyOrLogic(Op1, Op0))
+ return R;
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Or))
return V;
Value *A, *B, *NotA;
- // (A & ~B) | (A ^ B) -> (A ^ B)
- // (~B & A) | (A ^ B) -> (A ^ B)
- // (A & ~B) | (B ^ A) -> (B ^ A)
- // (~B & A) | (B ^ A) -> (B ^ A)
- if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
- (match(Op0, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op0, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op1;
-
- // Commute the 'or' operands.
- // (A ^ B) | (A & ~B) -> (A ^ B)
- // (A ^ B) | (~B & A) -> (A ^ B)
- // (B ^ A) | (A & ~B) -> (B ^ A)
- // (B ^ A) | (~B & A) -> (B ^ A)
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
- (match(Op1, m_c_And(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op1, m_c_And(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op0;
-
- // (A & B) | (~A ^ B) -> (~A ^ B)
- // (B & A) | (~A ^ B) -> (~A ^ B)
- // (A & B) | (B ^ ~A) -> (B ^ ~A)
- // (B & A) | (B ^ ~A) -> (B ^ ~A)
- if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
- (match(Op1, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op1, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op1;
-
- // Commute the 'or' operands.
- // (~A ^ B) | (A & B) -> (~A ^ B)
- // (~A ^ B) | (B & A) -> (~A ^ B)
- // (B ^ ~A) | (A & B) -> (B ^ ~A)
- // (B ^ ~A) | (B & A) -> (B ^ ~A)
- if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
- (match(Op0, m_c_Xor(m_Specific(A), m_Not(m_Specific(B)))) ||
- match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
- return Op0;
-
- // (A | B) | (A ^ B) --> A | B
- // (B | A) | (A ^ B) --> B | A
- if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
- return Op0;
-
- // Commute the outer 'or' operands.
- // (A ^ B) | (A | B) --> A | B
- // (A ^ B) | (B | A) --> B | A
- if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
- match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
- return Op1;
// (~A & B) | ~(A | B) --> ~A
// (~A & B) | ~(B | A) --> ~A
@@ -2414,6 +2397,30 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op1, m_Not(m_Specific(Op0))))
return Constant::getAllOnesValue(Op0->getType());
+ auto foldAndOrNot = [](Value *X, Value *Y) -> Value * {
+ Value *A, *B;
+ // (~A & B) ^ (A | B) --> A -- There are 8 commuted variants.
+ if (match(X, m_c_And(m_Not(m_Value(A)), m_Value(B))) &&
+ match(Y, m_c_Or(m_Specific(A), m_Specific(B))))
+ return A;
+
+ // (~A | B) ^ (A & B) --> ~A -- There are 8 commuted variants.
+ // The 'not' op must contain a complete -1 operand (no undef elements for
+ // vector) for the transform to be safe.
+ Value *NotA;
+ if (match(X,
+ m_c_Or(m_CombineAnd(m_NotForbidUndef(m_Value(A)), m_Value(NotA)),
+ m_Value(B))) &&
+ match(Y, m_c_And(m_Specific(A), m_Specific(B))))
+ return NotA;
+
+ return nullptr;
+ };
+ if (Value *R = foldAndOrNot(Op0, Op1))
+ return R;
+ if (Value *R = foldAndOrNot(Op1, Op0))
+ return R;
+
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::Xor))
return V;
@@ -2935,8 +2942,10 @@ static Value *simplifyICmpWithBinOpOnLHS(
return getFalse(ITy);
}
- // x >> y <=u x
- // x udiv y <=u x.
+ // x >>u y <=u x --> true.
+ // x >>u y >u x --> false.
+ // x udiv y <=u x --> true.
+ // x udiv y >u x --> false.
if (match(LBO, m_LShr(m_Specific(RHS), m_Value())) ||
match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
// icmp pred (X op Y), X
@@ -2946,6 +2955,37 @@ static Value *simplifyICmpWithBinOpOnLHS(
return getTrue(ITy);
}
+ // If x is nonzero:
+ // x >>u C <u x --> true for C != 0.
+ // x >>u C != x --> true for C != 0.
+ // x >>u C >=u x --> false for C != 0.
+ // x >>u C == x --> false for C != 0.
+ // x udiv C <u x --> true for C != 1.
+ // x udiv C != x --> true for C != 1.
+ // x udiv C >=u x --> false for C != 1.
+ // x udiv C == x --> false for C != 1.
+ // TODO: allow non-constant shift amount/divisor
+ const APInt *C;
+ if ((match(LBO, m_LShr(m_Specific(RHS), m_APInt(C))) && *C != 0) ||
+ (match(LBO, m_UDiv(m_Specific(RHS), m_APInt(C))) && *C != 1)) {
+ if (isKnownNonZero(RHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_UGE:
+ return getFalse(ITy);
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_ULT:
+ return getTrue(ITy);
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_ULE:
+ // UGT/ULE are handled by the more general case just above
+ llvm_unreachable("Unexpected UGT/ULE, should have been handled");
+ }
+ }
+ }
+
// (x*C1)/C2 <= x for C1 <= C2.
// This holds even if the multiplication overflows: Assume that x != 0 and
// arithmetic is modulo M. For overflow to occur we must have C1 >= M/x and
diff --git a/llvm/lib/Analysis/IntervalPartition.cpp b/llvm/lib/Analysis/IntervalPartition.cpp
index 23ff4fd6f85e..d9620fd405bc 100644
--- a/llvm/lib/Analysis/IntervalPartition.cpp
+++ b/llvm/lib/Analysis/IntervalPartition.cpp
@@ -36,16 +36,16 @@ INITIALIZE_PASS(IntervalPartition, "intervals",
// releaseMemory - Reset state back to before function was analyzed
void IntervalPartition::releaseMemory() {
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- delete Intervals[i];
+ for (Interval *I : Intervals)
+ delete I;
IntervalMap.clear();
Intervals.clear();
RootInterval = nullptr;
}
void IntervalPartition::print(raw_ostream &O, const Module*) const {
- for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
- Intervals[i]->print(O);
+ for (const Interval *I : Intervals)
+ I->print(O);
}
// addIntervalToPartition - Add an interval to the internal list of intervals,
@@ -87,8 +87,8 @@ bool IntervalPartition::runOnFunction(Function &F) {
// Now that we know all of the successor information, propagate this to the
// predecessors for each block.
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- updatePredecessors(Intervals[i]);
+ for (Interval *I : Intervals)
+ updatePredecessors(I);
return false;
}
@@ -113,6 +113,6 @@ IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
// Now that we know all of the successor information, propagate this to the
// predecessors for each block.
- for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
- updatePredecessors(Intervals[i]);
+ for (Interval *I : Intervals)
+ updatePredecessors(I);
}
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 50fa169c2081..5b5d48bf6fe5 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1095,7 +1095,8 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
if (!Ty->isIntegerTy())
return ValueLatticeElement::getOverdefined();
- APInt Offset(Ty->getScalarSizeInBits(), 0);
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ APInt Offset(BitWidth, 0);
if (matchICmpOperand(Offset, LHS, Val, EdgePred))
return getValueFromSimpleICmpCondition(EdgePred, RHS, Offset);
@@ -1118,13 +1119,23 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
// If (Val & Mask) != 0 then the value must be larger than the lowest set
// bit of Mask.
if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) {
- unsigned BitWidth = Ty->getIntegerBitWidth();
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()),
APInt::getZero(BitWidth)));
}
}
+ // If (X urem Modulus) >= C, then X >= C.
+ // TODO: An upper bound could be computed as well.
+ if (match(LHS, m_URem(m_Specific(Val), m_Value())) &&
+ match(RHS, m_APInt(C))) {
+ // Use the icmp region so we don't have to deal with different predicates.
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(EdgePred, *C);
+ if (!CR.isEmptySet())
+ return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
+ CR.getUnsignedMin(), APInt(BitWidth, 0)));
+ }
+
return ValueLatticeElement::getOverdefined();
}
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index f9bd7167317f..19a24ac6a484 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -666,6 +666,29 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
return false;
}
+static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+ function_ref<void(Value *)> AddPointer) {
+ SmallPtrSet<Value *, 8> Visited;
+ SmallVector<Value *> WorkList;
+ WorkList.push_back(StartPtr);
+
+ while (!WorkList.empty()) {
+ Value *Ptr = WorkList.pop_back_val();
+ if (!Visited.insert(Ptr).second)
+ continue;
+ auto *PN = dyn_cast<PHINode>(Ptr);
+ // SCEV does not look through non-header PHIs inside the loop. Such phis
+ // can be analyzed by adding separate accesses for each incoming pointer
+ // value.
+ if (PN && InnermostLoop.contains(PN->getParent()) &&
+ PN->getParent() != InnermostLoop.getHeader()) {
+ for (const Use &Inc : PN->incoming_values())
+ WorkList.push_back(Inc);
+ } else
+ AddPointer(Ptr);
+ }
+}
+
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access,
const ValueToValueMap &StridesMap,
@@ -1032,13 +1055,11 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
- unsigned AddrSpace = Ty->getPointerAddressSpace();
+ assert(!AccessTy->isAggregateType() && "Bad stride - Not a pointer to a scalar type");
- // Make sure we're not accessing an aggregate type.
- // TODO: Why? This doesn't make any sense.
- if (AccessTy->isAggregateType()) {
- LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
- << *Ptr << "\n");
+ if (isa<ScalableVectorType>(AccessTy)) {
+ LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
+ << "\n");
return 0;
}
@@ -1068,6 +1089,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
// An getelementptr without an inbounds attribute and unit stride would have
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec = !ShouldCheckWrap ||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
@@ -1101,7 +1123,8 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- int64_t Size = DL.getTypeAllocSize(AccessTy);
+ TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
+ int64_t Size = AllocSize.getFixedSize();
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
@@ -1263,29 +1286,6 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return Diff && *Diff == 1;
}
-static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
- function_ref<void(Value *)> AddPointer) {
- SmallPtrSet<Value *, 8> Visited;
- SmallVector<Value *> WorkList;
- WorkList.push_back(StartPtr);
-
- while (!WorkList.empty()) {
- Value *Ptr = WorkList.pop_back_val();
- if (!Visited.insert(Ptr).second)
- continue;
- auto *PN = dyn_cast<PHINode>(Ptr);
- // SCEV does not look through non-header PHIs inside the loop. Such phis
- // can be analyzed by adding separate accesses for each incoming pointer
- // value.
- if (PN && InnermostLoop.contains(PN->getParent()) &&
- PN->getParent() != InnermostLoop.getHeader()) {
- for (const Use &Inc : PN->incoming_values())
- WorkList.push_back(Inc);
- } else
- AddPointer(Ptr);
- }
-}
-
void MemoryDepChecker::addAccess(StoreInst *SI) {
visitPointers(SI->getPointerOperand(), *InnermostLoop,
[this, SI](Value *Ptr) {
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index b44d15e71556..da6bb4c49cba 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1481,11 +1481,11 @@ void MemoryDependenceResults::removeCachedNonLocalPointerDependencies(
// instructions from the reverse map.
NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
- for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
- Instruction *Target = PInfo[i].getResult().getInst();
+ for (const NonLocalDepEntry &DE : PInfo) {
+ Instruction *Target = DE.getResult().getInst();
if (!Target)
continue; // Ignore non-local dep results.
- assert(Target->getParent() == PInfo[i].getBB());
+ assert(Target->getParent() == DE.getBB());
// Eliminating the dirty entry from 'Cache', so update the reverse info.
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index 7f2d04c49565..854ba83bd34a 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -213,6 +213,28 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
LibFunc F;
if (TLI && TLI->getLibFunc(*Call, F) && TLI->has(F)) {
switch (F) {
+ case LibFunc_memset_chk: {
+ assert(ArgIdx == 0 && "Invalid argument index for memset_chk");
+ LocationSize Size = LocationSize::afterPointer();
+ if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
+ // memset_chk writes at most Len bytes. It may write less, if Len
+ // exceeds the specified max size and aborts.
+ Size = LocationSize::upperBound(Len->getZExtValue());
+ }
+ return MemoryLocation(Arg, Size, AATags);
+ }
+ case LibFunc_strncpy: {
+ assert((ArgIdx == 0 || ArgIdx == 1) &&
+ "Invalid argument index for strncpy");
+ LocationSize Size = LocationSize::afterPointer();
+ if (const auto *Len = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
+ // strncpy is guaranteed to write Len bytes, but only reads up to Len
+ // bytes.
+ Size = ArgIdx == 0 ? LocationSize::precise(Len->getZExtValue())
+ : LocationSize::upperBound(Len->getZExtValue());
+ }
+ return MemoryLocation(Arg, Size, AATags);
+ }
case LibFunc_memset_pattern16:
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memset_pattern16");
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index c73e1fd82915..4c80f6743411 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -69,7 +69,7 @@ static bool VerifySubExpr(Value *Expr,
}
// If it isn't in the InstInputs list it is a subexpr incorporated into the
- // address. Sanity check that it is phi translatable.
+ // address. Validate that it is phi translatable.
if (!CanPHITrans(I)) {
errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
errs() << *I << '\n';
diff --git a/llvm/lib/Analysis/RegionPass.cpp b/llvm/lib/Analysis/RegionPass.cpp
index a73607dbef61..c20ecff5f912 100644
--- a/llvm/lib/Analysis/RegionPass.cpp
+++ b/llvm/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
#include "llvm/Analysis/RegionPass.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/IR/PassTimingInfo.h"
+#include "llvm/IR/PrintPasses.h"
#include "llvm/IR/StructuralHash.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
@@ -187,6 +188,8 @@ public:
}
bool runOnRegion(Region *R, RGPassManager &RGM) override {
+ if (!isFunctionInPrintList(R->getEntry()->getParent()->getName()))
+ return false;
Out << Banner;
for (const auto *BB : R->blocks()) {
if (BB)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f7c22cfb0310..7dc7f9904c70 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2915,8 +2915,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
const Loop *L, SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scAddRecExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
ID.AddPointer(L);
void *IP = nullptr;
SCEVAddRecExpr *S =
@@ -2939,8 +2939,8 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
SCEV::NoWrapFlags Flags) {
FoldingSetNodeID ID;
ID.AddInteger(scMulExpr);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
void *IP = nullptr;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
@@ -3708,8 +3708,8 @@ SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
ID.AddInteger(SCEVType);
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- ID.AddPointer(Ops[i]);
+ for (const SCEV *Op : Ops)
+ ID.AddPointer(Op);
void *IP = nullptr;
return UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
}
@@ -4094,6 +4094,17 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
}
+void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
+ // A recursive query may have already computed the SCEV. It should be
+ // equivalent, but may not necessarily be exactly the same, e.g. due to lazily
+ // inferred nowrap flags.
+ auto It = ValueExprMap.find_as(V);
+ if (It == ValueExprMap.end()) {
+ ValueExprMap.insert({SCEVCallbackVH(V, this), S});
+ ExprValueMap[S].insert({V, nullptr});
+ }
+}
+
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4134,10 +4145,9 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
ValueExprMapType::iterator I = ValueExprMap.find_as(V);
if (I != ValueExprMap.end()) {
const SCEV *S = I->second;
- if (checkValidity(S))
- return S;
- eraseValueFromMap(V);
- forgetMemoizedResults(S);
+ assert(checkValidity(S) &&
+ "existing SCEV has not been properly invalidated");
+ return S;
}
return nullptr;
}
@@ -4430,44 +4440,6 @@ static void PushDefUseChildren(Instruction *I,
}
}
-void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
- SmallVector<Instruction *, 16> Worklist;
- SmallPtrSet<Instruction *, 8> Visited;
- SmallVector<const SCEV *, 8> ToForget;
- Visited.insert(PN);
- Worklist.push_back(PN);
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- auto It = ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- const SCEV *Old = It->second;
-
- // Short-circuit the def-use traversal if the symbolic name
- // ceases to appear in expressions.
- if (Old != SymName && !hasOperand(Old, SymName))
- continue;
-
- // SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, it's a PHI that's in the progress of being computed
- // by createNodeForPHI, or it's a single-value PHI. In the first case,
- // additional loop trip count information isn't going to change anything.
- // In the second case, createNodeForPHI will perform the necessary
- // updates on its own when it gets to that point. In the third, we do
- // want to forget the SCEVUnknown.
- if (!isa<PHINode>(I) ||
- !isa<SCEVUnknown>(Old) ||
- (I != PN && Old == SymName)) {
- eraseValueFromMap(It->first);
- ToForget.push_back(Old);
- }
- }
-
- PushDefUseChildren(I, Worklist, Visited);
- }
- forgetMemoizedResults(ToForget);
-}
-
namespace {
/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start
@@ -5335,15 +5307,17 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
const SCEV *StartVal = getSCEV(StartValueV);
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ insertValueToMap(PN, PHISCEV);
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
- if (auto *BEInst = dyn_cast<Instruction>(BEValueV))
- if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L))
+ if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) {
+ assert(isLoopInvariant(Accum, L) &&
+ "Accum is defined outside L, but is not invariant?");
+ if (isAddRecNeverPoison(BEInst, L))
(void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+ }
return PHISCEV;
}
@@ -5386,7 +5360,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Handle PHI node value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
- ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+ insertValueToMap(PN, SymbolicName);
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
@@ -5457,8 +5431,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
- forgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+ forgetMemoizedResults(SymbolicName);
+ insertValueToMap(PN, PHISCEV);
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
@@ -5489,8 +5463,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// Okay, for the entire analysis of this edge we assumed the PHI
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
- forgetSymbolicName(PN, SymbolicName);
- ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+ forgetMemoizedResults(SymbolicName);
+ insertValueToMap(PN, Shifted);
return Shifted;
}
}
@@ -7598,62 +7572,19 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
// conservative estimates made without the benefit of trip count
- // information. This is similar to the code in forgetLoop, except that
- // it handles SCEVUnknown PHI nodes specially.
+ // information. This invalidation is not necessary for correctness, and is
+ // only done to produce more precise results.
if (Result.hasAnyInfo()) {
- SmallVector<Instruction *, 16> Worklist;
- SmallPtrSet<Instruction *, 8> Discovered;
+ // Invalidate any expression using an addrec in this loop.
SmallVector<const SCEV *, 8> ToForget;
- PushLoopPHIs(L, Worklist, Discovered);
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- const SCEV *Old = It->second;
-
- // SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, or it's a PHI that's in the progress of being computed
- // by createNodeForPHI. In the former case, additional loop trip
- // count information isn't going to change anything. In the later
- // case, createNodeForPHI will perform the necessary updates on its
- // own when it gets to that point.
- if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
- eraseValueFromMap(It->first);
- ToForget.push_back(Old);
- }
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
- }
-
- // Since we don't need to invalidate anything for correctness and we're
- // only invalidating to make SCEV's results more precise, we get to stop
- // early to avoid invalidating too much. This is especially important in
- // cases like:
- //
- // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node
- // loop0:
- // %pn0 = phi
- // ...
- // loop1:
- // %pn1 = phi
- // ...
- //
- // where both loop0 and loop1's backedge taken count uses the SCEV
- // expression for %v. If we don't have the early stop below then in cases
- // like the above, getBackedgeTakenInfo(loop1) will clear out the trip
- // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip
- // count for loop1, effectively nullifying SCEV's trip count cache.
- for (auto *U : I->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- auto *LoopForUser = LI.getLoopFor(I->getParent());
- if (LoopForUser && L->contains(LoopForUser) &&
- Discovered.insert(I).second)
- Worklist.push_back(I);
- }
- }
+ auto LoopUsersIt = LoopUsers.find(L);
+ if (LoopUsersIt != LoopUsers.end())
+ append_range(ToForget, LoopUsersIt->second);
forgetMemoizedResults(ToForget);
+
+ // Invalidate constant-evolved loop header phis.
+ for (PHINode &PN : L->getHeader()->phis())
+ ConstantEvolutionLoopExitValue.erase(&PN);
}
// Re-lookup the insert position, since the call to
@@ -7672,10 +7603,12 @@ void ScalarEvolution::forgetAllLoops() {
// result.
BackedgeTakenCounts.clear();
PredicatedBackedgeTakenCounts.clear();
+ BECountUsers.clear();
LoopPropertiesCache.clear();
ConstantEvolutionLoopExitValue.clear();
ValueExprMap.clear();
ValuesAtScopes.clear();
+ ValuesAtScopesUsers.clear();
LoopDispositions.clear();
BlockDispositions.clear();
UnsignedRanges.clear();
@@ -7697,8 +7630,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
auto *CurrL = LoopWorklist.pop_back_val();
// Drop any stored trip count value.
- BackedgeTakenCounts.erase(CurrL);
- PredicatedBackedgeTakenCounts.erase(CurrL);
+ forgetBackedgeTakenCounts(CurrL, /* Predicated */ false);
+ forgetBackedgeTakenCounts(CurrL, /* Predicated */ true);
// Drop information about predicated SCEV rewrites for this loop.
for (auto I = PredicatedSCEVRewrites.begin();
@@ -7872,10 +7805,6 @@ bool ScalarEvolution::BackedgeTakenInfo::isConstantMaxOrZero(
return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
}
-bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S) const {
- return Operands.contains(S);
-}
-
ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E)
: ExitLimit(E, E, false, None) {
}
@@ -7916,19 +7845,6 @@ ScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M,
: ExitLimit(E, M, MaxOrZero, None) {
}
-class SCEVRecordOperands {
- SmallPtrSetImpl<const SCEV *> &Operands;
-
-public:
- SCEVRecordOperands(SmallPtrSetImpl<const SCEV *> &Operands)
- : Operands(Operands) {}
- bool follow(const SCEV *S) {
- Operands.insert(S);
- return true;
- }
- bool isDone() { return false; }
-};
-
/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
/// computable exit into a persistent ExitNotTakenInfo array.
ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
@@ -7957,14 +7873,6 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
assert((isa<SCEVCouldNotCompute>(ConstantMax) ||
isa<SCEVConstant>(ConstantMax)) &&
"No point in having a non-constant max backedge taken count!");
-
- SCEVRecordOperands RecordOperands(Operands);
- SCEVTraversal<SCEVRecordOperands> ST(RecordOperands);
- if (!isa<SCEVCouldNotCompute>(ConstantMax))
- ST.visitAll(ConstantMax);
- for (auto &ENT : ExitNotTaken)
- if (!isa<SCEVCouldNotCompute>(ENT.ExactNotTaken))
- ST.visitAll(ENT.ExactNotTaken);
}
/// Compute the number of times the backedge of the specified loop will execute.
@@ -8046,6 +7954,13 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
// The loop backedge will be taken the maximum or zero times if there's
// a single exit that must be taken the maximum or zero times.
bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
+
+ // Remember which SCEVs are used in exit limits for invalidation purposes.
+ // We only care about non-constant SCEVs here, so we can ignore EL.MaxNotTaken
+ // and MaxBECount, which must be SCEVConstant.
+ for (const auto &Pair : ExitCounts)
+ if (!isa<SCEVConstant>(Pair.second.ExactNotTaken))
+ BECountUsers[Pair.second.ExactNotTaken].insert({L, AllowPredicates});
return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
MaxBECount, MaxOrZero);
}
@@ -8916,6 +8831,9 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
LS.second = C;
break;
}
+
+ if (!isa<SCEVConstant>(C))
+ ValuesAtScopesUsers[C].push_back({L, V});
return C;
}
@@ -12387,7 +12305,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
if (Range.contains(Val->getValue()))
return SE.getCouldNotCompute(); // Something strange happened
- // Ensure that the previous value is in the range. This is a sanity check.
+ // Ensure that the previous value is in the range.
assert(Range.contains(
EvaluateConstantChrecAtConstant(this,
ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) &&
@@ -12531,9 +12449,11 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
std::move(Arg.PredicatedBackedgeTakenCounts)),
+ BECountUsers(std::move(Arg.BECountUsers)),
ConstantEvolutionLoopExitValue(
std::move(Arg.ConstantEvolutionLoopExitValue)),
ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+ ValuesAtScopesUsers(std::move(Arg.ValuesAtScopesUsers)),
LoopDispositions(std::move(Arg.LoopDispositions)),
LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
@@ -12946,6 +12866,23 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
+void ScalarEvolution::forgetBackedgeTakenCounts(const Loop *L,
+ bool Predicated) {
+ auto &BECounts =
+ Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
+ auto It = BECounts.find(L);
+ if (It != BECounts.end()) {
+ for (const ExitNotTakenInfo &ENT : It->second.ExitNotTaken) {
+ if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
+ auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
+ assert(UserIt != BECountUsers.end());
+ UserIt->second.erase({L, Predicated});
+ }
+ }
+ BECounts.erase(It);
+ }
+}
+
void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end());
SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end());
@@ -12970,32 +12907,52 @@ void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
else
++I;
}
-
- auto RemoveSCEVFromBackedgeMap = [&ToForget](
- DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
- for (auto I = Map.begin(), E = Map.end(); I != E;) {
- BackedgeTakenInfo &BEInfo = I->second;
- if (any_of(ToForget,
- [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); }))
- Map.erase(I++);
- else
- ++I;
- }
- };
-
- RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
- RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
- ValuesAtScopes.erase(S);
LoopDispositions.erase(S);
BlockDispositions.erase(S);
UnsignedRanges.erase(S);
SignedRanges.erase(S);
- ExprValueMap.erase(S);
HasRecMap.erase(S);
MinTrailingZerosCache.erase(S);
+
+ auto ExprIt = ExprValueMap.find(S);
+ if (ExprIt != ExprValueMap.end()) {
+ for (auto &ValueAndOffset : ExprIt->second) {
+ if (ValueAndOffset.second == nullptr) {
+ auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
+ if (ValueIt != ValueExprMap.end())
+ ValueExprMap.erase(ValueIt);
+ }
+ }
+ ExprValueMap.erase(ExprIt);
+ }
+
+ auto ScopeIt = ValuesAtScopes.find(S);
+ if (ScopeIt != ValuesAtScopes.end()) {
+ for (const auto &Pair : ScopeIt->second)
+ if (!isa_and_nonnull<SCEVConstant>(Pair.second))
+ erase_value(ValuesAtScopesUsers[Pair.second],
+ std::make_pair(Pair.first, S));
+ ValuesAtScopes.erase(ScopeIt);
+ }
+
+ auto ScopeUserIt = ValuesAtScopesUsers.find(S);
+ if (ScopeUserIt != ValuesAtScopesUsers.end()) {
+ for (const auto &Pair : ScopeUserIt->second)
+ erase_value(ValuesAtScopes[Pair.second], std::make_pair(Pair.first, S));
+ ValuesAtScopesUsers.erase(ScopeUserIt);
+ }
+
+ auto BEUsersIt = BECountUsers.find(S);
+ if (BEUsersIt != BECountUsers.end()) {
+ // Work on a copy, as forgetBackedgeTakenCounts() will modify the original.
+ auto Copy = BEUsersIt->second;
+ for (const auto &Pair : Copy)
+ forgetBackedgeTakenCounts(Pair.getPointer(), Pair.getInt());
+ BECountUsers.erase(BEUsersIt);
+ }
}
void
@@ -13100,16 +13057,43 @@ void ScalarEvolution::verify() const {
ValidLoops.insert(L);
Worklist.append(L->begin(), L->end());
}
- // Check for SCEV expressions referencing invalid/deleted loops.
for (auto &KV : ValueExprMap) {
- auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
- if (!AR)
- continue;
- assert(ValidLoops.contains(AR->getLoop()) &&
- "AddRec references invalid loop");
+ // Check for SCEV expressions referencing invalid/deleted loops.
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
+ assert(ValidLoops.contains(AR->getLoop()) &&
+ "AddRec references invalid loop");
+ }
+
+ // Check that the value is also part of the reverse map.
+ auto It = ExprValueMap.find(KV.second);
+ if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+ dbgs() << "Value " << *KV.first
+ << " is in ValueExprMap but not in ExprValueMap\n";
+ std::abort();
+ }
+ }
+
+ for (const auto &KV : ExprValueMap) {
+ for (const auto &ValueAndOffset : KV.second) {
+ if (ValueAndOffset.second != nullptr)
+ continue;
+
+ auto It = ValueExprMap.find_as(ValueAndOffset.first);
+ if (It == ValueExprMap.end()) {
+ dbgs() << "Value " << *ValueAndOffset.first
+ << " is in ExprValueMap but not in ValueExprMap\n";
+ std::abort();
+ }
+ if (It->second != KV.first) {
+ dbgs() << "Value " << *ValueAndOffset.first
+ << " mapped to " << *It->second
+ << " rather than " << *KV.first << "\n";
+ std::abort();
+ }
+ }
}
- // Verify intergity of SCEV users.
+ // Verify integrity of SCEV users.
for (const auto &S : UniqueSCEVs) {
SmallVector<const SCEV *, 4> Ops;
collectUniqueOps(&S, Ops);
@@ -13125,6 +13109,61 @@ void ScalarEvolution::verify() const {
std::abort();
}
}
+
+ // Verify integrity of ValuesAtScopes users.
+ for (const auto &ValueAndVec : ValuesAtScopes) {
+ const SCEV *Value = ValueAndVec.first;
+ for (const auto &LoopAndValueAtScope : ValueAndVec.second) {
+ const Loop *L = LoopAndValueAtScope.first;
+ const SCEV *ValueAtScope = LoopAndValueAtScope.second;
+ if (!isa<SCEVConstant>(ValueAtScope)) {
+ auto It = ValuesAtScopesUsers.find(ValueAtScope);
+ if (It != ValuesAtScopesUsers.end() &&
+ is_contained(It->second, std::make_pair(L, Value)))
+ continue;
+ dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
+ << ValueAtScope << " missing in ValuesAtScopesUsers\n";
+ std::abort();
+ }
+ }
+ }
+
+ for (const auto &ValueAtScopeAndVec : ValuesAtScopesUsers) {
+ const SCEV *ValueAtScope = ValueAtScopeAndVec.first;
+ for (const auto &LoopAndValue : ValueAtScopeAndVec.second) {
+ const Loop *L = LoopAndValue.first;
+ const SCEV *Value = LoopAndValue.second;
+ assert(!isa<SCEVConstant>(Value));
+ auto It = ValuesAtScopes.find(Value);
+ if (It != ValuesAtScopes.end() &&
+ is_contained(It->second, std::make_pair(L, ValueAtScope)))
+ continue;
+ dbgs() << "Value: " << *Value << ", Loop: " << *L << ", ValueAtScope: "
+ << ValueAtScope << " missing in ValuesAtScopes\n";
+ std::abort();
+ }
+ }
+
+ // Verify integrity of BECountUsers.
+ auto VerifyBECountUsers = [&](bool Predicated) {
+ auto &BECounts =
+ Predicated ? PredicatedBackedgeTakenCounts : BackedgeTakenCounts;
+ for (const auto &LoopAndBEInfo : BECounts) {
+ for (const ExitNotTakenInfo &ENT : LoopAndBEInfo.second.ExitNotTaken) {
+ if (!isa<SCEVConstant>(ENT.ExactNotTaken)) {
+ auto UserIt = BECountUsers.find(ENT.ExactNotTaken);
+ if (UserIt != BECountUsers.end() &&
+ UserIt->second.contains({ LoopAndBEInfo.first, Predicated }))
+ continue;
+ dbgs() << "Value " << *ENT.ExactNotTaken << " for loop "
+ << *LoopAndBEInfo.first << " missing from BECountUsers\n";
+ std::abort();
+ }
+ }
+ }
+ };
+ VerifyBECountUsers(/* Predicated */ false);
+ VerifyBECountUsers(/* Predicated */ true);
}
bool ScalarEvolution::invalidate(
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 74cc39b7f2c0..54f3605ee033 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -14,12 +14,14 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/ModuleSummaryIndex.h"
@@ -117,7 +119,7 @@ template <typename CalleeTy> struct UseInfo {
// Access range if the address (alloca or parameters).
// It is allowed to be empty-set when there are no known accesses.
ConstantRange Range;
- std::map<const Instruction *, ConstantRange> Accesses;
+ std::set<const Instruction *> UnsafeAccesses;
// List of calls which pass address as an argument.
// Value is offset range of address from base address (alloca or calling
@@ -131,10 +133,9 @@ template <typename CalleeTy> struct UseInfo {
UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); }
- void addRange(const Instruction *I, const ConstantRange &R) {
- auto Ins = Accesses.emplace(I, R);
- if (!Ins.second)
- Ins.first->second = unionNoWrap(Ins.first->second, R);
+ void addRange(const Instruction *I, const ConstantRange &R, bool IsSafe) {
+ if (!IsSafe)
+ UnsafeAccesses.insert(I);
updateRange(R);
}
};
@@ -230,7 +231,7 @@ struct StackSafetyInfo::InfoTy {
struct StackSafetyGlobalInfo::InfoTy {
GVToSSI Info;
SmallPtrSet<const AllocaInst *, 8> SafeAllocas;
- std::map<const Instruction *, bool> AccessIsUnsafe;
+ std::set<const Instruction *> UnsafeAccesses;
};
namespace {
@@ -253,6 +254,11 @@ class StackSafetyLocalAnalysis {
void analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
const StackLifetime &SL);
+
+ bool isSafeAccess(const Use &U, AllocaInst *AI, const SCEV *AccessSize);
+ bool isSafeAccess(const Use &U, AllocaInst *AI, Value *V);
+ bool isSafeAccess(const Use &U, AllocaInst *AI, TypeSize AccessSize);
+
public:
StackSafetyLocalAnalysis(Function &F, ScalarEvolution &SE)
: F(F), DL(F.getParent()->getDataLayout()), SE(SE),
@@ -333,6 +339,56 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
return getAccessRange(U, Base, SizeRange);
}
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ Value *V) {
+ return isSafeAccess(U, AI, SE.getSCEV(V));
+}
+
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ TypeSize TS) {
+ if (TS.isScalable())
+ return false;
+ auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize);
+ const SCEV *SV = SE.getConstant(CalculationTy, TS.getFixedSize());
+ return isSafeAccess(U, AI, SV);
+}
+
+bool StackSafetyLocalAnalysis::isSafeAccess(const Use &U, AllocaInst *AI,
+ const SCEV *AccessSize) {
+
+ if (!AI)
+ return true;
+ if (isa<SCEVCouldNotCompute>(AccessSize))
+ return false;
+
+ const auto *I = cast<Instruction>(U.getUser());
+
+ auto ToCharPtr = [&](const SCEV *V) {
+ auto *PtrTy = IntegerType::getInt8PtrTy(SE.getContext());
+ return SE.getTruncateOrZeroExtend(V, PtrTy);
+ };
+
+ const SCEV *AddrExp = ToCharPtr(SE.getSCEV(U.get()));
+ const SCEV *BaseExp = ToCharPtr(SE.getSCEV(AI));
+ const SCEV *Diff = SE.getMinusSCEV(AddrExp, BaseExp);
+ if (isa<SCEVCouldNotCompute>(Diff))
+ return false;
+
+ auto Size = getStaticAllocaSizeRange(*AI);
+
+ auto *CalculationTy = IntegerType::getIntNTy(SE.getContext(), PointerSize);
+ auto ToDiffTy = [&](const SCEV *V) {
+ return SE.getTruncateOrZeroExtend(V, CalculationTy);
+ };
+ const SCEV *Min = ToDiffTy(SE.getConstant(Size.getLower()));
+ const SCEV *Max = SE.getMinusSCEV(ToDiffTy(SE.getConstant(Size.getUpper())),
+ ToDiffTy(AccessSize));
+ return SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SGE, Diff, Min, I)
+ .getValueOr(false) &&
+ SE.evaluatePredicateAt(ICmpInst::Predicate::ICMP_SLE, Diff, Max, I)
+ .getValueOr(false);
+}
+
/// The function analyzes all local uses of Ptr (alloca or argument) and
/// calculates local access range and all function calls where it was used.
void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
@@ -341,7 +397,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 8> WorkList;
WorkList.push_back(Ptr);
- const AllocaInst *AI = dyn_cast<AllocaInst>(Ptr);
+ AllocaInst *AI = dyn_cast<AllocaInst>(Ptr);
// A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
while (!WorkList.empty()) {
@@ -356,11 +412,13 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
switch (I->getOpcode()) {
case Instruction::Load: {
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
- US.addRange(I,
- getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ auto TypeSize = DL.getTypeStoreSize(I->getType());
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -370,16 +428,17 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
case Instruction::Store: {
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
- US.addRange(
- I, getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ auto TypeSize = DL.getTypeStoreSize(I->getOperand(0)->getType());
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -387,7 +446,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
// Information leak.
// FIXME: Process parameters correctly. This is a leak only if we return
// alloca.
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
case Instruction::Call:
@@ -396,12 +455,20 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
break;
if (AI && !SL.isAliveAfter(AI, I)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
-
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- US.addRange(I, getMemIntrinsicAccessRange(MI, UI, Ptr));
+ auto AccessRange = getMemIntrinsicAccessRange(MI, UI, Ptr);
+ bool Safe = false;
+ if (const auto *MTI = dyn_cast<MemTransferInst>(MI)) {
+ if (MTI->getRawSource() != UI && MTI->getRawDest() != UI)
+ Safe = true;
+ } else if (MI->getRawDest() != UI) {
+ Safe = true;
+ }
+ Safe = Safe || isSafeAccess(UI, AI, MI->getLength());
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -412,15 +479,16 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}
if (!CB.isArgOperand(&UI)) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
unsigned ArgNo = CB.getArgOperandNo(&UI);
if (CB.isByValArgument(ArgNo)) {
- US.addRange(I, getAccessRange(
- UI, Ptr,
- DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
+ auto TypeSize = DL.getTypeStoreSize(CB.getParamByValType(ArgNo));
+ auto AccessRange = getAccessRange(UI, Ptr, TypeSize);
+ bool Safe = isSafeAccess(UI, AI, TypeSize);
+ US.addRange(I, AccessRange, Safe);
break;
}
@@ -430,7 +498,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
const GlobalValue *Callee =
dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts());
if (!Callee) {
- US.addRange(I, UnknownRange);
+ US.addRange(I, UnknownRange, /*IsSafe=*/false);
break;
}
@@ -827,8 +895,8 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
Info->SafeAllocas.insert(AI);
++NumAllocaStackSafe;
}
- for (const auto &A : KV.second.Accesses)
- Info->AccessIsUnsafe[A.first] |= !AIRange.contains(A.second);
+ Info->UnsafeAccesses.insert(KV.second.UnsafeAccesses.begin(),
+ KV.second.UnsafeAccesses.end());
}
}
@@ -903,11 +971,7 @@ bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const {
bool StackSafetyGlobalInfo::stackAccessIsSafe(const Instruction &I) const {
const auto &Info = getInfo();
- auto It = Info.AccessIsUnsafe.find(&I);
- if (It == Info.AccessIsUnsafe.end()) {
- return true;
- }
- return !It->second;
+ return Info.UnsafeAccesses.find(&I) == Info.UnsafeAccesses.end();
}
void StackSafetyGlobalInfo::print(raw_ostream &O) const {
diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index 59582cd3a198..ff833b55bbce 100644
--- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -15,21 +15,18 @@
// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
// control-induced divergence in phi nodes.
//
-// -- Summary --
-// The SyncDependenceAnalysis lazily computes sync dependences [3].
-// The analysis evaluates the disjoint path criterion [2] by a reduction
-// to SSA construction. The SSA construction algorithm is implemented as
-// a simple data-flow analysis [1].
//
-// [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
-// [2] "Efficiently Computing Static Single Assignment Form
-// and the Control Dependence Graph", TOPLAS '91,
-// Cytron, Ferrante, Rosen, Wegman and Zadeck
-// [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
-// [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
+// -- Reference --
+// The algorithm is presented in Section 5 of
+//
+// An abstract interpretation for SPMD divergence
+// on reducible control flow graphs.
+// Julian Rosemann, Simon Moll and Sebastian Hack
+// POPL '21
+//
//
// -- Sync dependence --
-// Sync dependence [4] characterizes the control flow aspect of the
+// Sync dependence characterizes the control flow aspect of the
// propagation of branch divergence. For example,
//
// %cond = icmp slt i32 %tid, 10
@@ -46,9 +43,10 @@
// because the branch "br i1 %cond" depends on %tid and affects which value %a
// is assigned to.
//
+//
// -- Reduction to SSA construction --
// There are two disjoint paths from A to X, if a certain variant of SSA
-// construction places a phi node in X under the following set-up scheme [2].
+// construction places a phi node in X under the following set-up scheme.
//
// This variant of SSA construction ignores incoming undef values.
// That is paths from the entry without a definition do not result in
@@ -63,6 +61,7 @@
// D E
// \ /
// F
+//
// Assume that A contains a divergent branch. We are interested
// in the set of all blocks where each block is reachable from A
// via two disjoint paths. This would be the set {D, F} in this
@@ -70,6 +69,7 @@
// To generally reduce this query to SSA construction we introduce
// a virtual variable x and assign to x different values in each
// successor block of A.
+//
// entry
// / \
// A \
@@ -79,23 +79,41 @@
// D E
// \ /
// F
+//
// Our flavor of SSA construction for x will construct the following
+//
// entry
// / \
// A \
// / \ Y
// x0 = 0 x1 = 1 /
// \ / \ /
-// x2=phi E
+// x2 = phi E
// \ /
-// x3=phi
+// x3 = phi
+//
// The blocks D and F contain phi nodes and are thus each reachable
// by two disjoins paths from A.
//
// -- Remarks --
-// In case of loop exits we need to check the disjoint path criterion for loops
-// [2]. To this end, we check whether the definition of x differs between the
-// loop exit and the loop header (_after_ SSA construction).
+// * In case of loop exits we need to check the disjoint path criterion for loops.
+// To this end, we check whether the definition of x differs between the
+// loop exit and the loop header (_after_ SSA construction).
+//
+// -- Known Limitations & Future Work --
+// * The algorithm requires reducible loops because the implementation
+// implicitly performs a single iteration of the underlying data flow analysis.
+// This was done for pragmatism, simplicity and speed.
+//
+// Relevant related work for extending the algorithm to irreducible control:
+// A simple algorithm for global data flow analysis problems.
+// Matthew S. Hecht and Jeffrey D. Ullman.
+// SIAM Journal on Computing, 4(4):519–532, December 1975.
+//
+// * Another reason for requiring reducible loops is that points of
+// synchronization in irreducible loops aren't 'obvious' - there is no unique
+// header where threads 'should' synchronize when entering or coming back
+// around from the latch.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/SyncDependenceAnalysis.h"
@@ -128,8 +146,9 @@ using namespace llvm;
//
// We cannot use the vanilla (R)PO computation of LLVM because:
// * We (virtually) modify the CFG.
-// * We want a loop-compact block enumeration, that is the numbers assigned by
-// the traveral to the blocks of a loop are an interval.
+// * We want a loop-compact block enumeration, that is the numbers assigned to
+// blocks of a loop form an interval
+//
using POCB = std::function<void(const BasicBlock &)>;
using VisitedSet = std::set<const BasicBlock *>;
using BlockStack = std::vector<const BasicBlock *>;
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 7326ba74c071..72fbd5ad3f68 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -166,8 +166,8 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
return;
}
- // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
- // All versions of watchOS support it.
+ // memset_pattern{4,8,16} is only available on iOS 3.0 and Mac OS X 10.5 and
+ // later. All versions of watchOS support it.
if (T.isMacOSX()) {
// available IO unlocked variants on Mac OS X
TLI.setAvailable(LibFunc_getc_unlocked);
@@ -175,12 +175,20 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_putc_unlocked);
TLI.setAvailable(LibFunc_putchar_unlocked);
- if (T.isMacOSXVersionLT(10, 5))
+ if (T.isMacOSXVersionLT(10, 5)) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
+ }
} else if (T.isiOS()) {
- if (T.isOSVersionLT(3, 0))
+ if (T.isOSVersionLT(3, 0)) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
+ }
} else if (!T.isWatchOS()) {
+ TLI.setUnavailable(LibFunc_memset_pattern4);
+ TLI.setUnavailable(LibFunc_memset_pattern8);
TLI.setUnavailable(LibFunc_memset_pattern16);
}
@@ -684,7 +692,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_strcat_chk);
TLI.setUnavailable(LibFunc_strcpy_chk);
TLI.setUnavailable(LibFunc_strlcat_chk);
- TLI.setUnavailable(LibFunc_strlcat_chk);
TLI.setUnavailable(LibFunc_strlcpy_chk);
TLI.setUnavailable(LibFunc_strlen_chk);
TLI.setUnavailable(LibFunc_strncat_chk);
@@ -1523,6 +1530,8 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(2)->isPointerTy() &&
FTy.getParamType(3)->isIntegerTy());
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
return (!FTy.isVarArg() && NumParams == 3 &&
FTy.getParamType(0)->isPointerTy() &&
diff --git a/llvm/lib/Analysis/VFABIDemangling.cpp b/llvm/lib/Analysis/VFABIDemangling.cpp
index 8a34a34eb307..7573975a3dd3 100644
--- a/llvm/lib/Analysis/VFABIDemangling.cpp
+++ b/llvm/lib/Analysis/VFABIDemangling.cpp
@@ -445,7 +445,6 @@ Optional<VFInfo> VFABI::tryDemangleForVFABI(StringRef MangledName,
VF = EC.getKnownMinValue();
}
- // Sanity checks.
// 1. We don't accept a zero lanes vectorization factor.
// 2. We don't accept the demangling if the vector function is not
// present in the module.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 5bce1eaa59a0..5feabd876e3a 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -124,8 +124,8 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
std::make_pair(I.first, std::make_pair(I.second, LocTy())));
}
-/// validateEndOfModule - Do final validity and sanity checks at the end of the
-/// module.
+/// validateEndOfModule - Do final validity and basic correctness checks at the
+/// end of the module.
bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
if (!M)
return false;
@@ -271,7 +271,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
return false;
}
-/// Do final validity and sanity checks at the end of the index.
+/// Do final validity and basic correctness checks at the end of the index.
bool LLParser::validateEndOfIndex() {
if (!Index)
return false;
@@ -2989,9 +2989,10 @@ BasicBlock *LLParser::PerFunctionState::defineBB(const std::string &Name,
/// parseValID - parse an abstract value that doesn't necessarily have a
/// type implied. For example, if we parse "4" we don't know what integer type
/// it has. The value will later be combined with its type and checked for
-/// sanity. PFS is used to convert function-local operands of metadata (since
-/// metadata operands are not just parsed here but also converted to values).
-/// PFS can be null when we are not parsing metadata values inside a function.
+/// basic correctness. PFS is used to convert function-local operands of
+/// metadata (since metadata operands are not just parsed here but also
+/// converted to values). PFS can be null when we are not parsing metadata
+/// values inside a function.
bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
ID.Loc = Lex.getLoc();
switch (Lex.getKind()) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 2723105b092f..d7bcb0d7f575 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -957,8 +957,8 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
O->OS.write_escaped(Blob, /*hex=*/true) << "'";
} else {
bool BlobIsPrintable = true;
- for (unsigned i = 0, e = Blob.size(); i != e; ++i)
- if (!isPrint(static_cast<unsigned char>(Blob[i]))) {
+ for (char C : Blob)
+ if (!isPrint(static_cast<unsigned char>(C))) {
BlobIsPrintable = false;
break;
}
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index c568461e62b0..993cb1de8c02 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3996,8 +3996,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
// See if anything took the address of blocks in this function.
auto BBFRI = BasicBlockFwdRefs.find(F);
if (BBFRI == BasicBlockFwdRefs.end()) {
- for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
- FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+ for (BasicBlock *&BB : FunctionBBs)
+ BB = BasicBlock::Create(Context, "", F);
} else {
auto &BBRefs = BBFRI->second;
// Check for invalid basic block references.
@@ -4605,9 +4605,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
CaseVals.push_back(ConstantInt::get(Context, Low));
}
BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
- for (SmallVector<ConstantInt*, 1>::iterator cvi = CaseVals.begin(),
- cve = CaseVals.end(); cvi != cve; ++cvi)
- SI->addCase(*cvi, DestBB);
+ for (ConstantInt *Cst : CaseVals)
+ SI->addCase(Cst, DestBB);
}
I = SI;
break;
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 6df5a4a64d51..60530d7f7a00 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -444,7 +444,8 @@ class MetadataLoader::MetadataLoaderImpl {
uint64_t GlobalDeclAttachmentPos = 0;
#ifndef NDEBUG
- /// Sanity check that we end up parsing all of the global decl attachments.
+ /// Baisic correctness check that we end up parsing all of the global decl
+ /// attachments.
unsigned NumGlobalDeclAttachSkipped = 0;
unsigned NumGlobalDeclAttachParsed = 0;
#endif
@@ -917,7 +918,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
case BitstreamEntry::Error:
return error("Malformed block");
case BitstreamEntry::EndBlock:
- // Sanity check that we parsed them all.
+ // Check that we parsed them all.
assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
return true;
case BitstreamEntry::Record:
@@ -929,7 +930,7 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
return MaybeCode.takeError();
if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) {
// Anything other than a global decl attachment signals the end of
- // these records. sanity check that we parsed them all.
+ // these records. Check that we parsed them all.
assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed);
return true;
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1e9a9197aed7..e2354c40844a 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -596,10 +596,10 @@ static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
SmallVector<unsigned, 64> Vals;
// Code: [strchar x N]
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+ for (char C : Str) {
+ if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(C))
AbbrevToUse = 0;
- Vals.push_back(Str[i]);
+ Vals.push_back(C);
}
// Emit the finished record.
@@ -914,8 +914,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
TypeVals.clear();
// Loop over all of the types, emitting each in turn.
- for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
- Type *T = TypeList[i];
+ for (Type *T : TypeList) {
int AbbrevToUse = 0;
unsigned Code = 0;
@@ -3343,19 +3342,18 @@ void ModuleBitcodeWriter::writeFunction(
DILocation *LastDL = nullptr;
// Finally, emit all the instructions, in order.
- for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- writeInstruction(*I, InstID, Vals);
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB) {
+ writeInstruction(I, InstID, Vals);
- if (!I->getType()->isVoidTy())
+ if (!I.getType()->isVoidTy())
++InstID;
// If the instruction has metadata, write a metadata attachment later.
- NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+ NeedsMetadataAttachment |= I.hasMetadataOtherThanDebugLoc();
// If the instruction has a debug location, emit it.
- DILocation *DL = I->getDebugLoc();
+ DILocation *DL = I.getDebugLoc();
if (!DL)
continue;
@@ -4429,9 +4427,9 @@ void ModuleBitcodeWriter::write() {
// Emit function bodies.
DenseMap<const Function *, uint64_t> FunctionToBitcodeIndex;
- for (Module::const_iterator F = M.begin(), E = M.end(); F != E; ++F)
- if (!F->isDeclaration())
- writeFunction(*F, FunctionToBitcodeIndex);
+ for (const Function &F : M)
+ if (!F.isDeclaration())
+ writeFunction(F, FunctionToBitcodeIndex);
// Need to write after the above call to WriteFunction which populates
// the summary information in the index.
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9465a3b11c8f..07e0708e68c3 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -1148,8 +1148,8 @@ void ValueEnumerator::purgeFunction() {
ValueMap.erase(Values[i].first);
for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
MetadataMap.erase(MDs[i]);
- for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
- ValueMap.erase(BasicBlocks[i]);
+ for (const BasicBlock *BB : BasicBlocks)
+ ValueMap.erase(BB);
Values.resize(NumModuleValues);
MDs.resize(NumModuleMDs);
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 87a3cede601b..5984063627b0 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -354,8 +354,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -407,8 +406,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// Scan the register defs for this instruction and update
// live-ranges.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -495,8 +493,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "\tKill Group:");
unsigned FirstReg = 0;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -762,11 +759,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// ...need a map from MI to SUnit.
std::map<MachineInstr *, const SUnit *> MISUnitMap;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
- SU));
- }
+ for (const SUnit &SU : SUnits)
+ MISUnitMap.insert(std::make_pair(SU.getInstr(), &SU));
// Track progress along the critical path through the SUnit graph as
// we walk the instructions. This is needed for regclasses that only
@@ -774,12 +768,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
const SUnit *CriticalPathSU = nullptr;
MachineInstr *CriticalPathMI = nullptr;
if (CriticalPathSet.any()) {
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
+ for (const SUnit &SU : SUnits) {
if (!CriticalPathSU ||
- ((SU->getDepth() + SU->Latency) >
+ ((SU.getDepth() + SU.Latency) >
(CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
- CriticalPathSU = SU;
+ CriticalPathSU = &SU;
}
}
assert(CriticalPathSU && "Failed to find SUnit critical path");
@@ -839,8 +832,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// but don't cause any anti-dependence breaking themselves)
if (!MI.isKill()) {
// Attempt to break each anti-dependency...
- for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
- const SDep *Edge = Edges[i];
+ for (const SDep *Edge : Edges) {
SUnit *NextSU = Edge->getSUnit();
if ((Edge->getKind() != SDep::Anti) &&
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index cc848d28a9a7..828cb760b82e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -809,9 +809,9 @@ void AsmPrinter::emitFunctionHeader() {
// so that we don't get references to undefined symbols.
std::vector<MCSymbol*> DeadBlockSyms;
MMI->takeDeletedSymbolsForFunction(&F, DeadBlockSyms);
- for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+ for (MCSymbol *DeadBlockSym : DeadBlockSyms) {
OutStreamer->AddComment("Address taken block that was later removed");
- OutStreamer->emitLabel(DeadBlockSyms[i]);
+ OutStreamer->emitLabel(DeadBlockSym);
}
if (CurrentFnBegin) {
@@ -910,8 +910,7 @@ static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
std::string Str;
raw_string_ostream OS(Str);
OS << "kill:";
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
assert(Op.isReg() && "KILL instruction must have only register operands");
OS << ' ' << (Op.isDef() ? "def " : "killed ")
<< printReg(Op.getReg(), AP.MF->getSubtarget().getRegisterInfo());
@@ -2150,8 +2149,7 @@ void AsmPrinter::emitJumpTableInfo() {
SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
- const MachineBasicBlock *MBB = JTBBs[ii];
+ for (const MachineBasicBlock *MBB : JTBBs) {
if (!EmittedSets.insert(MBB).second)
continue;
@@ -2177,8 +2175,8 @@ void AsmPrinter::emitJumpTableInfo() {
MCSymbol* JTISymbol = GetJTISymbol(JTI);
OutStreamer->emitLabel(JTISymbol);
- for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
- emitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+ for (const MachineBasicBlock *MBB : JTBBs)
+ emitJumpTableEntry(MJTI, MBB, JTI);
}
if (!JTInDiffSection)
OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index ef1abc47701a..5d0cadefdbf7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -128,191 +128,29 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
emitInlineAsmEnd(STI, &TAP->getSTI());
}
-static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
- // Switch to the inline assembly variant.
- OS << "\t.intel_syntax\n\t";
-
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
- const char *LastEmitted = AsmStr; // One past the last character emitted.
- unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
-
- while (*LastEmitted) {
- switch (*LastEmitted) {
- default: {
- // Not a special case, emit the string section literally.
- const char *LiteralEnd = LastEmitted+1;
- while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
- *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
- ++LiteralEnd;
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd - LastEmitted);
- LastEmitted = LiteralEnd;
- break;
- }
- case '\n':
- ++LastEmitted; // Consume newline character.
- OS << '\n'; // Indent code with newline.
- break;
- case '$': {
- ++LastEmitted; // Consume '$' character.
- bool Done = true;
-
- // Handle escapes.
- switch (*LastEmitted) {
- default: Done = false; break;
- case '$':
- ++LastEmitted; // Consume second '$' character.
- break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
- if (CurVariant != -1)
- report_fatal_error("Nested variants found in inline asm string: '" +
- Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
- break;
- case '|':
- ++LastEmitted; // Consume '|' character.
- if (CurVariant == -1)
- OS << '|'; // This is gcc's behavior for | outside a variant.
- else
- ++CurVariant; // We're in the next variant.
- break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // Consume ')' character.
- if (CurVariant == -1)
- OS << '}'; // This is gcc's behavior for } outside a variant.
- else
- CurVariant = -1;
- break;
- }
- if (Done) break;
-
- bool HasCurlyBraces = false;
- if (*LastEmitted == '{') { // ${variable}
- ++LastEmitted; // Consume '{' character.
- HasCurlyBraces = true;
- }
-
- // If we have ${:foo}, then this is not a real operand reference, it is a
- // "magic" string reference, just like in .td files. Arrange to call
- // PrintSpecial.
- if (HasCurlyBraces && *LastEmitted == ':') {
- ++LastEmitted;
- const char *StrStart = LastEmitted;
- const char *StrEnd = strchr(StrStart, '}');
- if (!StrEnd)
- report_fatal_error("Unterminated ${:foo} operand in inline asm"
- " string: '" + Twine(AsmStr) + "'");
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
- LastEmitted = StrEnd+1;
- break;
- }
-
- const char *IDStart = LastEmitted;
- const char *IDEnd = IDStart;
- while (isDigit(*IDEnd))
- ++IDEnd;
-
- unsigned Val;
- if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
- report_fatal_error("Bad $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
- LastEmitted = IDEnd;
-
- if (Val >= NumOperands - 1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- char Modifier[2] = { 0, 0 };
-
- if (HasCurlyBraces) {
- // If we have curly braces, check for a modifier character. This
- // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
- if (*LastEmitted == ':') {
- ++LastEmitted; // Consume ':' character.
- if (*LastEmitted == 0)
- report_fatal_error("Bad ${:} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
-
- Modifier[0] = *LastEmitted;
- ++LastEmitted; // Consume modifier character.
- }
-
- if (*LastEmitted != '}')
- report_fatal_error("Bad ${} expression in inline asm string: '" +
- Twine(AsmStr) + "'");
- ++LastEmitted; // Consume '}' character.
- }
-
- // Okay, we finally have a value number. Ask the target to print this
- // operand!
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
-
- bool Error = false;
-
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands())
- break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
-
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- // FIXME: Shouldn't arch-independent output template handling go into
- // PrintAsmOperand?
- // Labels are target independent.
- if (MI->getOperand(OpNo).isBlockAddress()) {
- const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
- MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
- Sym->print(OS, AP->MAI);
- MMI->getContext().registerInlineAsmLabel(Sym);
- } else if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
- } else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
- }
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
- }
- }
- break;
- }
- }
+static void EmitInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
+ bool InputIsIntelDialect = MI->getInlineAsmDialect() == InlineAsm::AD_Intel;
+
+ if (InputIsIntelDialect) {
+ // Switch to the inline assembly variant.
+ OS << "\t.intel_syntax\n\t";
}
- OS << "\n\t.att_syntax\n" << (char)0; // null terminate string.
-}
-static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, const MCAsmInfo *MAI,
- AsmPrinter *AP, uint64_t LocCookie,
- raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
- if (MAI->getEmitGNUAsmStartIndentationMarker())
+ int AsmPrinterVariant;
+ if (InputIsIntelDialect)
+ AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
+ else
+ AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
+
+ // FIXME: Should this happen for `asm inteldialect` as well?
+ if (!InputIsIntelDialect && MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
while (*LastEmitted) {
@@ -340,8 +178,9 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
switch (*LastEmitted) {
default: Done = false; break;
case '$': // $$ -> $
- if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS << '$';
+ if (!InputIsIntelDialect)
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS << '$';
++LastEmitted; // Consume second '$' character.
break;
case '(': // $( -> same as GCC's { character.
@@ -480,6 +319,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
}
}
+ if (InputIsIntelDialect)
+ OS << "\n\t.att_syntax";
OS << '\n' << (char)0; // null terminate string.
}
@@ -515,9 +356,8 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
// it.
uint64_t LocCookie = 0;
const MDNode *LocMD = nullptr;
- for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata() &&
- (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ for (const MachineOperand &MO : llvm::reverse(MI->operands())) {
+ if (MO.isMetadata() && (LocMD = MO.getMetadata()) &&
LocMD->getNumOperands() != 0) {
if (const ConstantInt *CI =
mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) {
@@ -533,10 +373,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
raw_svector_ostream OS(StringData);
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
- if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
- else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
+ EmitInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 922c91840520..0d2736178f0f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -521,8 +521,8 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
}
// Construct a DIE for this scope.
-void DwarfCompileUnit::constructScopeDIE(
- LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) {
+void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope,
+ DIE &ParentScopeDIE) {
if (!Scope || !Scope->getScopeNode())
return;
@@ -533,46 +533,27 @@ void DwarfCompileUnit::constructScopeDIE(
"constructSubprogramScopeDIE for non-inlined "
"subprograms");
- SmallVector<DIE *, 8> Children;
-
- // We try to create the scope DIE first, then the children DIEs. This will
- // avoid creating un-used children then removing them later when we find out
- // the scope DIE is null.
- DIE *ScopeDIE;
+ // Emit inlined subprograms.
if (Scope->getParent() && isa<DISubprogram>(DS)) {
- ScopeDIE = constructInlinedScopeDIE(Scope);
+ DIE *ScopeDIE = constructInlinedScopeDIE(Scope);
if (!ScopeDIE)
return;
- // We create children when the scope DIE is not null.
- createScopeChildrenDIE(Scope, Children);
- } else {
- // Early exit when we know the scope DIE is going to be null.
- if (DD->isLexicalScopeDIENull(Scope))
- return;
-
- bool HasNonScopeChildren = false;
- // We create children here when we know the scope DIE is not going to be
- // null and the children will be added to the scope DIE.
- createScopeChildrenDIE(Scope, Children, &HasNonScopeChildren);
-
- // If there are only other scopes as children, put them directly in the
- // parent instead, as this scope would serve no purpose.
- if (!HasNonScopeChildren) {
- FinalChildren.insert(FinalChildren.end(),
- std::make_move_iterator(Children.begin()),
- std::make_move_iterator(Children.end()));
- return;
- }
- ScopeDIE = constructLexicalScopeDIE(Scope);
- assert(ScopeDIE && "Scope DIE should not be null.");
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
+ return;
}
- // Add children
- for (auto &I : Children)
- ScopeDIE->addChild(std::move(I));
+ // Early exit when we know the scope DIE is going to be null.
+ if (DD->isLexicalScopeDIENull(Scope))
+ return;
+
+ // Emit lexical blocks.
+ DIE *ScopeDIE = constructLexicalScopeDIE(Scope);
+ assert(ScopeDIE && "Scope DIE should not be null.");
- FinalChildren.push_back(std::move(ScopeDIE));
+ ParentScopeDIE.addChild(ScopeDIE);
+ createAndAddScopeChildren(Scope, *ScopeDIE);
}
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
@@ -1013,42 +994,6 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
return Result;
}
-DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren) {
- assert(Children.empty());
- DIE *ObjectPointer = nullptr;
-
- // Emit function arguments (order is significant).
- auto Vars = DU->getScopeVariables().lookup(Scope);
- for (auto &DV : Vars.Args)
- Children.push_back(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
-
- // Emit local variables.
- auto Locals = sortLocalVars(Vars.Locals);
- for (DbgVariable *DV : Locals)
- Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer));
-
- // Skip imported directives in gmlt-like data.
- if (!includeMinimalInlineScopes()) {
- // There is no need to emit empty lexical block DIE.
- for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
- Children.push_back(
- constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
-
- if (HasNonScopeChildren)
- *HasNonScopeChildren = !Children.empty();
-
- for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
- Children.push_back(constructLabelDIE(*DL, *Scope));
-
- for (LexicalScope *LS : Scope->getChildren())
- constructScopeDIE(LS, Children);
-
- return ObjectPointer;
-}
-
DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope) {
DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
@@ -1079,13 +1024,48 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub,
DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
DIE &ScopeDIE) {
- // We create children when the scope DIE is not null.
- SmallVector<DIE *, 8> Children;
- DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children);
+ DIE *ObjectPointer = nullptr;
+
+ // Emit function arguments (order is significant).
+ auto Vars = DU->getScopeVariables().lookup(Scope);
+ for (auto &DV : Vars.Args)
+ ScopeDIE.addChild(constructVariableDIE(*DV.second, *Scope, ObjectPointer));
+
+ // Emit local variables.
+ auto Locals = sortLocalVars(Vars.Locals);
+ for (DbgVariable *DV : Locals)
+ ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
+
+ // Emit imported entities (skipped in gmlt-like data).
+ if (!includeMinimalInlineScopes()) {
+ for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
+ ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
+ }
+
+ // Emit labels.
+ for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
+ ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
- // Add children
- for (auto &I : Children)
- ScopeDIE.addChild(std::move(I));
+ // Emit inner lexical scopes.
+ auto needToEmitLexicalScope = [this](LexicalScope *LS) {
+ if (isa<DISubprogram>(LS->getScopeNode()))
+ return true;
+ auto Vars = DU->getScopeVariables().lookup(LS);
+ if (!Vars.Args.empty() || !Vars.Locals.empty())
+ return true;
+ if (!includeMinimalInlineScopes() &&
+ !ImportedEntities[LS->getScopeNode()].empty())
+ return true;
+ return false;
+ };
+ for (LexicalScope *LS : Scope->getChildren()) {
+ // If the lexical block doesn't have non-scope children, skip
+ // its emission and put its children directly to the parent scope.
+ if (needToEmitLexicalScope(LS))
+ constructScopeDIE(LS, ScopeDIE);
+ else
+ createAndAddScopeChildren(LS, ScopeDIE);
+ }
return ObjectPointer;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6e9261087686..fb03982b5e4a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -191,8 +191,7 @@ public:
/// variables.
DIE &updateSubprogramScopeDIE(const DISubprogram *SP);
- void constructScopeDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &FinalChildren);
+ void constructScopeDIE(LexicalScope *Scope, DIE &ParentScopeDIE);
/// A helper function to construct a RangeSpanList for a given
/// lexical scope.
@@ -220,11 +219,6 @@ public:
/// Construct a DIE for the given DbgLabel.
DIE *constructLabelDIE(DbgLabel &DL, const LexicalScope &Scope);
- /// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children,
- bool *HasNonScopeChildren = nullptr);
-
void createBaseTypeDIEs();
/// Construct a DIE for this subprogram scope.
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 150f19324834..39f40b172c1b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -162,9 +162,7 @@ bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) {
bool MarkedNoUnwind = false;
bool SawFunc = false;
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
-
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isGlobal()) continue;
const Function *F = dyn_cast<Function>(MO.getGlobal());
@@ -386,8 +384,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
SmallVector<const LandingPadInfo *, 64> LandingPads;
LandingPads.reserve(PadInfos.size());
- for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
- LandingPads.push_back(&PadInfos[i]);
+ for (const LandingPadInfo &LPI : PadInfos)
+ LandingPads.push_back(&LPI);
// Order landing pads lexicographically by type id.
llvm::sort(LandingPads, [](const LandingPadInfo *L, const LandingPadInfo *R) {
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 5ac8f49a9522..64dadc82b48b 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1013,8 +1013,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// If this is a large problem, avoid visiting the same basic blocks
// multiple times.
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
+ for (const MergePotentialsElt &Elt : MergePotentials)
+ TriedMerging.insert(Elt.getBlock());
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 50825ccf9bac..eda0f37fdeb7 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -513,9 +513,7 @@ bool BranchRelaxation::relaxBranchInstructions() {
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
- for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
- MachineBasicBlock &MBB = *I;
-
+ for (MachineBasicBlock &MBB : *MF) {
// Empty block?
MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
if (Last == MBB.end())
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index e0e2db9f4725..bbdd8aab502e 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -58,8 +58,10 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLiveVariablesPass(Registry);
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
+ initializeMIRAddFSDiscriminatorsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
initializeMIRNamerPass(Registry);
+ initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index a1ff02178ffa..3bed81d5841d 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -90,7 +90,7 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, ValueTrackingVariableLocations)
+CGOPT_EXP(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
@@ -534,12 +534,17 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
+ if (auto Opt = getExplicitValueTrackingVariableLocations())
+ Options.ValueTrackingVariableLocations = *Opt;
+ else
+ Options.ValueTrackingVariableLocations =
+ getDefaultValueTrackingVariableLocations(TheTriple);
+
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
@@ -692,3 +697,9 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
for (Function &F : M)
setFunctionAttributes(CPU, Features, F);
}
+
+bool codegen::getDefaultValueTrackingVariableLocations(const llvm::Triple &T) {
+ if (T.getArch() == llvm::Triple::x86_64)
+ return true;
+ return false;
+}
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 981f5973fee8..4e98d49206b5 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -370,9 +370,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
// Handle cases in which this instruction defines NewReg.
MachineInstr *MI = RefOper->getParent();
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &CheckOper = MI->getOperand(i);
-
+ for (const MachineOperand &CheckOper : MI->operands()) {
if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
return true;
@@ -462,11 +460,10 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// Find the node at the bottom of the critical path.
const SUnit *Max = nullptr;
- for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
- const SUnit *SU = &SUnits[i];
- MISUnitMap[SU->getInstr()] = SU;
- if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
- Max = SU;
+ for (const SUnit &SU : SUnits) {
+ MISUnitMap[SU.getInstr()] = &SU;
+ if (!Max || SU.getDepth() + SU.Latency > Max->getDepth() + Max->Latency)
+ Max = &SU;
}
assert(Max && "Failed to find bottom of the critical path");
@@ -621,8 +618,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
// is invalid. If the instruction defines other registers,
// save a list of them so that we don't pick a new register
// that overlaps any of them.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0) continue;
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index c6c0b79cd7e7..0bb186a02416 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -76,8 +76,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -87,7 +86,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
} else {
if (MO.isDead()) {
#ifndef NDEBUG
- // Sanity check on uses of this dead register. All of them should be
+ // Baisc check on the register. All of them should be
// 'undef'.
for (auto &U : MRI->use_nodbg_operands(Reg))
assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
@@ -152,8 +151,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -171,8 +169,7 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 3a52959d54bf..755b3b844570 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
@@ -3732,8 +3733,7 @@ void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
Builder.setInstrAndDebugLoc(MI);
auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
NewPhi.addDef(DstReg);
- for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
- auto &MO = MI.getOperand(SrcIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg()) {
NewPhi.addMBB(MO.getMBB());
continue;
@@ -3825,8 +3825,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
unsigned NumElts = DstTy.getNumElements();
SmallBitVector ExtractedElts(NumElts);
- for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
- MRI.use_instr_nodbg_end())) {
+ for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
@@ -3868,6 +3867,51 @@ void CombinerHelper::applyBuildFnNoErase(
MatchInfo(Builder);
}
+bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_OR);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+
+ Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+ unsigned FshOpc = 0;
+
+ // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
+ if (mi_match(
+ Dst, MRI,
+ // m_GOr() handles the commuted version as well.
+ m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+ m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(LShrAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHL;
+
+ // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
+ } else if (mi_match(Dst, MRI,
+ m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
+ m_GShl(m_Reg(ShlSrc),
+ m_GSub(m_SpecificICstOrSplat(BitWidth),
+ m_Reg(ShlAmt)))))) {
+ FshOpc = TargetOpcode::G_FSHR;
+
+ } else {
+ return false;
+ }
+
+ if (ShlAmt != LShrAmt)
+ return false;
+
+ LLT AmtTy = MRI.getType(ShlAmt);
+ if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+ };
+ return true;
+}
+
/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
@@ -4499,20 +4543,9 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
- // Check for a constant 2 or a splat of 2 on the RHS.
- auto RHS = MI.getOperand(3).getReg();
- bool IsVector = MRI.getType(RHS).isVector();
- if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
- return false;
- if (IsVector) {
- // FIXME: There's no mi_match pattern for this yet.
- auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
- if (!RHSDef)
- return false;
- auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
- if (!Splat || *Splat != 2)
- return false;
- }
+
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
+ return false;
MatchInfo = [=, &MI](MachineIRBuilder &B) {
Observer.changingInstr(MI);
@@ -4760,6 +4793,556 @@ bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
return true;
}
+/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
+/// due to global flags or MachineInstr flags.
+static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
+ if (MI.getOpcode() != TargetOpcode::G_FMUL)
+ return false;
+ return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
+}
+
+static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
+ const MachineRegisterInfo &MRI) {
+ return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end()) >
+ std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
+ MRI.use_instr_nodbg_end());
+}
+
+bool CombinerHelper::canCombineFMadOrFMA(MachineInstr &MI,
+ bool &AllowFusionGlobally,
+ bool &HasFMAD, bool &Aggressive,
+ bool CanReassociate) {
+
+ auto *MF = MI.getMF();
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetOptions &Options = MF->getTarget().Options;
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ if (CanReassociate &&
+ !(Options.UnsafeFPMath || MI.getFlag(MachineInstr::MIFlag::FmReassoc)))
+ return false;
+
+ // Floating-point multiply-add with intermediate rounding.
+ HasFMAD = (LI && TLI.isFMADLegal(MI, DstType));
+ // Floating-point multiply-add without intermediate rounding.
+ bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
+ // No valid opcode, do not combine.
+ if (!HasFMAD && !HasFMA)
+ return false;
+
+ AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Options.UnsafeFPMath || HasFMAD;
+ // If the addition is not contractable, do not combine.
+ if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
+ return false;
+
+ Aggressive = TLI.enableAggressiveFMAFusion(DstType);
+ return true;
+}
+
+bool CombinerHelper::matchCombineFAddFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fmul x, y), z) -> (fma x, y, z)
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
+ if (isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {RHS->getOperand(1).getReg(), RHS->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ MachineInstr *FpExtSrc;
+ if (mi_match(LHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), RHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
+ // Note: Commutes FADD operands.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI,
+ m_GFPExt(m_MInstr(FpExtSrc))) &&
+ isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
+ auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX.getReg(0), FpExtY.getReg(0), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFMAFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ MachineInstr *FMA = nullptr;
+ Register Z;
+ // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(LHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(LHS->getOperand(3).getReg())) {
+ FMA = LHS;
+ Z = RHS->getOperand(0).getReg();
+ }
+ // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
+ else if (RHS->getOpcode() == PreferredFusedOpcode &&
+ (MRI.getVRegDef(RHS->getOperand(3).getReg())->getOpcode() ==
+ TargetOpcode::G_FMUL) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg()) &&
+ MRI.hasOneNonDBGUse(RHS->getOperand(3).getReg())) {
+ Z = LHS->getOperand(0).getReg();
+ FMA = RHS;
+ }
+
+ if (FMA) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
+ Register X = FMA->getOperand(1).getReg();
+ Register Y = FMA->getOperand(2).getReg();
+ Register U = FMulMI->getOperand(1).getReg();
+ Register V = FMulMI->getOperand(2).getReg();
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
+ B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFAddFpExtFMulToFMadOrFMAAggressive(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FADD);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ if (!Aggressive)
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ if (Aggressive && isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally)) {
+ if (hasMoreUses(*LHS, *RHS, MRI))
+ std::swap(LHS, RHS);
+ }
+
+ // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
+ auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
+ Register Y, MachineIRBuilder &B) {
+ Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
+ Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
+ Register InnerFMA =
+ B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
+ .getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {X, Y, InnerFMA});
+ };
+
+ MachineInstr *FMulMI, *FMAMI;
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (LHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(LHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), LHS->getOperand(1).getReg(),
+ LHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(LHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ RHS->getOperand(0).getReg(), X, Y, B);
+ };
+
+ return true;
+ }
+ }
+
+ // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ if (RHS->getOpcode() == PreferredFusedOpcode &&
+ mi_match(RHS->getOperand(3).getReg(), MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), RHS->getOperand(1).getReg(),
+ RHS->getOperand(2).getReg(), B);
+ };
+ return true;
+ }
+
+ // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (mi_match(RHS->getOperand(0).getReg(), MRI, m_GFPExt(m_MInstr(FMAMI))) &&
+ FMAMI->getOpcode() == PreferredFusedOpcode) {
+ MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
+ if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
+ MRI.getType(FMAMI->getOperand(0).getReg()))) {
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Register X = FMAMI->getOperand(1).getReg();
+ Register Y = FMAMI->getOperand(2).getReg();
+ X = B.buildFPExt(DstType, X).getReg(0);
+ Y = B.buildFPExt(DstType, Y).getReg(0);
+ buildMatchInfo(FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(),
+ LHS->getOperand(0).getReg(), X, Y, B);
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ MachineInstr *LHS = MRI.getVRegDef(MI.getOperand(1).getReg());
+ MachineInstr *RHS = MRI.getVRegDef(MI.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+ // prefer to fold the multiply with fewer uses.
+ int FirstMulHasFewerUses = true;
+ if (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ isContractableFMul(*RHS, AllowFusionGlobally) &&
+ hasMoreUses(*LHS, *RHS, MRI))
+ FirstMulHasFewerUses = false;
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
+ if (FirstMulHasFewerUses &&
+ (isContractableFMul(*LHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegZ = B.buildFNeg(DstTy, RHS->getOperand(0).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {LHS->getOperand(1).getReg(), LHS->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+ // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
+ else if ((isContractableFMul(*RHS, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHS->getOperand(0).getReg())))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegY = B.buildFNeg(DstTy, RHS->getOperand(1).getReg()).getReg(0);
+ B.buildInstr(
+ PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, RHS->getOperand(2).getReg(), LHS->getOperand(0).getReg()});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register NegX =
+ B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegX, FMulMI->getOperand(2).getReg(), NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
+ if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
+ (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
+ MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally)) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
+ if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtX =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {FpExtX, FpExtY, NegZ});
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
+ if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FpExtY =
+ B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
+ Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
+ Register FpExtZ =
+ B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
+ {NegY, FpExtZ, LHSReg});
+ };
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchCombineFSubFpExtFNegFMulToFMadOrFMA(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FSUB);
+
+ bool AllowFusionGlobally, HasFMAD, Aggressive;
+ if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
+ return false;
+
+ const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ unsigned PreferredFusedOpcode =
+ HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
+
+ auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
+ MachineIRBuilder &B) {
+ Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
+ Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
+ B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
+ };
+
+ MachineInstr *FMulMI;
+ // fold (fsub (fpext (fneg (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ // fold (fsub (fneg (fpext (fmul x, y))), z) ->
+ // (fneg (fma (fpext x), (fpext y), z))
+ if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
+ buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), RHSReg, B);
+ B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
+ };
+ return true;
+ }
+
+ // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
+ if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
+ mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
+ isContractableFMul(*FMulMI, AllowFusionGlobally) &&
+ TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
+ MRI.getType(FMulMI->getOperand(0).getReg()))) {
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
+ FMulMI->getOperand(2).getReg(), LHSReg, B);
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c74bec7dfc0d..e09cd26eb0c1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -585,8 +585,8 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
// FIXME: What does the original arg index mean here?
SmallVector<CallLowering::ArgInfo, 3> Args;
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ Args.push_back({MO.getReg(), OpType, 0});
return createLibcall(MIRBuilder, Libcall,
{MI.getOperand(0).getReg(), OpType, 0}, Args);
}
@@ -1500,8 +1500,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
// Decompose the original operands if they don't evenly divide.
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
- Register SrcReg = MI.getOperand(I).getReg();
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ Register SrcReg = MO.getReg();
if (GCD == SrcSize) {
Unmerges.push_back(SrcReg);
} else {
@@ -4037,8 +4037,8 @@ LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
// Break into a common type
SmallVector<Register, 16> Parts;
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ extractGCDType(Parts, GCDTy, MO.getReg());
// Build the requested new merge, padding with undef.
LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
@@ -7782,7 +7782,6 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
// of that value loaded. This can result in a sequence of loads and stores
// mixed types, depending on what the target specifies as good types to use.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
unsigned Size = KnownLen;
for (auto CopyTy : MemOps) {
// Issuing an unaligned load / store pair that overlaps with the previous
@@ -7800,15 +7799,19 @@ LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
Register LoadPtr = Src;
Register Offset;
if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ LLT SrcTy = MRI.getType(Src);
+ Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
.getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
// Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
+ }
MIB.buildStore(LdVal, StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
Size -= CopyTy.getSizeInBytes();
@@ -7885,7 +7888,6 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Apart from that, this loop is pretty much doing the same thing as the
// memcpy codegen function.
unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
SmallVector<Register, 16> LoadVals;
for (auto CopyTy : MemOps) {
// Construct MMO for the load.
@@ -7895,9 +7897,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
// Create the load.
Register LoadPtr = Src;
if (CurrOffset != 0) {
+ LLT SrcTy = MRI.getType(Src);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
}
LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
CurrOffset += CopyTy.getSizeInBytes();
@@ -7912,9 +7915,10 @@ LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
Register StorePtr = Dst;
if (CurrOffset != 0) {
+ LLT DstTy = MRI.getType(Dst);
auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
}
MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
CurrOffset += CopyTy.getSizeInBytes();
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 1a2102e3ef21..650500c7eb31 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -123,7 +123,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
Register Reg = MI.getOperand(OpIdx).getReg();
const RegisterBank &RegBank = getRegBankFromRegClass(*RC, MRI.getType(Reg));
- // Sanity check that the target properly implemented getRegBankFromRegClass.
+ // Check that the target properly implemented getRegBankFromRegClass.
assert(RegBank.covers(*RC) &&
"The mapping of the register bank does not make sense");
return &RegBank;
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 1a440c064a59..b0b84763e922 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -834,10 +834,9 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
case TargetOpcode::G_BUILD_VECTOR: {
// TODO: Probably should have a recursion depth guard since you could have
// bitcasted vector elements.
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- if (!isKnownToBeAPowerOfTwo(MI.getOperand(I).getReg(), MRI, KB))
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ if (!isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB))
return false;
- }
return true;
}
@@ -845,8 +844,8 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// Only handle constants since we would need to know if number of leading
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
+ auto Const = getIConstantVRegVal(MO.getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -1031,16 +1030,22 @@ Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
return SplatValAndReg;
}
-bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue, bool AllowUndef) {
- if (auto SplatValAndReg =
- getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+} // end anonymous namespace
+
+bool llvm::isBuildVectorConstantSplat(const Register Reg,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(Reg, MRI, AllowUndef))
return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
return false;
}
-} // end anonymous namespace
+bool llvm::isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI.getOperand(0).getReg(), MRI, SplatValue,
+ AllowUndef);
+}
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 6c1ce4c1efb0..bbd9006a5d8c 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -399,8 +399,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
// having a single global, but is aggressive enough for any other case.
if (GlobalMergeIgnoreSingleUse) {
BitVector AllGlobals(Globals.size());
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (UGS.Globals.count() > 1)
@@ -418,8 +417,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
BitVector PickedGlobals(Globals.size());
bool Changed = false;
- for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) {
- const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1];
+ for (const UsedGlobalSet &UGS : llvm::reverse(UsedGlobalSets)) {
if (UGS.UsageCount == 0)
continue;
if (PickedGlobals.anyCommon(UGS.Globals))
diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
index e4606daba352..2d38a44d5a33 100644
--- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -260,10 +260,12 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
if (DTU) {
// If there were multiple indirectbr's, they may have common successors,
// but in the dominator tree, we only track unique edges.
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessors(BBs.begin(), BBs.end());
- Updates.reserve(Updates.size() + UniqueSuccessors.size());
- for (BasicBlock *BB : UniqueSuccessors)
- Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessors;
+ Updates.reserve(Updates.size() + BBs.size());
+ for (BasicBlock *BB : BBs) {
+ if (UniqueSuccessors.insert(BB).second)
+ Updates.push_back({DominatorTree::Insert, SwitchBB, BB});
+ }
DTU->applyUpdates(Updates);
}
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 64e1f4351456..fc5ac45752ca 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -274,11 +274,9 @@ static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
}
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
LIS.getInterval(MO.getReg());
- }
}
/// isSnippet - Identify if a live interval is a snippet that should be spilled.
@@ -583,11 +581,9 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!ParentVNI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
MO.setIsUndef();
- }
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
}
diff --git a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index c3e0553418a5..fab6b8d10a33 100644
--- a/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -73,11 +73,9 @@ void LatencyPriorityQueue::push(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+ for (const SDep &Succ : SU->Succs)
+ if (getSingleUnscheduledPred(Succ.getSUnit()) == SU)
++NumNodesBlocking;
- }
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
Queue.push_back(SU);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index a4eb3094612b..cf62b0e5d7e8 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -544,8 +544,7 @@ public:
// Re-state the variable location: if there's no replacement then NewLoc
// is None and a $noreg DBG_VALUE will be created. Otherwise, a DBG_VALUE
// identifying the alternative location will be emitted.
- const DIExpression *Expr = ActiveVLocIt->second.Properties.DIExpr;
- DbgValueProperties Properties(Expr, false);
+ const DbgValueProperties &Properties = ActiveVLocIt->second.Properties;
PendingDbgValues.push_back(MTracker->emitLoc(NewLoc, Var, Properties));
// Update machine locations <=> variable locations maps. Defer updating
@@ -836,6 +835,15 @@ MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
unsigned Base = Spill.SpillBase;
MIB.addReg(Base);
MIB.addImm(0);
+
+ // Being on the stack makes this location indirect; if it was _already_
+ // indirect though, we need to add extra indirection. See this test for
+ // a scenario where this happens:
+ // llvm/test/DebugInfo/X86/spill-nontrivial-param.ll
+ if (Properties.Indirect) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
} else {
// This is a stack location with a weird subregister offset: emit an undef
// DBG_VALUE instead.
@@ -1288,6 +1296,24 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
+ // We always ignore SP defines on call instructions, they don't actually
+ // change the value of the stack pointer... except for win32's _chkstk. This
+ // is rare: filter quickly for the common case (no stack adjustments, not a
+ // call, etc). If it is a call that modifies SP, recognise the SP register
+ // defs.
+ bool CallChangesSP = false;
+ if (AdjustsStackInCalls && MI.isCall() && MI.getOperand(0).isSymbol() &&
+ !strcmp(MI.getOperand(0).getSymbolName(), StackProbeSymbolName.data()))
+ CallChangesSP = true;
+
+ // Test whether we should ignore a def of this register due to it being part
+ // of the stack pointer.
+ auto IgnoreSPAlias = [this, &MI, CallChangesSP](Register R) -> bool {
+ if (CallChangesSP)
+ return false;
+ return MI.isCall() && MTracker->SPAliases.count(R);
+ };
+
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -1298,7 +1324,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
+ !IgnoreSPAlias(MO.getReg())) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -1347,6 +1373,9 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
continue;
Register Reg = MTracker->LocIdxToLocID[L.Idx];
+ if (IgnoreSPAlias(Reg))
+ continue;
+
for (auto *MO : RegMaskPtrs)
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
@@ -1628,9 +1657,10 @@ bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) {
/// fragments of that DILocalVariable which overlap. This reduces work during
/// the data-flow stage from "Find any overlapping fragments" to "Check if the
/// known-to-overlap fragments are present".
-/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// \param MI A previously unprocessed debug instruction to analyze for
/// fragment usage.
void InstrRefBasedLDV::accumulateFragmentMap(MachineInstr &MI) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
DebugVariable MIVar(MI.getDebugVariable(), MI.getDebugExpression(),
MI.getDebugLoc()->getInlinedAt());
FragmentInfo ThisFragment = MIVar.getFragmentOrDefault();
@@ -1732,7 +1762,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
for (auto &MI : MBB) {
process(MI);
// Also accumulate fragment map.
- if (MI.isDebugValue())
+ if (MI.isDebugValue() || MI.isDebugRef())
accumulateFragmentMap(MI);
// Create a map from the instruction number (if present) to the
@@ -2322,15 +2352,8 @@ Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
bool InstrRefBasedLDV::vlocJoin(
MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn) {
- // To emulate VarLocBasedImpl, process this block if it's not in scope but
- // _does_ assign a variable value. No live-ins for this scope are transferred
- // in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
- return false;
-
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -2466,11 +2489,10 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// "blocks that are potentially in scope. See comment at start of vlocJoin.
SmallPtrSet<const MachineBasicBlock *, 8> InScopeBlocks = BlocksToExplore;
- // Old LiveDebugValues tracks variable locations that come out of blocks
- // not in scope, where DBG_VALUEs occur. This is something we could
- // legitimately ignore, but lets allow it for now.
- if (EmulateOldLDV)
- BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
+ // VarLoc LiveDebugValues tracks variable locations that are defined in
+ // blocks not in scope. This is something we could legitimately ignore, but
+ // lets allow it for now for the sake of coverage.
+ BlocksToExplore.insert(AssignBlocks.begin(), AssignBlocks.end());
// We also need to propagate variable values through any artificial blocks
// that immediately follow blocks in scope.
@@ -2635,7 +2657,7 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
// Join values from predecessors. Updates LiveInIdx, and writes output
// into JoinedInLocs.
bool InLocsChanged =
- vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+ vlocJoin(*MBB, LiveOutIdx, BlocksToExplore, *LiveIn);
SmallVector<const MachineBasicBlock *, 8> Preds;
for (const auto *Pred : MBB->predecessors())
@@ -2730,6 +2752,8 @@ void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
continue;
if (BlockLiveIn->Kind == DbgValue::VPHI)
BlockLiveIn->Kind = DbgValue::Def;
+ assert(BlockLiveIn->Properties.DIExpr->getFragmentInfo() ==
+ Var.getFragment() && "Fragment info missing during value prop");
Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
} // Per-variable loop.
@@ -2879,6 +2903,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
MFI = &MF.getFrameInfo();
LS.initialize(MF);
+ const auto &STI = MF.getSubtarget();
+ AdjustsStackInCalls = MFI->adjustsStack() &&
+ STI.getFrameLowering()->stackProbeFunctionModifiesSP();
+ if (AdjustsStackInCalls)
+ StackProbeSymbolName = STI.getTargetLowering()->getStackProbeSymbolName(MF);
+
MTracker =
new MLocTracker(MF, *TII, *TRI, *MF.getSubtarget().getTargetLowering());
VTracker = nullptr;
@@ -2895,7 +2925,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
++MaxNumBlocks;
MLocTransfer.resize(MaxNumBlocks);
- vlocs.resize(MaxNumBlocks);
+ vlocs.resize(MaxNumBlocks, VLocTracker(OverlapFragments, EmptyExpr));
SavedLiveIns.resize(MaxNumBlocks);
initialSetup(MF);
@@ -3040,6 +3070,8 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
BBNumToRPO.clear();
DebugInstrNumToInstr.clear();
DebugPHINumToValue.clear();
+ OverlapFragments.clear();
+ SeenFragments.clear();
return Changed;
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index d96ef6d4f6e5..789205e61cdb 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -655,6 +655,14 @@ public:
const DbgValueProperties &Properties);
};
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
/// Collection of DBG_VALUEs observed when traversing a block. Records each
/// variable and the value the DBG_VALUE refers to. Requires the machine value
/// location dataflow algorithm to have run already, so that values can be
@@ -672,9 +680,12 @@ public:
MapVector<DebugVariable, DbgValue> Vars;
DenseMap<DebugVariable, const DILocation *> Scopes;
MachineBasicBlock *MBB = nullptr;
+ const OverlapMap &OverlappingFragments;
+ DbgValueProperties EmptyProperties;
public:
- VLocTracker() {}
+ VLocTracker(const OverlapMap &O, const DIExpression *EmptyExpr)
+ : OverlappingFragments(O), EmptyProperties(EmptyExpr, false) {}
void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
Optional<ValueIDNum> ID) {
@@ -689,6 +700,8 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
void defVar(const MachineInstr &MI, const MachineOperand &MO) {
@@ -704,16 +717,37 @@ public:
if (!Result.second)
Result.first->second = Rec;
Scopes[Var] = MI.getDebugLoc().get();
+
+ considerOverlaps(Var, MI.getDebugLoc().get());
}
-};
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+ void considerOverlaps(const DebugVariable &Var, const DILocation *Loc) {
+ auto Overlaps = OverlappingFragments.find(
+ {Var.getVariable(), Var.getFragmentOrDefault()});
+ if (Overlaps == OverlappingFragments.end())
+ return;
+
+ // Otherwise: terminate any overlapped variable locations.
+ for (auto FragmentInfo : Overlaps->second) {
+ // The "empty" fragment is stored as DebugVariable::DefaultFragment, so
+ // that it overlaps with everything, however its cannonical representation
+ // in a DebugVariable is as "None".
+ Optional<DIExpression::FragmentInfo> OptFragmentInfo = FragmentInfo;
+ if (DebugVariable::isDefaultFragment(FragmentInfo))
+ OptFragmentInfo = None;
+
+ DebugVariable Overlapped(Var.getVariable(), OptFragmentInfo,
+ Var.getInlinedAt());
+ DbgValue Rec = DbgValue(EmptyProperties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Overlapped, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Overlapped] = Loc;
+ }
+ }
+};
// XXX XXX docs
class InstrRefBasedLDV : public LDVImpl {
@@ -817,6 +851,16 @@ private:
OverlapMap OverlapFragments;
VarToFragments SeenFragments;
+ /// True if we need to examine call instructions for stack clobbers. We
+ /// normally assume that they don't clobber SP, but stack probes on Windows
+ /// do.
+ bool AdjustsStackInCalls = false;
+
+ /// If AdjustsStackInCalls is true, this holds the name of the target's stack
+ /// probe function, which is the function we expect will alter the stack
+ /// pointer.
+ StringRef StackProbeSymbolName;
+
/// Tests whether this instruction is a spill to a stack slot.
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
@@ -962,7 +1006,6 @@ private:
/// \returns true if any live-ins change value, either from value propagation
/// or PHI elimination.
bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
DbgValue &LiveIn);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index dcd546f9c6db..5f976bf43c5b 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -1875,34 +1875,57 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING INSTR REFERENCES **********\n");
- // Re-insert any debug instrs back in the position they were. Ordering
- // is preserved by vector. We must re-insert in the same order to ensure that
- // debug instructions don't swap, which could re-order assignments.
- for (auto &P : StashedDebugInstrs) {
- SlotIndex Idx = P.Idx;
+ // Re-insert any debug instrs back in the position they were. We must
+ // re-insert in the same order to ensure that debug instructions don't swap,
+ // which could re-order assignments. Do so in a batch -- once we find the
+ // insert position, insert all instructions at the same SlotIdx. They are
+ // guaranteed to appear in-sequence in StashedDebugInstrs because we insert
+ // them in order.
+ for (auto StashIt = StashedDebugInstrs.begin();
+ StashIt != StashedDebugInstrs.end(); ++StashIt) {
+ SlotIndex Idx = StashIt->Idx;
+ MachineBasicBlock *MBB = StashIt->MBB;
+ MachineInstr *MI = StashIt->MI;
+
+ auto EmitInstsHere = [this, &StashIt, MBB, Idx,
+ MI](MachineBasicBlock::iterator InsertPos) {
+ // Insert this debug instruction.
+ MBB->insert(InsertPos, MI);
+
+ // Look at subsequent stashed debug instructions: if they're at the same
+ // index, insert those too.
+ auto NextItem = std::next(StashIt);
+ while (NextItem != StashedDebugInstrs.end() && NextItem->Idx == Idx) {
+ assert(NextItem->MBB == MBB && "Instrs with same slot index should be"
+ "in the same block");
+ MBB->insert(InsertPos, NextItem->MI);
+ StashIt = NextItem;
+ NextItem = std::next(StashIt);
+ };
+ };
// Start block index: find the first non-debug instr in the block, and
// insert before it.
- if (Idx == Slots->getMBBStartIdx(P.MBB)) {
+ if (Idx == Slots->getMBBStartIdx(MBB)) {
MachineBasicBlock::iterator InsertPos =
- findInsertLocation(P.MBB, Idx, *LIS, BBSkipInstsMap);
- P.MBB->insert(InsertPos, P.MI);
+ findInsertLocation(MBB, Idx, *LIS, BBSkipInstsMap);
+ EmitInstsHere(InsertPos);
continue;
}
if (MachineInstr *Pos = Slots->getInstructionFromIndex(Idx)) {
// Insert at the end of any debug instructions.
auto PostDebug = std::next(Pos->getIterator());
- PostDebug = skipDebugInstructionsForward(PostDebug, P.MBB->instr_end());
- P.MBB->insert(PostDebug, P.MI);
+ PostDebug = skipDebugInstructionsForward(PostDebug, MBB->instr_end());
+ EmitInstsHere(PostDebug);
} else {
// Insert position disappeared; walk forwards through slots until we
// find a new one.
- SlotIndex End = Slots->getMBBEndIdx(P.MBB);
+ SlotIndex End = Slots->getMBBEndIdx(MBB);
for (; Idx < End; Idx = Slots->getNextNonNullIndex(Idx)) {
Pos = Slots->getInstructionFromIndex(Idx);
if (Pos) {
- P.MBB->insert(Pos->getIterator(), P.MI);
+ EmitInstsHere(Pos->getIterator());
break;
}
}
@@ -1911,8 +1934,8 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
// insert! It's not safe to discard any debug instructions; place them
// in front of the first terminator, or in front of end().
if (Idx >= End) {
- auto TermIt = P.MBB->getFirstTerminator();
- P.MBB->insert(TermIt, P.MI);
+ auto TermIt = MBB->getFirstTerminator();
+ EmitInstsHere(TermIt);
}
}
}
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index d91ff734ad8f..6380c4bfd6e6 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -108,8 +108,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
+ for (const MachineOperand &MO : OrigMI->operands()) {
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
@@ -425,15 +424,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
// The new intervals would have to be spilled anyway so its not worth it.
// Also they currently aren't spilled so creating them and not spilling
// them results in incorrect code.
- bool BeingSpilled = false;
- for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
- if (VReg == RegsBeingSpilled[i]) {
- BeingSpilled = true;
- break;
- }
- }
-
- if (BeingSpilled) continue;
+ if (llvm::is_contained(RegsBeingSpilled, VReg))
+ continue;
// LI may have been separated, create new intervals.
LI->RenumberValues();
diff --git a/llvm/lib/CodeGen/LiveRangeUtils.h b/llvm/lib/CodeGen/LiveRangeUtils.h
index dace05f1ad95..ada5c5be484a 100644
--- a/llvm/lib/CodeGen/LiveRangeUtils.h
+++ b/llvm/lib/CodeGen/LiveRangeUtils.h
@@ -18,7 +18,7 @@
namespace llvm {
/// Helper function that distributes live range value numbers and the
-/// corresponding segments of a master live range \p LR to a list of newly
+/// corresponding segments of a primary live range \p LR to a list of newly
/// created live ranges \p SplitLRs. \p VNIClasses maps each value number in \p
/// LR to 0 meaning it should stay or to 1..N meaning it should go to a specific
/// live range in the \p SplitLRs array.
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 51ba4b7e53eb..e8744797707b 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -58,9 +58,9 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *
LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
- for (unsigned i = 0, e = Kills.size(); i != e; ++i)
- if (Kills[i]->getParent() == MBB)
- return Kills[i];
+ for (MachineInstr *MI : Kills)
+ if (MI->getParent() == MBB)
+ return MI;
return nullptr;
}
@@ -811,8 +811,8 @@ bool LiveVariables::isLiveOut(Register Reg, const MachineBasicBlock &MBB) {
LiveVariables::VarInfo &VI = getVarInfo(Reg);
SmallPtrSet<const MachineBasicBlock *, 8> Kills;
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- Kills.insert(VI.Kills[i]->getParent());
+ for (MachineInstr *MI : VI.Kills)
+ Kills.insert(MI->getParent());
// Loop over all of the successors of the basic block, checking to see if
// the value is either live in the block, or if it is killed in the block.
diff --git a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index 2e99c8595cbd..ee2387d1e8e6 100644
--- a/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -316,14 +316,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
// than that, but the increased register pressure makes that a
// tricky thing to balance. Investigate if re-materializing these
// becomes an issue.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI.operands()) {
// Consider replacing all frame index operands that reference
// an object allocated in the local block.
- if (MI.getOperand(i).isFI()) {
+ if (MO.isFI()) {
// Don't try this with values not in the local block.
- if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
+ if (!MFI.isObjectPreAllocated(MO.getIndex()))
break;
- int Idx = MI.getOperand(i).getIndex();
+ int Idx = MO.getIndex();
int64_t LocalOffset = LocalOffsets[Idx];
if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
break;
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 90ecc6fc68fc..b742ad9823c9 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -314,6 +314,8 @@ bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
}
bool Changed = MIRSampleLoader->runOnFunction(MF);
+ if (Changed)
+ MBFI->calculate(MF, *MBFI->getMBPI(), *&getAnalysis<MachineLoopInfo>());
if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
(ViewBlockFreqFuncName.empty() ||
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 366d06871245..310c2721c3bd 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -1170,9 +1170,10 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
void MachineFunction::finalizeDebugInstrRefs() {
auto *TII = getSubtarget().getInstrInfo();
- auto MakeDbgValue = [&](MachineInstr &MI) {
+ auto MakeUndefDbgValue = [&](MachineInstr &MI) {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
+ MI.getOperand(0).setReg(0);
MI.getOperand(1).ChangeToRegister(0, false);
};
@@ -1187,15 +1188,15 @@ void MachineFunction::finalizeDebugInstrRefs() {
Register Reg = MI.getOperand(0).getReg();
// Some vregs can be deleted as redundant in the meantime. Mark those
- // as DBG_VALUE $noreg.
- if (Reg == 0) {
- MakeDbgValue(MI);
+ // as DBG_VALUE $noreg. Additionally, some normal instructions are
+ // quickly deleted, leaving dangling references to vregs with no def.
+ if (Reg == 0 || !RegInfo->hasOneDef(Reg)) {
+ MakeUndefDbgValue(MI);
continue;
}
assert(Reg.isVirtual());
MachineInstr &DefMI = *RegInfo->def_instr_begin(Reg);
- assert(RegInfo->hasOneDef(Reg));
// If we've found a copy-like instruction, follow it back to the
// instruction that defines the source value, see salvageCopySSA docs
@@ -1327,9 +1328,9 @@ bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
assert(Old != New && "Not making a change?");
bool MadeChange = false;
MachineJumpTableEntry &JTE = JumpTables[Idx];
- for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
- if (JTE.MBBs[j] == Old) {
- JTE.MBBs[j] = New;
+ for (MachineBasicBlock *&MBB : JTE.MBBs)
+ if (MBB == Old) {
+ MBB = New;
MadeChange = true;
}
return MadeChange;
@@ -1342,8 +1343,8 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
OS << printJumpTableEntryReference(i) << ':';
- for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
- OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+ for (const MachineBasicBlock *MBB : JumpTables[i].MBBs)
+ OS << ' ' << printMBBReference(*MBB);
if (i != e)
OS << '\n';
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 5c4f75e9ceb9..aaa80432d2f2 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1490,12 +1490,10 @@ bool MachineInstr::allDefsAreDead() const {
/// instruction to this instruction.
void MachineInstr::copyImplicitOps(MachineFunction &MF,
const MachineInstr &MI) {
- for (unsigned i = MI.getDesc().getNumOperands(), e = MI.getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands()))
if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
addOperand(MF, MO);
- }
}
bool MachineInstr::hasComplexRegisterTies() const {
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 4d080e1a4f82..680dbe54ffaf 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1071,7 +1071,9 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
- assert(MMO->getSize() == getSize() && "Size mismatch!");
+ assert((MMO->getSize() == ~UINT64_C(0) || getSize() == ~UINT64_C(0) ||
+ MMO->getSize() == getSize()) &&
+ "Size mismatch!");
if (MMO->getBaseAlign() >= getBaseAlign()) {
// Update the alignment value.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index cfbccebaff3e..7783b5e0d3cc 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -617,20 +617,11 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
F->addFnAttr(Attribute::OptimizeForSize);
F->addFnAttr(Attribute::MinSize);
- // Include target features from an arbitrary candidate for the outlined
- // function. This makes sure the outlined function knows what kinds of
- // instructions are going into it. This is fine, since all parent functions
- // must necessarily support the instructions that are in the outlined region.
Candidate &FirstCand = OF.Candidates.front();
- const Function &ParentFn = FirstCand.getMF()->getFunction();
- if (ParentFn.hasFnAttribute("target-features"))
- F->addFnAttr(ParentFn.getFnAttribute("target-features"));
+ const TargetInstrInfo &TII =
+ *FirstCand.getMF()->getSubtarget().getInstrInfo();
- // Set nounwind, so we don't generate eh_frame.
- if (llvm::all_of(OF.Candidates, [](const outliner::Candidate &C) {
- return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
- }))
- F->addFnAttr(Attribute::NoUnwind);
+ TII.mergeOutliningCandidateAttributes(*F, OF.Candidates);
BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
IRBuilder<> Builder(EntryBB);
@@ -639,8 +630,6 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- const TargetInstrInfo &TII = *STI.getInstrInfo();
// Insert the new function into the module.
MF.insert(MF.begin(), &MBB);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index e18318386def..8d6459a627fa 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -1455,17 +1455,15 @@ void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
int asap = 0;
int zeroLatencyDepth = 0;
SUnit *SU = &SUnits[I];
- for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
- EP = SU->Preds.end();
- IP != EP; ++IP) {
- SUnit *pred = IP->getSUnit();
- if (IP->getLatency() == 0)
+ for (const SDep &P : SU->Preds) {
+ SUnit *pred = P.getSUnit();
+ if (P.getLatency() == 0)
zeroLatencyDepth =
std::max(zeroLatencyDepth, getZeroLatencyDepth(pred) + 1);
- if (ignoreDependence(*IP, true))
+ if (ignoreDependence(P, true))
continue;
- asap = std::max(asap, (int)(getASAP(pred) + IP->getLatency() -
- getDistance(pred, SU, *IP) * MII));
+ asap = std::max(asap, (int)(getASAP(pred) + P.getLatency() -
+ getDistance(pred, SU, P) * MII));
}
maxASAP = std::max(maxASAP, asap);
ScheduleInfo[I].ASAP = asap;
@@ -1521,9 +1519,8 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
SmallSetVector<SUnit *, 8> &Preds,
const NodeSet *S = nullptr) {
Preds.clear();
- for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
- I != E; ++I) {
- for (const SDep &Pred : (*I)->Preds) {
+ for (const SUnit *SU : NodeOrder) {
+ for (const SDep &Pred : SU->Preds) {
if (S && S->count(Pred.getSUnit()) == 0)
continue;
if (ignoreDependence(Pred, true))
@@ -1532,7 +1529,7 @@ static bool pred_L(SetVector<SUnit *> &NodeOrder,
Preds.insert(Pred.getSUnit());
}
// Back-edges are predecessors with an anti-dependence.
- for (const SDep &Succ : (*I)->Succs) {
+ for (const SDep &Succ : SU->Succs) {
if (Succ.getKind() != SDep::Anti)
continue;
if (S && S->count(Succ.getSUnit()) == 0)
@@ -2546,8 +2543,7 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
unsigned Pos = 0;
for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
++I, ++Pos) {
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 30745c7a5583..54c478645dcf 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -596,8 +596,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
@@ -789,8 +788,7 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
// If this instruction is inside a loop and sinking this instruction can make
// more registers live range shorten, it is still prifitable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
// Ignore non-register operands.
if (!MO.isReg())
continue;
@@ -889,8 +887,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = nullptr;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue; // Ignore non-register operands.
Register Reg = MO.getReg();
@@ -1322,8 +1319,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isUse())
continue;
Register Reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index d6bb3e7c9e58..32078db76cf3 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1276,11 +1276,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (DstTy.getNumElements() != MI->getNumOperands() - 1)
report("G_BUILD_VECTOR must have an operand for each elemement", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
- }
break;
}
@@ -1292,12 +1290,10 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (!DstTy.isVector() || SrcEltTy.isVector())
report("G_BUILD_VECTOR_TRUNC must produce a vector from scalar operands",
MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_BUILD_VECTOR_TRUNC source operand types are not homogeneous",
MI);
- }
if (SrcEltTy.getSizeInBits() <= DstTy.getElementType().getSizeInBits())
report("G_BUILD_VECTOR_TRUNC source operand types are not larger than "
"dest elt type",
@@ -1316,11 +1312,9 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
if (MI->getNumOperands() < 3)
report("G_CONCAT_VECTOR requires at least 2 source operands", MI);
- for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
- if (MRI->getType(MI->getOperand(1).getReg()) !=
- MRI->getType(MI->getOperand(i).getReg()))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MRI->getType(MI->getOperand(1).getReg()) != MRI->getType(MO.getReg()))
report("G_CONCAT_VECTOR source operand types are not homogeneous", MI);
- }
if (DstTy.getNumElements() !=
SrcTy.getNumElements() * (MI->getNumOperands() - 1))
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
@@ -3063,9 +3057,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
SlotIndex PEnd = LiveInts->getMBBEndIdx(Pred);
// Predecessor of landing pad live-out on last call.
if (MFI->isEHPad()) {
- for (auto I = Pred->rbegin(), E = Pred->rend(); I != E; ++I) {
- if (I->isCall()) {
- PEnd = Indexes->getInstructionIndex(*I).getBoundaryIndex();
+ for (const MachineInstr &MI : llvm::reverse(*Pred)) {
+ if (MI.isCall()) {
+ PEnd = Indexes->getInstructionIndex(MI).getBoundaryIndex();
break;
}
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index 8b3cdfab4d42..aaa6403cc978 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -73,8 +73,7 @@ void ModuloScheduleExpander::expand() {
// stage difference for each use. Keep the maximum value.
for (MachineInstr *MI : Schedule.getInstructions()) {
int DefStage = Schedule.getStage(MI);
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &Op = MI->getOperand(i);
+ for (const MachineOperand &Op : MI->operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
@@ -1006,8 +1005,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
unsigned CurStageNum,
unsigned InstrStageNum,
ValueMapTy *VRMap) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
+ for (MachineOperand &MO : NewMI->operands()) {
if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
Register reg = MO.getReg();
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 9a4f70a6070f..29a88480fd9f 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -527,9 +527,9 @@ static void updateLiveness(MachineFunction &MF) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
for (MachineBasicBlock *MBB : Visited) {
- MCPhysReg Reg = CSI[i].getReg();
+ MCPhysReg Reg = I.getReg();
// Add the callee-saved register as live-in.
// It's killed at the spill.
if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
@@ -540,17 +540,16 @@ static void updateLiveness(MachineFunction &MF) {
// each MBB between the prologue and epilogue so that it is not clobbered
// before it is reloaded in the epilogue. The Visited set contains all
// blocks outside of the region delimited by prologue/epilogue.
- if (CSI[i].isSpilledToReg()) {
+ if (I.isSpilledToReg()) {
for (MachineBasicBlock &MBB : MF) {
if (Visited.count(&MBB))
continue;
- MCPhysReg DstReg = CSI[i].getDstReg();
+ MCPhysReg DstReg = I.getDstReg();
if (!MBB.isLiveIn(DstReg))
MBB.addLiveIn(DstReg);
}
}
}
-
}
/// Insert restore code for the callee-saved registers used in the function.
@@ -902,9 +901,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() &&
- RegInfo->useFPForScavengingIndex(MF) &&
- !RegInfo->hasStackRealignment(MF));
+ bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF);
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 68920e2e50df..6653145d3d2a 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1258,8 +1258,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free registers occupied by defs.
// Iterate operands in reverse order, so we see the implicit super register
// defs first (we added them earlier in case of <def,read-undef>).
- for (unsigned I = MI.getNumOperands(); I-- > 0;) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1362,8 +1361,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (unsigned I = MI.getNumOperands(); I-- > 0; ) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : llvm::reverse(MI.operands())) {
if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
continue;
// subreg defs don't free the full register. We left the subreg number
@@ -1440,8 +1438,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
++BundledMI;
while (BundledMI->isBundledWithPred()) {
- for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
- MachineOperand &MO = BundledMI->getOperand(I);
+ for (MachineOperand &MO : BundledMI->operands()) {
if (!MO.isReg())
continue;
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 5a93b58e0baf..50411c177007 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -199,7 +199,8 @@ class RAGreedy : public MachineFunctionPass,
struct RegInfo {
LiveRangeStage Stage = RS_New;
- // Cascade - Eviction loop prevention. See canEvictInterference().
+ // Cascade - Eviction loop prevention. See
+ // canEvictInterferenceBasedOnCost().
unsigned Cascade = 0;
RegInfo() = default;
@@ -207,13 +208,51 @@ class RAGreedy : public MachineFunctionPass,
IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
+ LiveRangeStage getStage(Register Reg) const {
+ return ExtraRegInfo[Reg].Stage;
+ }
+
LiveRangeStage getStage(const LiveInterval &VirtReg) const {
- return ExtraRegInfo[VirtReg.reg()].Stage;
+ return getStage(VirtReg.reg());
+ }
+
+ void setStage(Register Reg, LiveRangeStage Stage) {
+ ExtraRegInfo.resize(MRI->getNumVirtRegs());
+ ExtraRegInfo[Reg].Stage = Stage;
}
void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+ setStage(VirtReg.reg(), Stage);
+ }
+
+ /// Return the current stage of the register, if present, otherwise initialize
+ /// it and return that.
+ LiveRangeStage getOrInitStage(Register Reg) {
+ ExtraRegInfo.grow(Reg);
+ return getStage(Reg);
+ }
+
+ unsigned getCascade(Register Reg) const { return ExtraRegInfo[Reg].Cascade; }
+
+ void setCascade(Register Reg, unsigned Cascade) {
ExtraRegInfo.resize(MRI->getNumVirtRegs());
- ExtraRegInfo[VirtReg.reg()].Stage = Stage;
+ ExtraRegInfo[Reg].Cascade = Cascade;
+ }
+
+ unsigned getOrAssignNewCascade(Register Reg) {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade) {
+ Cascade = NextCascade++;
+ setCascade(Reg, Cascade);
+ }
+ return Cascade;
+ }
+
+ unsigned getCascadeOrCurrentNext(Register Reg) const {
+ unsigned Cascade = getCascade(Reg);
+ if (!Cascade)
+ Cascade = NextCascade;
+ return Cascade;
}
template<typename Iterator>
@@ -410,8 +449,11 @@ private:
void calcGapWeights(MCRegister, SmallVectorImpl<float> &);
Register canReassign(LiveInterval &VirtReg, Register PrevReg) const;
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
- bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
- const SmallVirtRegSet &) const;
+ bool canEvictInterferenceBasedOnCost(LiveInterval &, MCRegister, bool,
+ EvictionCost &,
+ const SmallVirtRegSet &) const;
+ bool canEvictHintInterference(LiveInterval &, MCRegister,
+ const SmallVirtRegSet &) const;
bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
MCRegister PhysReg, SlotIndex Start,
SlotIndex End, EvictionCost &MaxCost) const;
@@ -683,15 +725,16 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
assert(Reg.isVirtual() && "Can only enqueue virtual registers");
unsigned Prio;
- ExtraRegInfo.grow(Reg);
- if (ExtraRegInfo[Reg].Stage == RS_New)
- ExtraRegInfo[Reg].Stage = RS_Assign;
-
- if (ExtraRegInfo[Reg].Stage == RS_Split) {
+ auto Stage = getOrInitStage(Reg);
+ if (Stage == RS_New) {
+ Stage = RS_Assign;
+ setStage(Reg, Stage);
+ }
+ if (Stage == RS_Split) {
// Unsplit ranges that couldn't be allocated immediately are deferred until
// everything else has been allocated.
Prio = Size;
- } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+ } else if (Stage == RS_Memory) {
// Memory operand should be considered last.
// Change the priority such that Memory operand are assigned in
// the reverse order that they came in.
@@ -706,7 +749,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
bool ForceGlobal = !ReverseLocal &&
(Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
- if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
+ if (Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
// Allocate original local ranges in linear instruction order. Since they
// are singly defined, this produces optimal coloring in the absence of
@@ -780,10 +823,8 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
if (Order.isHint(Hint)) {
MCRegister PhysHint = Hint.asMCReg();
LLVM_DEBUG(dbgs() << "missed hint " << printReg(PhysHint, TRI) << '\n');
- EvictionCost MaxCost;
- MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, PhysHint, true, MaxCost,
- FixedRegisters)) {
+
+ if (canEvictHintInterference(VirtReg, PhysHint, FixedRegisters)) {
evictInterference(VirtReg, PhysHint, NewVRegs);
return PhysHint;
}
@@ -864,8 +905,19 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
return false;
}
-/// canEvictInterference - Return true if all interferences between VirtReg and
-/// PhysReg can be evicted.
+/// canEvictHintInterference - return true if the interference for VirtReg
+/// on the PhysReg, which is VirtReg's hint, can be evicted in favor of VirtReg.
+bool RAGreedy::canEvictHintInterference(
+ LiveInterval &VirtReg, MCRegister PhysReg,
+ const SmallVirtRegSet &FixedRegisters) const {
+ EvictionCost MaxCost;
+ MaxCost.setBrokenHints(1);
+ return canEvictInterferenceBasedOnCost(VirtReg, PhysReg, true, MaxCost,
+ FixedRegisters);
+}
+
+/// canEvictInterferenceBasedOnCost - Return true if all interferences between
+/// VirtReg and PhysReg can be evicted.
///
/// @param VirtReg Live range that is about to be assigned.
/// @param PhysReg Desired register for assignment.
@@ -873,7 +925,7 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// @param MaxCost Only look for cheaper candidates and update with new cost
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterference(
+bool RAGreedy::canEvictInterferenceBasedOnCost(
LiveInterval &VirtReg, MCRegister PhysReg, bool IsHint,
EvictionCost &MaxCost, const SmallVirtRegSet &FixedRegisters) const {
// It is only possible to evict virtual register interference.
@@ -1054,9 +1106,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// Make sure that VirtReg has a cascade number, and assign that cascade
// number to every evicted register. These live ranges than then only be
// evicted by a newer cascade, preventing infinite loops.
- unsigned Cascade = ExtraRegInfo[VirtReg.reg()].Cascade;
- if (!Cascade)
- Cascade = ExtraRegInfo[VirtReg.reg()].Cascade = NextCascade++;
+ unsigned Cascade = getOrAssignNewCascade(VirtReg.reg());
LLVM_DEBUG(dbgs() << "evicting " << printReg(PhysReg, TRI)
<< " interference: Cascade " << Cascade << '\n');
@@ -1082,10 +1132,10 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
LastEvicted.addEviction(PhysReg, VirtReg.reg(), Intf->reg());
Matrix->unassign(*Intf);
- assert((ExtraRegInfo[Intf->reg()].Cascade < Cascade ||
+ assert((getCascade(Intf->reg()) < Cascade ||
VirtReg.isSpillable() < Intf->isSpillable()) &&
"Cannot decrease cascade number, illegal eviction");
- ExtraRegInfo[Intf->reg()].Cascade = Cascade;
+ setCascade(Intf->reg(), Cascade);
++NumEvicted;
NewVRegs.push_back(Intf->reg());
}
@@ -1150,8 +1200,8 @@ MCRegister RAGreedy::tryFindEvictionCandidate(
continue;
}
- if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
- FixedRegisters))
+ if (!canEvictInterferenceBasedOnCost(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
continue;
// Best so far.
@@ -1756,7 +1806,6 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
SE->finish(&IntvMap);
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
unsigned OrigBlocks = SA->getNumLiveBlocks();
// Sort out the new intervals created by splitting. We get four kinds:
@@ -1765,10 +1814,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
// - Block-local splits are candidates for local splitting.
// - DCE leftovers should go back on the queue.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
+ const LiveInterval &Reg = LIS->getInterval(LREdit.get(I));
// Ignore old intervals from DCE.
- if (getStage(Reg) != RS_New)
+ if (getOrInitStage(Reg.reg()) != RS_New)
continue;
// Remainder interval. Don't try splitting again, spill if it doesn't
@@ -2012,13 +2061,11 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// Tell LiveDebugVariables about the new ranges.
DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Sort out the new intervals created by splitting. The remainder interval
// goes straight to spilling, the new local ranges get to stay RS_New.
for (unsigned I = 0, E = LREdit.size(); I != E; ++I) {
- LiveInterval &LI = LIS->getInterval(LREdit.get(I));
- if (getStage(LI) == RS_New && IntvMap[I] == 0)
+ const LiveInterval &LI = LIS->getInterval(LREdit.get(I));
+ if (getOrInitStage(LI.reg()) == RS_New && IntvMap[I] == 0)
setStage(LI, RS_Spill);
}
@@ -2104,8 +2151,6 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
-
// Assign all new registers to RS_Spill. This was the last chance.
setStage(LREdit.begin(), LREdit.end(), RS_Spill);
return 0;
@@ -2400,7 +2445,6 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
SmallVector<unsigned, 8> IntvMap;
SE->finish(&IntvMap);
DebugVars->splitRegister(VirtReg.reg(), LREdit.regs(), *LIS);
-
// If the new range has the same number of instructions as before, mark it as
// RS_Split2 so the next split will be forced to make progress. Otherwise,
// leave the new intervals as RS_New so they can compete.
@@ -3021,7 +3065,7 @@ MCRegister RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
LiveRangeStage Stage = getStage(VirtReg);
LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade "
- << ExtraRegInfo[VirtReg.reg()].Cascade << '\n');
+ << getCascade(VirtReg.reg()) << '\n');
// Try to evict a less worthy live range, but only for ranges from the primary
// queue. The RS_Split ranges already failed to do this, and they should not
@@ -3311,7 +3355,6 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI, *VRAI));
ExtraRegInfo.clear();
- ExtraRegInfo.resize(MRI->getNumVirtRegs());
NextCascade = 1;
IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
GlobalCand.resize(32); // This will grow as needed.
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index c847068bca90..4c8534cf2d01 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -3908,20 +3908,20 @@ void RegisterCoalescer::lateLiveIntervalUpdate() {
bool RegisterCoalescer::
copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
bool Progress = false;
- for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
- if (!CurrList[i])
+ for (MachineInstr *&MI : CurrList) {
+ if (!MI)
continue;
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
- if (ErasedInstrs.count(CurrList[i])) {
- CurrList[i] = nullptr;
+ if (ErasedInstrs.count(MI)) {
+ MI = nullptr;
continue;
}
bool Again = false;
- bool Success = joinCopy(CurrList[i], Again);
+ bool Success = joinCopy(MI, Again);
Progress |= Success;
if (Success || !Again)
- CurrList[i] = nullptr;
+ MI = nullptr;
}
return Progress;
}
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 3f013eb6024e..0e8e8338b46d 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -406,11 +406,10 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// register in later operands. The lanes of other defs will now be live
// after this instruction, so these should not be treated as killed by the
// instruction even though they appear to be killed in this one operand.
- for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &OtherMO = MI->getOperand(I);
+ for (const MachineOperand &OtherMO :
+ llvm::drop_begin(MI->operands(), OperIdx + 1))
if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
KillLaneMask &= ~getLaneMaskForMO(OtherMO);
- }
}
// Clear undef flag, we'll re-add it later once we know which subregister
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ce400ea43f29..df5a041b87cd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4436,7 +4436,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
SDValue OptimizedDiv =
isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
- if (OptimizedDiv.getNode()) {
+ if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
// If the equivalent Div node also exists, update its users.
unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
@@ -4464,6 +4464,9 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhs x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4521,6 +4524,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDLoc DL(N);
if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
// fold (mulhu x, 0) -> 0
// do not return N0/N1, because undef node may exist.
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
@@ -4779,6 +4785,106 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
return SDValue();
}
+// Function to calculate whether the Min/Max pair of SDNodes (potentially
+// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1)
+// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp
+// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The
+// operands are the same as SimplifySelectCC. N0<N1 ? N2 : N3
+static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC, unsigned &BW) {
+ auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
+ ISD::CondCode CC) {
+ // The compare and select operand should be the same or the select operands
+ // should be truncated versions of the comparison.
+ if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
+ return 0;
+ // The constants need to be the same or a truncated version of each other.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
+ ConstantSDNode *N3C = isConstOrConstSplat(N3);
+ if (!N1C || !N3C)
+ return 0;
+ const APInt &C1 = N1C->getAPIntValue();
+ const APInt &C2 = N3C->getAPIntValue();
+ if (C1.getBitWidth() < C2.getBitWidth() ||
+ C1 != C2.sextOrSelf(C1.getBitWidth()))
+ return 0;
+ return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
+ };
+
+ // Check the initial value is a SMIN/SMAX equivalent.
+ unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
+ if (!Opcode0)
+ return SDValue();
+
+ SDValue N00, N01, N02, N03;
+ ISD::CondCode N0CC;
+ switch (N0.getOpcode()) {
+ case ISD::SMIN:
+ case ISD::SMAX:
+ N00 = N02 = N0.getOperand(0);
+ N01 = N03 = N0.getOperand(1);
+ N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
+ break;
+ case ISD::SELECT_CC:
+ N00 = N0.getOperand(0);
+ N01 = N0.getOperand(1);
+ N02 = N0.getOperand(2);
+ N03 = N0.getOperand(3);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
+ break;
+ case ISD::SELECT:
+ case ISD::VSELECT:
+ if (N0.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+ N00 = N0.getOperand(0).getOperand(0);
+ N01 = N0.getOperand(0).getOperand(1);
+ N02 = N0.getOperand(1);
+ N03 = N0.getOperand(2);
+ N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
+ break;
+ default:
+ return SDValue();
+ }
+
+ unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
+ if (!Opcode1 || Opcode0 == Opcode1)
+ return SDValue();
+
+ ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
+ ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
+ if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
+ return SDValue();
+
+ const APInt &MinC = MinCOp->getAPIntValue();
+ const APInt &MaxC = MaxCOp->getAPIntValue();
+ APInt MinCPlus1 = MinC + 1;
+ if (-MaxC != MinCPlus1 || !MinCPlus1.isPowerOf2())
+ return SDValue();
+ BW = MinCPlus1.exactLogBase2() + 1;
+ return N02;
+}
+
+static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+ SDValue N3, ISD::CondCode CC,
+ SelectionDAG &DAG) {
+ unsigned BW;
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW);
+ if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
+ return SDValue();
+ EVT FPVT = Fp.getOperand(0).getValueType();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+ if (FPVT.isVector())
+ NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+ FPVT.getVectorElementCount());
+ if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(
+ ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT))
+ return SDValue();
+ SDLoc DL(Fp);
+ SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0),
+ DAG.getValueType(NewVT.getScalarType()));
+ return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+}
+
SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -4817,6 +4923,11 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
+ if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
+ if (SDValue S = PerformMinMaxFpToSatCombine(
+ N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
+ return S;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -9940,9 +10051,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MST->isUnindexed() && !MST->isCompressingStore() &&
- !MST->isTruncatingStore())
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
+ !MST->isCompressingStore() && !MST->isTruncatingStore())
return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
MST->getBasePtr(), MST->getMemOperand());
@@ -9997,9 +10107,8 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, expanding, or extending loads?
- if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) &&
- MLD->isUnindexed() && !MLD->isExpandingLoad() &&
- MLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
+ !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
SDValue NewLd = DAG.getLoad(N->getValueType(0), SDLoc(N), MLD->getChain(),
MLD->getBasePtr(), MLD->getMemOperand());
return CombineTo(N, NewLd, NewLd.getValue(1));
@@ -10138,6 +10247,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return FMinMax;
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+ return S;
+
// If this select has a condition (setcc) with narrower operands than the
// select, try to widen the compare to match the select width.
// TODO: This should be extended to handle any constant.
@@ -15007,7 +15119,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
+ TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
LN0->getChain(),
@@ -23034,6 +23146,9 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
}
+ if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+ return S;
+
return SDValue();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c1bb65409282..331e0325aea3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -765,7 +765,7 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
assert(!SD->isVariadic());
SDDbgOperand DbgOperand = SD->getLocationOps()[0];
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = (DIExpression*)SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF);
@@ -775,6 +775,13 @@ InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD,
DbgOperand.getKind() == SDDbgOperand::CONST)
return EmitDbgValueFromSingleOp(SD, VRBaseMap);
+ // Immediately fold any indirectness from the LLVM-IR intrinsic into the
+ // expression:
+ if (SD->isIndirect()) {
+ std::vector<uint64_t> Elts = {dwarf::DW_OP_deref};
+ Expr = DIExpression::append(Expr, Elts);
+ }
+
// It may not be immediately possible to identify the MachineInstr that
// defines a VReg, it can depend for example on the order blocks are
// emitted in. When this happens, or when further analysis is needed later,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index eb9d2286aeb4..08598eeded7a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3553,9 +3553,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Node.
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- if (Tmp2.getOpcode() == ISD::SETCC) {
- Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
- Tmp1, Tmp2.getOperand(2),
+ if (Tmp2.getOpcode() == ISD::SETCC &&
+ TLI.isOperationLegalOrCustom(ISD::BR_CC,
+ Tmp2.getOperand(0).getValueType())) {
+ Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, Tmp2.getOperand(2),
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 1f73c9eea104..98312f91d8c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -28,7 +28,7 @@ using namespace llvm;
static cl::opt<bool>
EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
-/// Do extensive, expensive, sanity checking.
+/// Do extensive, expensive, basic correctness checking.
void DAGTypeLegalizer::PerformExpensiveChecks() {
// If a node is not processed, then none of its values should be mapped by any
// of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
@@ -534,7 +534,8 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
// in theory momentarily not be the case while ReplaceValueWith is doing
- // its stuff. Mark the original node NewNode to help sanity checking.
+ // its stuff. Mark the original node NewNode to help basic correctness
+ // checking.
N->setNodeId(NewNode);
if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
// It morphed into a previously analyzed node - nothing more to do.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 539c9cb9c256..7ec2638b1e71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1820,10 +1820,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
else
std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoSize, Alignment,
- MLD->getAAInfo(), MLD->getRanges());
+ MLD->getPointerInfo(), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, Alignment, MLD->getAAInfo(),
+ MLD->getRanges());
Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
MMO, MLD->getAddressingMode(), ExtType,
@@ -1837,7 +1837,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Generate hi masked load.
Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
MLD->isExpandingLoad());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MachinePointerInfo MPI;
if (LoMemVT.isScalableVector())
@@ -1847,8 +1846,8 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
LoMemVT.getStoreSize().getFixedSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOLoad, HiSize, Alignment, MLD->getAAInfo(),
- MLD->getRanges());
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment,
+ MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
@@ -2662,10 +2661,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
SDValue Lo, Hi, Res;
- unsigned LoSize = MemoryLocation::getSizeOrUnknown(LoMemVT.getStoreSize());
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
- N->getPointerInfo(), MachineMemOperand::MOStore, LoSize, Alignment,
- N->getAAInfo(), N->getRanges());
+ N->getPointerInfo(), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
@@ -2689,10 +2687,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MPI = N->getPointerInfo().getWithOffset(
LoMemVT.getStoreSize().getFixedSize());
- unsigned HiSize = MemoryLocation::getSizeOrUnknown(HiMemVT.getStoreSize());
MMO = DAG.getMachineFunction().getMachineMemOperand(
- MPI, MachineMemOperand::MOStore, HiSize, Alignment, N->getAAInfo(),
- N->getRanges());
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment,
+ N->getAAInfo(), N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
N->getAddressingMode(), N->isTruncatingStore(),
diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 55fe26eb64cd..2695ed36991c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -268,8 +268,8 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
// Now see if there are no other dependencies
// to instructions already in the packet.
- for (unsigned i = 0, e = Packet.size(); i != e; ++i)
- for (const SDep &Succ : Packet[i]->Succs) {
+ for (const SUnit *S : Packet)
+ for (const SDep &Succ : S->Succs) {
// Since we do not add pseudos to packets, might as well
// ignore order deps.
if (Succ.isCtrl())
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 95f7e43b151d..84e6d2a16422 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -706,8 +706,8 @@ void ScheduleDAGSDNodes::dump() const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void ScheduleDAGSDNodes::dumpSchedule() const {
- for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
- if (SUnit *SU = Sequence[i])
+ for (const SUnit *SU : Sequence) {
+ if (SU)
dumpNode(*SU);
else
dbgs() << "**** NOOP ****\n";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 008665d50233..c282e03387dd 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -406,8 +406,8 @@ bool ISD::isVPOpcode(unsigned Opcode) {
switch (Opcode) {
default:
return false;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) \
+ case ISD::VPSD: \
return true;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -416,23 +416,25 @@ bool ISD::isVPOpcode(unsigned Opcode) {
bool ISD::isVPBinaryOp(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_BINARYOP return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
bool ISD::isVPReduction(unsigned Opcode) {
switch (Opcode) {
default:
- return false;
-#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
- case ISD::SDOPC: \
- return true;
+ break;
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) case ISD::VPSD:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_SDNODE(VPSD) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return false;
}
/// The operand position of the vector mask.
@@ -440,8 +442,8 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \
+ case ISD::VPSD: \
return MASKPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -452,8 +454,8 @@ Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
switch (Opcode) {
default:
return None;
-#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
- case ISD::SDOPC: \
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \
+ case ISD::VPSD: \
return EVLPOS;
#include "llvm/IR/VPIntrinsics.def"
}
@@ -974,7 +976,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
}
#ifndef NDEBUG
-/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+/// VerifySDNode - Check the given SDNode. Aborts if it is invalid.
static void VerifySDNode(SDNode *N) {
switch (N->getOpcode()) {
default:
@@ -4540,10 +4542,25 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
}
// FIXME: unify with llvm::haveNoCommonBitsSet.
-// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M)
bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
assert(A.getValueType() == B.getValueType() &&
"Values must have the same type");
+ // Match masked merge pattern (X & ~M) op (Y & M)
+ if (A->getOpcode() == ISD::AND && B->getOpcode() == ISD::AND) {
+ auto MatchNoCommonBitsPattern = [&](SDValue NotM, SDValue And) {
+ if (isBitwiseNot(NotM, true)) {
+ SDValue NotOperand = NotM->getOperand(0);
+ return NotOperand == And->getOperand(0) ||
+ NotOperand == And->getOperand(1);
+ }
+ return false;
+ };
+ if (MatchNoCommonBitsPattern(A->getOperand(0), B) ||
+ MatchNoCommonBitsPattern(A->getOperand(1), B) ||
+ MatchNoCommonBitsPattern(B->getOperand(0), A) ||
+ MatchNoCommonBitsPattern(B->getOperand(1), A))
+ return true;
+ }
return KnownBits::haveNoCommonBitsSet(computeKnownBits(A),
computeKnownBits(B));
}
@@ -5070,7 +5087,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
break;
case ISD::BITCAST:
- // Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 5d911c165293..7726a0007e44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4336,9 +4336,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
+ MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4496,22 +4494,14 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
- MemoryLocation ML;
- if (VT.isScalableVector())
- ML = MemoryLocation::getAfter(PtrOperand);
- else
- ML = MemoryLocation(PtrOperand, LocationSize::precise(
- DAG.getDataLayout().getTypeStoreSize(I.getType())),
- AAInfo);
+ MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
- // TODO: Make MachineMemOperands aware of scalable
- // vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
SDValue Load =
DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
@@ -5807,8 +5797,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vscale: {
match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I,
- DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)));
+ setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
}
case Intrinsic::vastart: visitVAStart(I); return;
@@ -6942,10 +6931,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
- SDValue N =
- DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
+ SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
if (Intrinsic == Intrinsic::eh_exceptioncode)
- N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+ N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
setValue(&I, N);
return;
}
@@ -6957,7 +6945,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -6974,7 +6961,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
- DL, NodeTys, Ops);
+ sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -6988,7 +6975,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Triple.getArch() != Triple::x86_64)
return;
- SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
// We want to say that we always want the arguments in registers.
@@ -7009,7 +6995,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// see that some registers may be assumed clobbered and have to preserve
// them across calls to the intrinsic.
MachineSDNode *MN = DAG.getMachineNode(
- TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops);
+ TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
@@ -7047,7 +7033,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (!Base)
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
- Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0));
+ Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));
struct BranchFunnelTarget {
int64_t Offset;
@@ -7068,8 +7054,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
report_fatal_error(
"llvm.icall.branch.funnel operand must be a GlobalValue");
Targets.push_back({Offset, DAG.getTargetGlobalAddress(
- GA->getGlobal(), getCurSDLoc(),
- Val.getValueType(), GA->getOffset())});
+ GA->getGlobal(), sdl, Val.getValueType(),
+ GA->getOffset())});
}
llvm::sort(Targets,
[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
@@ -7077,13 +7063,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
});
for (auto &T : Targets) {
- Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32));
+ Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
Ops.push_back(T.Target);
}
Ops.push_back(DAG.getRoot()); // Chain
- SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
- getCurSDLoc(), MVT::Other, Ops),
+ SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
+ MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
@@ -7102,7 +7088,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
SDValue Val = TSI.EmitTargetCodeForSetTag(
- DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
ZeroMemory);
DAG.setRoot(Val);
@@ -7114,46 +7100,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
SDValue Const = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT)));
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
+ DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
return;
}
case Intrinsic::get_active_lane_mask: {
- auto DL = getCurSDLoc();
+ EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDValue Index = getValue(I.getOperand(0));
- SDValue TripCount = getValue(I.getOperand(1));
- Type *ElementTy = I.getOperand(0)->getType();
- EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- unsigned VecWidth = VT.getVectorNumElements();
+ EVT ElementVT = Index.getValueType();
- SmallVector<SDValue, 16> OpsTripCount;
- SmallVector<SDValue, 16> OpsIndex;
- SmallVector<SDValue, 16> OpsStepConstants;
- for (unsigned i = 0; i < VecWidth; i++) {
- OpsTripCount.push_back(TripCount);
- OpsIndex.push_back(Index);
- OpsStepConstants.push_back(
- DAG.getConstant(i, DL, EVT::getEVT(ElementTy)));
+ if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
}
- EVT CCVT = EVT::getVectorVT(I.getContext(), MVT::i1, VecWidth);
+ SDValue TripCount = getValue(I.getOperand(1));
+ auto VecTy = CCVT.changeVectorElementType(ElementVT);
- auto VecTy = EVT::getEVT(FixedVectorType::get(ElementTy, VecWidth));
- SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex);
- SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants);
+ SDValue VectorIndex, VectorTripCount;
+ if (VecTy.isScalableVector()) {
+ VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
+ } else {
+ VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
+ VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
+ }
+ SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
SDValue VectorInduction = DAG.getNode(
- ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
- SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount);
- SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0),
+ ISD::UADDO, sdl, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep);
+ SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction.getValue(0),
VectorTripCount, ISD::CondCode::SETULT);
- setValue(&I, DAG.getNode(ISD::AND, DL, CCVT,
- DAG.getNOT(DL, VectorInduction.getValue(1), CCVT),
+ setValue(&I, DAG.getNode(ISD::AND, sdl, CCVT,
+ DAG.getNOT(sdl, VectorInduction.getValue(1), CCVT),
SetCC));
return;
}
case Intrinsic::experimental_vector_insert: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
SDValue Index = getValue(I.getOperand(2));
@@ -7163,16 +7145,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ResultVT, Vec, SubVec,
+ setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
Index));
return;
}
case Intrinsic::experimental_vector_extract: {
- auto DL = getCurSDLoc();
-
SDValue Vec = getValue(I.getOperand(0));
SDValue Index = getValue(I.getOperand(1));
EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -7182,9 +7162,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
if (Index.getValueType() != VectorIdxTy)
Index = DAG.getVectorIdxConstant(
- cast<ConstantSDNode>(Index)->getZExtValue(), DL);
+ cast<ConstantSDNode>(Index)->getZExtValue(), sdl);
- setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
+ setValue(&I,
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
@@ -7314,9 +7295,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
Optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
-#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN:
-#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResOPC = ISD::VPSDID;
-#define END_REGISTER_VP_INTRINSIC(...) break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define BEGIN_REGISTER_VP_SDNODE(VPSD, ...) ResOPC = ISD::VPSD;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e4a69adff05b..737695b5eabe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
if (DemandedBits == 0 || DemandedElts == 0)
return DAG.getUNDEF(Op.getValueType());
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned BitWidth = DemandedBits.getBitWidth();
KnownBits LHSKnown, RHSKnown;
@@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedElts, DAG, Depth + 1))
return DAG.getBitcast(DstVT, V);
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
}
// TODO - bigendian once we have test coverage.
- if ((NumSrcEltBits % NumDstEltBits) == 0 &&
- DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Src = Op.getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Op.getValueType();
- if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
- DAG.getDataLayout().isLittleEndian() &&
+ if (IsLE && DemandedElts == 1 &&
+ DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
return DAG.getBitcast(DstVT, Src);
}
@@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getValueType().isScalableVector())
return false;
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
unsigned NumElts = OriginalDemandedElts.getBitWidth();
assert((!Op.getValueType().isVector() ||
NumElts == Op.getValueType().getVectorNumElements()) &&
@@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits(
case ISD::ROTR: {
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
+ bool IsROTL = (Op.getOpcode() == ISD::ROTL);
// If we're rotating an 0/-1 value, then it stays an 0/-1 value.
if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
return TLO.CombineTo(Op, Op0);
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+ unsigned RevAmt = BitWidth - Amt;
+
+ // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
+ // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
+ APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+
+ // rot*(x, 0) --> x
+ if (Amt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ // See if we don't demand either half of the rotated bits.
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
+ DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
+ }
+ if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
+ DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
+ }
+ }
+
// For pow-2 bitwidths we only demand the bottom modulo amt bits.
if (isPowerOf2_32(BitWidth)) {
APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
@@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.getActiveBits() <= InBits) {
// If we only need the non-extended bits of the bottom element
// then we can just bitcast to the result.
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
unsigned Opc =
@@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only need the bottom element then we can just bitcast.
// TODO: Handle ANY_EXTEND?
- if (IsVecInReg && DemandedElts == 1 &&
- VT.getSizeInBits() == SrcVT.getSizeInBits() &&
- TLO.DAG.getDataLayout().isLittleEndian())
+ if (IsLE && IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits())
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
APInt InDemandedBits = DemandedBits.trunc(InBits);
@@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
- // TODO - bigendian once we have test coverage.
- if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
+ unsigned BitOffset = EltOffset * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
@@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
- } else if ((NumSrcEltBits % BitWidth) == 0 &&
- TLO.DAG.getDataLayout().isLittleEndian()) {
+ } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
+ // TODO - bigendian once we have test coverage.
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
@@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDLoc DL(Op);
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
// Helper for demanding the specified elements and all the bits of both binary
// operands.
@@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Try calling SimplifyDemandedBits, converting demanded elts to the bits
// of the large element.
// TODO - bigendian once we have test coverage.
- if (TLO.DAG.getDataLayout().isLittleEndian()) {
+ if (IsLE) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
@@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
- if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
- DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ DemandedSrcElts == 1) {
// aext - if we just need the bottom element then we can bitcast.
return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
}
@@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// zext - if we just need the bottom element then we can mask:
// zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
- if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
- Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
+ Op->isOnlyUserOf(Src.getNode()) &&
Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
SDLoc DL(Op);
EVT SrcVT = Src.getValueType();
@@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO: There are more binop opcodes that could be handled here - MIN,
// MAX, saturated math, etc.
+ case ISD::ADD: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
+ Depth + 1, /*AssumeSingleUse*/ true))
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+ }
case ISD::OR:
case ISD::XOR:
- case ISD::ADD:
case ISD::SUB:
case ISD::FADD:
case ISD::FSUB:
@@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
@@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
+ assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index 9aea5a7a8853..f49ba5ccd447 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -159,8 +159,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
// FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int FI = MO.getIndex();
@@ -394,8 +393,7 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI,
SmallVectorImpl<int> &SlotMapping,
MachineFunction &MF) {
// Update the operands.
- for (unsigned i = 0, ee = MI.getNumOperands(); i != ee; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isFI())
continue;
int OldFI = MO.getIndex();
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 943bd18c6c8b..54fc6ee45d00 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,12 +70,6 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
-static cl::opt<unsigned> TailDupJmpTableLoopSize(
- "tail-dup-jmptable-loop-size",
- cl::desc("Maximum loop latches to consider tail duplication that are "
- "successors of loop header."),
- cl::init(128), cl::Hidden);
-
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -569,29 +563,6 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
- // When doing tail-duplication with jumptable loops like:
- // 1 -> 2 <-> 3 |
- // \ <-> 4 |
- // \ <-> 5 |
- // \ <-> ... |
- // \---> rest |
- // quadratic number of edges and much more loops are added to CFG. This
- // may cause compile time regression when jumptable is quiet large.
- // So set the limit on jumptable cases.
- auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
- const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
- TailBB.pred_end());
- // Check the basic block has large number of successors, all of them only
- // have one successor which is the basic block itself.
- return llvm::count_if(
- TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
- return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
- }) > TailDupJmpTableLoopSize;
- };
-
- if (isLargeJumpTableLoop(TailBB))
- return false;
-
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index b0594ec086b2..fbf190a52585 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -136,6 +136,16 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
return 0;
}
+bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+ if (!hasFP(MF))
+ return false;
+
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ return RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->hasStackRealignment(MF);
+}
+
bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
!F.hasFnAttribute(Attribute::NoRecurse))
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e74b3195a130..5119dac36713 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -957,8 +957,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
// If any of the registers accessed are non-constant, conservatively assume
// the instruction is not rematerializable.
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
Register Reg = MO.getReg();
if (Reg == 0)
@@ -1401,3 +1400,21 @@ std::string TargetInstrInfo::createMIROperandComment(
}
TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
+
+void TargetInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ // Include target features from an arbitrary candidate for the outlined
+ // function. This makes sure the outlined function knows what kinds of
+ // instructions are going into it. This is fine, since all parent functions
+ // must necessarily support the instructions that are in the outlined region.
+ outliner::Candidate &FirstCand = Candidates.front();
+ const Function &ParentFn = FirstCand.getMF()->getFunction();
+ if (ParentFn.hasFnAttribute("target-features"))
+ F.addFnAttr(ParentFn.getFnAttribute("target-features"));
+
+ // Set nounwind, so we don't generate eh_frame.
+ if (llvm::all_of(Candidates, [](const outliner::Candidate &C) {
+ return C.getMF()->getFunction().hasFnAttribute(Attribute::NoUnwind);
+ }))
+ F.addFnAttr(Attribute::NoUnwind);
+}
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 1d3bb286c882..d1c2cdeb133b 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1082,7 +1082,7 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
@@ -2135,7 +2135,7 @@ const MCExpr *TargetLoweringObjectFileWasm::lowerRelativeReference(
if (!LHS->hasGlobalUnnamedAddr() || !LHS->getValueType()->isFunctionTy())
return nullptr;
- // Basic sanity checks.
+ // Basic correctness checks.
if (LHS->getType()->getPointerAddressSpace() != 0 ||
RHS->getType()->getPointerAddressSpace() != 0 || LHS->isThreadLocal() ||
RHS->isThreadLocal())
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 46cec5407565..dfd962be2882 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -373,19 +373,25 @@ static bool isTwoAddrUse(MachineInstr &MI, Register Reg, Register &DstReg) {
return false;
}
-/// Given a register, if has a single in-basic block use, return the use
-/// instruction if it's a copy or a two-address use.
+/// Given a register, if all its uses are in the same basic block, return the
+/// last use instruction if it's a copy or a two-address use.
static MachineInstr *
findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
- bool &IsCopy, Register &DstReg, bool &IsDstPhys) {
- if (!MRI->hasOneNonDBGUse(Reg))
- // None or more than one use.
- return nullptr;
- MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
- MachineInstr &UseMI = *UseOp.getParent();
- if (UseMI.getParent() != MBB)
+ bool &IsCopy, Register &DstReg, bool &IsDstPhys,
+ LiveIntervals *LIS) {
+ MachineOperand *UseOp = nullptr;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *MI = MO.getParent();
+ if (MI->getParent() != MBB)
+ return nullptr;
+ if (isPlainlyKilled(MI, Reg, LIS))
+ UseOp = &MO;
+ }
+ if (!UseOp)
return nullptr;
+ MachineInstr &UseMI = *UseOp->getParent();
+
Register SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
@@ -399,7 +405,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ unsigned Src2 = UseMI.getOperandNo(UseOp);
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
@@ -492,8 +498,7 @@ void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
return;
}
- for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isRegMask()) {
removeMapRegEntry(MO, SrcRegMap, TRI);
continue;
@@ -685,7 +690,6 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
- // Sanity check.
assert(mi->getNumExplicitDefs() == 1);
assert(NewMI->getNumExplicitDefs() == 1);
@@ -724,7 +728,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
Register NewReg;
Register Reg = DstReg;
while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
- NewReg, IsDstPhys)) {
+ NewReg, IsDstPhys, LIS)) {
if (IsCopy && !Processed.insert(UseMI).second)
break;
@@ -1336,8 +1340,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Success, or at least we made an improvement. Keep the unfolded
// instructions and discard the original.
if (LV) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg().isVirtual()) {
if (MO.isUse()) {
if (MO.isKill()) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index fb0798f204e1..7673a721c4ea 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/Support/DJB.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/WithColor.h"
@@ -317,12 +318,33 @@ bool DWARFVerifier::handleDebugAbbrev() {
return NumErrors == 0;
}
-unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
- DWARFSectionKind SectionKind) {
+unsigned DWARFVerifier::verifyUnits(const DWARFUnitVector &Units) {
+ unsigned NumDebugInfoErrors = 0;
+ ReferenceMap CrossUnitReferences;
+
+ for (const auto &Unit : Units) {
+ ReferenceMap UnitLocalReferences;
+ NumDebugInfoErrors +=
+ verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ UnitLocalReferences, [&](uint64_t Offset) { return Unit.get(); });
+ }
+
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
+ if (DWARFUnit *U = Units.getUnitForOffset(Offset))
+ return U;
+ return nullptr;
+ });
+
+ return NumDebugInfoErrors;
+}
+
+unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S) {
const DWARFObject &DObj = DCtx.getDWARFObj();
DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
unsigned NumDebugInfoErrors = 0;
- uint64_t OffsetStart = 0, Offset = 0, UnitIdx = 0;
+ uint64_t Offset = 0, UnitIdx = 0;
uint8_t UnitType = 0;
bool isUnitDWARF64 = false;
bool isHeaderChainValid = true;
@@ -334,48 +356,11 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
/// lies between to valid DIEs.
ReferenceMap CrossUnitReferences;
while (hasDIE) {
- OffsetStart = Offset;
if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
isUnitDWARF64)) {
isHeaderChainValid = false;
if (isUnitDWARF64)
break;
- } else {
- DWARFUnitHeader Header;
- Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
- ReferenceMap UnitLocalReferences;
- DWARFUnit *Unit;
- switch (UnitType) {
- case dwarf::DW_UT_type:
- case dwarf::DW_UT_split_type: {
- Unit = TypeUnitVector.addUnit(std::make_unique<DWARFTypeUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
- &DObj.getLocSection(), DObj.getStrSection(),
- DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false,
- TypeUnitVector));
- break;
- }
- case dwarf::DW_UT_skeleton:
- case dwarf::DW_UT_split_compile:
- case dwarf::DW_UT_compile:
- case dwarf::DW_UT_partial:
- // UnitType = 0 means that we are verifying a compile unit in DWARF v4.
- case 0: {
- Unit = CompileUnitVector.addUnit(std::make_unique<DWARFCompileUnit>(
- DCtx, S, Header, DCtx.getDebugAbbrev(), &DObj.getRangesSection(),
- &DObj.getLocSection(), DObj.getStrSection(),
- DObj.getStrOffsetsSection(), &DObj.getAddrSection(),
- DObj.getLineSection(), DCtx.isLittleEndian(), false,
- CompileUnitVector));
- break;
- }
- default: { llvm_unreachable("Invalid UnitType."); }
- }
- NumDebugInfoErrors +=
- verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
- NumDebugInfoErrors += verifyDebugInfoReferences(
- UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
}
hasDIE = DebugInfoData.isValidOffset(Offset);
++UnitIdx;
@@ -386,14 +371,6 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
if (!isHeaderChainValid)
++NumDebugInfoErrors;
- NumDebugInfoErrors += verifyDebugInfoReferences(
- CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
- if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
- return U;
- if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
- return U;
- return nullptr;
- });
return NumDebugInfoErrors;
}
@@ -403,13 +380,16 @@ bool DWARFVerifier::handleDebugInfo() {
OS << "Verifying .debug_info Unit Header Chain...\n";
DObj.forEachInfoSections([&](const DWARFSection &S) {
- NumErrors += verifyUnitSection(S, DW_SECT_INFO);
+ NumErrors += verifyUnitSection(S);
});
OS << "Verifying .debug_types Unit Header Chain...\n";
DObj.forEachTypesSections([&](const DWARFSection &S) {
- NumErrors += verifyUnitSection(S, DW_SECT_EXT_TYPES);
+ NumErrors += verifyUnitSection(S);
});
+
+ OS << "Verifying non-dwo Units...\n";
+ NumErrors += verifyUnits(DCtx.getNormalUnitsVector());
return NumErrors == 0;
}
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index d2f1bf4323ee..f380aa90035e 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -14,12 +14,250 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringView.h"
#include "llvm/Demangle/Utility.h"
+#include <cctype>
#include <cstring>
+#include <limits>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
+using llvm::itanium_demangle::StringView;
+
+namespace {
+
+/// Demangle information structure.
+struct Demangler {
+ /// Initialize the information structure we use to pass around information.
+ ///
+ /// \param Mangled String to demangle.
+ Demangler(const char *Mangled);
+
+ /// Extract and demangle the mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled);
+
+private:
+ /// Extract and demangle a given mangled symbol and append it to the output
+ /// string.
+ ///
+ /// \param Demangled output buffer to write the demangled name.
+ /// \param Mangled mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#name_mangling .
+ /// \see https://dlang.org/spec/abi.html#MangledName .
+ const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract the number from a given string.
+ ///
+ /// \param Mangled string to extract the number.
+ /// \param Ret assigned result value.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \note A result larger than UINT_MAX is considered a failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#Number .
+ const char *decodeNumber(const char *Mangled, unsigned long *Ret);
+
+ /// Check whether it is the beginning of a symbol name.
+ ///
+ /// \param Mangled string to extract the symbol name.
+ ///
+ /// \return true on success, false otherwise.
+ ///
+ /// \see https://dlang.org/spec/abi.html#SymbolName .
+ bool isSymbolName(const char *Mangled);
+
+ /// Extract and demangle an identifier from a given mangled symbol append it
+ /// to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#SymbolName .
+ const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
+
+ /// Extract and demangle the plain identifier from a given mangled symbol and
+ /// prepend/append it to the output string, with a special treatment for some
+ /// magic compiler generated symbols.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ /// \param Len Length of the mangled symbol name.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#LName .
+ const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len);
+
+ /// Extract and demangle the qualified symbol from a given mangled symbol
+ /// append it to the output string.
+ ///
+ /// \param Demangled Output buffer to write the demangled name.
+ /// \param Mangled Mangled symbol to be demangled.
+ ///
+ /// \return The remaining string on success or nullptr on failure.
+ ///
+ /// \see https://dlang.org/spec/abi.html#QualifiedName .
+ const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
+
+ /// The string we are demangling.
+ const char *Str;
+};
+
+} // namespace
+
+const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) {
+ // Return nullptr if trying to extract something that isn't a digit.
+ if (Mangled == nullptr || !std::isdigit(*Mangled))
+ return nullptr;
+
+ unsigned long Val = 0;
+
+ do {
+ unsigned long Digit = Mangled[0] - '0';
+
+ // Check for overflow.
+ if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
+ return nullptr;
+
+ Val = Val * 10 + Digit;
+ ++Mangled;
+ } while (std::isdigit(*Mangled));
+
+ if (*Mangled == '\0')
+ return nullptr;
+
+ *Ret = Val;
+ return Mangled;
+}
+
+bool Demangler::isSymbolName(const char *Mangled) {
+ if (std::isdigit(*Mangled))
+ return true;
+
+ // TODO: Handle symbol back references and template instances.
+ return false;
+}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // A D mangled symbol is comprised of both scope and type information.
+ // MangleName:
+ // _D QualifiedName Type
+ // _D QualifiedName Z
+ // ^
+ // The caller should have guaranteed that the start pointer is at the
+ // above location.
+ // Note that type is never a function type, but only the return type of
+ // a function or the type of a variable.
+ Mangled += 2;
+
+ Mangled = parseQualified(Demangled, Mangled);
+
+ if (Mangled != nullptr) {
+ // Artificial symbols end with 'Z' and have no type.
+ if (*Mangled == 'Z')
+ ++Mangled;
+ else {
+ // TODO: Implement symbols with types.
+ return nullptr;
+ }
+ }
+
+ return Mangled;
+}
+
+const char *Demangler::parseQualified(OutputBuffer *Demangled,
+ const char *Mangled) {
+ // Qualified names are identifiers separated by their encoded length.
+ // Nested functions also encode their argument types without specifying
+ // what they return.
+ // QualifiedName:
+ // SymbolFunctionName
+ // SymbolFunctionName QualifiedName
+ // ^
+ // SymbolFunctionName:
+ // SymbolName
+ // SymbolName TypeFunctionNoReturn
+ // SymbolName M TypeFunctionNoReturn
+ // SymbolName M TypeModifiers TypeFunctionNoReturn
+ // The start pointer should be at the above location.
+
+ // Whether it has more than one symbol
+ size_t NotFirst = false;
+ do {
+ // Skip over anonymous symbols.
+ if (*Mangled == '0') {
+ do
+ ++Mangled;
+ while (*Mangled == '0');
+
+ continue;
+ }
+
+ if (NotFirst)
+ *Demangled << '.';
+ NotFirst = true;
+
+ Mangled = parseIdentifier(Demangled, Mangled);
+
+ } while (Mangled && isSymbolName(Mangled));
+
+ return Mangled;
+}
+
+const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
+ const char *Mangled) {
+ unsigned long Len;
+
+ if (Mangled == nullptr || *Mangled == '\0')
+ return nullptr;
+
+ // TODO: Parse back references and lengthless template instances.
+
+ const char *Endptr = decodeNumber(Mangled, &Len);
+
+ if (Endptr == nullptr || Len == 0)
+ return nullptr;
+
+ if (strlen(Endptr) < Len)
+ return nullptr;
+
+ Mangled = Endptr;
+
+ // TODO: Parse template instances with a length prefix.
+
+ return parseLName(Demangled, Mangled, Len);
+}
+
+const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
+ unsigned long Len) {
+ *Demangled << StringView(Mangled, Len);
+ Mangled += Len;
+
+ return Mangled;
+}
+
+Demangler::Demangler(const char *Mangled) : Str(Mangled) {}
+
+const char *Demangler::parseMangle(OutputBuffer *Demangled) {
+ return parseMangle(Demangled, this->Str);
+}
char *llvm::dlangDemangle(const char *MangledName) {
if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
@@ -29,8 +267,19 @@ char *llvm::dlangDemangle(const char *MangledName) {
if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
return nullptr;
- if (strcmp(MangledName, "_Dmain") == 0)
+ if (strcmp(MangledName, "_Dmain") == 0) {
Demangled << "D main";
+ } else {
+
+ Demangler D = Demangler(MangledName);
+ MangledName = D.parseMangle(&Demangled);
+
+ // Check that the entire symbol was successfully demangled.
+ if (MangledName == nullptr || *MangledName != '\0') {
+ std::free(Demangled.getBuffer());
+ return nullptr;
+ }
+ }
// OutputBuffer's internal buffer is not null terminated and therefore we need
// to add it to comply with C null terminated strings.
@@ -40,6 +289,6 @@ char *llvm::dlangDemangle(const char *MangledName) {
return Demangled.getBuffer();
}
- free(Demangled.getBuffer());
+ std::free(Demangled.getBuffer());
return nullptr;
}
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index fe3c433bd2c5..a14bd4d2c3fd 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -1256,8 +1256,7 @@ void ExecutionEngine::emitGlobals() {
// If there are multiple modules, map the non-canonical globals to their
// canonical location.
if (!NonCanonicalGlobals.empty()) {
- for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
- const GlobalValue *GV = NonCanonicalGlobals[i];
+ for (const GlobalValue *GV : NonCanonicalGlobals) {
const GlobalValue *CGV = LinkedGlobalsMap[std::make_pair(
std::string(GV->getName()), GV->getType())];
void *Ptr = getPointerToGlobalIfAvailable(CGV);
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index fdc987751286..f9101d71dfa8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -143,6 +143,9 @@ protected:
// Only SHF_ALLOC sections will have graph sections.
DenseMap<ELFSectionIndex, Section *> GraphSections;
DenseMap<ELFSymbolIndex, Symbol *> GraphSymbols;
+ DenseMap<const typename ELFFile::Elf_Shdr *,
+ ArrayRef<typename ELFFile::Elf_Word>>
+ ShndxTables;
};
template <typename ELFT>
@@ -241,7 +244,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() {
return SectionStringTabOrErr.takeError();
// Get the SHT_SYMTAB section.
- for (auto &Sec : Sections)
+ for (auto &Sec : Sections) {
if (Sec.sh_type == ELF::SHT_SYMTAB) {
if (!SymTabSec)
SymTabSec = &Sec;
@@ -250,6 +253,20 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::prepare() {
G->getName());
}
+ // Extended table.
+ if (Sec.sh_type == ELF::SHT_SYMTAB_SHNDX) {
+ uint32_t SymtabNdx = Sec.sh_link;
+ if (SymtabNdx >= Sections.size())
+ return make_error<JITLinkError>("sh_link is out of bound");
+
+ auto ShndxTable = Obj.getSHNDXTable(Sec);
+ if (!ShndxTable)
+ return ShndxTable.takeError();
+
+ ShndxTables.insert({&Sections[SymtabNdx], *ShndxTable});
+ }
+ }
+
return Error::success();
}
@@ -299,11 +316,6 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
else
Prot = MemProt::Read | MemProt::Write;
- // For now we just use this to skip the "undefined" section, probably need
- // to revist.
- if (Sec.sh_size == 0)
- continue;
-
auto &GraphSec = G->createSection(*Name, Prot);
if (Sec.sh_type != ELF::SHT_NOBITS) {
auto Data = Obj.template getSectionContentsAsArray<char>(Sec);
@@ -401,9 +413,19 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
(Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC ||
Sym.getType() == ELF::STT_OBJECT ||
Sym.getType() == ELF::STT_SECTION || Sym.getType() == ELF::STT_TLS)) {
-
- // FIXME: Handle extended tables.
- if (auto *GraphSec = getGraphSection(Sym.st_shndx)) {
+ // Handle extended tables.
+ unsigned Shndx = Sym.st_shndx;
+ if (Shndx == ELF::SHN_XINDEX) {
+ auto ShndxTable = ShndxTables.find(SymTabSec);
+ if (ShndxTable == ShndxTables.end())
+ continue;
+ auto NdxOrErr = object::getExtendedSymbolTableIndex<ELFT>(
+ Sym, SymIndex, ShndxTable->second);
+ if (!NdxOrErr)
+ return NdxOrErr.takeError();
+ Shndx = *NdxOrErr;
+ }
+ if (auto *GraphSec = getGraphSection(Shndx)) {
Block *B = nullptr;
{
auto Blocks = GraphSec->blocks();
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 6b24d6461b63..56a97f83d915 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -612,9 +612,14 @@ void LookupState::continueLookup(Error Err) {
DefinitionGenerator::~DefinitionGenerator() {}
+JITDylib::~JITDylib() {
+ LLVM_DEBUG(dbgs() << "Destroying JITDylib " << getName() << "\n");
+}
+
Error JITDylib::clear() {
std::vector<ResourceTrackerSP> TrackersToRemove;
ES.runSessionLocked([&]() {
+ assert(State != Closed && "JD is defunct");
for (auto &KV : TrackerSymbols)
TrackersToRemove.push_back(KV.first);
TrackersToRemove.push_back(getDefaultResourceTracker());
@@ -628,6 +633,7 @@ Error JITDylib::clear() {
ResourceTrackerSP JITDylib::getDefaultResourceTracker() {
return ES.runSessionLocked([this] {
+ assert(State != Closed && "JD is defunct");
if (!DefaultTracker)
DefaultTracker = new ResourceTracker(this);
return DefaultTracker;
@@ -636,19 +642,22 @@ ResourceTrackerSP JITDylib::getDefaultResourceTracker() {
ResourceTrackerSP JITDylib::createResourceTracker() {
return ES.runSessionLocked([this] {
+ assert(State == Open && "JD is defunct");
ResourceTrackerSP RT = new ResourceTracker(this);
return RT;
});
}
void JITDylib::removeGenerator(DefinitionGenerator &G) {
- std::lock_guard<std::mutex> Lock(GeneratorsMutex);
- auto I = llvm::find_if(DefGenerators,
- [&](const std::shared_ptr<DefinitionGenerator> &H) {
- return H.get() == &G;
- });
- assert(I != DefGenerators.end() && "Generator not found");
- DefGenerators.erase(I);
+ ES.runSessionLocked([&] {
+ assert(State == Open && "JD is defunct");
+ auto I = llvm::find_if(DefGenerators,
+ [&](const std::shared_ptr<DefinitionGenerator> &H) {
+ return H.get() == &G;
+ });
+ assert(I != DefGenerators.end() && "Generator not found");
+ DefGenerators.erase(I);
+ });
}
Expected<SymbolFlagsMap>
@@ -708,10 +717,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
auto Err =
ES.runSessionLocked([&, this]() -> Error {
- auto RT = getTracker(FromMR);
-
- if (RT->isDefunct())
- return make_error<ResourceTrackerDefunct>(std::move(RT));
+ if (FromMR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT));
#ifndef NDEBUG
for (auto &KV : MU->getSymbols()) {
@@ -735,7 +742,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
if (MII != MaterializingInfos.end()) {
if (MII->second.hasQueriesPending()) {
MustRunMR = ES.createMaterializationResponsibility(
- *RT, std::move(MU->SymbolFlags), std::move(MU->InitSymbol));
+ *FromMR.RT, std::move(MU->SymbolFlags),
+ std::move(MU->InitSymbol));
MustRunMU = std::move(MU);
return Error::success();
}
@@ -743,10 +751,8 @@ Error JITDylib::replace(MaterializationResponsibility &FromMR,
}
// Otherwise, make MU responsible for all the symbols.
- auto RTI = MRTrackers.find(&FromMR);
- assert(RTI != MRTrackers.end() && "No tracker for FromMR");
- auto UMI =
- std::make_shared<UnmaterializedInfo>(std::move(MU), RTI->second);
+ auto UMI = std::make_shared<UnmaterializedInfo>(std::move(MU),
+ FromMR.RT.get());
for (auto &KV : UMI->MU->getSymbols()) {
auto SymI = Symbols.find(KV.first);
assert(SymI->second.getState() == SymbolState::Materializing &&
@@ -787,13 +793,11 @@ JITDylib::delegate(MaterializationResponsibility &FromMR,
return ES.runSessionLocked(
[&]() -> Expected<std::unique_ptr<MaterializationResponsibility>> {
- auto RT = getTracker(FromMR);
-
- if (RT->isDefunct())
- return make_error<ResourceTrackerDefunct>(std::move(RT));
+ if (FromMR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(std::move(FromMR.RT));
return ES.createMaterializationResponsibility(
- *RT, std::move(SymbolFlags), std::move(InitSymbol));
+ *FromMR.RT, std::move(SymbolFlags), std::move(InitSymbol));
});
}
@@ -903,10 +907,13 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
AsynchronousSymbolQuerySet CompletedQueries;
if (auto Err = ES.runSessionLocked([&, this]() -> Error {
- auto RTI = MRTrackers.find(&MR);
- assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
- if (RTI->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(RTI->second);
+ if (MR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(MR.RT);
+
+ if (State != Open)
+ return make_error<StringError>("JITDylib " + getName() +
+ " is defunct",
+ inconvertibleErrorCode());
struct WorklistEntry {
SymbolTable::iterator SymI;
@@ -1001,10 +1008,13 @@ Error JITDylib::emit(MaterializationResponsibility &MR,
DenseMap<JITDylib *, SymbolNameVector> ReadySymbols;
if (auto Err = ES.runSessionLocked([&, this]() -> Error {
- auto RTI = MRTrackers.find(&MR);
- assert(RTI != MRTrackers.end() && "No resource tracker for MR?");
- if (RTI->second->isDefunct())
- return make_error<ResourceTrackerDefunct>(RTI->second);
+ if (MR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(MR.RT);
+
+ if (State != Open)
+ return make_error<StringError>("JITDylib " + getName() +
+ " is defunct",
+ inconvertibleErrorCode());
SymbolNameSet SymbolsInErrorState;
std::vector<SymbolTable::iterator> Worklist;
@@ -1149,9 +1159,12 @@ Error JITDylib::emit(MaterializationResponsibility &MR,
void JITDylib::unlinkMaterializationResponsibility(
MaterializationResponsibility &MR) {
ES.runSessionLocked([&]() {
- auto I = MRTrackers.find(&MR);
- assert(I != MRTrackers.end() && "MaterializationResponsibility not linked");
- MRTrackers.erase(I);
+ auto I = TrackerMRs.find(MR.RT.get());
+ assert(I != TrackerMRs.end() && "No MRs in TrackerMRs list for RT");
+ assert(I->second.count(&MR) && "MR not in TrackerMRs list for RT");
+ I->second.erase(&MR);
+ if (I->second.empty())
+ TrackerMRs.erase(MR.RT.get());
});
}
@@ -1169,8 +1182,16 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) {
(*FailedSymbolsMap)[&JD].insert(Name);
- assert(JD.Symbols.count(Name) && "No symbol table entry for Name");
- auto &Sym = JD.Symbols[Name];
+ // Look up the symbol to fail.
+ auto SymI = JD.Symbols.find(Name);
+
+ // It's possible that this symbol has already been removed, e.g. if a
+ // materialization failure happens concurrently with a ResourceTracker or
+ // JITDylib removal. In that case we can safely skip this symbol and
+ // continue.
+ if (SymI == JD.Symbols.end())
+ continue;
+ auto &Sym = SymI->second;
// Move the symbol into the error state.
// Note that this may be redundant: The symbol might already have been
@@ -1267,6 +1288,7 @@ JITDylib::failSymbols(FailedSymbolsWorklist Worklist) {
void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder,
bool LinkAgainstThisJITDylibFirst) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
if (LinkAgainstThisJITDylibFirst) {
LinkOrder.clear();
if (NewLinkOrder.empty() || NewLinkOrder.front().first != this)
@@ -1285,6 +1307,7 @@ void JITDylib::addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags) {
void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
JITDylibLookupFlags JDLookupFlags) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
for (auto &KV : LinkOrder)
if (KV.first == &OldJD) {
KV = {&NewJD, JDLookupFlags};
@@ -1295,6 +1318,7 @@ void JITDylib::replaceInLinkOrder(JITDylib &OldJD, JITDylib &NewJD,
void JITDylib::removeFromLinkOrder(JITDylib &JD) {
ES.runSessionLocked([&]() {
+ assert(State == Open && "JD is defunct");
auto I = llvm::find_if(LinkOrder,
[&](const JITDylibSearchOrder::value_type &KV) {
return KV.first == &JD;
@@ -1306,6 +1330,7 @@ void JITDylib::removeFromLinkOrder(JITDylib &JD) {
Error JITDylib::remove(const SymbolNameSet &Names) {
return ES.runSessionLocked([&]() -> Error {
+ assert(State == Open && "JD is defunct");
using SymbolMaterializerItrPair =
std::pair<SymbolTable::iterator, UnmaterializedInfosMap::iterator>;
std::vector<SymbolMaterializerItrPair> SymbolsToRemove;
@@ -1365,8 +1390,23 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
void JITDylib::dump(raw_ostream &OS) {
ES.runSessionLocked([&, this]() {
OS << "JITDylib \"" << getName() << "\" (ES: "
- << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES)) << "):\n"
- << "Link order: " << LinkOrder << "\n"
+ << format("0x%016" PRIx64, reinterpret_cast<uintptr_t>(&ES))
+ << ", State = ";
+ switch (State) {
+ case Open:
+ OS << "Open";
+ break;
+ case Closing:
+ OS << "Closing";
+ break;
+ case Closed:
+ OS << "Closed";
+ break;
+ }
+ OS << ")\n";
+ if (State == Closed)
+ return;
+ OS << "Link order: " << LinkOrder << "\n"
<< "Symbol table:\n";
for (auto &KV : Symbols) {
@@ -1454,17 +1494,11 @@ JITDylib::JITDylib(ExecutionSession &ES, std::string Name)
LinkOrder.push_back({this, JITDylibLookupFlags::MatchAllSymbols});
}
-ResourceTrackerSP JITDylib::getTracker(MaterializationResponsibility &MR) {
- auto I = MRTrackers.find(&MR);
- assert(I != MRTrackers.end() && "MR is not linked");
- assert(I->second && "Linked tracker is null");
- return I->second;
-}
-
std::pair<JITDylib::AsynchronousSymbolQuerySet,
std::shared_ptr<SymbolDependenceMap>>
JITDylib::removeTracker(ResourceTracker &RT) {
// Note: Should be called under the session lock.
+ assert(State != Closed && "JD is defunct");
SymbolNameVector SymbolsToRemove;
std::vector<std::pair<JITDylib *, SymbolStringPtr>> SymbolsToFail;
@@ -1525,6 +1559,7 @@ JITDylib::removeTracker(ResourceTracker &RT) {
}
void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
+ assert(State != Closed && "JD is defunct");
assert(&DstRT != &SrcRT && "No-op transfers shouldn't call transferTracker");
assert(&DstRT.getJITDylib() == this && "DstRT is not for this JITDylib");
assert(&SrcRT.getJITDylib() == this && "SrcRT is not for this JITDylib");
@@ -1536,9 +1571,22 @@ void JITDylib::transferTracker(ResourceTracker &DstRT, ResourceTracker &SrcRT) {
}
// Update trackers for any active materialization responsibilities.
- for (auto &KV : MRTrackers) {
- if (KV.second == &SrcRT)
- KV.second = &DstRT;
+ {
+ auto I = TrackerMRs.find(&SrcRT);
+ if (I != TrackerMRs.end()) {
+ auto &SrcMRs = I->second;
+ auto &DstMRs = TrackerMRs[&DstRT];
+ for (auto *MR : SrcMRs)
+ MR->RT = &DstRT;
+ if (DstMRs.empty())
+ DstMRs = std::move(SrcMRs);
+ else
+ for (auto *MR : SrcMRs)
+ DstMRs.insert(MR);
+ // Erase SrcRT entry in TrackerMRs. Use &SrcRT key rather than iterator I
+ // for this, since I may have been invalidated by 'TrackerMRs[&DstRT]'.
+ TrackerMRs.erase(&SrcRT);
+ }
}
// If we're transfering to the default tracker we just need to delete the
@@ -1872,6 +1920,40 @@ Expected<JITDylib &> ExecutionSession::createJITDylib(std::string Name) {
return JD;
}
+Error ExecutionSession::removeJITDylib(JITDylib &JD) {
+ // Keep JD alive throughout this routine, even if all other references
+ // have been dropped.
+ JITDylibSP JDKeepAlive = &JD;
+
+ // Set JD to 'Closing' state and remove JD from the ExecutionSession.
+ runSessionLocked([&] {
+ assert(JD.State == JITDylib::Open && "JD already closed");
+ JD.State = JITDylib::Closing;
+ auto I = llvm::find(JDs, &JD);
+ assert(I != JDs.end() && "JD does not appear in session JDs");
+ JDs.erase(I);
+ });
+
+ // Clear the JITDylib.
+ auto Err = JD.clear();
+
+ // Set JD to closed state. Clear remaining data structures.
+ runSessionLocked([&] {
+ assert(JD.State == JITDylib::Closing && "JD should be closing");
+ JD.State = JITDylib::Closed;
+ assert(JD.Symbols.empty() && "JD.Symbols is not empty after clear");
+ assert(JD.UnmaterializedInfos.empty() &&
+ "JD.UnmaterializedInfos is not empty after clear");
+ assert(JD.MaterializingInfos.empty() &&
+ "JD.MaterializingInfos is not empty after clear");
+ assert(JD.TrackerSymbols.empty() &&
+ "TrackerSymbols is not empty after clear");
+ JD.DefGenerators.clear();
+ JD.LinkOrder.clear();
+ });
+ return Err;
+}
+
std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
if (JDs.empty())
return {};
@@ -1883,6 +1965,8 @@ std::vector<JITDylibSP> JITDylib::getDFSLinkOrder(ArrayRef<JITDylibSP> JDs) {
for (auto &JD : JDs) {
+ assert(JD->State == Open && "JD is defunct");
+
if (Visited.count(JD.get()))
continue;
@@ -2311,8 +2395,11 @@ void ExecutionSession::OL_applyQueryPhase1(
});
// Build the definition generator stack for this JITDylib.
- for (auto &DG : reverse(JD.DefGenerators))
- IPLS->CurDefGeneratorStack.push_back(DG);
+ runSessionLocked([&] {
+ IPLS->CurDefGeneratorStack.reserve(JD.DefGenerators.size());
+ for (auto &DG : reverse(JD.DefGenerators))
+ IPLS->CurDefGeneratorStack.push_back(DG);
+ });
// Flag that we've done our initialization.
IPLS->NewJITDylib = false;
@@ -2629,17 +2716,15 @@ void ExecutionSession::OL_completeLookup(
LLVM_DEBUG(dbgs() << "Adding MUs to dispatch:\n");
for (auto &KV : CollectedUMIs) {
- auto &JD = *KV.first;
LLVM_DEBUG({
+ auto &JD = *KV.first;
dbgs() << " For " << JD.getName() << ": Adding " << KV.second.size()
<< " MUs.\n";
});
for (auto &UMI : KV.second) {
- std::unique_ptr<MaterializationResponsibility> MR(
- new MaterializationResponsibility(
- &JD, std::move(UMI->MU->SymbolFlags),
- std::move(UMI->MU->InitSymbol)));
- JD.MRTrackers[MR.get()] = UMI->RT;
+ auto MR = createMaterializationResponsibility(
+ *UMI->RT, std::move(UMI->MU->SymbolFlags),
+ std::move(UMI->MU->InitSymbol));
OutstandingMUs.push_back(
std::make_pair(std::move(UMI->MU), std::move(MR)));
}
@@ -2757,18 +2842,18 @@ void ExecutionSession::OL_destroyMaterializationResponsibility(
assert(MR.SymbolFlags.empty() &&
"All symbols should have been explicitly materialized or failed");
- MR.JD->unlinkMaterializationResponsibility(MR);
+ MR.JD.unlinkMaterializationResponsibility(MR);
}
SymbolNameSet ExecutionSession::OL_getRequestedSymbols(
const MaterializationResponsibility &MR) {
- return MR.JD->getRequestedSymbols(MR.SymbolFlags);
+ return MR.JD.getRequestedSymbols(MR.SymbolFlags);
}
Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
const SymbolMap &Symbols) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " resolving " << Symbols << "\n";
+ dbgs() << "In " << MR.JD.getName() << " resolving " << Symbols << "\n";
});
#ifndef NDEBUG
for (auto &KV : Symbols) {
@@ -2783,15 +2868,16 @@ Error ExecutionSession::OL_notifyResolved(MaterializationResponsibility &MR,
}
#endif
- return MR.JD->resolve(MR, Symbols);
+ return MR.JD.resolve(MR, Symbols);
}
Error ExecutionSession::OL_notifyEmitted(MaterializationResponsibility &MR) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " emitting " << MR.SymbolFlags << "\n";
+ dbgs() << "In " << MR.JD.getName() << " emitting " << MR.SymbolFlags
+ << "\n";
});
- if (auto Err = MR.JD->emit(MR, MR.SymbolFlags))
+ if (auto Err = MR.JD.emit(MR, MR.SymbolFlags))
return Err;
MR.SymbolFlags.clear();
@@ -2802,10 +2888,11 @@ Error ExecutionSession::OL_defineMaterializing(
MaterializationResponsibility &MR, SymbolFlagsMap NewSymbolFlags) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " defining materializing symbols "
+ dbgs() << "In " << MR.JD.getName() << " defining materializing symbols "
<< NewSymbolFlags << "\n";
});
- if (auto AcceptedDefs = MR.JD->defineMaterializing(std::move(NewSymbolFlags))) {
+ if (auto AcceptedDefs =
+ MR.JD.defineMaterializing(std::move(NewSymbolFlags))) {
// Add all newly accepted symbols to this responsibility object.
for (auto &KV : *AcceptedDefs)
MR.SymbolFlags.insert(KV);
@@ -2817,14 +2904,14 @@ Error ExecutionSession::OL_defineMaterializing(
void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
LLVM_DEBUG({
- dbgs() << "In " << MR.JD->getName() << " failing materialization for "
+ dbgs() << "In " << MR.JD.getName() << " failing materialization for "
<< MR.SymbolFlags << "\n";
});
JITDylib::FailedSymbolsWorklist Worklist;
for (auto &KV : MR.SymbolFlags)
- Worklist.push_back(std::make_pair(MR.JD.get(), KV.first));
+ Worklist.push_back(std::make_pair(&MR.JD, KV.first));
MR.SymbolFlags.clear();
if (Worklist.empty())
@@ -2834,9 +2921,8 @@ void ExecutionSession::OL_notifyFailed(MaterializationResponsibility &MR) {
std::shared_ptr<SymbolDependenceMap> FailedSymbols;
runSessionLocked([&]() {
- auto RTI = MR.JD->MRTrackers.find(&MR);
- assert(RTI != MR.JD->MRTrackers.end() && "No tracker for this");
- if (RTI->second->isDefunct())
+ // If the tracker is defunct then there's nothing to do here.
+ if (MR.RT->isDefunct())
return;
std::tie(FailedQueries, FailedSymbols) =
@@ -2858,12 +2944,12 @@ Error ExecutionSession::OL_replace(MaterializationResponsibility &MR,
if (MU->getInitializerSymbol() == MR.InitSymbol)
MR.InitSymbol = nullptr;
- LLVM_DEBUG(MR.JD->getExecutionSession().runSessionLocked([&]() {
- dbgs() << "In " << MR.JD->getName() << " replacing symbols with " << *MU
+ LLVM_DEBUG(MR.JD.getExecutionSession().runSessionLocked([&]() {
+ dbgs() << "In " << MR.JD.getName() << " replacing symbols with " << *MU
<< "\n";
}););
- return MR.JD->replace(MR, std::move(MU));
+ return MR.JD.replace(MR, std::move(MU));
}
Expected<std::unique_ptr<MaterializationResponsibility>>
@@ -2886,8 +2972,8 @@ ExecutionSession::OL_delegate(MaterializationResponsibility &MR,
MR.SymbolFlags.erase(I);
}
- return MR.JD->delegate(MR, std::move(DelegatedFlags),
- std::move(DelegatedInitSymbol));
+ return MR.JD.delegate(MR, std::move(DelegatedFlags),
+ std::move(DelegatedInitSymbol));
}
void ExecutionSession::OL_addDependencies(
@@ -2899,7 +2985,7 @@ void ExecutionSession::OL_addDependencies(
});
assert(MR.SymbolFlags.count(Name) &&
"Symbol not covered by this MaterializationResponsibility instance");
- MR.JD->addDependencies(Name, Dependencies);
+ MR.JD.addDependencies(Name, Dependencies);
}
void ExecutionSession::OL_addDependenciesForAll(
@@ -2910,7 +2996,7 @@ void ExecutionSession::OL_addDependenciesForAll(
<< Dependencies << "\n";
});
for (auto &KV : MR.SymbolFlags)
- MR.JD->addDependencies(KV.first, Dependencies);
+ MR.JD.addDependencies(KV.first, Dependencies);
}
#ifndef NDEBUG
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 1b7fdb588275..0de76ab78e0f 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -1301,7 +1301,7 @@ RuntimeDyldELF::processRelocationRef(
MemMgr.allowStubAllocation()) {
resolveAArch64Branch(SectionID, Value, RelI, Stubs);
} else if (RelType == ELF::R_AARCH64_ADR_GOT_PAGE) {
- // Craete new GOT entry or find existing one. If GOT entry is
+ // Create new GOT entry or find existing one. If GOT entry is
// to be created, then we also emit ABS64 relocation for it.
uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_AARCH64_ABS64);
resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index ce998df757ec..18f1a2314853 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -993,6 +993,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Value *ST = ConstantInt::get(I32Ty, 1);
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
+ Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
+ AllocaIP = Builder.saveIP();
InsertPointTy AfterIP =
applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
BasicBlock *LoopAfterBB = AfterIP.getBlock();
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 7734c0a8de58..c9748e1387eb 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -353,12 +353,11 @@ void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) {
// Scan the name to see if it needs quotes first.
bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
if (!NeedsQuotes) {
- for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+ for (unsigned char C : Name) {
// By making this unsigned, the value passed in to isalnum will always be
// in the range 0-255. This is important when building with MSVC because
// its implementation will assert. This situation can arise when dealing
// with UTF-8 multibyte characters.
- unsigned char C = Name[i];
if (!isalnum(static_cast<unsigned char>(C)) && C != '-' && C != '.' &&
C != '_') {
NeedsQuotes = true;
@@ -1309,27 +1308,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
bool FromValue = false);
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
- if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
- // 'Fast' is an abbreviation for all fast-math-flags.
- if (FPO->isFast())
- Out << " fast";
- else {
- if (FPO->hasAllowReassoc())
- Out << " reassoc";
- if (FPO->hasNoNaNs())
- Out << " nnan";
- if (FPO->hasNoInfs())
- Out << " ninf";
- if (FPO->hasNoSignedZeros())
- Out << " nsz";
- if (FPO->hasAllowReciprocal())
- Out << " arcp";
- if (FPO->hasAllowContract())
- Out << " contract";
- if (FPO->hasApproxFunc())
- Out << " afn";
- }
- }
+ if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U))
+ Out << FPO->getFastMathFlags();
if (const OverflowingBinaryOperator *OBO =
dyn_cast<OverflowingBinaryOperator>(U)) {
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 905372982dc2..2c396ae97499 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2266,6 +2266,14 @@ LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
unwrap<Constant>(Aliasee), unwrap(M)));
}
+LLVMValueRef LLVMAddAlias2(LLVMModuleRef M, LLVMTypeRef ValueTy,
+ unsigned AddrSpace, LLVMValueRef Aliasee,
+ const char *Name) {
+ return wrap(GlobalAlias::create(unwrap(ValueTy), AddrSpace,
+ GlobalValue::ExternalLinkage, Name,
+ unwrap<Constant>(Aliasee), unwrap(M)));
+}
+
LLVMValueRef LLVMGetNamedGlobalAlias(LLVMModuleRef M,
const char *Name, size_t NameLen) {
return wrap(unwrap(M)->getNamedAlias(Name));
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index ca7dafc814ce..548962bd6a98 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -34,7 +34,20 @@ static cl::opt<bool>
DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
: M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr),
ValueFn(nullptr), LabelFn(nullptr),
- AllowUnresolvedNodes(AllowUnresolvedNodes) {}
+ AllowUnresolvedNodes(AllowUnresolvedNodes) {
+ if (CUNode) {
+ if (const auto &ETs = CUNode->getEnumTypes())
+ AllEnumTypes.assign(ETs.begin(), ETs.end());
+ if (const auto &RTs = CUNode->getRetainedTypes())
+ AllRetainTypes.assign(RTs.begin(), RTs.end());
+ if (const auto &GVs = CUNode->getGlobalVariables())
+ AllGVs.assign(GVs.begin(), GVs.end());
+ if (const auto &IMs = CUNode->getImportedEntities())
+ AllImportedModules.assign(IMs.begin(), IMs.end());
+ if (const auto &MNs = CUNode->getMacros())
+ AllMacrosPerParent.insert({nullptr, {MNs.begin(), MNs.end()}});
+ }
+}
void DIBuilder::trackIfUnresolved(MDNode *N) {
if (!N)
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index c42df49d97ea..ad27a6d8c08e 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -2474,7 +2474,7 @@ bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
// Additionally, mask size is a replication factor multiplied by vector size,
// which further significantly reduces the search space.
- // Before doing that, let's perform basic sanity check first.
+ // Before doing that, let's perform basic correctness checking first.
int Largest = -1;
for (int MaskElt : Mask) {
if (MaskElt == UndefMaskElem)
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 7552906fd07a..9206cd37a6d1 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -358,13 +358,13 @@ Value *VPIntrinsic::getMemoryPointerParam() const {
Optional<unsigned> VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
- return None;
-
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \
- case Intrinsic::VPID: \
- return POINTERPOS;
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_MEMOP(POINTERPOS, ...) return POINTERPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return None;
}
/// \return The data (payload) operand of this store or scatter.
@@ -378,52 +378,51 @@ Value *VPIntrinsic::getMemoryDataParam() const {
Optional<unsigned> VPIntrinsic::getMemoryDataParamPos(Intrinsic::ID VPID) {
switch (VPID) {
default:
- return None;
-
-#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS) \
- case Intrinsic::VPID: \
- return DATAPOS;
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_MEMOP(POINTERPOS, DATAPOS) return DATAPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
+ return None;
}
bool VPIntrinsic::isVPIntrinsic(Intrinsic::ID ID) {
switch (ID) {
default:
- return false;
-
+ break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \
case Intrinsic::VPID: \
- break;
+ return true;
#include "llvm/IR/VPIntrinsics.def"
}
- return true;
+ return false;
}
// Equivalent non-predicated opcode
Optional<unsigned> VPIntrinsic::getFunctionalOpcodeForVP(Intrinsic::ID ID) {
- Optional<unsigned> FunctionalOC;
switch (ID) {
default:
break;
#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
-#define HANDLE_VP_TO_OPC(OPC) FunctionalOC = Instruction::OPC;
-#define END_REGISTER_VP_INTRINSIC(...) break;
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) return Instruction::OPC;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
-
- return FunctionalOC;
+ return None;
}
Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) {
switch (IROPC) {
default:
- return Intrinsic::not_intrinsic;
+ break;
-#define HANDLE_VP_TO_OPC(OPC) case Instruction::OPC:
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_OPC(OPC) case Instruction::OPC:
#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID;
#include "llvm/IR/VPIntrinsics.def"
}
+ return Intrinsic::not_intrinsic;
}
bool VPIntrinsic::canIgnoreVectorLengthParam() const {
@@ -516,13 +515,13 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
switch (ID) {
default:
- return false;
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, ...) return true;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
}
- return true;
+ return false;
}
unsigned VPReductionIntrinsic::getVectorParamPos() const {
@@ -535,24 +534,26 @@ unsigned VPReductionIntrinsic::getStartParamPos() const {
Optional<unsigned> VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) {
switch (ID) {
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
- return VECTORPOS;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return VECTORPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
- return None;
+ break;
}
+ return None;
}
Optional<unsigned> VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) {
switch (ID) {
-#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
- case Intrinsic::VPID: \
- return STARTPOS;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_REDUCTION(STARTPOS, VECTORPOS) return STARTPOS;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
#include "llvm/IR/VPIntrinsics.def"
default:
- return None;
+ break;
}
+ return None;
}
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index cf309ffd6212..d15fcfbc5b9f 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -226,4 +226,25 @@ bool GEPOperator::collectOffset(
}
return true;
}
+
+void FastMathFlags::print(raw_ostream &O) const {
+ if (all())
+ O << " fast";
+ else {
+ if (allowReassoc())
+ O << " reassoc";
+ if (noNaNs())
+ O << " nnan";
+ if (noInfs())
+ O << " ninf";
+ if (noSignedZeros())
+ O << " nsz";
+ if (allowReciprocal())
+ O << " arcp";
+ if (allowContract())
+ O << " contract";
+ if (approxFunc())
+ O << " afn";
+ }
+}
} // namespace llvm
diff --git a/llvm/lib/IR/PassTimingInfo.cpp b/llvm/lib/IR/PassTimingInfo.cpp
index d0c1517f480b..a03fafec9fac 100644
--- a/llvm/lib/IR/PassTimingInfo.cpp
+++ b/llvm/lib/IR/PassTimingInfo.cpp
@@ -187,7 +187,7 @@ Timer &TimePassesHandler::getPassTimer(StringRef PassID) {
Timer *T = new Timer(PassID, FullDesc, TG);
Timers.emplace_back(T);
- assert(Count == Timers.size() && "sanity check");
+ assert(Count == Timers.size() && "Timers vector not adjusted correctly.");
return *T;
}
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index 9be6de693ee3..2117527a64f0 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -6,9 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// Run a sanity check on the IR to ensure that Safepoints - if they've been
-// inserted - were inserted correctly. In particular, look for use of
-// non-relocated values after a safepoint. It's primary use is to check the
+// Run a basic correctness check on the IR to ensure that Safepoints - if
+// they've been inserted - were inserted correctly. In particular, look for use
+// of non-relocated values after a safepoint. It's primary use is to check the
// correctness of safepoint insertion immediately after insertion, but it can
// also be used to verify that later transforms have not found a way to break
// safepoint semenatics.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index dc4370d4b6ed..154b59835b01 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file defines the function verifier interface, that can be used for some
-// sanity checking of input to the system.
+// basic correctness checking of input to the system.
//
// Note that this does not provide full `Java style' security and verifications,
// instead it just tries to ensure that code is well-formed.
@@ -1604,7 +1604,7 @@ Verifier::visitModuleFlag(const MDNode *Op,
Assert(ID, "invalid ID operand in module flag (expected metadata string)",
Op->getOperand(1));
- // Sanity check the values for behaviors with additional requirements.
+ // Check the values for behaviors with additional requirements.
switch (MFB) {
case Module::Error:
case Module::Warning:
@@ -5269,24 +5269,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
break;
- case Intrinsic::matrix_column_major_load:
+ case Intrinsic::matrix_column_major_load: {
Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1));
NumRows = cast<ConstantInt>(Call.getArgOperand(3));
NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
ResultTy = cast<VectorType>(Call.getType());
- Op0ElemTy =
- cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType();
+
+ PointerType *Op0PtrTy =
+ cast<PointerType>(Call.getArgOperand(0)->getType());
+ if (!Op0PtrTy->isOpaque())
+ Op0ElemTy = Op0PtrTy->getElementType();
break;
- case Intrinsic::matrix_column_major_store:
+ }
+ case Intrinsic::matrix_column_major_store: {
Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2));
NumRows = cast<ConstantInt>(Call.getArgOperand(4));
NumColumns = cast<ConstantInt>(Call.getArgOperand(5));
ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
- Op1ElemTy =
- cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType();
+
+ PointerType *Op1PtrTy =
+ cast<PointerType>(Call.getArgOperand(1)->getType());
+ if (!Op1PtrTy->isOpaque())
+ Op1ElemTy = Op1PtrTy->getElementType();
break;
+ }
default:
llvm_unreachable("unexpected intrinsic");
}
@@ -5295,9 +5303,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ResultTy->getElementType()->isFloatingPointTy(),
"Result type must be an integer or floating-point type!", IF);
- Assert(ResultTy->getElementType() == Op0ElemTy,
- "Vector element type mismatch of the result and first operand "
- "vector!", IF);
+ if (Op0ElemTy)
+ Assert(ResultTy->getElementType() == Op0ElemTy,
+ "Vector element type mismatch of the result and first operand "
+ "vector!", IF);
if (Op1ElemTy)
Assert(ResultTy->getElementType() == Op1ElemTy,
diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index d41c7d3217d7..0d1a864f31ac 100644
--- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -372,7 +372,7 @@ Error appendToError(Error Err, StringRef After) {
/// This function populates a DynamicEntries struct using an ELFT::DynRange.
/// After populating the struct, the members are validated with
-/// some basic sanity checks.
+/// some basic correctness checks.
///
/// @param Dyn Target DynamicEntries struct to populate.
/// @param DynTable Source dynamic table.
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 154b2d051f34..2ca921017171 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -1069,16 +1069,14 @@ void MCAsmStreamer::PrintQuotedString(StringRef Data, raw_ostream &OS) const {
OS << '"';
if (MAI->hasPairedDoubleQuoteStringConstants()) {
- for (unsigned i = 0, e = Data.size(); i != e; ++i) {
- unsigned char C = Data[i];
+ for (unsigned char C : Data) {
if (C == '"')
OS << "\"\"";
else
OS << (char)C;
}
} else {
- for (unsigned i = 0, e = Data.size(); i != e; ++i) {
- unsigned char C = Data[i];
+ for (unsigned char C : Data) {
if (C == '"' || C == '\\') {
OS << '\\' << (char)C;
continue;
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 1ba999a63113..fbf3c860368a 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -646,8 +646,6 @@ void MCELFStreamer::emitBundleAlignMode(unsigned AlignPow2) {
void MCELFStreamer::emitBundleLock(bool AlignToEnd) {
MCSection &Sec = *getCurrentSectionOnly();
- // Sanity checks
- //
if (!getAssembler().isBundlingEnabled())
report_fatal_error(".bundle_lock forbidden when bundling is disabled");
@@ -667,7 +665,6 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) {
void MCELFStreamer::emitBundleUnlock() {
MCSection &Sec = *getCurrentSectionOnly();
- // Sanity checks
if (!getAssembler().isBundlingEnabled())
report_fatal_error(".bundle_unlock forbidden when bundling is disabled");
else if (!isBundleLocked())
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index ddc41d0a08ab..e95019c12db7 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -676,14 +676,14 @@ EndStmt:
getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
IsComdat, UniqueID, LinkedToSym);
getStreamer().SwitchSection(Section, Subsection);
- if (Section->getType() != Type &&
+ // Check that flags are used consistently. However, the GNU assembler permits
+ // to leave out in subsequent uses of the same sections; for compatibility,
+ // do likewise.
+ if (!TypeName.empty() && Section->getType() != Type &&
!allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName,
Type))
Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
utohexstr(Section->getType()));
- // Check that flags are used consistently. However, the GNU assembler permits
- // to leave out in subsequent uses of the same sections; for compatibility,
- // do likewise.
if ((extraFlags || Size || !TypeName.empty()) && Section->getFlags() != Flags)
Error(loc, "changed section flags for " + SectionName + ", expected: 0x" +
utohexstr(Section->getFlags()));
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 646f416821ae..73c687331d30 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -56,6 +56,8 @@ using llvm::support::endian::write32le;
namespace {
+constexpr int OffsetLabelIntervalBits = 20;
+
using name = SmallString<COFF::NameSize>;
enum AuxiliaryType {
@@ -120,6 +122,8 @@ public:
relocations Relocations;
COFFSection(StringRef Name) : Name(std::string(Name)) {}
+
+ SmallVector<COFFSymbol *, 1> OffsetSymbols;
};
class WinCOFFObjectWriter : public MCObjectWriter {
@@ -149,6 +153,7 @@ public:
symbol_list WeakDefaults;
bool UseBigObj;
+ bool UseOffsetLabels = false;
bool EmitAddrsigSection = false;
MCSectionCOFF *AddrsigSection;
@@ -174,7 +179,7 @@ public:
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
COFFSection *createSection(StringRef Name);
- void defineSection(MCSectionCOFF const &Sec);
+ void defineSection(MCSectionCOFF const &Sec, const MCAsmLayout &Layout);
COFFSymbol *getLinkedSymbol(const MCSymbol &Symbol);
void DefineSymbol(const MCSymbol &Symbol, MCAssembler &Assembler,
@@ -244,6 +249,11 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
: W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {
Header.Machine = TargetObjectWriter->getMachine();
+ // Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly
+ // limited range for the immediate offset (+/- 1 MB); create extra offset
+ // label symbols with regular intervals to allow referencing a
+ // non-temporary symbol that is close enough.
+ UseOffsetLabels = Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
}
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
@@ -299,7 +309,8 @@ static uint32_t getAlignment(const MCSectionCOFF &Sec) {
/// This function takes a section data object from the assembler
/// and creates the associated COFF section staging object.
-void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) {
+void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec,
+ const MCAsmLayout &Layout) {
COFFSection *Section = createSection(MCSec.getName());
COFFSymbol *Symbol = createSymbol(MCSec.getName());
Section->Symbol = Symbol;
@@ -329,6 +340,20 @@ void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec) {
// Bind internal COFF section to MC section.
Section->MCSection = &MCSec;
SectionMap[&MCSec] = Section;
+
+ if (UseOffsetLabels && !MCSec.getFragmentList().empty()) {
+ const uint32_t Interval = 1 << OffsetLabelIntervalBits;
+ uint32_t N = 1;
+ for (uint32_t Off = Interval, E = Layout.getSectionAddressSize(&MCSec);
+ Off < E; Off += Interval) {
+ auto Name = ("$L" + MCSec.getName() + "_" + Twine(N++)).str();
+ COFFSymbol *Label = createSymbol(Name);
+ Label->Section = Section;
+ Label->Data.StorageClass = COFF::IMAGE_SYM_CLASS_LABEL;
+ Label->Data.Value = Off;
+ Section->OffsetSymbols.push_back(Label);
+ }
+ }
}
static uint64_t getSymbolValue(const MCSymbol &Symbol,
@@ -688,7 +713,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
for (const auto &Section : Asm)
- defineSection(static_cast<const MCSectionCOFF &>(Section));
+ defineSection(static_cast<const MCSectionCOFF &>(Section), Layout);
for (const MCSymbol &Symbol : Asm.symbols())
if (!Symbol.isTemporary())
@@ -774,8 +799,23 @@ void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
assert(
SectionMap.find(TargetSection) != SectionMap.end() &&
"Section must already have been defined in executePostLayoutBinding!");
- Reloc.Symb = SectionMap[TargetSection]->Symbol;
+ COFFSection *Section = SectionMap[TargetSection];
+ Reloc.Symb = Section->Symbol;
FixedValue += Layout.getSymbolOffset(A);
+ // Technically, we should do the final adjustments of FixedValue (below)
+ // before picking an offset symbol, otherwise we might choose one which
+ // is slightly too far away. The relocations where it really matters
+ // (arm64 adrp relocations) don't get any offset though.
+ if (UseOffsetLabels && !Section->OffsetSymbols.empty()) {
+ uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits;
+ if (LabelIndex > 0) {
+ if (LabelIndex <= Section->OffsetSymbols.size())
+ Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1];
+ else
+ Reloc.Symb = Section->OffsetSymbols.back();
+ FixedValue -= Reloc.Symb->Data.Value;
+ }
+ }
} else {
assert(
SymbolMap.find(&A) != SymbolMap.end() &&
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 0ab845a4c28f..d8283f8d2682 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -612,7 +612,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
- // Sanity check on the instruction descriptor.
+ // Validation check on the instruction descriptor.
if (Error Err = verifyInstrDesc(*ID, MCI))
return std::move(Err);
diff --git a/llvm/lib/MCA/Stages/ExecuteStage.cpp b/llvm/lib/MCA/Stages/ExecuteStage.cpp
index 6e021d3d9232..2b11f73b19df 100644
--- a/llvm/lib/MCA/Stages/ExecuteStage.cpp
+++ b/llvm/lib/MCA/Stages/ExecuteStage.cpp
@@ -188,7 +188,7 @@ Error ExecuteStage::execute(InstRef &IR) {
#ifndef NDEBUG
// Ensure that the HWS has not stored this instruction in its queues.
- HWS.sanityCheck(IR);
+ HWS.instructionCheck(IR);
#endif
if (IR.getInstruction()->isEliminated())
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 50035d6c7523..cf1f12d9a9a7 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -682,7 +682,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
std::vector<VersionEntry> Ret;
size_t I = 0;
- for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
+ for (const ELFSymbolRef &Sym : Symbols) {
++I;
Expected<const typename ELFT::Versym *> VerEntryOrErr =
EF.template getEntry<typename ELFT::Versym>(*VerSec, I);
@@ -691,7 +691,7 @@ readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
" from " + describe(EF, *VerSec) + ": " +
toString(VerEntryOrErr.takeError()));
- Expected<uint32_t> FlagsOrErr = It->getFlags();
+ Expected<uint32_t> FlagsOrErr = Sym.getFlags();
if (!FlagsOrErr)
return createError("unable to read flags for symbol with index " +
Twine(I) + ": " + toString(FlagsOrErr.takeError()));
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 5f38ca13cfc2..66ad16db1ba4 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -476,29 +476,25 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
assert(OS.tell() == CP.SectionTableStart);
// Output section table.
- for (std::vector<COFFYAML::Section>::iterator i = CP.Obj.Sections.begin(),
- e = CP.Obj.Sections.end();
- i != e; ++i) {
- OS.write(i->Header.Name, COFF::NameSize);
- OS << binary_le(i->Header.VirtualSize)
- << binary_le(i->Header.VirtualAddress)
- << binary_le(i->Header.SizeOfRawData)
- << binary_le(i->Header.PointerToRawData)
- << binary_le(i->Header.PointerToRelocations)
- << binary_le(i->Header.PointerToLineNumbers)
- << binary_le(i->Header.NumberOfRelocations)
- << binary_le(i->Header.NumberOfLineNumbers)
- << binary_le(i->Header.Characteristics);
+ for (const COFFYAML::Section &S : CP.Obj.Sections) {
+ OS.write(S.Header.Name, COFF::NameSize);
+ OS << binary_le(S.Header.VirtualSize)
+ << binary_le(S.Header.VirtualAddress)
+ << binary_le(S.Header.SizeOfRawData)
+ << binary_le(S.Header.PointerToRawData)
+ << binary_le(S.Header.PointerToRelocations)
+ << binary_le(S.Header.PointerToLineNumbers)
+ << binary_le(S.Header.NumberOfRelocations)
+ << binary_le(S.Header.NumberOfLineNumbers)
+ << binary_le(S.Header.Characteristics);
}
assert(OS.tell() == CP.SectionTableStart + CP.SectionTableSize);
unsigned CurSymbol = 0;
StringMap<unsigned> SymbolTableIndexMap;
- for (std::vector<COFFYAML::Symbol>::iterator I = CP.Obj.Symbols.begin(),
- E = CP.Obj.Symbols.end();
- I != E; ++I) {
- SymbolTableIndexMap[I->Name] = CurSymbol;
- CurSymbol += 1 + I->Header.NumberOfAuxSymbols;
+ for (const COFFYAML::Symbol &Sym : CP.Obj.Symbols) {
+ SymbolTableIndexMap[Sym.Name] = CurSymbol;
+ CurSymbol += 1 + Sym.Header.NumberOfAuxSymbols;
}
// Output section data.
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index fdf9aeae1622..e0dde4433d24 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -155,6 +155,10 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_OSREL);
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
+ // NetBSD core note types.
+ ECase(NT_NETBSDCORE_PROCINFO);
+ ECase(NT_NETBSDCORE_AUXV);
+ ECase(NT_NETBSDCORE_LWPSTATUS);
// OpenBSD core note types.
ECase(NT_OPENBSD_PROCINFO);
ECase(NT_OPENBSD_AUXV);
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index c653c29ec9a7..e5ffb12df434 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -54,6 +54,7 @@ private:
void writeNameList(raw_ostream &OS);
void writeStringTable(raw_ostream &OS);
void writeExportTrie(raw_ostream &OS);
+ void writeDynamicSymbolTable(raw_ostream &OS);
void dumpExportEntry(raw_ostream &OS, MachOYAML::ExportEntry &Entry);
void ZeroToOffset(raw_ostream &OS, size_t offset);
@@ -482,6 +483,7 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
MachO::dyld_info_command *DyldInfoOnlyCmd = 0;
MachO::symtab_command *SymtabCmd = 0;
+ MachO::dysymtab_command *DSymtabCmd = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SYMTAB:
@@ -504,6 +506,11 @@ void MachOWriter::writeLinkEditData(raw_ostream &OS) {
WriteQueue.push_back(std::make_pair(DyldInfoOnlyCmd->export_off,
&MachOWriter::writeExportTrie));
break;
+ case MachO::LC_DYSYMTAB:
+ DSymtabCmd = &LC.Data.dysymtab_command_data;
+ WriteQueue.push_back(std::make_pair(
+ DSymtabCmd->indirectsymoff, &MachOWriter::writeDynamicSymbolTable));
+ break;
}
}
@@ -556,6 +563,12 @@ void MachOWriter::writeStringTable(raw_ostream &OS) {
}
}
+void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) {
+ for (auto Data : Obj.LinkEdit.IndirectSymbols)
+ OS.write(reinterpret_cast<const char *>(&Data),
+ sizeof(yaml::Hex32::BaseType));
+}
+
class UniversalWriter {
public:
UniversalWriter(yaml::YamlObjectFile &ObjectFile)
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index c9562bd72258..f32009458110 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -164,6 +164,7 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
IO.mapOptional("NameList", LinkEditData.NameList);
IO.mapOptional("StringTable", LinkEditData.StringTable);
+ IO.mapOptional("IndirectSymbols", LinkEditData.IndirectSymbols);
}
void MappingTraits<MachOYAML::RebaseOpcode>::mapping(
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 37c2fcbab181..19e05b9272bb 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -150,10 +150,9 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
for (StringSet<>::const_iterator I = PrefixesUnion.begin(),
E = PrefixesUnion.end(); I != E; ++I) {
StringRef Prefix = I->getKey();
- for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
- C != CE; ++C)
- if (!is_contained(PrefixChars, *C))
- PrefixChars.push_back(*C);
+ for (char C : Prefix)
+ if (!is_contained(PrefixChars, C))
+ PrefixChars.push_back(C);
}
}
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index ac5dfdbdd540..de1b0ace7876 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1765,6 +1765,8 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
+ MPM.addPass(createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
+
return MPM;
}
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 8e6be6730ea4..27a6c519ff82 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -225,8 +225,8 @@ std::string doSystemDiff(StringRef Before, StringRef After,
return "Unable to read result.";
// Clean up.
- for (unsigned I = 0; I < NumFiles; ++I) {
- std::error_code EC = sys::fs::remove(FileName[I]);
+ for (const std::string &I : FileName) {
+ std::error_code EC = sys::fs::remove(I);
if (EC)
return "Unable to remove temporary file.";
}
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 94bd4807041d..c6691e321b3c 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -83,7 +83,6 @@ Error RawCoverageReader::readIntMax(uint64_t &Result, uint64_t MaxPlus1) {
Error RawCoverageReader::readSize(uint64_t &Result) {
if (auto Err = readULEB128(Result))
return Err;
- // Sanity check the number.
if (Result > Data.size())
return make_error<CoverageMapError>(coveragemap_error::malformed);
return Error::success();
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 1168ad27fe52..ab3487ecffe8 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -657,19 +657,18 @@ void InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
Input.sortByTargetValues();
auto I = ValueData.begin();
auto IE = ValueData.end();
- for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE;
- ++J) {
- while (I != IE && I->Value < J->Value)
+ for (const InstrProfValueData &J : Input.ValueData) {
+ while (I != IE && I->Value < J.Value)
++I;
- if (I != IE && I->Value == J->Value) {
+ if (I != IE && I->Value == J.Value) {
bool Overflowed;
- I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
+ I->Count = SaturatingMultiplyAdd(J.Count, Weight, I->Count, &Overflowed);
if (Overflowed)
Warn(instrprof_error::counter_overflow);
++I;
continue;
}
- ValueData.insert(I, *J);
+ ValueData.insert(I, J);
}
}
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index b4e8025dbef9..885c1fe49240 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -62,7 +62,6 @@ InstrProfReader::create(const Twine &Path) {
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
- // Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
@@ -113,7 +112,6 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer) {
- // Sanity check the buffer.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
return make_error<InstrProfError>(instrprof_error::too_large);
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
new file mode 100644
index 000000000000..f8d13c74fac3
--- /dev/null
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -0,0 +1,121 @@
+//===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for reading MemProf profiling data.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+#include <type_traits>
+
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/MemProfData.inc"
+#include "llvm/ProfileData/RawMemProfReader.h"
+
+namespace llvm {
+namespace memprof {
+namespace {
+
+struct Summary {
+ uint64_t Version;
+ uint64_t TotalSizeBytes;
+ uint64_t NumSegments;
+ uint64_t NumMIBInfo;
+ uint64_t NumStackOffsets;
+};
+
+template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
+ static_assert(std::is_pod<T>::value, "Not a pod type.");
+ assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
+ return *reinterpret_cast<const T *>(Ptr);
+}
+
+Summary computeSummary(const char *Start) {
+ auto *H = reinterpret_cast<const Header *>(Start);
+
+ // Check alignment while reading the number of items in each section.
+ return Summary{
+ H->Version,
+ H->TotalSize,
+ alignedRead(Start + H->SegmentOffset),
+ alignedRead(Start + H->MIBOffset),
+ alignedRead(Start + H->StackOffset),
+ };
+}
+
+} // namespace
+
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(const Twine &Path) {
+ auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
+ if (std::error_code EC = BufferOr.getError())
+ return errorCodeToError(EC);
+
+ std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
+
+ if (Buffer->getBufferSize() == 0)
+ return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
+ if (!RawMemProfReader::hasFormat(*Buffer))
+ return make_error<InstrProfError>(instrprof_error::bad_magic);
+
+ if (Buffer->getBufferSize() < sizeof(Header)) {
+ return make_error<InstrProfError>(instrprof_error::truncated);
+ }
+
+ // The size of the buffer can be > header total size since we allow repeated
+ // serialization of memprof profiles to the same file.
+ uint64_t TotalSize = 0;
+ const char *Next = Buffer->getBufferStart();
+ while (Next < Buffer->getBufferEnd()) {
+ auto *H = reinterpret_cast<const Header *>(Next);
+ if (H->Version != MEMPROF_RAW_VERSION) {
+ return make_error<InstrProfError>(instrprof_error::unsupported_version);
+ }
+
+ TotalSize += H->TotalSize;
+ Next += H->TotalSize;
+ }
+
+ if (Buffer->getBufferSize() != TotalSize) {
+ return make_error<InstrProfError>(instrprof_error::malformed);
+ }
+
+ return std::make_unique<RawMemProfReader>(std::move(Buffer));
+}
+
+bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
+ if (Buffer.getBufferSize() < sizeof(uint64_t))
+ return false;
+ // Aligned read to sanity check that the buffer was allocated with at least 8b
+ // alignment.
+ const uint64_t Magic = alignedRead(Buffer.getBufferStart());
+ return Magic == MEMPROF_RAW_MAGIC_64;
+}
+
+void RawMemProfReader::printSummaries(raw_ostream &OS) const {
+ int Count = 0;
+ const char *Next = DataBuffer->getBufferStart();
+ while (Next < DataBuffer->getBufferEnd()) {
+ auto Summary = computeSummary(Next);
+ OS << "MemProf Profile " << ++Count << "\n";
+ OS << " Version: " << Summary.Version << "\n";
+ OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n";
+ OS << " NumSegments: " << Summary.NumSegments << "\n";
+ OS << " NumMIBInfo: " << Summary.NumMIBInfo << "\n";
+ OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n";
+ // TODO: Print the build ids once we can record them using the
+ // sanitizer_procmaps library for linux.
+
+ auto *H = reinterpret_cast<const Header *>(Next);
+ Next += H->TotalSize;
+ }
+}
+
+} // namespace memprof
+} // namespace llvm
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index c99a19020511..eefb7c2ba627 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -1709,7 +1709,7 @@ setupMemoryBuffer(const Twine &Filename) {
return EC;
auto Buffer = std::move(BufferOrErr.get());
- // Sanity check the file.
+ // Check the file.
if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint32_t>::max())
return sampleprof_error::too_large;
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp
index b3136a91e7f5..a3e41ccd199c 100644
--- a/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/llvm/lib/Support/AArch64TargetParser.cpp
@@ -240,52 +240,4 @@ AArch64::ArchKind AArch64::parseCPUArch(StringRef CPU) {
return C.ArchID;
}
return ArchKind::INVALID;
-}
-
-// Parse a branch protection specification, which has the form
-// standard | none | [bti,pac-ret[+b-key,+leaf]*]
-// Returns true on success, with individual elements of the specification
-// returned in `PBP`. Returns false in error, with `Err` containing
-// an erroneous part of the spec.
-bool AArch64::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
- StringRef &Err) {
- PBP = {"none", "a_key", false};
- if (Spec == "none")
- return true; // defaults are ok
-
- if (Spec == "standard") {
- PBP.Scope = "non-leaf";
- PBP.BranchTargetEnforcement = true;
- return true;
- }
-
- SmallVector<StringRef, 4> Opts;
- Spec.split(Opts, "+");
- for (int I = 0, E = Opts.size(); I != E; ++I) {
- StringRef Opt = Opts[I].trim();
- if (Opt == "bti") {
- PBP.BranchTargetEnforcement = true;
- continue;
- }
- if (Opt == "pac-ret") {
- PBP.Scope = "non-leaf";
- for (; I + 1 != E; ++I) {
- StringRef PACOpt = Opts[I + 1].trim();
- if (PACOpt == "leaf")
- PBP.Scope = "all";
- else if (PACOpt == "b-key")
- PBP.Key = "b_key";
- else
- break;
- }
- continue;
- }
- if (Opt == "")
- Err = "<empty>";
- else
- Err = Opt;
- return false;
- }
-
- return true;
-}
+} \ No newline at end of file
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 459691923af8..241cfb1eedbe 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -59,6 +59,10 @@ const ARMAttributeParser::DisplayHandler ARMAttributeParser::displayRoutines[] =
ATTRIBUTE_HANDLER(DSP_extension),
ATTRIBUTE_HANDLER(T2EE_use),
ATTRIBUTE_HANDLER(Virtualization_use),
+ ATTRIBUTE_HANDLER(PAC_extension),
+ ATTRIBUTE_HANDLER(BTI_extension),
+ ATTRIBUTE_HANDLER(PACRET_use),
+ ATTRIBUTE_HANDLER(BTI_use),
ATTRIBUTE_HANDLER(nodefaults),
};
@@ -350,6 +354,28 @@ Error ARMAttributeParser::Virtualization_use(AttrType tag) {
return parseStringAttribute("Virtualization_use", tag, makeArrayRef(strings));
}
+Error ARMAttributeParser::PAC_extension(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Permitted", "Permitted in NOP space",
+ "Permitted"};
+ return parseStringAttribute("PAC_extension", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::BTI_extension(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Permitted", "Permitted in NOP space",
+ "Permitted"};
+ return parseStringAttribute("BTI_extension", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::PACRET_use(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Used", "Used"};
+ return parseStringAttribute("PACRET_use", tag, makeArrayRef(strings));
+}
+
+Error ARMAttributeParser::BTI_use(ARMBuildAttrs::AttrType tag) {
+ static const char *strings[] = {"Not Used", "Used"};
+ return parseStringAttribute("BTI_use", tag, makeArrayRef(strings));
+}
+
Error ARMAttributeParser::nodefaults(AttrType tag) {
uint64_t value = de.getULEB128(cursor);
printAttribute(tag, value, "Unspecified Tags UNDEFINED");
diff --git a/llvm/lib/Support/ARMBuildAttrs.cpp b/llvm/lib/Support/ARMBuildAttrs.cpp
index f20521f2a2d4..815cfc62a4b0 100644
--- a/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -50,6 +50,10 @@ static const TagNameItem tagData[] = {
{ARMBuildAttrs::MPextension_use, "Tag_MPextension_use"},
{ARMBuildAttrs::DIV_use, "Tag_DIV_use"},
{ARMBuildAttrs::DSP_extension, "Tag_DSP_extension"},
+ {ARMBuildAttrs::PAC_extension, "Tag_PAC_extension"},
+ {ARMBuildAttrs::BTI_extension, "Tag_BTI_extension"},
+ {ARMBuildAttrs::BTI_use, "Tag_BTI_use"},
+ {ARMBuildAttrs::PACRET_use, "Tag_PACRET_use"},
{ARMBuildAttrs::nodefaults, "Tag_nodefaults"},
{ARMBuildAttrs::also_compatible_with, "Tag_also_compatible_with"},
{ARMBuildAttrs::T2EE_use, "Tag_T2EE_use"},
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e64934aa90cc..5b7004c86f5a 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2656,10 +2656,13 @@ cl::getRegisteredSubcommands() {
void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) {
initCommonOptions();
for (auto &I : Sub.OptionsMap) {
+ bool Unrelated = true;
for (auto &Cat : I.second->Categories) {
- if (Cat != &Category && Cat != &CommonOptions->GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ if (Cat == &Category || Cat == &CommonOptions->GenericCategory)
+ Unrelated = false;
}
+ if (Unrelated)
+ I.second->setHiddenFlag(cl::ReallyHidden);
}
}
@@ -2667,11 +2670,14 @@ void cl::HideUnrelatedOptions(ArrayRef<const cl::OptionCategory *> Categories,
SubCommand &Sub) {
initCommonOptions();
for (auto &I : Sub.OptionsMap) {
+ bool Unrelated = true;
for (auto &Cat : I.second->Categories) {
- if (!is_contained(Categories, Cat) &&
- Cat != &CommonOptions->GenericCategory)
- I.second->setHiddenFlag(cl::ReallyHidden);
+ if (is_contained(Categories, Cat) ||
+ Cat == &CommonOptions->GenericCategory)
+ Unrelated = false;
}
+ if (Unrelated)
+ I.second->setHiddenFlag(cl::ReallyHidden);
}
}
diff --git a/llvm/lib/Support/HTTPClient.cpp b/llvm/lib/Support/HTTPClient.cpp
new file mode 100644
index 000000000000..68ba56d1fe50
--- /dev/null
+++ b/llvm/lib/Support/HTTPClient.cpp
@@ -0,0 +1,97 @@
+//===-- llvm/Support/HTTPClient.cpp - HTTP client library -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+///
+/// This file defines the methods of the HTTPRequest, HTTPClient, and
+/// BufferedHTTPResponseHandler classes.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/HTTPClient.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+HTTPRequest::HTTPRequest(StringRef Url) { this->Url = Url.str(); }
+
+bool operator==(const HTTPRequest &A, const HTTPRequest &B) {
+ return A.Url == B.Url && A.Method == B.Method &&
+ A.FollowRedirects == B.FollowRedirects;
+}
+
+HTTPResponseHandler::~HTTPResponseHandler() = default;
+
+static inline bool parseContentLengthHeader(StringRef LineRef,
+ size_t &ContentLength) {
+ // Content-Length is a mandatory header, and the only one we handle.
+ return LineRef.consume_front("Content-Length: ") &&
+ to_integer(LineRef.trim(), ContentLength, 10);
+}
+
+Error BufferedHTTPResponseHandler::handleHeaderLine(StringRef HeaderLine) {
+ if (ResponseBuffer.Body)
+ return Error::success();
+
+ size_t ContentLength;
+ if (parseContentLengthHeader(HeaderLine, ContentLength))
+ ResponseBuffer.Body =
+ WritableMemoryBuffer::getNewUninitMemBuffer(ContentLength);
+
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
+ if (!ResponseBuffer.Body)
+ return createStringError(errc::io_error,
+ "Unallocated response buffer. HTTP Body data "
+ "received before Content-Length header.");
+ if (Offset + BodyChunk.size() > ResponseBuffer.Body->getBufferSize())
+ return createStringError(errc::io_error,
+ "Content size exceeds buffer size.");
+ memcpy(ResponseBuffer.Body->getBufferStart() + Offset, BodyChunk.data(),
+ BodyChunk.size());
+ Offset += BodyChunk.size();
+ return Error::success();
+}
+
+Error BufferedHTTPResponseHandler::handleStatusCode(unsigned Code) {
+ ResponseBuffer.Code = Code;
+ return Error::success();
+}
+
+Expected<HTTPResponseBuffer> HTTPClient::perform(const HTTPRequest &Request) {
+ BufferedHTTPResponseHandler Handler;
+ if (Error Err = perform(Request, Handler))
+ return std::move(Err);
+ return std::move(Handler.ResponseBuffer);
+}
+
+Expected<HTTPResponseBuffer> HTTPClient::get(StringRef Url) {
+ HTTPRequest Request(Url);
+ return perform(Request);
+}
+
+HTTPClient::HTTPClient() = default;
+
+HTTPClient::~HTTPClient() = default;
+
+bool HTTPClient::isAvailable() { return false; }
+
+void HTTPClient::cleanup() {}
+
+void HTTPClient::setTimeout(std::chrono::milliseconds Timeout) {}
+
+Error HTTPClient::perform(const HTTPRequest &Request,
+ HTTPResponseHandler &Handler) {
+ llvm_unreachable("No HTTP Client implementation available.");
+}
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 90483817c302..554e3248524c 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -421,11 +421,10 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
"Self multiplication knownbits mismatch");
// Compute a conservative estimate for high known-0 bits.
- unsigned LeadZ =
- std::max(LHS.countMinLeadingZeros() + RHS.countMinLeadingZeros(),
- BitWidth) -
- BitWidth;
- LeadZ = std::min(LeadZ, BitWidth);
+ unsigned LHSLeadZ = LHS.countMinLeadingZeros();
+ unsigned RHSLeadZ = RHS.countMinLeadingZeros();
+ unsigned LeadZ = std::max(LHSLeadZ + RHSLeadZ, BitWidth) - BitWidth;
+ assert(LeadZ <= BitWidth && "More zeros than bits?");
// The result of the bottom bits of an integer multiply can be
// inferred by looking at the bottom bits of both operands and
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 0d5cc1c00db1..7a804a1a2297 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -218,10 +218,10 @@ bool Regex::isLiteralERE(StringRef Str) {
std::string Regex::escape(StringRef String) {
std::string RegexStr;
- for (unsigned i = 0, e = String.size(); i != e; ++i) {
- if (strchr(RegexMetachars, String[i]))
+ for (char C : String) {
+ if (strchr(RegexMetachars, C))
RegexStr += '\\';
- RegexStr += String[i];
+ RegexStr += C;
}
return RegexStr;
diff --git a/llvm/lib/Support/StringExtras.cpp b/llvm/lib/Support/StringExtras.cpp
index 8abf9f7ce0f1..5683d7005584 100644
--- a/llvm/lib/Support/StringExtras.cpp
+++ b/llvm/lib/Support/StringExtras.cpp
@@ -60,8 +60,7 @@ void llvm::SplitString(StringRef Source,
}
void llvm::printEscapedString(StringRef Name, raw_ostream &Out) {
- for (unsigned i = 0, e = Name.size(); i != e; ++i) {
- unsigned char C = Name[i];
+ for (unsigned char C : Name) {
if (C == '\\')
Out << '\\' << C;
else if (isPrint(C) && C != '"')
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index c532a1abe906..652303fdb6a0 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -227,8 +227,8 @@ size_t StringRef::rfind_insensitive(StringRef Str) const {
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (CharBits.test((unsigned char)Data[i]))
@@ -252,8 +252,8 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (!CharBits.test((unsigned char)Data[i]))
@@ -268,8 +268,8 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
StringRef::size_type StringRef::find_last_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0; i != Chars.size(); ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (CharBits.test((unsigned char)Data[i]))
@@ -293,8 +293,8 @@ StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
size_t From) const {
std::bitset<1 << CHAR_BIT> CharBits;
- for (size_type i = 0, e = Chars.size(); i != e; ++i)
- CharBits.set((unsigned char)Chars[i]);
+ for (char C : Chars)
+ CharBits.set((unsigned char)C);
for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
if (!CharBits.test((unsigned char)Data[i]))
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index 1dadce4b9040..4acc23dd455b 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -333,3 +333,51 @@ bool getCPUFeaturesExceptStdExt(CPUKind Kind,
} // namespace RISCV
} // namespace llvm
+
+// Parse a branch protection specification, which has the form
+// standard | none | [bti,pac-ret[+b-key,+leaf]*]
+// Returns true on success, with individual elements of the specification
+// returned in `PBP`. Returns false in error, with `Err` containing
+// an erroneous part of the spec.
+bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
+ StringRef &Err) {
+ PBP = {"none", "a_key", false};
+ if (Spec == "none")
+ return true; // defaults are ok
+
+ if (Spec == "standard") {
+ PBP.Scope = "non-leaf";
+ PBP.BranchTargetEnforcement = true;
+ return true;
+ }
+
+ SmallVector<StringRef, 4> Opts;
+ Spec.split(Opts, "+");
+ for (int I = 0, E = Opts.size(); I != E; ++I) {
+ StringRef Opt = Opts[I].trim();
+ if (Opt == "bti") {
+ PBP.BranchTargetEnforcement = true;
+ continue;
+ }
+ if (Opt == "pac-ret") {
+ PBP.Scope = "non-leaf";
+ for (; I + 1 != E; ++I) {
+ StringRef PACOpt = Opts[I + 1].trim();
+ if (PACOpt == "leaf")
+ PBP.Scope = "all";
+ else if (PACOpt == "b-key")
+ PBP.Key = "b_key";
+ else
+ break;
+ }
+ continue;
+ }
+ if (Opt == "")
+ Err = "<empty>";
+ else
+ Err = Opt;
+ return false;
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 81926d8071b2..c11e16d3cf98 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -29,7 +29,7 @@ ThreadPool::ThreadPool(ThreadPoolStrategy S)
Threads.emplace_back([S, ThreadID, this] {
S.apply_thread_strategy(ThreadID);
while (true) {
- PackagedTaskTy Task;
+ std::function<void()> Task;
{
std::unique_lock<std::mutex> LockGuard(QueueLock);
// Wait for tasks to be pushed in the queue
@@ -80,23 +80,6 @@ bool ThreadPool::isWorkerThread() const {
return false;
}
-std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
- /// Wrap the Task in a packaged_task to return a future object.
- PackagedTaskTy PackagedTask(std::move(Task));
- auto Future = PackagedTask.get_future();
- {
- // Lock the queue and push the new task
- std::unique_lock<std::mutex> LockGuard(QueueLock);
-
- // Don't allow enqueueing after disabling the pool
- assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
-
- Tasks.push(std::move(PackagedTask));
- }
- QueueCondition.notify_one();
- return Future.share();
-}
-
// The destructor joins all threads, waiting for completion.
ThreadPool::~ThreadPool() {
{
@@ -128,16 +111,6 @@ void ThreadPool::wait() {
}
}
-std::shared_future<void> ThreadPool::asyncImpl(TaskTy Task) {
- // Get a Future with launch::deferred execution using std::async
- auto Future = std::async(std::launch::deferred, std::move(Task)).share();
- // Wrap the future so that both ThreadPool::wait() can operate and the
- // returned future can be sync'ed on.
- PackagedTaskTy PackagedTask([Future]() { Future.get(); });
- Tasks.push(std::move(PackagedTask));
- return Future;
-}
-
ThreadPool::~ThreadPool() { wait(); }
#endif
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 2acac63ce843..25079fe33edb 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -1017,12 +1017,10 @@ void TGLexer::prepSkipToLineEnd() {
}
bool TGLexer::prepIsProcessingEnabled() {
- for (auto I = PrepIncludeStack.back()->rbegin(),
- E = PrepIncludeStack.back()->rend();
- I != E; ++I) {
- if (!I->IsDefined)
+ for (const PreprocessorControlDesc &I :
+ llvm::reverse(*PrepIncludeStack.back()))
+ if (!I.IsDefined)
return false;
- }
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 9f527a17d390..aeebb49675b2 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -818,18 +818,9 @@ void AArch64AsmPrinter::emitJumpTableInfo() {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
- const Function &F = MF->getFunction();
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
- bool JTInDiffSection =
- !STI->isTargetCOFF() ||
- !TLOF.shouldPutJumpTableInFunctionSection(
- MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32,
- F);
- if (JTInDiffSection) {
- // Drop it in the readonly section.
- MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(F, TM);
- OutStreamer->SwitchSection(ReadOnlySec);
- }
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
auto AFI = MF->getInfo<AArch64FunctionInfo>();
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index 533ab3b05de9..ff4a4dfc1b95 100644
--- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -88,12 +88,9 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
// If this is already the flag setting version of the instruction (e.g., SUBS)
// just make sure the implicit-def of NZCV isn't marked dead.
if (IsFlagSetting) {
- for (unsigned I = MI.getNumExplicitOperands(), E = MI.getNumOperands();
- I != E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
+ for (MachineOperand &MO : MI.implicit_operands())
if (MO.isReg() && MO.isDead() && MO.getReg() == AArch64::NZCV)
MO.setIsDead(false);
- }
return &MI;
}
bool Is64Bit;
@@ -104,8 +101,8 @@ MachineInstr *AArch64CondBrTuning::convertToFlagSetting(MachineInstr &MI,
MachineInstrBuilder MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
TII->get(NewOpc), NewDestReg);
- for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
return MIB;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 4c04e04a7d3c..ee6e670fe3cd 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -102,9 +102,8 @@ INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
- for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
- ++i) {
- const MachineOperand &MO = OldMI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.add(MO);
@@ -733,8 +732,9 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER(
MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
RegMaskStartIdx++;
}
- for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
- OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), RegMaskStartIdx))
+ OriginalCall->addOperand(MO);
auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
.addReg(AArch64::FP, RegState::Define)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6e9e61c8e7ac..72461aa1f772 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -890,7 +890,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::ABS);
setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
@@ -930,6 +929,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::VECREDUCE_ADD);
setTargetDAGCombine(ISD::STEP_VECTOR);
+ setTargetDAGCombine(ISD::FP_EXTEND);
+
setTargetDAGCombine(ISD::GlobalAddress);
// In case of strict alignment, avoid an excessive number of byte wide stores.
@@ -1323,6 +1324,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
}
setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
@@ -1504,6 +1506,24 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
}
}
+bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
+ EVT OpVT) const {
+ // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
+ if (!Subtarget->hasSVE())
+ return true;
+
+ // We can only support legal predicate result types.
+ if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
+ ResVT != MVT::nxv16i1)
+ return true;
+
+ // The whilelo instruction only works with i32 or i64 scalar inputs.
+ if (OpVT != MVT::i32 && OpVT != MVT::i64)
+ return true;
+
+ return false;
+}
+
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
@@ -1528,7 +1548,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setCondCodeAction(ISD::SETUNE, VT, Expand);
}
- // Mark integer truncating stores as having custom lowering
+ // Mark integer truncating stores/extending loads as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
@@ -1540,6 +1560,18 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
}
}
+ // Mark floating-point truncating stores/extending loads as having custom
+ // lowering
+ if (VT.isFloatingPoint()) {
+ MVT InnerVT = VT.changeVectorElementType(MVT::f16);
+ while (InnerVT != VT) {
+ setTruncStoreAction(VT, InnerVT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
+ InnerVT = InnerVT.changeVectorElementType(
+ MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
+ }
+ }
+
// Lower fixed length vector operations to scalable equivalents.
setOperationAction(ISD::ABS, VT, Custom);
setOperationAction(ISD::ADD, VT, Custom);
@@ -1950,6 +1982,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UDIV_PRED)
MAKE_CASE(AArch64ISD::UMAX_PRED)
MAKE_CASE(AArch64ISD::UMIN_PRED)
+ MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
@@ -2316,6 +2349,8 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
static SDValue convertFixedMaskToScalableVector(SDValue Mask,
SelectionDAG &DAG);
+static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
+ EVT VT);
/// isZerosVector - Check whether SDNode N is a zero-filled vector.
static bool isZerosVector(const SDNode *N) {
@@ -4288,6 +4323,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
+ case Intrinsic::get_active_lane_mask: {
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
+ Op.getOperand(1), Op.getOperand(2));
+ }
}
}
@@ -4506,7 +4547,7 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
}
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
Mask = DAG.getNode(
- ISD::ZERO_EXTEND, DL,
+ ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
@@ -4618,7 +4659,7 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
Mask = DAG.getNode(
- ISD::ZERO_EXTEND, DL,
+ ISD::SIGN_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
@@ -10963,8 +11004,40 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
return SDValue();
}
+static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
+ if (Op.getOpcode() != AArch64ISD::DUP &&
+ Op.getOpcode() != ISD::SPLAT_VECTOR &&
+ Op.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+
+ if (Op.getOpcode() == ISD::BUILD_VECTOR &&
+ !isAllConstantBuildVector(Op, SplatVal))
+ return false;
+
+ if (Op.getOpcode() != ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(Op->getOperand(0)))
+ return false;
+
+ SplatVal = Op->getConstantOperandVal(0);
+ if (Op.getValueType().getVectorElementType() != MVT::i64)
+ SplatVal = (int32_t)SplatVal;
+
+ Negated = false;
+ if (isPowerOf2_64(SplatVal))
+ return true;
+
+ Negated = true;
+ if (isPowerOf2_64(-SplatVal)) {
+ SplatVal = -SplatVal;
+ return true;
+ }
+
+ return false;
+}
+
SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
@@ -10974,6 +11047,19 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+ bool Negated;
+ uint64_t SplatVal;
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
+ SDValue Res =
+ DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
+ DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
+ if (Negated)
+ Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
+
+ return Res;
+ }
+
if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode);
@@ -10987,7 +11073,6 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
else
llvm_unreachable("Unexpected Custom DIV operation");
- SDLoc dl(Op);
unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
@@ -11924,6 +12009,12 @@ static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
}
+static bool isSplatShuffle(Value *V) {
+ if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
+ return is_splat(Shuf->getShuffleMask());
+ return false;
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
@@ -11934,12 +12025,24 @@ bool AArch64TargetLowering::shouldSinkOperands(
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
+ case Intrinsic::aarch64_neon_smull:
case Intrinsic::aarch64_neon_umull:
- if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
- return false;
- Ops.push_back(&II->getOperandUse(0));
- Ops.push_back(&II->getOperandUse(1));
- return true;
+ if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) {
+ Ops.push_back(&II->getOperandUse(0));
+ Ops.push_back(&II->getOperandUse(1));
+ return true;
+ }
+ LLVM_FALLTHROUGH;
+
+ case Intrinsic::aarch64_neon_sqdmull:
+ case Intrinsic::aarch64_neon_sqdmulh:
+ case Intrinsic::aarch64_neon_sqrdmulh:
+ // Sink splats for index lane variants
+ if (isSplatShuffle(II->getOperand(0)))
+ Ops.push_back(&II->getOperandUse(0));
+ if (isSplatShuffle(II->getOperand(1)))
+ Ops.push_back(&II->getOperandUse(1));
+ return !Ops.empty();
case Intrinsic::aarch64_neon_pmull64:
if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
@@ -12961,8 +13064,14 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
- // fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
+
+ // For scalable and fixed types, mark them as cheap so we can handle it much
+ // later. This allows us to handle larger than legal types.
+ if (VT.isScalableVector() || Subtarget->useSVEForFixedLengthVectors())
+ return SDValue(N, 0);
+
+ // fold (sdiv X, pow2)
if ((VT != MVT::i32 && VT != MVT::i64) ||
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
@@ -13858,34 +13967,6 @@ static SDValue performANDCombine(SDNode *N,
return SDValue();
}
-static SDValue performSRLCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
- // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
- // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() == ISD::BSWAP) {
- SDLoc DL(N);
- SDValue N1 = N->getOperand(1);
- SDValue N00 = N0.getOperand(0);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- uint64_t ShiftAmt = C->getZExtValue();
- if (VT == MVT::i32 && ShiftAmt == 16 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- if (VT == MVT::i64 && ShiftAmt == 32 &&
- DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
- return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
- }
- }
- return SDValue();
-}
-
// Attempt to form urhadd(OpA, OpB) from
// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
@@ -14031,6 +14112,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
+ if (VT.isScalableVector())
+ return SDValue();
+
// Optimize concat_vectors of truncated vectors, where the intermediate
// type is illegal, to avoid said illegality, e.g.,
// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
@@ -15089,6 +15173,9 @@ static SDValue performIntrinsicCombine(SDNode *N,
case Intrinsic::aarch64_sve_uqsub_x:
return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_sve_asrd:
+ return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_cmphs:
if (!N->getOperand(2).getValueType().isFloatingPoint())
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
@@ -15883,6 +15970,22 @@ static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
const AArch64Subtarget *Subtarget) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+
+ // If this is an FP_ROUND followed by a store, fold this into a truncating
+ // store. We can do this even if this is already a truncstore.
+ // We purposefully don't care about legality of the nodes here as we know
+ // they can be split down into something legal.
+ if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
+ Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+ Subtarget->useSVEForFixedLengthVectors() &&
+ Value.getValueType().isFixedLengthVector())
+ return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getMemoryVT(), ST->getMemOperand());
+
if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
return Split;
@@ -17225,6 +17328,37 @@ SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(Ty, Trunc);
}
+SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
+ return SDValue();
+
+ // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+ // We purposefully don't care about legality of the nodes here as we know
+ // they can be split down into something legal.
+ if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
+ N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
+ VT.isFixedLengthVector()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
+ LN0->getChain(), LN0->getBasePtr(),
+ N0.getValueType(), LN0->getMemOperand());
+ DCI.CombineTo(N, ExtLoad);
+ DCI.CombineTo(N0.getNode(),
+ DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(),
+ ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -17253,8 +17387,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget);
case ISD::AND:
return performANDCombine(N, DCI);
- case ISD::SRL:
- return performSRLCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
@@ -17283,6 +17415,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performSTORECombine(N, DCI, DAG, Subtarget);
case ISD::VECTOR_SPLICE:
return performSVESpliceCombine(N, DAG);
+ case ISD::FP_EXTEND:
+ return performFPExtendCombine(N, DAG, DCI, Subtarget);
case AArch64ISD::BRCOND:
return performBRCONDCombine(N, DCI, DAG);
case AArch64ISD::TBNZ:
@@ -18414,6 +18548,15 @@ bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ // v8f16 without fp16 need to be extended to v8f32, which is more difficult to
+ // legalize.
+ if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
+ return false;
+ return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
+}
+
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
@@ -18591,12 +18734,29 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT LoadVT = ContainerVT;
+ EVT MemVT = Load->getMemoryVT();
+
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
+
+ if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
+ LoadVT = ContainerVT.changeTypeToInteger();
+ MemVT = MemVT.changeTypeToInteger();
+ }
auto NewLoad = DAG.getMaskedLoad(
- ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
- getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
- Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
- Load->getExtensionType());
+ LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
+ DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
+ Load->getAddressingMode(), Load->getExtensionType());
+
+ if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
+ EVT ExtendVT = ContainerVT.changeVectorElementType(
+ Load->getMemoryVT().getVectorElementType());
+
+ NewLoad = getSVESafeBitCast(ExtendVT, NewLoad, DAG);
+ NewLoad = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
+ Pg, NewLoad, DAG.getUNDEF(ContainerVT));
+ }
auto Result = convertFromScalableVector(DAG, VT, NewLoad);
SDValue MergedValues[2] = {Result, Load->getChain()};
@@ -18609,12 +18769,15 @@ static SDValue convertFixedMaskToScalableVector(SDValue Mask,
EVT InVT = Mask.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
+
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return Pg;
+
auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
auto Op2 = DAG.getConstant(0, DL, ContainerVT);
- auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
- EVT CmpVT = Pg.getValueType();
- return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
+ return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(),
{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
}
@@ -18668,13 +18831,26 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
SDLoc DL(Op);
EVT VT = Store->getValue().getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ EVT MemVT = Store->getMemoryVT();
+ auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
- return DAG.getMaskedStore(
- Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
- getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
- Store->getMemOperand(), Store->getAddressingMode(),
- Store->isTruncatingStore());
+
+ if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
+ EVT TruncVT = ContainerVT.changeVectorElementType(
+ Store->getMemoryVT().getVectorElementType());
+ MemVT = MemVT.changeTypeToInteger();
+ NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
+ NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
+ DAG.getUNDEF(TruncVT));
+ NewValue =
+ getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
+ }
+
+ return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
+ Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
+ Store->getMemOperand(), Store->getAddressingMode(),
+ Store->isTruncatingStore());
}
SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
@@ -18706,6 +18882,21 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
bool Signed = Op.getOpcode() == ISD::SDIV;
unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
+ bool Negated;
+ uint64_t SplatVal;
+ if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
+ SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+ SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
+
+ SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
+ SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
+ if (Negated)
+ Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
+
+ return convertFromScalableVector(DAG, VT, Res);
+ }
+
// Scalable vector i32/i64 DIV is supported.
if (EltVT == MVT::i32 || EltVT == MVT::i64)
return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 392e22b68366..ea884cdccd28 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -104,6 +104,8 @@ enum NodeType : unsigned {
// Unpredicated vector instructions
BIC,
+ SRAD_MERGE_OP1,
+
// Predicated instructions with the result of inactive lanes provided by the
// last operand.
FABS_MERGE_PASSTHRU,
@@ -774,6 +776,8 @@ public:
bool preferIncOfAddToSubOfNot(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
bool hasBitPreservingFPLogic(EVT VT) const override {
// FIXME: Is this always true? It should be true for vectors at least.
return VT == MVT::f32 || VT == MVT::f64;
@@ -842,6 +846,8 @@ public:
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const override;
+ bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index db8e0c5dac4a..decee117d2d5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -437,6 +437,18 @@ def non_temporal_store :
cast<MaskedStoreSDNode>(N)->isNonTemporal();
}]>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR32:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i32 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
+// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
+def top32Zero: PatLeaf<(i64 GPR64:$src), [{
+ return SDValue(N,0)->getValueType(0) == MVT::i64 &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
+ }]>;
+
// Node definitions.
def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
def AArch64adr : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
@@ -2046,6 +2058,10 @@ def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
+// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
+def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
+def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
+
//===----------------------------------------------------------------------===//
// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 67d8fbb45cf5..25d53f4ab065 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -199,6 +199,13 @@ def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
def AArch64umulh_p : SDNode<"AArch64ISD::MULHU_PRED", SDT_AArch64Arith>;
+def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
+ SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
+]>;
+
+def AArch64asrd_m1 : SDNode<"AArch64ISD::SRAD_MERGE_OP1", SDT_AArch64Arith_Imm>;
+
def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3, OtherVT>, SDTCisVec<4>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisVTSmallerThanOp<3, 2>, SDTCisSameAs<0,4>
@@ -1575,7 +1582,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", AArch64asrd_m1>;
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
@@ -1586,7 +1593,7 @@ let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
- defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
let Predicates = [HasSVEorStreamingSVE] in {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 63d6fa5bbb26..34015d2dbd49 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -833,17 +833,12 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
};
- // The OpMultiplier variable should always point to the dup (if any), so
- // swap if necessary.
- if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand))
- std::swap(OpMultiplier, OpMultiplicand);
-
if (IsUnitSplat(OpMultiplier)) {
- // [f]mul pg (dupx 1) %n => %n
+ // [f]mul pg %n, (dupx 1) => %n
OpMultiplicand->takeName(&II);
return IC.replaceInstUsesWith(II, OpMultiplicand);
} else if (IsUnitDup(OpMultiplier)) {
- // [f]mul pg (dup pg 1) %n => %n
+ // [f]mul pg %n, (dup pg 1) => %n
auto *DupInst = cast<IntrinsicInst>(OpMultiplier);
auto *DupPg = DupInst->getOperand(1);
// TODO: this is naive. The optimization is still valid if DupPg
@@ -2142,6 +2137,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::FMax:
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
+ case RecurKind::FMulAdd:
return true;
default:
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e090d87d59a2..3d9a626d3ac3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1920,35 +1920,6 @@ bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
MachineRegisterInfo &MRI = MF.getRegInfo();
switch (I.getOpcode()) {
- case TargetOpcode::G_SHL:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR: {
- // These shifts are legalized to have 64 bit shift amounts because we want
- // to take advantage of the existing imported selection patterns that assume
- // the immediates are s64s. However, if the shifted type is 32 bits and for
- // some reason we receive input GMIR that has an s64 shift amount that's not
- // a G_CONSTANT, insert a truncate so that we can still select the s32
- // register-register variant.
- Register SrcReg = I.getOperand(1).getReg();
- Register ShiftReg = I.getOperand(2).getReg();
- const LLT ShiftTy = MRI.getType(ShiftReg);
- const LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy.isVector())
- return false;
- assert(!ShiftTy.isVector() && "unexpected vector shift ty");
- if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
- return false;
- auto *AmtMI = MRI.getVRegDef(ShiftReg);
- assert(AmtMI && "could not find a vreg definition for shift amount");
- if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
- // Insert a subregister copy to implement a 64->32 trunc
- auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
- .addReg(ShiftReg, 0, AArch64::sub_32);
- MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
- I.getOperand(2).setReg(Trunc.getReg(0));
- }
- return true;
- }
case TargetOpcode::G_STORE: {
bool Changed = contractCrossBankCopyIntoStore(I, MRI);
MachineOperand &SrcOp = I.getOperand(0);
@@ -2950,6 +2921,28 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (Opcode == TargetOpcode::G_SHL &&
MRI.getType(I.getOperand(0).getReg()).isVector())
return selectVectorSHL(I, MRI);
+
+ // These shifts were legalized to have 64 bit shift amounts because we
+ // want to take advantage of the selection patterns that assume the
+ // immediates are s64s, however, selectBinaryOp will assume both operands
+ // will have the same bit size.
+ {
+ Register SrcReg = I.getOperand(1).getReg();
+ Register ShiftReg = I.getOperand(2).getReg();
+ const LLT ShiftTy = MRI.getType(ShiftReg);
+ const LLT SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
+ ShiftTy.getSizeInBits() == 64) {
+ assert(!ShiftTy.isVector() && "unexpected vector shift ty");
+ assert(MRI.getVRegDef(ShiftReg) &&
+ "could not find a vreg definition for shift amount");
+ // Insert a subregister copy to implement a 64->32 trunc
+ auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
+ .addReg(ShiftReg, 0, AArch64::sub_32);
+ MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
+ I.getOperand(2).setReg(Trunc.getReg(0));
+ }
+ }
LLVM_FALLTHROUGH;
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
@@ -6452,8 +6445,7 @@ static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder MIB(MI);
// Go through each operand and ensure it has the same regbank.
- for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
- MachineOperand &MO = MI.getOperand(OpIdx);
+ for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
if (!MO.isReg())
continue;
Register OpReg = MO.getReg();
@@ -6511,8 +6503,7 @@ void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
// %endbb:
// %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
bool HasGPROp = false, HasFPROp = false;
- for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
- const auto &MO = MI->getOperand(OpIdx);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
if (!MO.isReg())
continue;
const LLT &Ty = MRI.getType(MO.getReg());
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index f2a470857d21..78c0e90b1384 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -177,8 +177,8 @@ public:
// We can't just use EmitIntValue here, as that will emit a data mapping
// symbol, and swap the endianness on big-endian systems (instructions are
// always little-endian).
- for (unsigned I = 0; I < 4; ++I) {
- Buffer[I] = uint8_t(Inst);
+ for (char &C : Buffer) {
+ C = uint8_t(Inst);
Inst >>= 8;
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index cf1a60643efd..92552c3d41d5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -101,8 +101,8 @@ void AArch64TargetStreamer::emitInst(uint32_t Inst) {
// We can't just use EmitIntValue here, as that will swap the
// endianness on big-endian systems (instructions are always
// little-endian).
- for (unsigned I = 0; I < 4; ++I) {
- Buffer[I] = uint8_t(Inst);
+ for (char &C : Buffer) {
+ C = uint8_t(Inst);
Inst >>= 8;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index df2f9a0fa3a9..c7c5ff7bcbe7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -26,6 +26,14 @@ def uchar_to_float : GICombineRule<
[{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]),
(apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>;
+
+def rcp_sqrt_to_rsq : GICombineRule<
+ (defs root:$rcp, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_INTRINSIC, G_FSQRT):$rcp,
+ [{ return PostLegalizerHelper.matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
+
+
def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">;
def cvt_f32_ubyteN : GICombineRule<
@@ -86,7 +94,8 @@ def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
[all_combines, gfx6gfx7_combines,
- uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
+ uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
+ rcp_sqrt_to_rsq]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index cee56ee97294..8236e6672247 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -654,6 +654,9 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectMAD_64_32(N);
return;
}
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return SelectMUL_LOHI(N);
case ISD::CopyToReg: {
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
@@ -719,6 +722,18 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
Term->getMetadata("structurizecfg.uniform");
}
+bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
+ unsigned ShAmtBits) const {
+ assert(N->getOpcode() == ISD::AND);
+
+ const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+ if (RHS.countTrailingOnes() >= ShAmtBits)
+ return true;
+
+ const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
+ return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
+}
+
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
SDValue &N0, SDValue &N1) {
if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
@@ -1001,6 +1016,32 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
+// We need to handle this here because tablegen doesn't support matching
+// instructions with multiple outputs.
+void AMDGPUDAGToDAGISel::SelectMUL_LOHI(SDNode *N) {
+ SDLoc SL(N);
+ bool Signed = N->getOpcode() == ISD::SMUL_LOHI;
+ unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
+
+ SDValue Zero = CurDAG->getTargetConstant(0, SL, MVT::i64);
+ SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Zero, Clamp};
+ SDNode *Mad = CurDAG->getMachineNode(Opc, SL, N->getVTList(), Ops);
+ if (!SDValue(N, 0).use_empty()) {
+ SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
+ SDNode *Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
+ MVT::i32, SDValue(Mad, 0), Sub0);
+ ReplaceUses(SDValue(N, 0), SDValue(Lo, 0));
+ }
+ if (!SDValue(N, 1).use_empty()) {
+ SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
+ SDNode *Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
+ MVT::i32, SDValue(Mad, 0), Sub1);
+ ReplaceUses(SDValue(N, 1), SDValue(Hi, 0));
+ }
+ CurDAG->RemoveDeadNode(N);
+}
+
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const {
if (!isUInt<16>(Offset))
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index c1d9673f067e..d638d9877a9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -136,6 +136,10 @@ private:
bool isUniformLoad(const SDNode *N) const;
bool isUniformBr(const SDNode *N) const;
+ // Returns true if ISD::AND SDNode `N`'s masking of the shift amount operand's
+ // `ShAmtBits` bits is unneeded.
+ bool isUnneededShiftMask(const SDNode *N, unsigned ShAmtBits) const;
+
bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
SDValue &RHS) const;
@@ -231,6 +235,7 @@ private:
void SelectUADDO_USUBO(SDNode *N);
void SelectDIV_SCALE(SDNode *N);
void SelectMAD_64_32(SDNode *N);
+ void SelectMUL_LOHI(SDNode *N);
void SelectFMA_W_CHAIN(SDNode *N);
void SelectFMUL_W_CHAIN(SDNode *N);
SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 523fa2d3724b..54177564afbc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -594,6 +594,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SMUL_LOHI);
+ setTargetDAGCombine(ISD::UMUL_LOHI);
setTargetDAGCombine(ISD::MULHU);
setTargetDAGCombine(ISD::MULHS);
setTargetDAGCombine(ISD::SELECT);
@@ -3462,6 +3464,50 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
return DAG.getSExtOrTrunc(Mul, DL, VT);
}
+SDValue
+AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
+ // in the source into any_extends if the result of the mul is truncated. Since
+ // we can assume the high bits are whatever we want, use the underlying value
+ // to avoid the unknown high bits from interfering.
+ if (N0.getOpcode() == ISD::ANY_EXTEND)
+ N0 = N0.getOperand(0);
+ if (N1.getOpcode() == ISD::ANY_EXTEND)
+ N1 = N1.getOperand(0);
+
+ // Try to use two fast 24-bit multiplies (one for each half of the result)
+ // instead of one slow extending multiply.
+ unsigned LoOpcode, HiOpcode;
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_U24;
+ HiOpcode = AMDGPUISD::MULHI_U24;
+ } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ LoOpcode = AMDGPUISD::MUL_I24;
+ HiOpcode = AMDGPUISD::MULHI_I24;
+ } else {
+ return SDValue();
+ }
+
+ SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
+ SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
+ DCI.CombineTo(N, Lo, Hi);
+ return SDValue(N, 0);
+}
+
SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
@@ -4103,6 +4149,9 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performTruncateCombine(N, DCI);
case ISD::MUL:
return performMulCombine(N, DCI);
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return performMulLoHiCombine(N, DCI);
case ISD::MULHS:
return performMulhsCombine(N, DCI);
case ISD::MULHU:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 03632ac18598..daaca8737c5d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -91,6 +91,7 @@ protected:
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 28cb2fc57ac7..e16bead81b65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3880,6 +3880,22 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
return KnownBits->signBitIsZero(Base);
}
+bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
+ unsigned ShAmtBits) const {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ Optional<APInt> RHS = getIConstantVRegVal(MI.getOperand(2).getReg(), *MRI);
+ if (!RHS)
+ return false;
+
+ if (RHS->countTrailingOnes() >= ShAmtBits)
+ return true;
+
+ const APInt &LHSKnownZeros =
+ KnownBits->getKnownZeroes(MI.getOperand(1).getReg());
+ return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits;
+}
+
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index b70e6883bae2..26996e42af53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -300,6 +300,10 @@ private:
bool isInlineImmediate64(int64_t Imm) const;
bool isInlineImmediate(const APFloat &Imm) const;
+ // Returns true if TargetOpcode::G_AND MachineInstr `MI`'s masking of the
+ // shift amount operand's `ShAmtBits` bits is unneeded.
+ bool isUnneededShiftMask(const MachineInstr &MI, unsigned ShAmtBits) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index bad9f6265b36..0528b552f475 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -242,25 +242,41 @@ def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
//===----------------------------------------------------------------------===//
// Constrained shift PatFrags.
+
+def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 4); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }];
+ }
+
+def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 5); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }];
+ }
+
+def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm),
+ [{ return isUnneededShiftMask(N, 6); }]> {
+ let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }];
+ }
+
foreach width = [16, 32, 64] in {
-defvar mask = !sub(width, 1);
+defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width);
def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
- [(shl node:$src0, node:$src1), (shl node:$src0, (and node:$src1, mask))]>;
+ [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>;
defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
(cshl $src1, $src0)>;
def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
- [(srl node:$src0, node:$src1), (srl node:$src0, (and node:$src1, mask))]>;
+ [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>;
defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
(csrl $src1, $src0)>;
def csra_#width : PatFrags<(ops node:$src0, node:$src1),
- [(sra node:$src0, node:$src1), (sra node:$src0, (and node:$src1, mask))]>;
+ [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>;
defvar csra = !cast<SDPatternOperator>("csra_"#width);
def csra_#width#_oneuse : HasOneUseBinOp<csra>;
def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
@@ -696,11 +712,6 @@ class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat <
(RcpInst $src)
>;
-class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat <
- (AMDGPUrcp (fsqrt vt:$src)),
- (RsqInst $src)
->;
-
// Instructions which select to the same v_min_f*
def fminnum_like : PatFrags<(ops node:$src0, node:$src1),
[(fminnum_ieee node:$src0, node:$src1),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index fc984d2dda64..1479933a2850 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
@@ -58,6 +59,9 @@ public:
bool matchUCharToFloat(MachineInstr &MI);
void applyUCharToFloat(MachineInstr &MI);
+ bool matchRcpSqrtToRsq(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
struct CvtF32UByteMatchInfo {
@@ -203,6 +207,48 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
MI.eraseFromParent();
}
+bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+
+ auto getRcpSrc = [=](const MachineInstr &MI) {
+ MachineInstr *ResMI = nullptr;
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC &&
+ MI.getIntrinsicID() == Intrinsic::amdgcn_rcp)
+ ResMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+
+ return ResMI;
+ };
+
+ auto getSqrtSrc = [=](const MachineInstr &MI) {
+ MachineInstr *SqrtSrcMI = nullptr;
+ mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
+ return SqrtSrcMI;
+ };
+
+ MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
+ // rcp(sqrt(x))
+ if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
+ MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(SqrtSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ // sqrt(rcp(x))
+ if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
+ MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
+ B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)}, false)
+ .addUse(RcpSrcMI->getOperand(0).getReg())
+ .setMIFlags(MI.getFlags());
+ };
+ return true;
+ }
+
+ return false;
+}
+
bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
Register SrcReg = MI.getOperand(1).getReg();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index d560d2043f42..7c4eb71882c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -280,10 +280,10 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
}
LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
<< '\n');
- for (size_t I = 0; I < Str.size(); ++I) {
+ for (char C : Str) {
// Rest of the C escape sequences (e.g. \') are handled correctly
// by the MDParser
- switch (Str[I]) {
+ switch (C) {
case '\a':
Sizes << "\\a";
break;
@@ -308,7 +308,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Sizes << "\\72";
break;
default:
- Sizes << Str[I];
+ Sizes << C;
break;
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index d7dc9ee4117b..12b5830ef930 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -45,6 +45,7 @@ public:
TRI(*MF.getSubtarget().getRegisterInfo()), Helper(Helper){};
bool isVgprRegBank(Register Reg);
+ Register getAsVgpr(Register Reg);
struct MinMaxMedOpc {
unsigned Min, Max, Med;
@@ -69,6 +70,23 @@ bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
+Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
+ if (isVgprRegBank(Reg))
+ return Reg;
+
+ // Search for existing copy of Reg to vgpr.
+ for (MachineInstr &Use : MRI.use_instructions(Reg)) {
+ Register Def = Use.getOperand(0).getReg();
+ if (Use.getOpcode() == AMDGPU::COPY && isVgprRegBank(Def))
+ return Def;
+ }
+
+ // Copy Reg to vgpr.
+ Register VgprReg = B.buildCopy(MRI.getType(Reg), Reg).getReg(0);
+ MRI.setRegBank(VgprReg, RBI.getRegBank(AMDGPU::VGPRRegBankID));
+ return VgprReg;
+}
+
AMDGPURegBankCombinerHelper::MinMaxMedOpc
AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
switch (Opc) {
@@ -134,7 +152,9 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
Med3MatchInfo &MatchInfo) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
- {MatchInfo.Val0, MatchInfo.Val1, MatchInfo.Val2}, MI.getFlags());
+ {getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
+ getAsVgpr(MatchInfo.Val2)},
+ MI.getFlags());
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index ab3ce980c3f6..5988403c0a29 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3189,10 +3189,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI,
const MachineInstr &MI) const {
unsigned RegBank = AMDGPU::InvalidRegBankID;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
RegBank = regBankUnion(RegBank, Bank->getID());
if (RegBank == AMDGPU::VGPRRegBankID)
@@ -3206,10 +3206,10 @@ unsigned AMDGPURegisterBankInfo::getMappingType(const MachineRegisterInfo &MRI,
bool AMDGPURegisterBankInfo::isSALUMapping(const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
- for (unsigned i = 0, e = MI.getNumOperands();i != e; ++i) {
- if (!MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (const RegisterBank *Bank = getRegBank(Reg, MRI, *TRI)) {
if (Bank->getID() != AMDGPU::SGPRRegBankID)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 1a9255f3240f..712f6dece911 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -706,9 +706,7 @@ bool AMDGPUCFGStructurizer::prepare() {
// Remove unconditional branch instr.
// Add dummy exit block iff there are multiple returns.
- for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
- It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
+ for (MachineBasicBlock *MBB : OrderedBlks) {
removeUnconditionalBranch(MBB);
removeRedundantConditionalBranch(MBB);
if (isReturnBlock(MBB)) {
@@ -851,14 +849,10 @@ bool AMDGPUCFGStructurizer::run() {
void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
int SccNum = 0;
- MachineBasicBlock *MBB;
for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
++It, ++SccNum) {
const std::vector<MachineBasicBlock *> &SccNext = *It;
- for (std::vector<MachineBasicBlock *>::const_iterator
- blockIter = SccNext.begin(), blockEnd = SccNext.end();
- blockIter != blockEnd; ++blockIter) {
- MBB = *blockIter;
+ for (MachineBasicBlock *MBB : SccNext) {
OrderedBlks.push_back(MBB);
recordSccnum(MBB, SccNum);
}
@@ -1601,11 +1595,8 @@ void AMDGPUCFGStructurizer::addDummyExitBlock(
FuncRep->push_back(DummyExitBlk); //insert to function
insertInstrEnd(DummyExitBlk, R600::RETURN);
- for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
- E = RetMBB.end(); It != E; ++It) {
- MachineBasicBlock *MBB = *It;
- MachineInstr *MI = getReturnInstr(MBB);
- if (MI)
+ for (MachineBasicBlock *MBB : RetMBB) {
+ if (MachineInstr *MI = getReturnInstr(MBB))
MI->eraseFromParent();
MBB->addSuccessor(DummyExitBlk);
LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
diff --git a/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index f4ddbf1131c3..d18dab0554bd 100644
--- a/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -48,8 +48,6 @@ def SIN_cm : SIN_Common<0x8D>;
def COS_cm : COS_Common<0x8E>;
} // End isVector = 1
-def : RsqPat<RECIPSQRT_IEEE_cm, f32>;
-
def : SqrtPat<RECIPSQRT_IEEE_cm, RECIP_IEEE_cm>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 12224cb3f797..a9a3421e8192 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -126,7 +126,6 @@ def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
def LOG_IEEE_eg : LOG_IEEE_Common<0x83>;
def RECIP_CLAMPED_eg : RECIP_CLAMPED_Common<0x84>;
def RECIPSQRT_IEEE_eg : RECIPSQRT_IEEE_Common<0x89>;
-def : RsqPat<RECIPSQRT_IEEE_eg, f32>;
def : SqrtPat<RECIPSQRT_IEEE_eg, RECIP_IEEE_eg>;
def SIN_eg : SIN_Common<0x8D>;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index ff5d0b0af6a4..0f8dd0b3bf58 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1442,12 +1442,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
bool FullReg;
const MachineInstr *MI1;
- auto IsOverlappedDGEMMorXDLFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
- this](const MachineInstr &MI) {
+ auto IsOverlappedMFMAFn = [Reg, &IsMFMAFn, &FullReg, &MI1,
+ this](const MachineInstr &MI) {
if (!IsMFMAFn(MI))
return false;
- if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
- return false;
Register DstReg = MI.getOperand(0).getReg();
FullReg = (DstReg == Reg);
MI1 = &MI;
@@ -1458,8 +1456,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
getWaitStatesSinceDef(Reg, IsLegacyVALUNotDotFn, MaxWaitStates);
WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
- int NumWaitStates = getWaitStatesSinceDef(Reg, IsOverlappedDGEMMorXDLFn,
- MaxWaitStates);
+ int NumWaitStates =
+ getWaitStatesSinceDef(Reg, IsOverlappedMFMAFn, MaxWaitStates);
if (NumWaitStates == std::numeric_limits<int>::max())
continue;
@@ -1619,12 +1617,9 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const MachineInstr *MFMA = nullptr;
unsigned Reg;
- auto IsDGEMMorXDLWriteFn = [&Reg, &IsMFMAFn, &MFMA,
- this](const MachineInstr &MI) {
+ auto IsMFMAWriteFn = [&Reg, &IsMFMAFn, &MFMA, this](const MachineInstr &MI) {
if (!IsMFMAFn(MI) || !TRI.regsOverlap(MI.getOperand(0).getReg(), Reg))
return false;
- if (!isDGEMM(MI.getOpcode()) && !isXDL(ST, MI))
- return false;
MFMA = &MI;
return true;
};
@@ -1675,8 +1670,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
}
MFMA = nullptr;
- WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
- MaxWaitStates);
+ WaitStatesSinceDef =
+ getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
if (!MFMA)
continue;
@@ -1750,8 +1745,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
WaitStatesSinceDef);
MFMA = nullptr;
- WaitStatesSinceDef = getWaitStatesSinceDef(Reg, IsDGEMMorXDLWriteFn,
- MaxWaitStates);
+ WaitStatesSinceDef =
+ getWaitStatesSinceDef(Reg, IsMFMAWriteFn, MaxWaitStates);
if (MFMA) {
int NeedWaitStates = MaxWaitStates;
switch (TSchedModel.computeInstrLatency(MFMA)) {
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 3456f9a6156c..82c09378acac 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -74,11 +74,11 @@ unsigned GCNRegPressure::getRegKind(Register Reg,
assert(Reg.isVirtual());
const auto RC = MRI.getRegClass(Reg);
auto STI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- return STI->isSGPRClass(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE) :
- STI->hasAGPRs(RC) ?
- (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE) :
- (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
+ return STI->isSGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? SGPR32 : SGPR_TUPLE)
+ : STI->isAGPRClass(RC)
+ ? (STI->getRegSizeInBits(*RC) == 32 ? AGPR32 : AGPR_TUPLE)
+ : (STI->getRegSizeInBits(*RC) == 32 ? VGPR32 : VGPR_TUPLE);
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 1d93165f9eec..715fd69fc7ae 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -177,9 +177,7 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
TII = ST.getInstrInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator LatestCFAlu = E;
while (I != E) {
diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index d5eaa33ef964..b9ca7f928d56 100644
--- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -301,9 +301,7 @@ public:
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
TII = ST.getInstrInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin();
if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
continue; // BB was already parsed
diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 838a497b4df1..194879fef53c 100644
--- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -73,9 +73,7 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
const R600RegisterInfo &TRI = TII->getRegisterInfo();
- for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
- BB != BB_E; ++BB) {
- MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::iterator I = MBB.begin();
while (I != MBB.end()) {
MachineInstr &MI = *I;
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 0215eb9f9fea..bd757e9e3d70 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -285,9 +285,8 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
- for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
- NewMI.add(MI.getOperand(i));
- }
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ NewMI.add(MO);
} else {
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index 4487864888b6..b3da2fdefacc 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1265,7 +1265,6 @@ let Predicates = [isR600] in {
defm DIV_r600 : DIV_Common<RECIP_IEEE_r600>;
def : POW_Common <LOG_IEEE_r600, EXP_IEEE_r600, MUL>;
- def : RsqPat<RECIPSQRT_IEEE_r600, f32>;
def : SqrtPat<RECIPSQRT_IEEE_r600, RECIP_IEEE_r600>;
def R600_ExportSwz : ExportSwzInst {
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index 36acfafa72aa..6aee2f591b56 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -124,11 +124,9 @@ SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
DAG->dumpNode(*SU);
} else {
dbgs() << "NO NODE \n";
- for (unsigned i = 0; i < DAG->SUnits.size(); i++) {
- const SUnit &S = DAG->SUnits[i];
+ for (const SUnit &S : DAG->SUnits)
if (!S.isScheduled)
DAG->dumpNode(S);
- }
});
return SU;
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 1a723279dc9f..72cf48c04e7f 100644
--- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -323,14 +323,12 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
TII = ST.getInstrInfo();
MRI = &Fn.getRegInfo();
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock *MB = &*MBB;
+ for (MachineBasicBlock &MB : Fn) {
PreviousRegSeq.clear();
PreviousRegSeqByReg.clear();
PreviousRegSeqByUndefCount.clear();
- for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
+ for (MachineBasicBlock::iterator MII = MB.begin(), MIIE = MB.end();
MII != MIIE; ++MII) {
MachineInstr &MI = *MII;
if (MI.getOpcode() != R600::REG_SEQUENCE) {
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index e858bba2983c..beb0aad86e89 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -343,20 +343,11 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
//
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB) {
- MachineBasicBlock::iterator End = MBB->end();
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != End) {
- if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
- (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
- MachineBasicBlock::iterator DeleteMI = MI;
- ++MI;
- MBB->erase(DeleteMI);
- End = MBB->end();
- continue;
- }
- ++MI;
+ for (MachineBasicBlock &MBB : Fn) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isKill() || MI.getOpcode() == R600::IMPLICIT_DEF ||
+ (MI.getOpcode() == R600::CF_ALU && !MI.getOperand(8).getImm()))
+ MBB.erase(MI);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 777744f08cde..580e4bc417a4 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -18,7 +18,8 @@ namespace llvm {
enum SIRCFlags : uint8_t {
// For vector registers.
HasVGPR = 1 << 0,
- HasAGPR = 1 << 1
+ HasAGPR = 1 << 1,
+ HasSGPR = 1 << 2
}; // enum SIRCFlags
namespace SIInstrFlags {
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index cf93a63f26a0..f54778535b7c 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -127,11 +127,11 @@ FunctionPass *llvm::createSIFixSGPRCopiesPass() {
static bool hasVectorOperands(const MachineInstr &MI,
const SIRegisterInfo *TRI) {
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- if (!MI.getOperand(i).isReg() || !MI.getOperand(i).getReg().isVirtual())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- if (TRI->hasVectorRegisters(MRI.getRegClass(MI.getOperand(i).getReg())))
+ if (TRI->hasVectorRegisters(MRI.getRegClass(MO.getReg())))
return true;
}
return false;
@@ -259,7 +259,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
// VGPRz = REG_SEQUENCE VGPRx, sub0
MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
- bool IsAGPR = TRI->hasAGPRs(DstRC);
+ bool IsAGPR = TRI->isAGPRClass(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
Register SrcReg = MI.getOperand(I).getReg();
@@ -853,7 +853,7 @@ MachineBasicBlock *SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
Register PHIRes = MI.getOperand(0).getReg();
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
- if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
+ if (AllAGPRUses && numVGPRUses && !TRI->isAGPRClass(RC0)) {
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a3a0e9c9b9ac..200e00ee5521 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1586,17 +1586,9 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
unsigned OpIdx = Op - &UseMI->getOperand(0);
const MCInstrDesc &InstDesc = UseMI->getDesc();
- const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
- switch (OpInfo.RegClass) {
- case AMDGPU::AV_32RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_64RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_96RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_128RegClassID: LLVM_FALLTHROUGH;
- case AMDGPU::AV_160RegClassID:
- break;
- default:
+ if (!TRI->isVectorSuperClass(
+ TRI->getRegClass(InstDesc.OpInfo[OpIdx].RegClass)))
return false;
- }
const auto *NewDstRC = TRI->getEquivalentAGPRClass(MRI->getRegClass(Reg));
auto Dst = MRI->createVirtualRegister(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 882b9a203755..4706c74be721 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1364,6 +1364,34 @@ bool SIFrameLowering::assignCalleeSavedSpillSlots(
return false;
}
+bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint64_t EstStackSize = MFI.estimateStackSize(MF);
+ uint64_t MaxOffset = EstStackSize - 1;
+
+ // We need the emergency stack slots to be allocated in range of the
+ // MUBUF/flat scratch immediate offset from the base register, so assign these
+ // first at the incoming SP position.
+ //
+ // TODO: We could try sorting the objects to find a hole in the first bytes
+ // rather than allocating as close to possible. This could save a lot of space
+ // on frames with alignment requirements.
+ if (ST.enableFlatScratch()) {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch))
+ return false;
+ } else {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
+ return false;
+ }
+
+ return true;
+}
+
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 951ea79b2809..56fbb875ffd9 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -43,6 +43,9 @@ public:
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const override;
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override;
+
bool isSupportedStackID(TargetStackID::Value ID) const override;
void processFunctionBeforeFrameFinalized(
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 519c5b936536..35b72f5d201b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -809,6 +809,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMULO, MVT::i64, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
+ if (Subtarget->hasMad64_32()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+ }
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
@@ -919,6 +924,16 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
!hasFP32Denormals(DAG.getMachineFunction());
}
+bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
+ LLT DestTy, LLT SrcTy) const {
+ return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) ||
+ (Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) &&
+ DestTy.getScalarSizeInBits() == 32 &&
+ SrcTy.getScalarSizeInBits() == 16 &&
+ // TODO: This probably only requires no input flushing?
+ !hasFP32Denormals(*MI.getMF());
+}
+
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
// SI has some legal vector types, but no legal vector operations. Say no
// shuffles are legal in order to prefer scalarizing some vector operations.
@@ -4290,8 +4305,8 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineInstrBuilder MIB;
MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : MI.operands())
+ MIB.add(MO);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -4457,6 +4472,8 @@ bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return true;
}
+bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }
+
EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
EVT VT) const {
if (!VT.isVector()) {
@@ -4522,6 +4539,34 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ LLT Ty) const {
+ switch (Ty.getScalarSizeInBits()) {
+ case 16:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f16);
+ case 32:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f32);
+ case 64:
+ return isFMAFasterThanFMulAndFAdd(MF, MVT::f64);
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
+ if (!Ty.isScalar())
+ return false;
+
+ if (Ty.getScalarSizeInBits() == 16)
+ return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
+ if (Ty.getScalarSizeInBits() == 32)
+ return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
+
+ return false;
+}
+
bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
const SDNode *N) const {
// TODO: Check future ftz flag
@@ -4691,6 +4736,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO:
return lowerXMULO(Op, DAG);
+ case ISD::SMUL_LOHI:
+ case ISD::UMUL_LOHI:
+ return lowerXMUL_LOHI(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
@@ -5304,6 +5352,21 @@ SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({ Result, Overflow }, SL);
}
+SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
+ if (Op->isDivergent()) {
+ // Select to V_MAD_[IU]64_[IU]32.
+ return Op;
+ }
+ if (Subtarget->hasSMulHi()) {
+ // Expand to S_MUL_I32 + S_MUL_HI_[IU]32.
+ return SDValue();
+ }
+ // The multiply is uniform but we would have to use V_MUL_HI_[IU]32 to
+ // calculate the high part, so we might as well do the whole thing with
+ // V_MAD_[IU]64_[IU]32.
+ return Op;
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
if (!Subtarget->isTrapHandlerEnabled() ||
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
@@ -9790,10 +9853,9 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
if (Subtarget->supportsMinMaxDenormModes() ||
denormalsEnabledForType(MRI.getType(Reg), MF))
return true;
- for (unsigned I = 1, E = MI->getNumOperands(); I != E; ++I) {
- if (!isCanonicalized(MI->getOperand(I).getReg(), MF, MaxDepth - 1))
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
+ if (!isCanonicalized(MO.getReg(), MF, MaxDepth - 1))
return false;
- }
return true;
}
default:
@@ -11460,15 +11522,15 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
if (I == -1)
break;
MachineOperand &Op = MI.getOperand(I);
- if ((OpInfo[I].RegClass != llvm::AMDGPU::AV_64RegClassID &&
- OpInfo[I].RegClass != llvm::AMDGPU::AV_32RegClassID) ||
- !Op.getReg().isVirtual() || !TRI->isAGPR(MRI, Op.getReg()))
+ if (!Op.isReg() || !Op.getReg().isVirtual())
+ continue;
+ auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
+ if (!TRI->hasAGPRs(RC))
continue;
auto *Src = MRI.getUniqueVRegDef(Op.getReg());
if (!Src || !Src->isCopy() ||
!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
continue;
- auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
auto *NewRC = TRI->getEquivalentVGPRClass(RC);
// All uses of agpr64 and agpr32 can also accept vgpr except for
// v_accvgpr_read, but we do not produce agpr reads during selection,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1e48c96ad3c8..1315cc15dd02 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -135,6 +135,7 @@ private:
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue getSegmentAperture(unsigned AS, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -252,6 +253,9 @@ public:
bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, EVT DestVT,
EVT SrcVT) const override;
+ bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, LLT DestTy,
+ LLT SrcTy) const override;
+
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
@@ -377,6 +381,7 @@ public:
bool hasBitPreservingFPLogic(EVT VT) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;
+ bool enableAggressiveFMAFusion(LLT Ty) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
@@ -384,7 +389,10 @@ public:
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
+ const LLT Ty) const override;
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
+ bool isFMADLegal(const MachineInstr &MI, const LLT Ty) const override;
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index f4e5771d2a2a..c9d9dd1fb82c 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -150,6 +150,8 @@ enum VmemType {
VMEM_NOSAMPLER,
// MIMG instructions with a sampler.
VMEM_SAMPLER,
+ // BVH instructions
+ VMEM_BVH
};
VmemType getVmemType(const MachineInstr &Inst) {
@@ -157,9 +159,10 @@ VmemType getVmemType(const MachineInstr &Inst) {
if (!SIInstrInfo::isMIMG(Inst))
return VMEM_NOSAMPLER;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode());
- return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler
- ? VMEM_SAMPLER
- : VMEM_NOSAMPLER;
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+ return BaseInfo->BVH ? VMEM_BVH
+ : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER;
}
void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4a928123b68f..92f5322b8ad2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -898,10 +898,10 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
- if (RI.hasAGPRs(RC)) {
+ if (RI.isAGPRClass(RC)) {
Opcode = (RI.hasVGPRs(SrcRC)) ?
AMDGPU::V_ACCVGPR_WRITE_B32_e64 : AMDGPU::INSTRUCTION_LIST_END;
- } else if (RI.hasVGPRs(RC) && RI.hasAGPRs(SrcRC)) {
+ } else if (RI.hasVGPRs(RC) && RI.isAGPRClass(SrcRC)) {
Opcode = AMDGPU::V_ACCVGPR_READ_B32_e64;
} else if ((Size % 64 == 0) && RI.hasVGPRs(RC) &&
(RI.isProperlyAlignedRC(*RC) &&
@@ -1205,7 +1205,7 @@ Register SIInstrInfo::insertNE(MachineBasicBlock *MBB,
unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
- if (RI.hasAGPRs(DstRC))
+ if (RI.isAGPRClass(DstRC))
return AMDGPU::COPY;
if (RI.getRegSizeInBits(*DstRC) == 32) {
return RI.isSGPRClass(DstRC) ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
@@ -1435,6 +1435,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
FrameInfo.getObjectAlign(FrameIndex));
unsigned SpillSize = TRI->getSpillSize(*RC);
+ MachineRegisterInfo &MRI = MF->getRegInfo();
if (RI.isSGPRClass(RC)) {
MFI->setHasSpilledSGPRs();
assert(SrcReg != AMDGPU::M0 && "m0 should not be spilled");
@@ -1448,7 +1449,6 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
// The SGPR spill/restore instructions only work on number sgprs, so we need
// to make sure we are using the correct register class.
if (SrcReg.isVirtual() && SpillSize == 4) {
- MachineRegisterInfo &MRI = MF->getRegInfo();
MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0_XEXECRegClass);
}
@@ -1463,10 +1463,21 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
- : getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(SpillSize)
+ : getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
+ if (RI.isVectorSuperClass(RC)) {
+ // Convert an AV spill into a VGPR spill. Introduce a copy from AV to an
+ // equivalent VGPR register beforehand. Regalloc might want to introduce
+ // AV spills only to be relevant until rewriter at which they become
+ // either spills of VGPRs or AGPRs.
+ Register TmpVReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
+ BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpVReg)
+ .addReg(SrcReg, RegState::Kill);
+ SrcReg = TmpVReg;
+ }
+
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
@@ -1598,13 +1609,26 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
- : getVGPRSpillRestoreOpcode(SpillSize);
+ unsigned Opcode = RI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
+ : getVGPRSpillRestoreOpcode(SpillSize);
+
+ bool IsVectorSuperClass = RI.isVectorSuperClass(RC);
+ Register TmpReg = DestReg;
+ if (IsVectorSuperClass) {
+ // For AV classes, insert the spill restore to a VGPR followed by a copy
+ // into an equivalent AV register.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(RC));
+ }
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
+
+ if (IsVectorSuperClass)
+ BuildMI(MBB, MI, DL, get(TargetOpcode::COPY), TmpReg)
+ .addReg(DestReg, RegState::Kill);
}
void SIInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -2802,12 +2826,11 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
if (Is16Bit) {
- if (isVGPRCopy)
- return false; // Do not clobber vgpr_hi16
+ if (isVGPRCopy)
+ return false; // Do not clobber vgpr_hi16
- if (DstReg.isVirtual() &&
- UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
- return false;
+ if (DstReg.isVirtual() && UseMI.getOperand(0).getSubReg() != AMDGPU::lo16)
+ return false;
UseMI.getOperand(0).setSubReg(0);
if (DstReg.isPhysical()) {
@@ -3896,9 +3919,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// verification is broken anyway
if (ST.needsAlignedVGPRs()) {
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
- const bool IsVGPR = RI.hasVGPRs(RC);
- const bool IsAGPR = !IsVGPR && RI.hasAGPRs(RC);
- if ((IsVGPR || IsAGPR) && MO.getSubReg()) {
+ if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
const TargetRegisterClass *SubRC =
RI.getSubRegClass(RC, MO.getSubReg());
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
@@ -5522,13 +5543,13 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
if (getOpRegClass(MI, 0) == &AMDGPU::VReg_1RegClass) {
VRC = &AMDGPU::VReg_1RegClass;
} else
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
? RI.getEquivalentAGPRClass(SRC)
: RI.getEquivalentVGPRClass(SRC);
} else {
- VRC = RI.hasAGPRs(getOpRegClass(MI, 0))
- ? RI.getEquivalentAGPRClass(VRC)
- : RI.getEquivalentVGPRClass(VRC);
+ VRC = RI.isAGPRClass(getOpRegClass(MI, 0))
+ ? RI.getEquivalentAGPRClass(VRC)
+ : RI.getEquivalentVGPRClass(VRC);
}
RC = VRC;
} else {
@@ -7065,8 +7086,8 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
case AMDGPU::STRICT_WWM:
case AMDGPU::STRICT_WQM: {
const TargetRegisterClass *SrcRC = getOpRegClass(Inst, 1);
- if (RI.hasAGPRs(SrcRC)) {
- if (RI.hasAGPRs(NewDstRC))
+ if (RI.isAGPRClass(SrcRC)) {
+ if (RI.isAGPRClass(NewDstRC))
return nullptr;
switch (Inst.getOpcode()) {
@@ -7082,7 +7103,7 @@ const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
if (!NewDstRC)
return nullptr;
} else {
- if (RI.hasVGPRs(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
+ if (RI.isVGPRClass(NewDstRC) || NewDstRC == &AMDGPU::VReg_1RegClass)
return nullptr;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 8c24268e379e..47ee83eb9351 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2246,7 +2246,7 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
let HasExtSDWA9 = 0;
}
-class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.Pattern> : VOPProfile <p.ArgVT> {
+class VOP_PAT_GEN <VOPProfile p, int mode=PatGenMode.NoPattern> : VOPProfile <p.ArgVT> {
let NeedPatGen = mode;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index d5f9cb8ba493..d55d8da8699a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -827,10 +827,6 @@ def : Pat <
let OtherPredicates = [UnsafeFPMath] in {
-//defm : RsqPat<V_RSQ_F32_e32, f32>;
-
-def : RsqPat<V_RSQ_F32_e32, f32>;
-
// Convert (x - floor(x)) to fract(x)
def : GCNPat <
(f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
@@ -1372,61 +1368,48 @@ def : GCNPat <
>;
}
+
/********** ================================ **********/
/********** Floating point absolute/negative **********/
/********** ================================ **********/
-// Prevent expanding both fneg and fabs.
-// TODO: Add IgnoredBySelectionDAG bit?
-let AddedComplexity = 1 in { // Prefer SALU to VALU patterns for DAG
-
def : GCNPat <
- (fneg (fabs (f32 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f32 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000))) // Set sign bit
>;
def : GCNPat <
- (fabs (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f32 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fffffff)))
>;
def : GCNPat <
- (fneg (f32 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f32 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80000000)))
>;
def : GCNPat <
- (fneg (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000)))
>;
def : GCNPat <
- (fneg (f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
->;
-
-def : GCNPat <
- (fabs (f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00007fff)))
>;
def : GCNPat <
- (fneg (fabs (f16 SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (fabs (f16 SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (fabs (f16 VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
->;
-
-def : GCNPat <
- (fneg (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fneg> (v2f16 SReg_32:$src)),
(S_XOR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000)))
>;
def : GCNPat <
- (fabs (v2f16 SReg_32:$src)),
+ (UniformUnaryFrag<fabs> (v2f16 SReg_32:$src)),
(S_AND_B32 SReg_32:$src, (S_MOV_B32 (i32 0x7fff7fff)))
>;
@@ -1435,51 +1418,20 @@ def : GCNPat <
// fabs is not reported as free because there is modifier for it in
// VOP3P instructions, so it is turned into the bit op.
def : GCNPat <
- (fneg (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
+ (UniformUnaryFrag<fneg> (v2f16 (bitconvert (and_oneuse (i32 SReg_32:$src), 0x7fff7fff)))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
def : GCNPat <
- (fneg (v2f16 (fabs SReg_32:$src))),
+ (UniformUnaryFrag<fneg> (v2f16 (fabs SReg_32:$src))),
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
>;
-// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled
- // def : GCNPat <
-// (fneg (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x80000000)))),
-// sub1)
-// >;
-
-// def : GCNPat <
-// (fneg (fabs (f64 SReg_64:$src))),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (S_MOV_B32 (i32 0x80000000))), // Set sign bit.
-// sub1)
-// >;
-
-// FIXME: Use S_BITSET0_B32/B64?
-// def : GCNPat <
-// (fabs (f64 SReg_64:$src)),
-// (REG_SEQUENCE SReg_64,
-// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
-// sub0,
-// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
-// (i32 (S_MOV_B32 (i32 0x7fffffff)))),
-// sub1)
-// >;
// COPY_TO_REGCLASS is needed to avoid using SCC from S_XOR_B32 instead
// of the real value.
def : GCNPat <
- (fneg (v2f32 SReg_64:$src)),
+ (UniformUnaryFrag<fneg> (v2f32 SReg_64:$src)),
(v2f32 (REG_SEQUENCE SReg_64,
(f32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
(i32 (S_MOV_B32 (i32 0x80000000)))),
@@ -1489,36 +1441,103 @@ def : GCNPat <
SReg_32)), sub1))
>;
-} // End let AddedComplexity = 1
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (v2f32 SReg_64:$src)),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x7fffffff)))),
+ SReg_32)), sub1))
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (v2f32 SReg_64:$src))),
+ (v2f32 (REG_SEQUENCE SReg_64,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub0)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub0,
+ (f32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG $src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))),
+ SReg_32)), sub1))
+>;
+
+// FIXME: Use S_BITSET0_B32/B64?
+def : GCNPat <
+ (UniformUnaryFrag<fabs> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x7fffffff))), SReg_32)), // Set sign bit.
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (f64 SReg_64:$src)),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_XOR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (i32 (S_MOV_B32 (i32 0x80000000)))), SReg_32)),
+ sub1)
+>;
+
+def : GCNPat <
+ (UniformUnaryFrag<fneg> (fabs (f64 SReg_64:$src))),
+ (REG_SEQUENCE SReg_64,
+ (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
+ sub0,
+ (i32 (COPY_TO_REGCLASS (S_OR_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
+ (S_MOV_B32 (i32 0x80000000))), SReg_32)),// Set sign bit.
+ sub1)
+>;
+
+
+def : GCNPat <
+ (fneg (fabs (f32 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src) // Set sign bit
+>;
def : GCNPat <
(fabs (f32 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fffffff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (f32 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80000000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x00007fff)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (f16 VGPR_32:$src)),
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src)
+>;
+
+def : GCNPat <
+ (fneg (fabs (f16 VGPR_32:$src))),
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x00008000)), VGPR_32:$src) // Set sign bit
>;
def : GCNPat <
(fneg (v2f16 VGPR_32:$src)),
- (V_XOR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
+ (V_XOR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
(fabs (v2f16 VGPR_32:$src)),
- (V_AND_B32_e32 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
+ (V_AND_B32_e64 (S_MOV_B32 (i32 0x7fff7fff)), VGPR_32:$src)
>;
def : GCNPat <
(fneg (v2f16 (fabs VGPR_32:$src))),
- (V_OR_B32_e32 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src) // Set sign bit
+ (V_OR_B32_e64 (S_MOV_B32 (i32 0x80008000)), VGPR_32:$src)
>;
def : GCNPat <
@@ -1526,30 +1545,28 @@ def : GCNPat <
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_AND_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
+ (V_AND_B32_e64 (i32 (S_MOV_B32 (i32 0x7fffffff))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (f64 VReg_64:$src)),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
+ (V_XOR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
-// TODO: Use SGPR for constant
def : GCNPat <
(fneg (fabs (f64 VReg_64:$src))),
(REG_SEQUENCE VReg_64,
(i32 (EXTRACT_SUBREG VReg_64:$src, sub0)),
sub0,
- (V_OR_B32_e32 (i32 (EXTRACT_SUBREG VReg_64:$src, sub1)),
- (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
+ (V_OR_B32_e64 (i32 (S_MOV_B32 (i32 0x80000000))),
+ (i32 (EXTRACT_SUBREG VReg_64:$src, sub1))),
sub1)
>;
@@ -1681,14 +1698,9 @@ def : GCNPat <
/********** Intrinsic Patterns **********/
/********** ================== **********/
-let OtherPredicates = [isNotGFX90APlus] in
-// FIXME: Should use _e64 and select source modifiers.
-def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
-
-let OtherPredicates = [isGFX90APlus] in
def : GCNPat <
- (fpow f32:$src0, f32:$src1),
- (V_EXP_F32_e32 (V_MUL_LEGACY_F32_e64 0, f32:$src1, SRCMODS.NONE, (V_LOG_F32_e32 f32:$src0), 0, 0))
+ (f32 (fpow (VOP3Mods f32:$src0, i32:$src0_mods), (VOP3Mods f32:$src1, i32:$src1_mods))),
+ (V_EXP_F32_e64 SRCMODS.NONE, (V_MUL_LEGACY_F32_e64 $src1_mods, $src1, SRCMODS.NONE, (V_LOG_F32_e64 $src0_mods, $src0), 0, 0))
>;
def : GCNPat <
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 34cbb49dcd16..f4d9002e930e 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1609,7 +1609,7 @@ SILoadStoreOptimizer::getTargetRegisterClass(const CombineInfo &CI,
}
unsigned BitWidth = 32 * (CI.Width + Paired.Width);
- return TRI->hasAGPRs(getDataRegClass(*CI.I))
+ return TRI->isAGPRClass(getDataRegClass(*CI.I))
? TRI->getAGPRClassForBitWidth(BitWidth)
: TRI->getVGPRClassForBitWidth(BitWidth);
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 29f072ca1e6c..fff4f6729c99 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -795,6 +795,8 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set L1 cache policy to MISS_EVICT.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
@@ -837,8 +839,10 @@ bool SIGfx6CacheControl::enableRMWCacheBypass(
assert(MI->mayLoad() && MI->mayStore());
bool Changed = false;
- /// The L1 cache is write through so does not need to be bypassed. There is no
- /// bypass control for the L2 cache at the isa level.
+ /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
+ /// bypassed, and the GLC bit is instead used to indicate if they are
+ /// return or no-return.
+ /// Note: there is no L2 cache coherent bypass control at the ISA level.
return Changed;
}
@@ -860,6 +864,9 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
@@ -875,7 +882,8 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
@@ -1097,6 +1105,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L1 cache policy to MISS_LRU.
+ // Note: there is no L2 cache bypass policy at the ISA level.
Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WORKGROUP:
@@ -1206,6 +1216,9 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache bypass policy at the ISA level.
if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
@@ -1221,7 +1234,8 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
+ // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
+ // for both loads and stores, and the L2 cache policy to STREAM.
Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
return Changed;
@@ -1380,12 +1394,11 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
bool Changed = false;
if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
- /// TODO Do not set glc for rmw atomic operations as they
- /// implicitly bypass the L0/L1 caches.
-
switch (Scope) {
case SIAtomicScope::SYSTEM:
case SIAtomicScope::AGENT:
+ // Set the L0 and L1 cache policies to MISS_EVICT.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
break;
@@ -1434,6 +1447,9 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
bool Changed = false;
if (IsVolatile) {
+ // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
+ // and MISS_LRU for store instructions.
+ // Note: there is no L2 cache coherent bypass control at the ISA level.
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
@@ -1450,8 +1466,14 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
}
if (IsNonTemporal) {
- // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions.
+ // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
+ // and L2 cache policy to STREAM.
+ // For stores setting both GLC and SLC configures L0 and L1 cache policy
+ // to MISS_EVICT and the L2 cache policy to STREAM.
+ if (Op == SIMemOp::STORE)
+ Changed |= enableGLCBit(MI);
Changed |= enableSLCBit(MI);
+
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 6a698348d389..da41a5e2478a 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -1170,7 +1170,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
unsigned I = MI.getOperandNo(&Op);
if (Desc.OpInfo[I].RegClass == -1 ||
- !TRI->hasVGPRs(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
+ !TRI->isVSSuperClass(TRI->getRegClass(Desc.OpInfo[I].RegClass)))
continue;
if (ST.hasSDWAScalar() && ConstantBusCount == 0 && Op.isReg() &&
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index d1b8e217471e..b0e45dd3e3e3 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -291,20 +291,19 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
MBBI != End && MBBI != ToI; ++MBBI) {
const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
+ for (const MachineInstr &MI : MBB) {
// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might never be taken when EXEC = 0.
// Hence we should retain cbranch out of the loop lest it become infinite.
- if (I->isConditionalBranch())
+ if (MI.isConditionalBranch())
return true;
- if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
+ if (TII->hasUnwantedEffectsWhenEXECEmpty(MI))
return true;
// These instructions are potentially expensive even if EXEC = 0.
- if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
- TII->isDS(*I) || I->getOpcode() == AMDGPU::S_WAITCNT)
+ if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
+ TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
return true;
++NumInstr;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bfbe84f696f8..a1d9a23a5084 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -402,6 +402,62 @@ const uint32_t *SIRegisterInfo::getNoPreservedMask() const {
return CSR_AMDGPU_NoRegs_RegMask;
}
+const TargetRegisterClass *
+SIRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const {
+ // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
+ // equivalent AV class. If used one, the verifier will crash after
+ // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
+ // until Instruction selection.
+ if (MF.getSubtarget<GCNSubtarget>().hasMAIInsts() &&
+ (isVGPRClass(RC) || isAGPRClass(RC))) {
+ if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
+ return &AMDGPU::AV_32RegClass;
+ if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
+ return &AMDGPU::AV_64RegClass;
+ if (RC == &AMDGPU::VReg_64_Align2RegClass ||
+ RC == &AMDGPU::AReg_64_Align2RegClass)
+ return &AMDGPU::AV_64_Align2RegClass;
+ if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
+ return &AMDGPU::AV_96RegClass;
+ if (RC == &AMDGPU::VReg_96_Align2RegClass ||
+ RC == &AMDGPU::AReg_96_Align2RegClass)
+ return &AMDGPU::AV_96_Align2RegClass;
+ if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
+ return &AMDGPU::AV_128RegClass;
+ if (RC == &AMDGPU::VReg_128_Align2RegClass ||
+ RC == &AMDGPU::AReg_128_Align2RegClass)
+ return &AMDGPU::AV_128_Align2RegClass;
+ if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
+ return &AMDGPU::AV_160RegClass;
+ if (RC == &AMDGPU::VReg_160_Align2RegClass ||
+ RC == &AMDGPU::AReg_160_Align2RegClass)
+ return &AMDGPU::AV_160_Align2RegClass;
+ if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
+ return &AMDGPU::AV_192RegClass;
+ if (RC == &AMDGPU::VReg_192_Align2RegClass ||
+ RC == &AMDGPU::AReg_192_Align2RegClass)
+ return &AMDGPU::AV_192_Align2RegClass;
+ if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
+ return &AMDGPU::AV_256RegClass;
+ if (RC == &AMDGPU::VReg_256_Align2RegClass ||
+ RC == &AMDGPU::AReg_256_Align2RegClass)
+ return &AMDGPU::AV_256_Align2RegClass;
+ if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
+ return &AMDGPU::AV_512RegClass;
+ if (RC == &AMDGPU::VReg_512_Align2RegClass ||
+ RC == &AMDGPU::AReg_512_Align2RegClass)
+ return &AMDGPU::AV_512_Align2RegClass;
+ if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
+ return &AMDGPU::AV_1024RegClass;
+ if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
+ RC == &AMDGPU::AReg_1024_Align2RegClass)
+ return &AMDGPU::AV_1024_Align2RegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC, MF);
+}
+
Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const SIFrameLowering *TFI =
MF.getSubtarget<GCNSubtarget>().getFrameLowering();
@@ -994,10 +1050,22 @@ static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST,
unsigned Dst = IsStore ? Reg : ValueReg;
unsigned Src = IsStore ? ValueReg : Reg;
- unsigned Opc = (IsStore ^ TRI->isVGPR(MRI, Reg)) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
- : AMDGPU::V_ACCVGPR_READ_B32_e64;
+ bool IsVGPR = TRI->isVGPR(MRI, Reg);
+ DebugLoc DL = MI->getDebugLoc();
+ if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
+ // Spiller during regalloc may restore a spilled register to its superclass.
+ // It could result in AGPR spills restored to VGPRs or the other way around,
+ // making the src and dst with identical regclasses at this point. It just
+ // needs a copy in such cases.
+ auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
+ .addReg(Src, getKillRegState(IsKill));
+ CopyMIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ return CopyMIB;
+ }
+ unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
+ : AMDGPU::V_ACCVGPR_READ_B32_e64;
- auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(Opc), Dst)
+ auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
.addReg(Src, getKillRegState(IsKill));
MIB->setAsmPrinterFlag(MachineInstr::ReloadReuse);
return MIB;
@@ -1099,7 +1167,7 @@ void SIRegisterInfo::buildSpillLoadStore(
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
- const bool IsAGPR = !ST.hasGFX90AInsts() && hasAGPRs(RC);
+ const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
@@ -2163,6 +2231,65 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
: getAnyAGPRClassForBitWidth(BitWidth);
}
+static const TargetRegisterClass *
+getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024RegClass;
+
+ return nullptr;
+}
+
+static const TargetRegisterClass *
+getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
+ if (BitWidth <= 64)
+ return &AMDGPU::AV_64_Align2RegClass;
+ if (BitWidth <= 96)
+ return &AMDGPU::AV_96_Align2RegClass;
+ if (BitWidth <= 128)
+ return &AMDGPU::AV_128_Align2RegClass;
+ if (BitWidth <= 160)
+ return &AMDGPU::AV_160_Align2RegClass;
+ if (BitWidth <= 192)
+ return &AMDGPU::AV_192_Align2RegClass;
+ if (BitWidth <= 224)
+ return &AMDGPU::AV_224_Align2RegClass;
+ if (BitWidth <= 256)
+ return &AMDGPU::AV_256_Align2RegClass;
+ if (BitWidth <= 512)
+ return &AMDGPU::AV_512_Align2RegClass;
+ if (BitWidth <= 1024)
+ return &AMDGPU::AV_1024_Align2RegClass;
+
+ return nullptr;
+}
+
+const TargetRegisterClass *
+SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
+ if (BitWidth <= 16)
+ return &AMDGPU::VGPR_LO16RegClass;
+ if (BitWidth <= 32)
+ return &AMDGPU::AV_32RegClass;
+ return ST.needsAlignedVGPRs()
+ ? getAlignedVectorSuperClassForBitWidth(BitWidth)
+ : getAnyVectorSuperClassForBitWidth(BitWidth);
+}
+
const TargetRegisterClass *
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth <= 16)
@@ -2305,15 +2432,14 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
// We can assume that each lane corresponds to one 32-bit register.
unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
- if (isSGPRClass(RC)) {
- if (Size == 32)
- RC = &AMDGPU::SGPR_32RegClass;
- else
- RC = getSGPRClassForBitWidth(Size);
- } else if (hasAGPRs(RC)) {
+ if (isAGPRClass(RC)) {
RC = getAGPRClassForBitWidth(Size);
- } else {
+ } else if (isVGPRClass(RC)) {
RC = getVGPRClassForBitWidth(Size);
+ } else if (isVectorSuperClass(RC)) {
+ RC = getVectorSuperClassForBitWidth(Size);
+ } else {
+ RC = getSGPRClassForBitWidth(Size);
}
assert(RC && "Invalid sub-register class size");
return RC;
@@ -2626,10 +2752,13 @@ bool SIRegisterInfo::isProperlyAlignedRC(const TargetRegisterClass &RC) const {
if (!ST.needsAlignedVGPRs())
return true;
- if (hasVGPRs(&RC))
+ if (isVGPRClass(&RC))
return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
- if (hasAGPRs(&RC))
+ if (isAGPRClass(&RC))
return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
+ if (isVectorSuperClass(&RC))
+ return RC.hasSuperClassEq(
+ getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 8d90ddb1cf4c..f1fe0a1d9329 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -77,6 +77,10 @@ public:
return 100;
}
+ const TargetRegisterClass *
+ getLargestLegalSuperClass(const TargetRegisterClass *RC,
+ const MachineFunction &MF) const override;
+
Register getFrameRegister(const MachineFunction &MF) const override;
bool hasBasePointer(const MachineFunction &MF) const;
@@ -156,6 +160,10 @@ public:
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;
LLVM_READONLY
+ const TargetRegisterClass *
+ getVectorSuperClassForBitWidth(unsigned BitWidth) const;
+
+ LLVM_READONLY
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
/// Return the 'base' register class for this register.
@@ -164,7 +172,7 @@ public:
/// \returns true if this class contains only SGPR registers
static bool isSGPRClass(const TargetRegisterClass *RC) {
- return !hasVGPRs(RC) && !hasAGPRs(RC);
+ return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
}
/// \returns true if this class ID contains only SGPR registers
@@ -176,12 +184,22 @@ public:
/// \returns true if this class contains only VGPR registers
static bool isVGPRClass(const TargetRegisterClass *RC) {
- return hasVGPRs(RC) && !hasAGPRs(RC);
+ return hasVGPRs(RC) && !hasAGPRs(RC) && !hasSGPRs(RC);
}
/// \returns true if this class contains only AGPR registers
static bool isAGPRClass(const TargetRegisterClass *RC) {
- return hasAGPRs(RC) && !hasVGPRs(RC);
+ return hasAGPRs(RC) && !hasVGPRs(RC) && !hasSGPRs(RC);
+ }
+
+ /// \returns true only if this class contains both VGPR and AGPR registers
+ bool isVectorSuperClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && hasAGPRs(RC) && !hasSGPRs(RC);
+ }
+
+ /// \returns true only if this class contains both VGPR and SGPR registers
+ bool isVSSuperClass(const TargetRegisterClass *RC) const {
+ return hasVGPRs(RC) && hasSGPRs(RC) && !hasAGPRs(RC);
}
/// \returns true if this class contains VGPR registers.
@@ -194,6 +212,11 @@ public:
return RC->TSFlags & SIRCFlags::HasAGPR;
}
+ /// \returns true if this class contains SGPR registers.
+ static bool hasSGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasSGPR;
+ }
+
/// \returns true if this class contains any vector registers.
static bool hasVectorRegisters(const TargetRegisterClass *RC) {
return hasVGPRs(RC) || hasAGPRs(RC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index cf1d90484228..340e2b48e5cd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -133,9 +133,13 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
field bit HasVGPR = 0;
field bit HasAGPR = 0;
+ // For scalar register classes.
+ field bit HasSGPR = 0;
+
// These need to be kept in sync with the enum SIRCFlags.
let TSFlags{0} = HasVGPR;
let TSFlags{1} = HasAGPR;
+ let TSFlags{2} = HasSGPR;
}
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
@@ -307,45 +311,51 @@ foreach Index = 0...255 in {
// Groupings using register classes and tuples
//===----------------------------------------------------------------------===//
-def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
+def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let CopyCost = -1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
+def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def M0_CLASS_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
+def M0_CLASS_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16, (add M0_LO16)> {
let CopyCost = 1;
let Size = 16;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// TODO: Do we need to set DwarfRegAlias on register tuples?
-def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_LO16", 0, 105))> {
let AllocationPriority = 9;
let Size = 16;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
-def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SGPR_HI16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "SGPR%u_HI16", 0, 105))> {
let isAllocatable = 0;
let Size = 16;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
// SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "SGPR%u", 0, 105))> {
// Give all SGPR classes higher priority than VGPR classes, because
// we want to spill SGPRs to VGPRs.
let AllocationPriority = 9;
let GeneratePressureSet = 0;
+ let HasSGPR = 1;
}
// SGPR 64-bit registers
@@ -376,16 +386,18 @@ def SGPR_512Regs : SIRegisterTuples<getSubRegs<16>.ret, SGPR_32, 105, 4, 16, "s"
def SGPR_1024Regs : SIRegisterTuples<getSubRegs<32>.ret, SGPR_32, 105, 4, 32, "s">;
// Trap handler TMP 32-bit registers
-def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
+def TTMP_32 : SIRegisterClass<"AMDGPU", [i32, f32, v2i16, v2f16], 32,
(add (sequence "TTMP%u", 0, 15))> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// Trap handler TMP 16-bit registers
-def TTMP_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def TTMP_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add (sequence "TTMP%u_LO16", 0, 15))> {
let Size = 16;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
// Trap handler TMP 64-bit registers
@@ -598,16 +610,18 @@ def AGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, AGPR_32, 255, 1, 32, "a">;
// Register classes used as source and destination
//===----------------------------------------------------------------------===//
-def Pseudo_SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add FP_REG, SP_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
+ let HasSGPR = 1;
}
-def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
(add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
+ let HasSGPR = 1;
}
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
@@ -616,10 +630,10 @@ def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
let CopyCost = -1;
}
-let GeneratePressureSet = 0 in {
+let GeneratePressureSet = 0, HasSGPR = 1 in {
// Subset of SReg_32 without M0 for SMRD instructions and alike.
// See comments in SIInstructions.td for more info.
-def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI, XNACK_MASK_LO, XNACK_MASK_HI,
SGPR_NULL, TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT, SRC_POPS_EXITING_WAVE_ID,
@@ -627,7 +641,7 @@ def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f1
let AllocationPriority = 10;
}
-def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XM0_XEXEC : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, VCC_LO_LO16, VCC_HI_LO16, FLAT_SCR_LO_LO16, FLAT_SCR_HI_LO16,
XNACK_MASK_LO_LO16, XNACK_MASK_HI_LO16, SGPR_NULL_LO16, TTMP_LO16, TMA_LO_LO16,
TMA_HI_LO16, TBA_LO_LO16, TBA_HI_LO16, SRC_SHARED_BASE_LO16,
@@ -637,29 +651,29 @@ def SReg_LO16_XM0_XEXEC : RegisterClass<"AMDGPU", [i16, f16], 16,
let AllocationPriority = 10;
}
-def SReg_32_XEXEC_HI : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XEXEC_HI : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, M0_CLASS)> {
let AllocationPriority = 10;
}
-def SReg_LO16_XEXEC_HI : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XEXEC_HI : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, M0_CLASS_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32_XM0 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
let AllocationPriority = 10;
}
-def SReg_LO16_XM0 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16_XM0 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SReg_LO16_XM0_XEXEC, EXEC_LO_LO16, EXEC_HI_LO16)> {
let Size = 16;
let AllocationPriority = 10;
}
-def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
(add SGPR_LO16, SReg_LO16_XM0, M0_CLASS_LO16, EXEC_LO_LO16, EXEC_HI_LO16, SReg_LO16_XEXEC_HI)> {
let Size = 16;
let AllocationPriority = 10;
@@ -667,65 +681,75 @@ def SReg_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
} // End GeneratePressureSet = 0
// Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
+def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
(add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI, SReg_32_XEXEC_HI)> {
let AllocationPriority = 10;
+ let HasSGPR = 1;
}
let GeneratePressureSet = 0 in {
-def SRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
+def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], 32,
(add SGPR_64Regs)> {
let CopyCost = 1;
let AllocationPriority = 11;
+ let HasSGPR = 1;
}
// CCR (call clobbered registers) SGPR 64-bit registers
-def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc SGPR_64, 16))> {
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
}
// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
-def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc (shl SGPR_64, 15), 1), // s[30:31]
(trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
}
-def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
+def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64_XEXEC : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SGPR_64, VCC, FLAT_SCR, XNACK_MASK, TTMP_64, TBA, TMA)> {
let CopyCost = 1;
let AllocationPriority = 13;
+ let HasSGPR = 1;
}
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
+def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f16], 32,
(add SReg_64_XEXEC, EXEC)> {
let CopyCost = 1;
let AllocationPriority = 13;
+ let HasSGPR = 1;
}
-def SReg_1_XEXEC : RegisterClass<"AMDGPU", [i1], 32,
+def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_64_XEXEC, SReg_32_XM0_XEXEC)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
-def SReg_1 : RegisterClass<"AMDGPU", [i1], 32,
+def SReg_1 : SIRegisterClass<"AMDGPU", [i1], 32,
(add SReg_1_XEXEC, EXEC, EXEC_LO)> {
let CopyCost = 1;
let isAllocatable = 0;
+ let HasSGPR = 1;
}
multiclass SRegClass<int numRegs, int priority,
@@ -738,18 +762,18 @@ multiclass SRegClass<int numRegs, int priority,
defvar sgprName = !strconcat("SGPR_", suffix);
defvar ttmpName = !strconcat("TTMP_", suffix);
- let AllocationPriority = priority, CopyCost = copyCost in {
- def "" # sgprName : RegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
+ let AllocationPriority = priority, CopyCost = copyCost, HasSGPR = 1 in {
+ def "" # sgprName : SIRegisterClass<"AMDGPU", regTypes, 32, (add regList)> {
}
if hasTTMP then {
- def "" # ttmpName : RegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
+ def "" # ttmpName : SIRegisterClass<"AMDGPU", regTypes, 32, (add ttmpList)> {
let isAllocatable = 0;
}
}
def SReg_ # suffix :
- RegisterClass<"AMDGPU", regTypes, 32,
+ SIRegisterClass<"AMDGPU", regTypes, 32,
!con(!dag(add, [!cast<RegisterClass>(sgprName)], ["sgpr"]),
!if(hasTTMP,
!dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]),
@@ -855,44 +879,45 @@ def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
let HasVGPR = 1;
+ let HasSGPR = 1;
}
def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
let HasVGPR = 1;
+ let HasSGPR = 1;
}
-def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
- (add AGPR_32, VGPR_32)> {
- let isAllocatable = 0;
- let HasVGPR = 1;
- let HasAGPR = 1;
-}
-
-def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
- (add AReg_64, VReg_64)> {
- let isAllocatable = 0;
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32, (add VGPR_32, AGPR_32)> {
let HasVGPR = 1;
let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-let HasVGPR = 1, HasAGPR = 1 in {
-def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
- (add AReg_96, VReg_96)> {
- let isAllocatable = 0;
-}
+// Define a register tuple class, along with one requiring an even
+// aligned base register.
+multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
+ dag vregList, dag aregList> {
+ let HasVGPR = 1, HasAGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
-def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
- (add AReg_128, VReg_128)> {
- let isAllocatable = 0;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes,
+ (add (decimate vregList, 2),
+ (decimate aregList, 2))>;
+ }
}
-def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
- (add AReg_160, VReg_160)> {
- let isAllocatable = 0;
-}
-} // End HasVGPR = 1, HasAGPR = 1
+defm AV_64 : AVRegClass<2, VReg_64.RegTypes, (add VGPR_64), (add AGPR_64)>;
+defm AV_96 : AVRegClass<3, VReg_96.RegTypes, (add VGPR_96), (add AGPR_96)>;
+defm AV_128 : AVRegClass<4, VReg_128.RegTypes, (add VGPR_128), (add AGPR_128)>;
+defm AV_160 : AVRegClass<5, VReg_160.RegTypes, (add VGPR_160), (add AGPR_160)>;
+defm AV_192 : AVRegClass<6, VReg_160.RegTypes, (add VGPR_192), (add AGPR_192)>;
+defm AV_224 : AVRegClass<7, VReg_160.RegTypes, (add VGPR_224), (add AGPR_224)>;
+defm AV_256 : AVRegClass<8, VReg_160.RegTypes, (add VGPR_256), (add AGPR_256)>;
+defm AV_512 : AVRegClass<16, VReg_160.RegTypes, (add VGPR_512), (add AGPR_512)>;
+defm AV_1024 : AVRegClass<32, VReg_160.RegTypes, (add VGPR_1024), (add AGPR_1024)>;
//===----------------------------------------------------------------------===//
// Register operands
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 0792b303b830..18d424a3bc9f 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -93,16 +93,16 @@ def HWBranch : ProcResource<1> {
let BufferSize = 1;
}
def HWExport : ProcResource<1> {
- let BufferSize = 7; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWLGKM : ProcResource<1> {
- let BufferSize = 31; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWSALU : ProcResource<1> {
let BufferSize = 1;
}
def HWVMEM : ProcResource<1> {
- let BufferSize = 15; // Taken from S_WAITCNT
+ let BufferSize = 1;
}
def HWVALU : ProcResource<1> {
let BufferSize = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 6f63f686635a..46012e5d7d97 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -487,6 +487,8 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
bool WQMOutputs = MF.getFunction().hasFnAttribute("amdgpu-ps-wqm-outputs");
SmallVector<MachineInstr *, 4> SetInactiveInstrs;
SmallVector<MachineInstr *, 4> SoftWQMInstrs;
+ bool HasImplicitDerivatives =
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS;
// We need to visit the basic blocks in reverse post-order so that we visit
// defs before uses, in particular so that we don't accidentally mark an
@@ -497,8 +499,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
MachineBasicBlock &MBB = **BI;
BlockInfo &BBI = Blocks[&MBB];
- for (auto II = MBB.begin(), IE = MBB.end(); II != IE; ++II) {
- MachineInstr &MI = *II;
+ for (MachineInstr &MI : MBB) {
InstrInfo &III = Instructions[&MI];
unsigned Opcode = MI.getOpcode();
char Flags = 0;
@@ -507,6 +508,11 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
// If LOD is not supported WQM is not needed.
if (!ST->hasExtendedImageInsts())
continue;
+ // Only generate implicit WQM if implicit derivatives are required.
+ // This avoids inserting unintended WQM if a shader type without
+ // implicit derivatives uses an image sampling instruction.
+ if (!HasImplicitDerivatives)
+ continue;
// Sampling instructions don't need to produce results for all pixels
// in a quad, they just require all inputs of a quad to have been
// computed for derivatives.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 9da7b9f5145d..d20eaaaa65e8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1626,13 +1626,14 @@ unsigned getRegBitWidth(unsigned RCID) {
return 32;
case AMDGPU::SGPR_64RegClassID:
case AMDGPU::VS_64RegClassID:
- case AMDGPU::AV_64RegClassID:
case AMDGPU::SReg_64RegClassID:
case AMDGPU::VReg_64RegClassID:
case AMDGPU::AReg_64RegClassID:
case AMDGPU::SReg_64_XEXECRegClassID:
case AMDGPU::VReg_64_Align2RegClassID:
case AMDGPU::AReg_64_Align2RegClassID:
+ case AMDGPU::AV_64RegClassID:
+ case AMDGPU::AV_64_Align2RegClassID:
return 64;
case AMDGPU::SGPR_96RegClassID:
case AMDGPU::SReg_96RegClassID:
@@ -1641,6 +1642,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_96_Align2RegClassID:
case AMDGPU::AReg_96_Align2RegClassID:
case AMDGPU::AV_96RegClassID:
+ case AMDGPU::AV_96_Align2RegClassID:
return 96;
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::SReg_128RegClassID:
@@ -1649,6 +1651,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_128_Align2RegClassID:
case AMDGPU::AReg_128_Align2RegClassID:
case AMDGPU::AV_128RegClassID:
+ case AMDGPU::AV_128_Align2RegClassID:
return 128;
case AMDGPU::SGPR_160RegClassID:
case AMDGPU::SReg_160RegClassID:
@@ -1657,6 +1660,7 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::VReg_160_Align2RegClassID:
case AMDGPU::AReg_160_Align2RegClassID:
case AMDGPU::AV_160RegClassID:
+ case AMDGPU::AV_160_Align2RegClassID:
return 160;
case AMDGPU::SGPR_192RegClassID:
case AMDGPU::SReg_192RegClassID:
@@ -1664,6 +1668,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_192RegClassID:
case AMDGPU::VReg_192_Align2RegClassID:
case AMDGPU::AReg_192_Align2RegClassID:
+ case AMDGPU::AV_192RegClassID:
+ case AMDGPU::AV_192_Align2RegClassID:
return 192;
case AMDGPU::SGPR_224RegClassID:
case AMDGPU::SReg_224RegClassID:
@@ -1671,6 +1677,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_224RegClassID:
case AMDGPU::VReg_224_Align2RegClassID:
case AMDGPU::AReg_224_Align2RegClassID:
+ case AMDGPU::AV_224RegClassID:
+ case AMDGPU::AV_224_Align2RegClassID:
return 224;
case AMDGPU::SGPR_256RegClassID:
case AMDGPU::SReg_256RegClassID:
@@ -1678,6 +1686,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_256RegClassID:
case AMDGPU::VReg_256_Align2RegClassID:
case AMDGPU::AReg_256_Align2RegClassID:
+ case AMDGPU::AV_256RegClassID:
+ case AMDGPU::AV_256_Align2RegClassID:
return 256;
case AMDGPU::SGPR_512RegClassID:
case AMDGPU::SReg_512RegClassID:
@@ -1685,6 +1695,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_512RegClassID:
case AMDGPU::VReg_512_Align2RegClassID:
case AMDGPU::AReg_512_Align2RegClassID:
+ case AMDGPU::AV_512RegClassID:
+ case AMDGPU::AV_512_Align2RegClassID:
return 512;
case AMDGPU::SGPR_1024RegClassID:
case AMDGPU::SReg_1024RegClassID:
@@ -1692,6 +1704,8 @@ unsigned getRegBitWidth(unsigned RCID) {
case AMDGPU::AReg_1024RegClassID:
case AMDGPU::VReg_1024_Align2RegClassID:
case AMDGPU::AReg_1024_Align2RegClassID:
+ case AMDGPU::AV_1024RegClassID:
+ case AMDGPU::AV_1024_Align2RegClassID:
return 1024;
default:
llvm_unreachable("Unexpected register class");
diff --git a/llvm/lib/Target/ARC/ARCMCInstLower.cpp b/llvm/lib/Target/ARC/ARCMCInstLower.cpp
index 62462b77eccf..50ba9fe75232 100644
--- a/llvm/lib/Target/ARC/ARCMCInstLower.cpp
+++ b/llvm/lib/Target/ARC/ARCMCInstLower.cpp
@@ -104,8 +104,7 @@ MCOperand ARCMCInstLower::LowerOperand(const MachineOperand &MO,
void ARCMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/llvm/lib/Target/ARM/ARM.h b/llvm/lib/Target/ARM/ARM.h
index 5500783f74db..1d5e45aec06c 100644
--- a/llvm/lib/Target/ARM/ARM.h
+++ b/llvm/lib/Target/ARM/ARM.h
@@ -44,6 +44,7 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
FunctionPass *createA15SDOptimizerPass();
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMBranchTargetsPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createMLxExpansionPass();
FunctionPass *createThumb2ITBlockPass();
@@ -66,6 +67,7 @@ void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
void initializeARMParallelDSPPass(PassRegistry &);
void initializeARMLoadStoreOptPass(PassRegistry &);
void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+void initializeARMBranchTargetsPass(PassRegistry &);
void initializeARMConstantIslandsPass(PassRegistry &);
void initializeARMExpandPseudoPass(PassRegistry &);
void initializeThumb2SizeReducePass(PassRegistry &);
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 8cbd80f1bf65..e03dd597eb65 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -442,6 +442,10 @@ def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
"Mitigate against the cve-2021-35465 "
"security vulnurability">;
+def FeaturePACBTI : SubtargetFeature<"pacbti", "HasPACBTI", "true",
+ "Enable Pointer Authentication and Branch "
+ "Target Identification">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 9901b86b0e87..6a88ac485e69 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -763,6 +763,32 @@ void ARMAsmPrinter::emitAttributes() {
int EnumBuildAttr = EnumWidth == 1 ? 1 : 2;
ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr);
}
+
+ auto *PACValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("sign-return-address"));
+ if (PACValue && PACValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_PAC_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::PAC_extension,
+ ARMBuildAttrs::AllowPACInNOPSpace);
+ }
+ ATS.emitAttribute(ARMBuildAttrs::PACRET_use, ARMBuildAttrs::PACRETUsed);
+ }
+
+ auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
+ SourceModule->getModuleFlag("branch-target-enforcement"));
+ if (BTIValue && BTIValue->getZExtValue() == 1) {
+ // If "+pacbti" is used as an architecture extension,
+ // Tag_BTI_extension is emitted in
+ // ARMTargetStreamer::emitTargetAttributes().
+ if (!STI.hasPACBTI()) {
+ ATS.emitAttribute(ARMBuildAttrs::BTI_extension,
+ ARMBuildAttrs::AllowBTIInNOPSpace);
+ }
+ ATS.emitAttribute(ARMBuildAttrs::BTI_use, ARMBuildAttrs::BTIUsed);
+ }
}
}
@@ -1535,17 +1561,17 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCInst.addExpr(BranchTarget);
}
- if (Opc == ARM::t2BFic) {
- const MCExpr *ElseLabel = MCSymbolRefExpr::create(
- getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
- MI->getOperand(2).getIndex(), OutContext),
- OutContext);
- MCInst.addExpr(ElseLabel);
- MCInst.addImm(MI->getOperand(3).getImm());
- } else {
- MCInst.addImm(MI->getOperand(2).getImm())
- .addReg(MI->getOperand(3).getReg());
- }
+ if (Opc == ARM::t2BFic) {
+ const MCExpr *ElseLabel = MCSymbolRefExpr::create(
+ getBFLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+ MI->getOperand(2).getIndex(), OutContext),
+ OutContext);
+ MCInst.addExpr(ElseLabel);
+ MCInst.addImm(MI->getOperand(3).getImm());
+ } else {
+ MCInst.addImm(MI->getOperand(2).getImm())
+ .addReg(MI->getOperand(3).getReg());
+ }
EmitToStreamer(*OutStreamer, MCInst);
return;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2d981be4cfc1..2a12947d24a8 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -310,8 +310,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
// Transfer LiveVariables states, kill / dead info.
if (LV) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
Register Reg = MO.getReg();
@@ -634,8 +633,7 @@ bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,
std::vector<MachineOperand> &Pred,
bool SkipDead) const {
bool Found = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);
bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;
if (ClobbersCPSR || IsCPSR) {
@@ -732,8 +730,7 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {
namespace llvm {
template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isUndef() || MO.isUse())
continue;
if (MO.getReg() != ARM::CPSR)
@@ -1860,15 +1857,11 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
const MachineInstr &MI1,
const MachineRegisterInfo *MRI) const {
unsigned Opcode = MI0.getOpcode();
- if (Opcode == ARM::t2LDRpci ||
- Opcode == ARM::t2LDRpci_pic ||
- Opcode == ARM::tLDRpci ||
- Opcode == ARM::tLDRpci_pic ||
- Opcode == ARM::LDRLIT_ga_pcrel ||
- Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
- Opcode == ARM::tLDRLIT_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel_ldr ||
+ if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||
+ Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||
+ Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel) {
if (MI1.getOpcode() != Opcode)
return false;
@@ -1880,11 +1873,9 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,
if (MO0.getOffset() != MO1.getOffset())
return false;
- if (Opcode == ARM::LDRLIT_ga_pcrel ||
- Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
- Opcode == ARM::tLDRLIT_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel ||
- Opcode == ARM::MOV_ga_pcrel_ldr ||
+ if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||
+ Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||
+ Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||
Opcode == ARM::t2MOV_ga_pcrel)
// Ignore the PC labels.
return MO0.getGlobal() == MO1.getGlobal();
@@ -2312,8 +2303,7 @@ ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
@@ -4857,11 +4847,10 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
if (MI.getOpcode() == ARM::tPUSH ||
MI.getOpcode() == ARM::tPOP ||
MI.getOpcode() == ARM::tPOP_RET) {
- for (int i = 2, e = MI.getNumOperands(); i < e; ++i) {
- if (MI.getOperand(i).isImplicit() ||
- !MI.getOperand(i).isReg())
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {
+ if (MO.isImplicit() || !MO.isReg())
continue;
- Register Reg = MI.getOperand(i).getReg();
+ Register Reg = MO.getReg();
if (Reg < ARM::R0 || Reg > ARM::R7) {
if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&
!(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {
@@ -5748,17 +5737,17 @@ enum MachineOutlinerMBBFlags {
};
struct OutlinerCosts {
- const int CallTailCall;
- const int FrameTailCall;
- const int CallThunk;
- const int FrameThunk;
- const int CallNoLRSave;
- const int FrameNoLRSave;
- const int CallRegSave;
- const int FrameRegSave;
- const int CallDefault;
- const int FrameDefault;
- const int SaveRestoreLROnStack;
+ int CallTailCall;
+ int FrameTailCall;
+ int CallThunk;
+ int FrameThunk;
+ int CallNoLRSave;
+ int FrameNoLRSave;
+ int CallRegSave;
+ int FrameRegSave;
+ int CallDefault;
+ int FrameDefault;
+ int SaveRestoreLROnStack;
OutlinerCosts(const ARMSubtarget &target)
: CallTailCall(target.isThumb() ? 4 : 4),
@@ -5879,6 +5868,24 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
return outliner::OutlinedFunction();
}
+ // Partition the candidates in two sets: one with BTI enabled and one with BTI
+ // disabled. Remove the candidates from the smaller set. We expect the
+ // majority of the candidates to be in consensus with regard to branch target
+ // enforcement with just a few oddballs, but if they are the same number
+ // prefer the non-BTI ones for outlining, since they have less overhead.
+ auto NoBTI =
+ llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {
+ const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();
+ return AFI.branchTargetEnforcement();
+ });
+ if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >
+ std::distance(NoBTI, RepeatedSequenceLocs.end()))
+ RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());
+ else
+ RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
+ if (RepeatedSequenceLocs.size() < 2)
+ return outliner::OutlinedFunction();
+
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -5892,6 +5899,16 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
};
OutlinerCosts Costs(Subtarget);
+ const auto &SomeMFI =
+ *RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();
+ // Adjust costs to account for the BTI instructions.
+ if (SomeMFI.branchTargetEnforcement()) {
+ Costs.FrameDefault += 4;
+ Costs.FrameNoLRSave += 4;
+ Costs.FrameRegSave += 4;
+ Costs.FrameTailCall += 4;
+ Costs.FrameThunk += 4;
+ }
unsigned FrameID = MachineOutlinerDefault;
unsigned NumBytesToCreateFrame = Costs.FrameDefault;
@@ -6004,16 +6021,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
// Stack might be involved but addressing mode doesn't handle any offset.
// Rq: AddrModeT1_[1|2|4] don't operate on SP
- if (AddrMode == ARMII::AddrMode1 // Arithmetic instructions
- || AddrMode == ARMII::AddrMode4 // Load/Store Multiple
- || AddrMode == ARMII::AddrMode6 // Neon Load/Store Multiple
- || AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
- || AddrMode == ARMII::AddrModeT2_pc // PCrel access
- || AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
- || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
- || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
- || AddrMode == ARMII::AddrModeNone)
+ if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions
+ AddrMode == ARMII::AddrMode4 || // Load/Store Multiple
+ AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple
+ AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register
+ AddrMode == ARMII::AddrModeT2_pc || // PCrel access
+ AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST
+ AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE
+ AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE
+ AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR
+ AddrMode == ARMII::AddrModeNone ||
+ AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions
+ AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm
return false;
unsigned NumOps = MI->getDesc().getNumOperands();
@@ -6051,7 +6070,7 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
NumBits = 8;
Scale = 2;
break;
- case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8pos:
NumBits = 8;
break;
case ARMII::AddrModeT2_i8s4:
@@ -6089,7 +6108,18 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
}
return false;
+}
+
+void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const {
+ outliner::Candidate &C = Candidates.front();
+ // branch-target-enforcement is guaranteed to be consistent between all
+ // candidates, so we only need to look at one.
+ const Function &CFn = C.getMF()->getFunction();
+ if (CFn.hasFnAttribute("branch-target-enforcement"))
+ F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));
+ ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
}
bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index db9320962e81..5fa912ae35d7 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -349,6 +349,8 @@ public:
bool OutlineFromLinkOnceODRs) const override;
outliner::OutlinedFunction getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
+ void mergeOutliningCandidateAttributes(
+ Function &F, std::vector<outliner::Candidate> &Candidates) const override;
outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
unsigned Flags) const override;
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
@@ -877,19 +879,23 @@ inline bool isLegalAddressImm(unsigned Opcode, int Imm,
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
switch (AddrMode) {
case ARMII::AddrModeT2_i7:
- return std::abs(Imm) < (((1 << 7) * 1) - 1);
+ return std::abs(Imm) < ((1 << 7) * 1);
case ARMII::AddrModeT2_i7s2:
- return std::abs(Imm) < (((1 << 7) * 2) - 1) && Imm % 2 == 0;
+ return std::abs(Imm) < ((1 << 7) * 2) && Imm % 2 == 0;
case ARMII::AddrModeT2_i7s4:
- return std::abs(Imm) < (((1 << 7) * 4) - 1) && Imm % 4 == 0;
+ return std::abs(Imm) < ((1 << 7) * 4) && Imm % 4 == 0;
case ARMII::AddrModeT2_i8:
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
- case ARMII::AddrMode2:
- return std::abs(Imm) < (((1 << 12) * 1) - 1);
- case ARMII::AddrModeT2_i12:
- return Imm >= 0 && Imm < (((1 << 12) * 1) - 1);
+ return std::abs(Imm) < ((1 << 8) * 1);
+ case ARMII::AddrModeT2_i8pos:
+ return Imm >= 0 && Imm < ((1 << 8) * 1);
+ case ARMII::AddrModeT2_i8neg:
+ return Imm < 0 && -Imm < ((1 << 8) * 1);
case ARMII::AddrModeT2_i8s4:
- return std::abs(Imm) < (((1 << 8) * 4) - 1) && Imm % 4 == 0;
+ return std::abs(Imm) < ((1 << 8) * 4) && Imm % 4 == 0;
+ case ARMII::AddrModeT2_i12:
+ return Imm >= 0 && Imm < ((1 << 12) * 1);
+ case ARMII::AddrMode2:
+ return std::abs(Imm) < ((1 << 12) * 1);
default:
llvm_unreachable("Unhandled Addressing mode");
}
diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
new file mode 100644
index 000000000000..1091c1f970fa
--- /dev/null
+++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
@@ -0,0 +1,135 @@
+//===-- ARMBranchTargets.cpp -- Harden code using v8.1-M BTI extension -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts BTI instructions at the start of every function and basic
+// block which could be indirectly called. The hardware will (when enabled)
+// trap when an indirect branch or call instruction targets an instruction
+// which is not a valid BTI instruction. This is intended to guard against
+// control-flow hijacking attacks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-branch-targets"
+#define ARM_BRANCH_TARGETS_NAME "ARM Branch Targets"
+
+namespace {
+class ARMBranchTargets : public MachineFunctionPass {
+public:
+ static char ID;
+ ARMBranchTargets() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return ARM_BRANCH_TARGETS_NAME; }
+
+private:
+ void addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB, bool IsFirstBB);
+};
+} // end anonymous namespace
+
+char ARMBranchTargets::ID = 0;
+
+INITIALIZE_PASS(ARMBranchTargets, "arm-branch-targets", ARM_BRANCH_TARGETS_NAME,
+ false, false)
+
+void ARMBranchTargets::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+FunctionPass *llvm::createARMBranchTargetsPass() {
+ return new ARMBranchTargets();
+}
+
+bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "********** ARM Branch Targets **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ const ARMInstrInfo &TII =
+ *static_cast<const ARMInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ // LLVM does not consider basic blocks which are the targets of jump tables
+ // to be address-taken (the address can't escape anywhere else), but they are
+ // used for indirect branches, so need BTI instructions.
+ SmallPtrSet<const MachineBasicBlock *, 8> JumpTableTargets;
+ if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs)
+ JumpTableTargets.insert(MBB);
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ bool NeedBTI = false;
+ bool IsFirstBB = &MBB == &MF.front();
+
+ // Every function can potentially be called indirectly (even if it has
+ // static linkage, due to linker-generated veneers).
+ if (IsFirstBB)
+ NeedBTI = true;
+
+ // If the block itself is address-taken, or is an exception landing pad, it
+ // could be indirectly branched to.
+ if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB))
+ NeedBTI = true;
+
+ if (NeedBTI) {
+ addBTI(TII, MBB, IsFirstBB);
+ MadeChange = true;
+ }
+ }
+
+ return MadeChange;
+}
+
+/// Insert a BTI/PACBTI instruction into a given basic block \c MBB. If
+/// \c IsFirstBB is true (meaning that this is the first BB in a function) try
+/// to find a PAC instruction and replace it with PACBTI. Otherwise just insert
+/// a BTI instruction.
+/// The point of insertion is in the beginning of the BB, immediately after meta
+/// instructions (such labels in exception handling landing pads).
+void ARMBranchTargets::addBTI(const ARMInstrInfo &TII, MachineBasicBlock &MBB,
+ bool IsFirstBB) {
+ // Which instruction to insert: BTI or PACBTI
+ unsigned OpCode = ARM::t2BTI;
+
+ // Skip meta instructions, including EH labels
+ auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+
+ // If this is the first BB in a function, check if it starts with a PAC
+ // instruction and in that case remove the PAC instruction.
+ if (IsFirstBB) {
+ if (MBBI != MBB.instr_end() && MBBI->getOpcode() == ARM::t2PAC) {
+ LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName()
+ << "' to replace with PACBTI\n");
+ OpCode = ARM::t2PACBTI;
+ auto NextMBBI = std::next(MBBI);
+ MBBI->eraseFromParent();
+ MBBI = NextMBBI;
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inserting a '"
+ << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI")
+ << "' instr into BB '" << MBB.getName() << "'\n");
+ // Finally, insert a new instruction (either PAC or PACBTI)
+ BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode));
+}
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 121558276c3e..c2ca4708c208 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -184,6 +184,9 @@ namespace {
/// base address.
DenseMap<int, int> JumpTableUserIndices;
+ // Maps a MachineBasicBlock to the number of jump tables entries.
+ DenseMap<const MachineBasicBlock *, int> BlockJumpTableRefCount;
+
/// ImmBranch - One per immediate branch, keeping the machine instruction
/// pointer, conditional or unconditional, the max displacement,
/// and (if isCond is true) the corresponding unconditional branch
@@ -274,7 +277,10 @@ namespace {
unsigned &DeadSize, bool &CanDeleteLEA,
bool &BaseRegKill);
bool optimizeThumb2JumpTables();
- MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
+ void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB);
+ MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
+ MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
unsigned getUserOffset(CPUser&) const;
@@ -518,6 +524,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
CPEntries.clear();
JumpTableEntryIndices.clear();
JumpTableUserIndices.clear();
+ BlockJumpTableRefCount.clear();
ImmBranches.clear();
PushPopMIs.clear();
T2JumpTables.clear();
@@ -720,6 +727,14 @@ Align ARMConstantIslands::getCPEAlign(const MachineInstr *CPEMI) {
return MCP->getConstants()[CPI].getAlign();
}
+// Exception landing pads, blocks that has their adress taken, and function
+// entry blocks will always be (potential) indirect jump targets, regardless of
+// whether they are referenced by or not by jump tables.
+static bool isAlwaysIndirectTarget(const MachineBasicBlock &MBB) {
+ return MBB.isEHPad() || MBB.hasAddressTaken() ||
+ &MBB == &MBB.getParent()->front();
+}
+
/// scanFunctionJumpTables - Do a scan of the function, building up
/// information about the sizes of each block and the locations of all
/// the jump tables.
@@ -730,6 +745,20 @@ void ARMConstantIslands::scanFunctionJumpTables() {
(I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
T2JumpTables.push_back(&I);
}
+
+ if (!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ return;
+
+ if (const MachineJumpTableInfo *JTI = MF->getJumpTableInfo())
+ for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (isAlwaysIndirectTarget(*MBB))
+ // Set the reference count essentially to infinity, it will never
+ // reach zero and the BTI Instruction will never be removed.
+ BlockJumpTableRefCount[MBB] = std::numeric_limits<int>::max();
+ else
+ ++BlockJumpTableRefCount[MBB];
+ }
}
/// initializeFunctionInfo - Do the initial scan of the function, building up
@@ -1219,9 +1248,9 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) {
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1601,9 +1630,9 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
BBUtils->adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
- for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
- if (UserMI->getOperand(i).isCPI()) {
- UserMI->getOperand(i).setIndex(ID);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(ID);
break;
}
@@ -2211,8 +2240,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
unsigned JTOffset = BBUtils->getOffsetOf(MI) + 4;
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
BBInfoVector &BBInfo = BBUtils->getBBInfo();
- for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
- MachineBasicBlock *MBB = JTBBs[j];
+ for (MachineBasicBlock *MBB : JTBBs) {
unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
// Negative offset is not ok. FIXME: We should change BB layout to make
// sure all the branches are forward.
@@ -2405,17 +2433,16 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
// and try to adjust them such that that's true.
int JTNumber = MI->getParent()->getNumber();
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
- for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
- MachineBasicBlock *MBB = JTBBs[j];
+ for (MachineBasicBlock *MBB : JTBBs) {
int DTNumber = MBB->getNumber();
if (DTNumber < JTNumber) {
// The destination precedes the switch. Try to move the block forward
// so we have a positive offset.
MachineBasicBlock *NewBB =
- adjustJTTargetBlockForward(MBB, MI->getParent());
+ adjustJTTargetBlockForward(JTI, MBB, MI->getParent());
if (NewBB)
- MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MJTI->ReplaceMBBInJumpTable(JTI, MBB, NewBB);
MadeChange = true;
}
}
@@ -2424,8 +2451,40 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
return MadeChange;
}
-MachineBasicBlock *ARMConstantIslands::
-adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
+void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
+ MachineBasicBlock &NewBB) {
+ assert(isThumb2 && "BTI in Thumb1?");
+
+ // Insert a BTI instruction into NewBB
+ BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI));
+
+ // Update jump table reference counts.
+ const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI];
+ for (const MachineBasicBlock *MBB : JTE.MBBs) {
+ if (MBB != &OldBB)
+ continue;
+ --BlockJumpTableRefCount[MBB];
+ ++BlockJumpTableRefCount[&NewBB];
+ }
+
+ // If the old basic block reference count dropped to zero, remove
+ // the BTI instruction at its beginning.
+ if (BlockJumpTableRefCount[&OldBB] > 0)
+ return;
+
+ // Skip meta instructions
+ auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) {
+ return MI.isMetaInstruction();
+ });
+ assert(BTIPos->getOpcode() == ARM::t2BTI &&
+ "BasicBlock is mentioned in a jump table but does start with BTI");
+ if (BTIPos->getOpcode() == ARM::t2BTI)
+ BTIPos->eraseFromParent();
+}
+
+MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
+ unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
@@ -2483,6 +2542,9 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
NewBB->addSuccessor(BB);
JTBB->replaceSuccessor(BB, NewBB);
+ if (MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
+ fixupBTI(JTI, *BB, *NewBB);
+
++NumJTInserted;
return NewBB;
}
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a8f09969e948..7a35f252b22a 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -125,9 +125,8 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI,
MachineInstrBuilder &DefMI) {
const MCInstrDesc &Desc = OldMI.getDesc();
- for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
- i != e; ++i) {
- const MachineOperand &MO = OldMI.getOperand(i);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
assert(MO.isReg() && MO.getReg());
if (MO.isUse())
UseMI.add(MO);
@@ -2252,8 +2251,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
.add(predOps(ARMCC::AL))
.addReg(JumpReg, RegState::Kill);
- for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
- NewCall->addOperand(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ NewCall->addOperand(MO);
if (MI.isCandidateForCallSiteEntry())
MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
@@ -2524,17 +2523,21 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::LDRLIT_ga_pcrel:
case ARM::LDRLIT_ga_pcrel_ldr:
case ARM::tLDRLIT_ga_abs:
+ case ARM::t2LDRLIT_ga_pcrel:
case ARM::tLDRLIT_ga_pcrel: {
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
const MachineOperand &MO1 = MI.getOperand(1);
auto Flags = MO1.getTargetFlags();
const GlobalValue *GV = MO1.getGlobal();
- bool IsARM =
- Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
+ bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel &&
+ Opcode != ARM::tLDRLIT_ga_abs &&
+ Opcode != ARM::t2LDRLIT_ga_pcrel;
bool IsPIC =
Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
+ if (Opcode == ARM::t2LDRLIT_ga_pcrel)
+ LDRLITOpc = ARM::t2LDRpci;
unsigned PICAddOpc =
IsARM
? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
@@ -3065,7 +3068,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
}
MIB.cloneMemRefs(MI);
- for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
MI.eraseFromParent();
return true;
}
@@ -3080,8 +3084,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
Opcode == ARM::LOADDUAL ? RegState::Define : 0)
.addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
Opcode == ARM::LOADDUAL ? RegState::Define : 0);
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
MIB.add(predOps(ARMCC::AL));
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 025e43444f9c..b866cf952ff1 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -523,9 +523,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
// Determine spill area sizes.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -1317,11 +1317,11 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
// Mark the D-register spill slots as properly aligned. Since MFI computes
// stack slot layout backwards, this can actually mean that the d-reg stack
// slot offsets can be wrong. The offset for d8 will always be correct.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned DNum = CSI[i].getReg() - ARM::D8;
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned DNum = I.getReg() - ARM::D8;
if (DNum > NumAlignedDPRCS2Regs - 1)
continue;
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
// The even-numbered registers will be 16-byte aligned, the odd-numbered
// registers will be 8-byte aligned.
MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
@@ -1488,9 +1488,9 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
// Find the frame index assigned to d8.
int D8SpillFI = 0;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- if (CSI[i].getReg() == ARM::D8) {
- D8SpillFI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI)
+ if (I.getReg() == ARM::D8) {
+ D8SpillFI = I.getFrameIdx();
break;
}
@@ -1693,7 +1693,7 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
// Default 12 bit limit.
break;
case ARMII::AddrMode3:
- case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i8neg:
Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode5FP16:
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 2b83a292db76..bb2859c766c2 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3274,7 +3274,8 @@ bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
return false;
unsigned int ScalarBits = Type.getScalarSizeInBits();
- bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT;
+ bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT;
SDNode *Node = N->getOperand(0).getNode();
// floating-point to fixed-point with one fractional bit gets turned into an
@@ -3764,6 +3765,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
break;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT_SAT:
+ case ISD::FP_TO_SINT_SAT:
if (tryFP_TO_INT(N, dl))
return;
break;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index e7e10ce07a44..33d115945614 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1016,6 +1016,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::SELECT);
setTargetDAGCombine(ISD::SELECT_CC);
+ setTargetDAGCombine(ISD::SETCC);
+ }
+ if (Subtarget->hasMVEFloatOps()) {
+ setTargetDAGCombine(ISD::FADD);
}
if (!Subtarget->hasFP64()) {
@@ -10587,10 +10591,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
LPadList.reserve(CallSiteNumToLPad.size());
for (unsigned I = 1; I <= MaxCSNum; ++I) {
SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
- for (SmallVectorImpl<MachineBasicBlock*>::iterator
- II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
- LPadList.push_back(*II);
- InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
+ for (MachineBasicBlock *MBB : MBBList) {
+ LPadList.push_back(MBB);
+ InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
}
}
@@ -10879,9 +10882,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Add the jump table entries as successors to the MBB.
SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
- for (std::vector<MachineBasicBlock*>::iterator
- I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
- MachineBasicBlock *CurMBB = *I;
+ for (MachineBasicBlock *CurMBB : LPadList) {
if (SeenMBBs.insert(CurMBB).second)
DispContBB->addSuccessor(CurMBB);
}
@@ -10943,9 +10944,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
// Mark all former landing pads as non-landing pads. The dispatch is the only
// landing pad now.
- for (SmallVectorImpl<MachineBasicBlock*>::iterator
- I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
- (*I)->setIsEHPad(false);
+ for (MachineBasicBlock *MBBLPad : MBBLPads)
+ MBBLPad->setIsEHPad(false);
// The instruction is gone now.
MI.eraseFromParent();
@@ -11771,8 +11771,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
- for (unsigned i = 0; i < MI.getNumOperands(); ++i)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : MI.operands())
+ MIB.add(MO);
MI.eraseFromParent();
return BB;
}
@@ -13083,6 +13083,65 @@ static SDValue PerformVSELECTCombine(SDNode *N,
return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
}
+// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
+static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT VT = N->getValueType(0);
+
+ if (!Subtarget->hasMVEIntegerOps() ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ if (CC == ISD::SETUGE) {
+ std::swap(Op0, Op1);
+ CC = ISD::SETULT;
+ }
+
+ if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
+ Op0.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ // Check first operand is BuildVector of 0,1,2,...
+ for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
+ if (!Op0.getOperand(I).isUndef() &&
+ !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
+ Op0.getConstantOperandVal(I) == I))
+ return SDValue();
+ }
+
+ // The second is a Splat of Op1S
+ SDValue Op1S = DCI.DAG.getSplatValue(Op1);
+ if (!Op1S)
+ return SDValue();
+
+ unsigned Opc;
+ switch (VT.getVectorNumElements()) {
+ case 2:
+ Opc = Intrinsic::arm_mve_vctp64;
+ break;
+ case 4:
+ Opc = Intrinsic::arm_mve_vctp32;
+ break;
+ case 8:
+ Opc = Intrinsic::arm_mve_vctp16;
+ break;
+ case 16:
+ Opc = Intrinsic::arm_mve_vctp8;
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDLoc DL(N);
+ return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DCI.DAG.getConstant(Opc, DL, MVT::i32),
+ DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
+}
+
static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -13427,6 +13486,26 @@ bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
return VT.isScalarInteger();
}
+bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
+ return false;
+
+ switch (FPVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget->hasVFP2Base();
+ case MVT::f32:
+ return Subtarget->hasVFP2Base();
+ case MVT::f64:
+ return Subtarget->hasFP64();
+ case MVT::v4f32:
+ case MVT::v8f16:
+ return Subtarget->hasMVEFloatOps();
+ default:
+ return false;
+ }
+}
+
static SDValue PerformSHLSimplify(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
@@ -14485,6 +14564,52 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Check that N is CMPZ(CSINC(0, 0, CC, X)), return X if valid.
+static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
+ if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
+ return SDValue();
+ SDValue CSInc = Cmp->getOperand(0);
+ if (CSInc.getOpcode() != ARMISD::CSINC ||
+ !isNullConstant(CSInc.getOperand(0)) ||
+ !isNullConstant(CSInc.getOperand(1)) || !CSInc->hasOneUse())
+ return SDValue();
+ CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
+ return CSInc.getOperand(3);
+}
+
+static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
+ // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
+ // t92: glue = ARMISD::CMPZ t74, 0
+ // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
+ // t96: glue = ARMISD::CMPZ t93, 0
+ // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N, Cond))
+ if (Cond == ARMCC::EQ)
+ return C;
+ return SDValue();
+}
+
+static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG) {
+ // Fold away an unneccessary CMPZ/CSINC
+ // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
+ // if C1==EQ -> CSXYZ A, B, C2, D
+ // if C1==NE -> CSXYZ A, B, NOT(C2), D
+ ARMCC::CondCodes Cond;
+ if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
+ if (N->getConstantOperandVal(2) == ARMCC::EQ)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
+ if (N->getConstantOperandVal(2) == ARMCC::NE)
+ return DAG.getNode(
+ N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
+ N->getOperand(1),
+ DAG.getConstant(ARMCC::getOppositeCondition(Cond), SDLoc(N), MVT::i32), C);
+ }
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -16411,6 +16536,42 @@ static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
return FixConv;
}
+static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasMVEFloatOps())
+ return SDValue();
+
+ // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
+ // The second form can be more easily turned into a predicated vadd, and
+ // possibly combined into a fma to become a predicated vfma.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // The identity element for a fadd is -0.0, which these VMOV's represent.
+ auto isNegativeZeroSplat = [&](SDValue Op) {
+ if (Op.getOpcode() != ISD::BITCAST ||
+ Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
+ return false;
+ if (VT == MVT::v4f32 && Op.getOperand(0).getConstantOperandVal(0) == 1664)
+ return true;
+ if (VT == MVT::v8f16 && Op.getOperand(0).getConstantOperandVal(0) == 2688)
+ return true;
+ return false;
+ };
+
+ if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
+ std::swap(Op0, Op1);
+
+ if (Op1.getOpcode() != ISD::VSELECT ||
+ !isNegativeZeroSplat(Op1.getOperand(2)))
+ return SDValue();
+ SDValue FAdd =
+ DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), N->getFlags());
+ return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0);
+}
+
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
/// can replace combinations of VCVT (integer to floating-point) and VDIV
/// when the VDIV has a constant operand that is a power of 2.
@@ -17049,18 +17210,6 @@ static SDValue PerformShiftCombine(SDNode *N,
const ARMSubtarget *ST) {
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
- // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
- // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
- SDValue N1 = N->getOperand(1);
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
- SDValue N0 = N->getOperand(0);
- if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
- DAG.MaskedValueIsZero(N0.getOperand(0),
- APInt::getHighBitsSet(32, 16)))
- return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1);
- }
- }
if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
N->getOperand(0)->getOpcode() == ISD::AND &&
@@ -18173,6 +18322,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT_CC:
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
+ case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
@@ -18205,6 +18355,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+ case ISD::FADD:
+ return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget);
case ISD::FDIV:
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -18228,6 +18380,12 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformCMOVCombine(N, DCI.DAG);
case ARMISD::BRCOND:
return PerformBRCONDCombine(N, DCI.DAG);
+ case ARMISD::CMPZ:
+ return PerformCMPZCombine(N, DCI.DAG);
+ case ARMISD::CSINC:
+ case ARMISD::CSINV:
+ case ARMISD::CSNEG:
+ return PerformCSETCombine(N, DCI.DAG);
case ISD::LOAD:
return PerformLOADCombine(N, DCI, Subtarget);
case ARMISD::VLD1DUP:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0fddd58e178e..e3b422358cae 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -736,6 +736,8 @@ class VectorType;
bool preferIncOfAddToSubOfNot(EVT VT) const override;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
protected:
std::pair<const TargetRegisterClass *, uint8_t>
findRepresentativeClass(const TargetRegisterInfo *TRI,
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index de351372abf2..ff5afd787c82 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -103,15 +103,17 @@ def AddrModeT1_4 : AddrMode<9>;
def AddrModeT1_s : AddrMode<10>;
def AddrModeT2_i12 : AddrMode<11>;
def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
-def AddrModeT2_i8s4 : AddrMode<15>;
-def AddrMode_i12 : AddrMode<16>;
-def AddrMode5FP16 : AddrMode<17>;
-def AddrModeT2_ldrex : AddrMode<18>;
-def AddrModeT2_i7s4 : AddrMode<19>;
-def AddrModeT2_i7s2 : AddrMode<20>;
-def AddrModeT2_i7 : AddrMode<21>;
+def AddrModeT2_i8pos : AddrMode<13>;
+def AddrModeT2_i8neg : AddrMode<14>;
+def AddrModeT2_so : AddrMode<15>;
+def AddrModeT2_pc : AddrMode<16>;
+def AddrModeT2_i8s4 : AddrMode<17>;
+def AddrMode_i12 : AddrMode<18>;
+def AddrMode5FP16 : AddrMode<19>;
+def AddrModeT2_ldrex : AddrMode<20>;
+def AddrModeT2_i7s4 : AddrMode<21>;
+def AddrModeT2_i7s2 : AddrMode<22>;
+def AddrModeT2_i7 : AddrMode<23>;
// Load / store index mode.
class IndexMode<bits<2> val> {
@@ -1392,9 +1394,12 @@ class T2I<dag oops, dag iops, InstrItinClass itin,
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
-class T2Ii8<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
+class T2Ii8p<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8pos, 4, itin, opc, asm, "", pattern>;
+class T2Ii8n<dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : Thumb2I<oops, iops, AddrModeT2_i8neg, 4, itin, opc, asm, "", pattern>;
class T2Iso<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7d0bc756e882..1c1db473f866 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -420,6 +420,12 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
+// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
+def top16Zero: PatLeaf<(i32 GPR:$src), [{
+ return !SDValue(N,0)->getValueType(0).isVector() &&
+ CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
+ }]>;
+
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -4748,6 +4754,8 @@ def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
(REV16 (LDRH addrmode3:$addr))>;
def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
(STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+def : ARMV6Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (REV16 GPR:$Rn)>;
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 697730037277..f53814a80e01 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3621,21 +3621,24 @@ class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI, SDNode Op,
+ Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>;
}
}
-multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
- : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated>;
+multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated, IdentityVec>;
+
+def ARMimmOneF: PatLeaf<(bitconvert (v4f32 (ARMvmovFPImm (i32 112))))>; // 1.0 float
+def ARMimmOneH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2620))))>; // 1.0 half
-defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
-defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
+defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32, ARMimmOneF>;
+defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16, ARMimmOneH>;
class MVE_VCMLA<string suffix, bits<2> size>
: MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
@@ -3747,27 +3750,30 @@ defm MVE_VFMSf32 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v4f32>;
defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
- defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME)>;
+ defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? ), !cast<Instruction>(NAME), IdentityVec>;
}
}
-multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI>
- : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated>;
-multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI>
- : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated>;
+multiclass MVE_VADD_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VADDSUB_fp_m<"vadd", 0, VTI, fadd, int_arm_mve_add_predicated, IdentityVec>;
+multiclass MVE_VSUB_fp_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec>
+ : MVE_VADDSUB_fp_m<"vsub", 1, VTI, fsub, int_arm_mve_sub_predicated, IdentityVec>;
-defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32>;
-defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
+def ARMimmMinusZeroF: PatLeaf<(bitconvert (v4i32 (ARMvmovImm (i32 1664))))>; // -0.0 float
+def ARMimmMinusZeroH: PatLeaf<(bitconvert (v8i16 (ARMvmovImm (i32 2688))))>; // -0.0 half
-defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
-defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
+defm MVE_VADDf32 : MVE_VADD_fp_m<MVE_v4f32, ARMimmMinusZeroF>;
+defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16, ARMimmMinusZeroH>;
+
+defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32, ARMimmAllZerosV>;
+defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16, ARMimmAllZerosV>;
class MVE_VCADD<string suffix, bits<2> size, string cstr="">
: MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd),
@@ -5373,22 +5379,22 @@ defm MVE_VHSUB_qr_u16 : MVE_VHSUB_qr_m<MVE_v8u16>;
defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
- SDNode Op, Intrinsic PredInt> {
+ SDNode Op, Intrinsic PredInt, SDPatternOperator IdentityVec> {
def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
- !cast<Instruction>(NAME)>;
+ !cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
defm MVE_VADD_qr_f32 : MVE_VADDSUB_qr_f<"vadd", MVE_v4f32, 0b0, fadd,
- int_arm_mve_add_predicated>;
+ int_arm_mve_add_predicated, ARMimmMinusZeroF>;
defm MVE_VADD_qr_f16 : MVE_VADDSUB_qr_f<"vadd", MVE_v8f16, 0b0, fadd,
- int_arm_mve_add_predicated>;
+ int_arm_mve_add_predicated, ARMimmMinusZeroH>;
defm MVE_VSUB_qr_f32 : MVE_VADDSUB_qr_f<"vsub", MVE_v4f32, 0b1, fsub,
- int_arm_mve_sub_predicated>;
+ int_arm_mve_sub_predicated, ARMimmAllZerosV>;
defm MVE_VSUB_qr_f16 : MVE_VADDSUB_qr_f<"vsub", MVE_v8f16, 0b1, fsub,
- int_arm_mve_sub_predicated>;
+ int_arm_mve_sub_predicated, ARMimmAllZerosV>;
}
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
@@ -5567,16 +5573,16 @@ defm MVE_VQRDMULH_qr_s8 : MVE_VQRDMULH_qr_m<MVE_v16s8>;
defm MVE_VQRDMULH_qr_s16 : MVE_VQRDMULH_qr_m<MVE_v8s16>;
defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
-multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
+multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI, SDPatternOperator IdentityVec> {
let validForTailPredication = 1 in
def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
- !cast<Instruction>(NAME)>;
+ !cast<Instruction>(NAME), IdentityVec>;
}
let Predicates = [HasMVEFloat] in {
- defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16>;
- defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32>;
+ defm MVE_VMUL_qr_f16 : MVE_VxxMUL_qr_f_m<MVE_v8f16, ARMimmOneH>;
+ defm MVE_VMUL_qr_f32 : MVE_VxxMUL_qr_f_m<MVE_v4f32, ARMimmOneF>;
}
class MVE_VFMAMLA_qr<string iname, string suffix,
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index bf717a4056e9..f09ad8167600 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -1576,6 +1576,8 @@ def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
(tREV16 (tLDRHi t_addrmode_is2:$addr))>;
def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
(tREV16 (tLDRHr t_addrmode_rr:$addr))>;
+def : T1Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (tREV16 tGPR:$Rn)>;
def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
t_addrmode_is2:$addr),
(tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 783db9dde17f..4471317f4ea4 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1191,9 +1191,9 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
let DecoderMethod = "DecodeT2LoadImm12";
}
- def i8 : T2Ii8 <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
- opc, "\t$Rt, $addr",
- [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
+ def i8 : T2Ii8n <(outs target:$Rt), (ins t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(set target:$Rt, (opnode t2addrmode_negimm8:$addr))]>,
Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
@@ -1284,9 +1284,9 @@ multiclass T2I_st<bits<2> opcod, string opc,
let Inst{23} = addr{12}; // U
let Inst{11-0} = addr{11-0}; // imm
}
- def i8 : T2Ii8 <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
- opc, "\t$Rt, $addr",
- [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
+ def i8 : T2Ii8n <(outs), (ins target:$Rt, t2addrmode_negimm8:$addr), iii,
+ opc, "\t$Rt, $addr",
+ [(opnode target:$Rt, t2addrmode_negimm8:$addr)]>,
Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0000;
@@ -1580,8 +1580,8 @@ def t2LDR_POST_imm : t2AsmPseudo<"ldr${p}.w $Rt, $Rn, $imm",
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
- "\t$Rt, $addr", []>, Sched<[WriteLd]> {
+ : T2Ii8p<(outs rGPR:$Rt), (ins t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []>, Sched<[WriteLd]> {
bits<4> Rt;
bits<13> addr;
let Inst{31-27} = 0b11111;
@@ -1747,8 +1747,8 @@ def t2STR_POST_imm : t2AsmPseudo<"str${p}.w $Rt, $Rn, $imm",
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
class T2IstT<bits<2> type, string opc, InstrItinClass ii>
- : T2Ii8<(outs), (ins rGPR:$Rt, t2addrmode_imm8:$addr), ii, opc,
- "\t$Rt, $addr", []>, Sched<[WriteST]> {
+ : T2Ii8p<(outs), (ins rGPR:$Rt, t2addrmode_posimm8:$addr), ii, opc,
+ "\t$Rt, $addr", []>, Sched<[WriteST]> {
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
let Inst{24} = 0; // not signed
@@ -1851,8 +1851,8 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
let DecoderMethod = "DecodeT2LoadImm12";
}
- def i8 : T2Ii8<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
- "\t$addr",
+ def i8 : T2Ii8n<(outs), (ins t2addrmode_negimm8:$addr), IIC_Preload, opc,
+ "\t$addr",
[(ARMPreload t2addrmode_negimm8:$addr, (i32 write), (i32 instr))]>,
Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
@@ -2926,18 +2926,11 @@ let AddedComplexity = 1 in
def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
(t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
-// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
-def top16Zero: PatLeaf<(i32 rGPR:$src), [{
- return !SDValue(N,0)->getValueType(0).isVector() &&
- CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
- }]>;
-
// so_imm_notSext is needed instead of so_imm_not, as the value of imm
// will match the extended, not the original bitWidth for $src.
def : T2Pat<(and top16Zero:$src, t2_so_imm_notSext:$imm),
(t2BICri rGPR:$src, t2_so_imm_notSext:$imm)>;
-
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
(t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
@@ -3283,6 +3276,9 @@ def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
[(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>,
Sched<[WriteALU]>;
+def : T2Pat<(srl (bswap top16Zero:$Rn), (i32 16)),
+ (t2REV16 rGPR:$Rn)>;
+
def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
"revsh", ".w\t$Rd, $Rm",
[(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>,
@@ -4059,6 +4055,8 @@ def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
let Inst{7-0} = imm;
+
+ let DecoderMethod = "DecodeT2HintSpaceInstruction";
}
def : t2InstAlias<"hint$p $imm", (t2HINT imm0_239:$imm, pred:$p), 0>;
@@ -4079,6 +4077,11 @@ def : t2InstAlias<"esb$p", (t2HINT 16, pred:$p), 0> {
def : t2InstAlias<"csdb$p.w", (t2HINT 20, pred:$p), 0>;
def : t2InstAlias<"csdb$p", (t2HINT 20, pred:$p), 1>;
+def : t2InstAlias<"pacbti$p r12,lr,sp", (t2HINT 13, pred:$p), 1>;
+def : t2InstAlias<"bti$p", (t2HINT 15, pred:$p), 1>;
+def : t2InstAlias<"pac$p r12,lr,sp", (t2HINT 29, pred:$p), 1>;
+def : t2InstAlias<"aut$p r12,lr,sp", (t2HINT 45, pred:$p), 1>;
+
def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
[(int_arm_dbg imm0_15:$opt)]> {
bits<4> opt;
@@ -4254,6 +4257,19 @@ def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
def : T2Pat<(ARMWrapperJT tjumptable:$dst), (t2LEApcrelJT tjumptable:$dst)>;
+let hasNoSchedulingInfo = 1 in {
+def t2LDRLIT_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+ IIC_iLoadiALU,
+ [(set rGPR:$dst,
+ (ARMWrapperPIC tglobaladdr:$addr))]>,
+ Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>;
+}
+
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (t2LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, HasV8MBaseline, DontUseMovtInPic]>;
+
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
@@ -5607,6 +5623,15 @@ let Predicates = [HasV8_1MMainline] in {
defm : CSPats<ARMcsinv, t2CSINV>;
defm : CSPats<ARMcsneg, t2CSNEG>;
+ def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm),
+ (t2CSINC ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm),
+ (t2CSINV ZR, ZR, imm0_31:$imm)>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm),
+ (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+ def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm),
+ (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>;
+
multiclass ModifiedV8_1CSEL<Instruction Insn, dag modvalue> {
def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm),
(Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>;
@@ -5636,3 +5661,78 @@ let Predicates = [HasV8_1MMainline] in {
def : InstAlias<"cneg\t$Rd, $Rn, $fcond",
(t2CSNEG rGPR:$Rd, GPRwithZRnosp:$Rn, GPRwithZRnosp:$Rn, pred_noal_inv:$fcond)>;
}
+
+
+// PACBTI
+let Predicates = [IsThumb2, HasV8_1MMainline, HasPACBTI] in {
+def t2PACG : V8_1MI<(outs rGPR:$Rd),
+ (ins pred:$p, GPRnopc:$Rn, GPRnopc:$Rm),
+ AddrModeNone, NoItinerary, "pacg${p}", "$Rd, $Rn, $Rm", "", []> {
+ bits<4> Rd;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111110110110;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = 0b1111;
+ let Inst{11-8} = Rd;
+ let Inst{7-4} = 0b0000;
+ let Inst{3-0} = Rm;
+}
+
+let hasSideEffects = 1 in {
+class PACBTIAut<dag iops, string asm, bit b>
+ : V8_1MI<(outs), iops,
+ AddrModeNone, NoItinerary, asm, "$Ra, $Rn, $Rm", "", []> {
+ bits<4> Ra;
+ bits<4> Rn;
+ bits<4> Rm;
+ let Inst{31-20} = 0b111110110101;
+ let Inst{19-16} = Rn;
+ let Inst{15-12} = Ra;
+ let Inst{11-5} = 0b1111000;
+ let Inst{4} = b;
+ let Inst{3-0} = Rm;
+}
+}
+
+def t2AUTG : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, GPRnopc:$Rn, GPRnopc:$Rm),
+ "autg${p}", 0>;
+
+let isBranch = 1, isTerminator = 1, isIndirectBranch = 1 in {
+ def t2BXAUT : PACBTIAut<(ins pred:$p, GPRnosp:$Ra, rGPR:$Rn, GPRnopc:$Rm),
+ "bxaut${p}", 1>;
+}
+}
+
+
+class PACBTIHintSpaceInst<string asm, string ops, bits<8> imm>
+ : V8_1MI<(outs), (ins), AddrModeNone, NoItinerary, asm, ops, "", []> {
+ let Inst{31-8} = 0b111100111010111110000000;
+ let Inst{7-0} = imm;
+
+ let Unpredictable{19-16} = 0b1111;
+ let Unpredictable{13-11} = 0b101;
+
+ let DecoderMethod = "DecodeT2HintSpaceInstruction";
+}
+
+class PACBTIHintSpaceNoOpsInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "", imm>;
+
+class PACBTIHintSpaceDefInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> {
+ let Defs = [R12];
+ let Uses = [LR, SP];
+}
+
+class PACBTIHintSpaceUseInst<string asm, bits<8> imm>
+ : PACBTIHintSpaceInst<asm, "r12, lr, sp", imm> {
+ let Uses = [R12, LR, SP];
+}
+
+def t2PAC : PACBTIHintSpaceDefInst<"pac", 0b00011101>;
+def t2PACBTI : PACBTIHintSpaceDefInst<"pacbti", 0b00001101>;
+def t2BTI : PACBTIHintSpaceNoOpsInst<"bti", 0b00001111>;
+def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> {
+ let hasSideEffects = 1;
+}
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 6e259b1baf97..3b10c60a0654 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1298,8 +1298,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
// Can't use an updating ld/st if the base register is also a dest
// register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).getReg() == Base)
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MO.getReg() == Base)
return false;
int Bytes = getLSMultipleTransferSize(MI);
@@ -1326,8 +1326,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
return false;
bool HighRegsUsed = false;
- for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).getReg() >= ARM::R8) {
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
+ if (MO.getReg() >= ARM::R8) {
HighRegsUsed = true;
break;
}
@@ -1350,8 +1350,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
.addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
- for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.add(MI->getOperand(OpNum));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
+ MIB.add(MO);
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
@@ -2119,9 +2119,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : Fn) {
Modified |= LoadStoreMultipleOpti(MBB);
if (STI->hasV5TOps())
Modified |= MergeReturnIntoLDM(MBB);
@@ -2710,13 +2708,13 @@ static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
if (isLegalAddressImm(Opcode, Imm, TII))
return true;
- // We can convert AddrModeT2_i12 to AddrModeT2_i8.
+ // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
const MCInstrDesc &Desc = TII->get(Opcode);
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
switch (AddrMode) {
case ARMII::AddrModeT2_i12:
CodesizeEstimate += 1;
- return std::abs(Imm) < (((1 << 8) * 1) - 1);
+ return Imm < 0 && -Imm < ((1 << 8) * 1);
}
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 507c3e69b3a4..308d5e7889f2 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -13,8 +13,63 @@ using namespace llvm;
void ARMFunctionInfo::anchor() {}
+static bool GetBranchTargetEnforcement(MachineFunction &MF) {
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ if (!Subtarget.isMClass() || !Subtarget.hasV7Ops())
+ return false;
+
+ const Function &F = MF.getFunction();
+ if (!F.hasFnAttribute("branch-target-enforcement")) {
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ F.getParent()->getModuleFlag("branch-target-enforcement")))
+ return BTE->getZExtValue();
+ return false;
+ }
+
+ const StringRef BTIEnable =
+ F.getFnAttribute("branch-target-enforcement").getValueAsString();
+ assert(BTIEnable.equals_insensitive("true") ||
+ BTIEnable.equals_insensitive("false"));
+ return BTIEnable.equals_insensitive("true");
+}
+
+// The pair returns values for the ARMFunctionInfo members
+// SignReturnAddress and SignReturnAddressAll respectively.
+static std::pair<bool, bool> GetSignReturnAddress(const Function &F) {
+ if (!F.hasFnAttribute("sign-return-address")) {
+ const Module &M = *F.getParent();
+ if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address"))) {
+ if (Sign->getZExtValue()) {
+ if (const auto *All = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("sign-return-address-all")))
+ return {true, All->getZExtValue()};
+ return {true, false};
+ }
+ }
+ return {false, false};
+ }
+
+ StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
+ if (Scope.equals("none"))
+ return {false, false};
+
+ if (Scope.equals("all"))
+ return {true, true};
+
+ assert(Scope.equals("non-leaf"));
+ return {true, false};
+}
+
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")),
- IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {}
+ IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")),
+ BranchTargetEnforcement(GetBranchTargetEnforcement(MF)) {
+
+ const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isMClass() && Subtarget.hasV7Ops())
+ std::tie(SignReturnAddress, SignReturnAddressAll) =
+ GetSignReturnAddress(MF.getFunction());
+}
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 851655284060..4077fc058217 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -142,6 +142,17 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// con/destructors).
bool PreservesR0 = false;
+ /// True if the function should sign its return address.
+ bool SignReturnAddress = false;
+
+ /// True if the fucntion should sign its return address, even if LR is not
+ /// saved.
+ bool SignReturnAddressAll = false;
+
+ /// True if BTI instructions should be placed at potential indirect jump
+ /// destinations.
+ bool BranchTargetEnforcement = false;
+
public:
ARMFunctionInfo() = default;
@@ -268,6 +279,20 @@ public:
void setPreservesR0() { PreservesR0 = true; }
bool getPreservesR0() const { return PreservesR0; }
+
+ bool shouldSignReturnAddress() const {
+ return shouldSignReturnAddress(LRSpilled);
+ }
+
+ bool shouldSignReturnAddress(bool SpillsLR) const {
+ if (!SignReturnAddress)
+ return false;
+ if (SignReturnAddressAll)
+ return true;
+ return LRSpilled;
+ }
+
+ bool branchTargetEnforcement() const { return BranchTargetEnforcement; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index 2dc097566d14..c0dc6a363471 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -107,6 +107,8 @@ def HasRAS : Predicate<"Subtarget->hasRAS()">,
AssemblerPredicate<(all_of FeatureRAS), "ras">;
def HasLOB : Predicate<"Subtarget->hasLOB()">,
AssemblerPredicate<(all_of FeatureLOB), "lob">;
+def HasPACBTI : Predicate<"Subtarget->hasPACBTI()">,
+ AssemblerPredicate<(all_of FeaturePACBTI), "pacbti">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">,
AssemblerPredicate<(all_of FeatureFP16),"half-float conversions">;
def HasFullFP16 : Predicate<"Subtarget->hasFullFP16()">,
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 9752b3166b45..760a5a5a20cf 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -277,6 +277,16 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV)
let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv";
}
+// GPRs without the SP register. Used for BXAUT and AUTG
+def GPRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, PC)> {
+ let AltOrders = [(add LR, GPRnosp), (trunc GPRnosp, 8),
+ (add (trunc GPRnosp, 8), R12, LR, (shl GPRnosp, 8))];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<ARMSubtarget>().getGPRAllocationOrder(MF);
+ }];
+ let DiagnosticString = "operand must be a register in range [r0, r12] or LR or PC";
+}
+
// GPRs without the PC and SP registers but with APSR. Used by CLRM instruction.
def GPRwithAPSRnosp : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), LR, APSR)> {
let isAllocatable = 0;
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 5e1217b6a468..d51a888c951f 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -373,6 +373,8 @@ protected:
/// HasLOB - if true, the processor supports the Low Overhead Branch extension
bool HasLOB = false;
+ bool HasPACBTI = false;
+
/// If true, the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
/// particularly effective at zeroing a VFP register.
bool HasZeroCycleZeroing = false;
@@ -671,6 +673,7 @@ public:
bool hasCRC() const { return HasCRC; }
bool hasRAS() const { return HasRAS; }
bool hasLOB() const { return HasLOB; }
+ bool hasPACBTI() const { return HasPACBTI; }
bool hasVirtualization() const { return HasVirtualization; }
bool useNEONForSinglePrecisionFP() const {
diff --git a/llvm/lib/Target/ARM/ARMSystemRegister.td b/llvm/lib/Target/ARM/ARMSystemRegister.td
index f21c7f0246f9..c03db15d1041 100644
--- a/llvm/lib/Target/ARM/ARMSystemRegister.td
+++ b/llvm/lib/Target/ARM/ARMSystemRegister.td
@@ -106,6 +106,24 @@ def : MClassSysReg<0, 0, 1, 0x894, "control_ns">;
def : MClassSysReg<0, 0, 1, 0x898, "sp_ns">;
}
+let Requires = [{ {ARM::FeaturePACBTI} }] in {
+def : MClassSysReg<0, 0, 1, 0x820, "pac_key_p_0">;
+def : MClassSysReg<0, 0, 1, 0x821, "pac_key_p_1">;
+def : MClassSysReg<0, 0, 1, 0x822, "pac_key_p_2">;
+def : MClassSysReg<0, 0, 1, 0x823, "pac_key_p_3">;
+def : MClassSysReg<0, 0, 1, 0x824, "pac_key_u_0">;
+def : MClassSysReg<0, 0, 1, 0x825, "pac_key_u_1">;
+def : MClassSysReg<0, 0, 1, 0x826, "pac_key_u_2">;
+def : MClassSysReg<0, 0, 1, 0x827, "pac_key_u_3">;
+def : MClassSysReg<0, 0, 1, 0x8a0, "pac_key_p_0_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a1, "pac_key_p_1_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a2, "pac_key_p_2_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a3, "pac_key_p_3_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a4, "pac_key_u_0_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a5, "pac_key_u_1_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a6, "pac_key_u_2_ns">;
+def : MClassSysReg<0, 0, 1, 0x8a7, "pac_key_u_3_ns">;
+}
// Banked Registers
//
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 833c7effd31c..0b314ac2a41e 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -92,6 +92,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeARMTarget() {
initializeARMLoadStoreOptPass(Registry);
initializeARMPreAllocLoadStoreOptPass(Registry);
initializeARMParallelDSPPass(Registry);
+ initializeARMBranchTargetsPass(Registry);
initializeARMConstantIslandsPass(Registry);
initializeARMExecutionDomainFixPass(Registry);
initializeARMExpandPseudoPass(Registry);
@@ -571,6 +572,7 @@ void ARMPassConfig::addPreEmitPass() {
}
void ARMPassConfig::addPreEmitPass2() {
+ addPass(createARMBranchTargetsPass());
addPass(createARMConstantIslandPass());
addPass(createARMLowOverheadLoopsPass());
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 88de84a4fd78..602c6745d310 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -334,8 +334,9 @@ InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
}
// Checks whether Inst is part of a min(max()) or max(min()) pattern
-// that will match to an SSAT instruction
-static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+// that will match to an SSAT instruction. Returns the instruction being
+// saturated, or null if no saturation pattern was found.
+static Value *isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
Value *LHS, *RHS;
ConstantInt *C;
SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;
@@ -358,12 +359,27 @@ static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
return false;
};
- if (isSSatMin(Inst->getOperand(1)) ||
- (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) ||
- isSSatMin(*(++Inst->user_begin())))))
- return true;
+ if (isSSatMin(Inst->getOperand(1)))
+ return cast<Instruction>(Inst->getOperand(1))->getOperand(1);
+ if (Inst->hasNUses(2) &&
+ (isSSatMin(*Inst->user_begin()) || isSSatMin(*(++Inst->user_begin()))))
+ return Inst->getOperand(1);
}
- return false;
+ return nullptr;
+}
+
+// Look for a FP Saturation pattern, where the instruction can be simplified to
+// a fptosi.sat. max(min(fptosi)). The constant in this case is always free.
+static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
+ if (Imm.getBitWidth() != 64 ||
+ Imm != APInt::getHighBitsSet(64, 33)) // -2147483648
+ return false;
+ Value *FP = isSSATMinMaxPattern(Inst, Imm);
+ if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse())
+ FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm);
+ if (!FP)
+ return false;
+ return isa<FPToSIInst>(FP);
}
InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
@@ -423,6 +439,9 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
}
+ if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
+ return 0;
+
// We can convert <= -1 to < 0, which is generally quite cheap.
if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 64d2e1bfa9b2..39f407ba7149 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6429,15 +6429,17 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" ||
Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" ||
Mnemonic == "bxns" || Mnemonic == "blxns" ||
- Mnemonic == "vdot" || Mnemonic == "vmmla" ||
+ Mnemonic == "vdot" || Mnemonic == "vmmla" ||
Mnemonic == "vudot" || Mnemonic == "vsdot" ||
Mnemonic == "vcmla" || Mnemonic == "vcadd" ||
Mnemonic == "vfmal" || Mnemonic == "vfmsl" ||
- Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
- Mnemonic == "csel" || Mnemonic == "csinc" ||
+ Mnemonic == "wls" || Mnemonic == "le" || Mnemonic == "dls" ||
+ Mnemonic == "csel" || Mnemonic == "csinc" ||
Mnemonic == "csinv" || Mnemonic == "csneg" || Mnemonic == "cinc" ||
- Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
- Mnemonic == "csetm")
+ Mnemonic == "cinv" || Mnemonic == "cneg" || Mnemonic == "cset" ||
+ Mnemonic == "csetm" ||
+ Mnemonic == "aut" || Mnemonic == "pac" || Mnemonic == "pacbti" ||
+ Mnemonic == "bti")
return Mnemonic;
// First, split out any predication code. Ignore mnemonics we know aren't
@@ -6581,9 +6583,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic,
Mnemonic == "csinc" || Mnemonic == "csinv" || Mnemonic == "csneg" ||
Mnemonic == "cinc" || Mnemonic == "cinv" || Mnemonic == "cneg" ||
Mnemonic == "cset" || Mnemonic == "csetm" ||
- Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
(hasCDE() && MS.isCDEInstr(Mnemonic) &&
!MS.isITPredicableCDEInstr(Mnemonic)) ||
+ Mnemonic.startswith("vpt") || Mnemonic.startswith("vpst") ||
+ Mnemonic == "pac" || Mnemonic == "pacbti" || Mnemonic == "aut" ||
+ Mnemonic == "bti" ||
(hasMVE() &&
(Mnemonic.startswith("vst2") || Mnemonic.startswith("vld2") ||
Mnemonic.startswith("vst4") || Mnemonic.startswith("vld4") ||
@@ -12272,6 +12276,7 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) {
{ARM::FeatureFPARMv8, ARM::FeatureFullFP16}},
{ARM::AEK_RAS, {Feature_HasV8Bit}, {ARM::FeatureRAS}},
{ARM::AEK_LOB, {Feature_HasV8_1MMainlineBit}, {ARM::FeatureLOB}},
+ {ARM::AEK_PACBTI, {Feature_HasV8_1MMainlineBit}, {ARM::FeaturePACBTI}},
// FIXME: Unsupported extensions.
{ARM::AEK_OS, {}, {}},
{ARM::AEK_IWMMXT, {}, {}},
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 9caef9f09ea9..c3df7dc88d79 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -185,8 +185,11 @@ static DecodeStatus DecodetGPREvenRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus
DecodeGPRwithAPSR_NZCVnospRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
- unsigned RegNo, uint64_t Address,
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeGPRwithAPSRRegisterClass(MCInst &Inst,
unsigned RegNo, uint64_t Address,
@@ -287,6 +290,9 @@ static DecodeStatus DecodeSETPANInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
@@ -1172,6 +1178,19 @@ DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
return S;
}
+static DecodeStatus DecodeGPRnospRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ DecodeStatus S = MCDisassembler::Success;
+
+ if (RegNo == 13)
+ S = MCDisassembler::SoftFail;
+
+ Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+ return S;
+}
+
static DecodeStatus
DecodeGPRwithAPSRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
@@ -2441,6 +2460,31 @@ static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
return S;
}
+static DecodeStatus DecodeT2HintSpaceInstruction(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const void *Decoder) {
+ unsigned imm = fieldFromInstruction(Insn, 0, 8);
+
+ unsigned Opcode = ARM::t2HINT;
+
+ if (imm == 0x0D) {
+ Opcode = ARM::t2PACBTI;
+ } else if (imm == 0x1D) {
+ Opcode = ARM::t2PAC;
+ } else if (imm == 0x2D) {
+ Opcode = ARM::t2AUT;
+ } else if (imm == 0x0F) {
+ Opcode = ARM::t2BTI;
+ }
+
+ Inst.setOpcode(Opcode);
+ if (Opcode == ARM::t2HINT) {
+ Inst.addOperand(MCOperand::createImm(imm));
+ }
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
DecodeStatus S = MCDisassembler::Success;
@@ -4726,6 +4770,25 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
if (!(FeatureBits[ARM::Feature8MSecExt]))
return MCDisassembler::Fail;
break;
+ case 0x20: // pac_key_p_0
+ case 0x21: // pac_key_p_1
+ case 0x22: // pac_key_p_2
+ case 0x23: // pac_key_p_3
+ case 0x24: // pac_key_u_0
+ case 0x25: // pac_key_u_1
+ case 0x26: // pac_key_u_2
+ case 0x27: // pac_key_u_3
+ case 0xa0: // pac_key_p_0_ns
+ case 0xa1: // pac_key_p_1_ns
+ case 0xa2: // pac_key_p_2_ns
+ case 0xa3: // pac_key_p_3_ns
+ case 0xa4: // pac_key_u_0_ns
+ case 0xa5: // pac_key_u_1_ns
+ case 0xa6: // pac_key_u_2_ns
+ case 0xa7: // pac_key_u_3_ns
+ if (!(FeatureBits[ARM::FeaturePACBTI]))
+ return MCDisassembler::Fail;
+ break;
default:
// Architecturally defined as unpredictable
S = MCDisassembler::SoftFail;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index 43f7575df6db..f8de0320166a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -195,16 +195,18 @@ namespace ARMII {
AddrModeT1_4 = 9,
AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data
AddrModeT2_i12 = 11,
- AddrModeT2_i8 = 12,
- AddrModeT2_so = 13,
- AddrModeT2_pc = 14, // +/- i12 for pc relative data
- AddrModeT2_i8s4 = 15, // i8 * 4
- AddrMode_i12 = 16,
- AddrMode5FP16 = 17, // i8 * 2
- AddrModeT2_ldrex = 18, // i8 * 4, with unscaled offset in MCInst
- AddrModeT2_i7s4 = 19, // i7 * 4
- AddrModeT2_i7s2 = 20, // i7 * 2
- AddrModeT2_i7 = 21, // i7 * 1
+ AddrModeT2_i8 = 12, // +/- i8
+ AddrModeT2_i8pos = 13, // + i8
+ AddrModeT2_i8neg = 14, // - i8
+ AddrModeT2_so = 15,
+ AddrModeT2_pc = 16, // +/- i12 for pc relative data
+ AddrModeT2_i8s4 = 17, // i8 * 4
+ AddrMode_i12 = 18,
+ AddrMode5FP16 = 19, // i8 * 2
+ AddrModeT2_ldrex = 20, // i8 * 4, with unscaled offset in MCInst
+ AddrModeT2_i7s4 = 21, // i7 * 4
+ AddrModeT2_i7s2 = 22, // i7 * 2
+ AddrModeT2_i7 = 23, // i7 * 1
};
inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -223,6 +225,8 @@ namespace ARMII {
case AddrModeT1_s: return "AddrModeT1_s";
case AddrModeT2_i12: return "AddrModeT2_i12";
case AddrModeT2_i8: return "AddrModeT2_i8";
+ case AddrModeT2_i8pos: return "AddrModeT2_i8pos";
+ case AddrModeT2_i8neg: return "AddrModeT2_i8neg";
case AddrModeT2_so: return "AddrModeT2_so";
case AddrModeT2_pc: return "AddrModeT2_pc";
case AddrModeT2_i8s4: return "AddrModeT2_i8s4";
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 3e4c97630af6..02a2d01176fc 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -299,4 +299,9 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
else if (STI.hasFeature(ARM::FeatureVirtualization))
emitAttribute(ARMBuildAttrs::Virtualization_use,
ARMBuildAttrs::AllowVirtualization);
+
+ if (STI.hasFeature(ARM::FeaturePACBTI)) {
+ emitAttribute(ARMBuildAttrs::PAC_extension, ARMBuildAttrs::AllowPAC);
+ emitAttribute(ARMBuildAttrs::BTI_extension, ARMBuildAttrs::AllowBTI);
+ }
}
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index e4e95f63f0a6..224c61b9f065 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -205,9 +205,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -266,10 +266,9 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
}
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
+ int FI = I.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index bdb167a08e61..ebd139af2219 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -261,7 +261,7 @@ void Thumb2InstrInfo::expandLoadStackGuard(
cast<GlobalValue>((*MI->memoperands_begin())->getValue());
if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV))
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12);
+ expandLoadStackGuardBase(MI, ARM::t2LDRLIT_ga_pcrel, ARM::t2LDRi12);
else if (MF.getTarget().isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
@@ -634,7 +634,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned NumBits = 0;
unsigned Scale = 1;
- if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+ if (AddrMode == ARMII::AddrModeT2_i8neg ||
+ AddrMode == ARMII::AddrModeT2_i12) {
// i8 supports only negative, and i12 supports only positive, so
// based on Offset sign convert Opcode to the appropriate
// instruction
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 132516694f4e..1164b6ebbac3 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -502,8 +502,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// For the non-writeback version (this one), the base register must be
// one of the registers being loaded.
bool isOK = false;
- for (unsigned i = 3; i < MI->getNumOperands(); ++i) {
- if (MI->getOperand(i).getReg() == BaseReg) {
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
+ if (MO.getReg() == BaseReg) {
isOK = true;
break;
}
@@ -527,8 +527,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
// numbered register (i.e. it's in operand 4 onwards) then with writeback
// the stored value is unknown, so we can't convert to tSTMIA_UPD.
Register BaseReg = MI->getOperand(0).getReg();
- for (unsigned i = 4; i < MI->getNumOperands(); ++i)
- if (MI->getOperand(i).getReg() == BaseReg)
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
+ if (MO.getReg() == BaseReg)
return false;
break;
@@ -611,8 +611,8 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
}
// Transfer the rest of operands.
- for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
- MIB.add(MI->getOperand(OpNum));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
+ MIB.add(MO);
// Transfer memoperands.
MIB.setMemRefs(MI->memoperands());
diff --git a/llvm/lib/Target/BPF/BPFMCInstLower.cpp b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
index 846798a63cb7..2ce9c386f24c 100644
--- a/llvm/lib/Target/BPF/BPFMCInstLower.cpp
+++ b/llvm/lib/Target/BPF/BPFMCInstLower.cpp
@@ -47,9 +47,7 @@ MCOperand BPFMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
default:
diff --git a/llvm/lib/Target/Hexagon/BitTracker.cpp b/llvm/lib/Target/Hexagon/BitTracker.cpp
index 8bced3cec082..685bafd785df 100644
--- a/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -214,9 +214,9 @@ bool BT::RegisterCell::meet(const RegisterCell &RC, Register SelfR) {
BT::RegisterCell &BT::RegisterCell::insert(const BT::RegisterCell &RC,
const BitMask &M) {
uint16_t B = M.first(), E = M.last(), W = width();
- // Sanity: M must be a valid mask for *this.
+ // M must be a valid mask for *this.
assert(B < W && E < W);
- // Sanity: the masked part of *this must have the same number of bits
+ // The masked part of *this must have the same number of bits
// as the source.
assert(B > E || E-B+1 == RC.width()); // B <= E => E-B+1 = |RC|.
assert(B <= E || E+(W-B)+1 == RC.width()); // E < B => E+(W-B)+1 = |RC|.
@@ -850,8 +850,7 @@ void BT::visitNonBranch(const MachineInstr &MI) {
bool Eval = ME.evaluate(MI, Map, ResMap);
if (Trace && Eval) {
- for (unsigned i = 0, n = MI.getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
RegisterRef RU(MO);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 0f6dedeb28c3..1938a5c259da 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -189,7 +189,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
unsigned NumDefs = 0;
- // Sanity verification: there should not be any defs with subregisters.
+ // Basic correctness check: there should not be any defs with subregisters.
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 43f0758f6598..8c3b9572201e 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -476,10 +476,10 @@ namespace {
} // end anonymous namespace
static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
- for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
- if (I->count(N))
- return &*I;
- return nullptr;
+ for (const NodeSet &S : Rel)
+ if (S.count(N))
+ return &S;
+ return nullptr;
}
// Create an ordered pair of GepNode pointers. The pair will be used in
@@ -589,9 +589,8 @@ void HexagonCommonGEP::common() {
dbgs() << "{ " << I->first << ", " << I->second << " }\n";
dbgs() << "Gep equivalence classes:\n";
- for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
+ for (const NodeSet &S : EqRel) {
dbgs() << '{';
- const NodeSet &S = *I;
for (NodeSet::const_iterator J = S.begin(), F = S.end(); J != F; ++J) {
if (J != S.begin())
dbgs() << ',';
@@ -604,8 +603,7 @@ void HexagonCommonGEP::common() {
// Create a projection from a NodeSet to the minimal element in it.
using ProjMap = std::map<const NodeSet *, GepNode *>;
ProjMap PM;
- for (NodeSymRel::iterator I = EqRel.begin(), E = EqRel.end(); I != E; ++I) {
- const NodeSet &S = *I;
+ for (const NodeSet &S : EqRel) {
GepNode *Min = *std::min_element(S.begin(), S.end(), NodeOrder);
std::pair<ProjMap::iterator,bool> Ins = PM.insert(std::make_pair(&S, Min));
(void)Ins;
@@ -1280,8 +1278,8 @@ bool HexagonCommonGEP::runOnFunction(Function &F) {
return false;
// For now bail out on C++ exception handling.
- for (Function::iterator A = F.begin(), Z = F.end(); A != Z; ++A)
- for (BasicBlock::iterator I = A->begin(), E = A->end(); I != E; ++I)
+ for (const BasicBlock &BB : F)
+ for (const Instruction &I : BB)
if (isa<InvokeInst>(I) || isa<LandingPadInst>(I))
return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index a774baaa48e6..d3fcdb6ae9a8 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1254,7 +1254,7 @@ void HCE::collect(MachineFunction &MF) {
void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
AssignmentMap &IMap) {
- // Sanity check: make sure that all extenders in the range [Begin..End)
+ // Basic correctness: make sure that all extenders in the range [Begin..End)
// share the same root ER.
for (unsigned I = Begin; I != End; ++I)
assert(ER == ExtRoot(Extenders[I].getOp()));
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 23d0cc829e52..03b0f75b2dc1 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -237,12 +237,9 @@ static bool isEvenReg(unsigned Reg) {
}
static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) {
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- MachineOperand &Op = MI.getOperand(I);
- if (!Op.isReg() || Op.getReg() != RegNotKilled || !Op.isKill())
- continue;
- Op.setIsKill(false);
- }
+ for (MachineOperand &Op : MI.operands())
+ if (Op.isReg() && Op.getReg() == RegNotKilled && Op.isKill())
+ Op.setIsKill(false);
}
/// Returns true if it is unsafe to move a copy instruction from \p UseReg to
@@ -403,10 +400,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
// Mark TFRs that feed a potential new value store as such.
if (TII->mayBeNewStore(MI)) {
// Look for uses of TFR instructions.
- for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE;
- ++OpdIdx) {
- MachineOperand &Op = MI.getOperand(OpdIdx);
-
+ for (const MachineOperand &Op : MI.operands()) {
// Skip over anything except register uses.
if (!Op.isReg() || !Op.isUse() || !Op.getReg())
continue;
@@ -484,14 +478,13 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
IsConst64Disabled = true;
// Traverse basic blocks.
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
+ for (MachineBasicBlock &MBB : MF) {
PotentiallyNewifiableTFR.clear();
- findPotentialNewifiableTFRs(*BI);
+ findPotentialNewifiableTFRs(MBB);
// Traverse instructions in basic block.
- for(MachineBasicBlock::iterator MI = BI->begin(), End = BI->end();
- MI != End;) {
+ for (MachineBasicBlock::iterator MI = MBB.begin(), End = MBB.end();
+ MI != End;) {
MachineInstr &I1 = *MI++;
if (I1.isDebugInstr())
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index bff596e69efd..12ceac545e9d 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1404,18 +1404,18 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
// Add callee-saved registers as use.
addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
// Add live in registers.
- for (unsigned I = 0; I < CSI.size(); ++I)
- MBB.addLiveIn(CSI[I].getReg());
+ for (const CalleeSavedInfo &I : CSI)
+ MBB.addLiveIn(I.getReg());
return true;
}
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
// Add live in registers. We treat eh_return callee saved register r0 - r3
// specially. They are not really callee saved registers as they are not
// supposed to be killed.
bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
if (IsKill)
@@ -1478,10 +1478,10 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
return true;
}
- for (unsigned i = 0; i < CSI.size(); ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
- int FI = CSI[i].getFrameIdx();
+ int FI = I.getFrameIdx();
HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
}
@@ -1619,8 +1619,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
// (1) For each callee-saved register, add that register and all of its
// sub-registers to SRegs.
LLVM_DEBUG(dbgs() << "Initial CS registers: {");
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned R = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned R = I.getReg();
LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
SRegs[*SR] = true;
@@ -1720,10 +1720,10 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
LLVM_DEBUG({
dbgs() << "CS information: {";
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- int FI = CSI[i].getFrameIdx();
+ for (const CalleeSavedInfo &I : CSI) {
+ int FI = I.getFrameIdx();
int Off = MFI.getObjectOffset(FI);
- dbgs() << ' ' << printReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
+ dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp";
if (Off >= 0)
dbgs() << '+';
dbgs() << Off;
@@ -2634,8 +2634,8 @@ bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
// Check if CSI only has double registers, and if the registers form
// a contiguous block starting from D8.
BitVector Regs(Hexagon::NUM_TARGET_REGS);
- for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
- unsigned R = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned R = I.getReg();
if (!Hexagon::DoubleRegsRegClass.contains(R))
return true;
Regs[R] = true;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 02da2f29591a..46c1fbc6eeb2 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -597,19 +597,12 @@ void HexagonGenInsert::dump_map() const {
void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
unsigned Index = 0;
- using mf_iterator = MachineFunction::const_iterator;
-
- for (mf_iterator A = MFN->begin(), Z = MFN->end(); A != Z; ++A) {
- const MachineBasicBlock &B = *A;
+ for (const MachineBasicBlock &B : *MFN) {
if (!CMS->BT.reached(&B))
continue;
- using mb_iterator = MachineBasicBlock::const_iterator;
-
- for (mb_iterator I = B.begin(), E = B.end(); I != E; ++I) {
- const MachineInstr *MI = &*I;
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineInstr &MI : B) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef()) {
Register R = MO.getReg();
assert(MO.getSubReg() == 0 && "Unexpected subregister in definition");
@@ -725,8 +718,7 @@ bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
RegisterSet &Defs) const {
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
Register R = MO.getReg();
@@ -738,8 +730,7 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
RegisterSet &Uses) const {
- for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
Register R = MO.getReg();
@@ -942,12 +933,11 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
// can remove them from the list of available registers once all DT
// successors have been processed.
RegisterSet BlockDefs, InsDefs;
- for (MachineBasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I) {
- MachineInstr *MI = &*I;
+ for (MachineInstr &MI : *B) {
InsDefs.clear();
- getInstrDefs(MI, InsDefs);
+ getInstrDefs(&MI, InsDefs);
// Leave those alone. They are more transparent than "insert".
- bool Skip = MI->isCopy() || MI->isRegSequence();
+ bool Skip = MI.isCopy() || MI.isRegSequence();
if (!Skip) {
// Visit all defined registers, and attempt to find the corresponding
@@ -1458,8 +1448,7 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
Instrs.push_back(&*I);
- for (auto I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : Instrs) {
unsigned Opc = MI->getOpcode();
// Do not touch lifetime markers. This is why the target-independent DCE
// cannot be used.
@@ -1501,7 +1490,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
bool Changed = false;
- // Sanity check: one, but not both.
+ // Verify: one, but not both.
assert(!OptSelectAll0 || !OptSelectHas0);
IFMap.clear();
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index cf4f13fb8c0d..55de02816fb8 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -328,7 +328,7 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
if (!MxOpc)
continue;
- // Basic sanity check: since we are deleting instructions, validate the
+ // Basic correctness check: since we are deleting instructions, validate the
// iterators. There is a possibility that one of Def1 or Def2 is translated
// to "mux" and being considered for other "mux" instructions.
if (!MX.At->getParent() || !MX.Def1->getParent() || !MX.Def2->getParent())
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index d8d2025c5d27..1a66394e9757 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -205,16 +205,14 @@ bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
}
void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
- for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
- MachineBasicBlock &B = *A;
- for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- unsigned Opc = MI->getOpcode();
+ for (MachineBasicBlock &B : MF) {
+ for (MachineInstr &MI : B) {
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::C2_tfrpr:
case TargetOpcode::COPY:
- if (isPredReg(MI->getOperand(1).getReg())) {
- RegisterSubReg RD = MI->getOperand(0);
+ if (isPredReg(MI.getOperand(1).getReg())) {
+ RegisterSubReg RD = MI.getOperand(0);
if (RD.R.isVirtual())
PredGPRs.insert(RD);
}
@@ -411,7 +409,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
NumOps = 2;
}
- // Some sanity: check that def is in operand #0.
+ // Check that def is in operand #0.
MachineOperand &Op0 = MI->getOperand(0);
assert(Op0.isDef());
RegisterSubReg OutR(Op0);
@@ -488,8 +486,8 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
}
}
- for (VectOfInst::iterator I = Erase.begin(), E = Erase.end(); I != E; ++I)
- (*I)->eraseFromParent();
+ for (MachineInstr *MI : Erase)
+ MI->eraseFromParent();
return Changed;
}
@@ -515,11 +513,8 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
Again = false;
VectOfInst Processed, Copy;
- using iterator = VectOfInst::iterator;
-
Copy = PUsers;
- for (iterator I = Copy.begin(), E = Copy.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : Copy) {
bool Done = convertToPredForm(MI);
if (Done) {
Processed.insert(MI);
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index a4971ad712eb..5d2e1b259449 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1014,12 +1014,10 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
LLVM_DEBUG(dbgs() << "\nhw_loop head, "
<< printMBBReference(**L->block_begin()));
for (MachineBasicBlock *MBB : L->getBlocks()) {
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
- const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI, IsInnerHWLoop)) {
+ for (const MachineInstr &MI : *MBB) {
+ if (isInvalidLoopOperation(&MI, IsInnerHWLoop)) {
LLVM_DEBUG(dbgs() << "\nCannot convert to hw_loop due to:";
- MI->dump(););
+ MI.dump(););
return true;
}
}
@@ -1034,8 +1032,7 @@ bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L,
bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
SmallVectorImpl<MachineInstr *> &DeadPhis) const {
// Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1089,8 +1086,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
// It is possible that some DBG_VALUE instructions refer to this
// instruction. Examine each def operand for such references;
// if found, mark the DBG_VALUE as undef (but don't delete it).
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
@@ -1123,7 +1119,7 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
bool &RecL0used,
bool &RecL1used) {
- // This is just for sanity.
+ // This is just to confirm basic correctness.
assert(L->getHeader() && "Loop without a header?");
bool Changed = false;
@@ -1877,8 +1873,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
if (TII->analyzeBranch(*ExitingBlock, TB, FB, Tmp1, false))
return nullptr;
- for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
- MachineBasicBlock *PB = *I;
+ for (MachineBasicBlock *PB : Preds) {
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp1, false);
if (NotAnalyzed)
return nullptr;
@@ -1960,8 +1955,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
TB = FB = nullptr;
- for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
- MachineBasicBlock *PB = *I;
+ for (MachineBasicBlock *PB : Preds) {
if (PB != Latch) {
Tmp2.clear();
bool NotAnalyzed = TII->analyzeBranch(*PB, TB, FB, Tmp2, false);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index b50a0e29ecae..ed4874baf7c8 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1006,7 +1006,7 @@ static void packSegmentMask(ArrayRef<int> Mask, ArrayRef<unsigned> OutSegMap,
static bool isPermutation(ArrayRef<int> Mask) {
// Check by adding all numbers only works if there is no overflow.
- assert(Mask.size() < 0x00007FFF && "Sanity failure");
+ assert(Mask.size() < 0x00007FFF && "Overflow failure");
int Sum = 0;
for (int Idx : Mask) {
if (Idx == -1)
@@ -1217,7 +1217,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
} else if (Seg0 == ~1u) {
Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
} else {
- assert(Seg1 == ~1u); // Sanity
+ assert(Seg1 == ~1u);
Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
}
}
@@ -1265,7 +1265,7 @@ OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
} else {
// BC or DA: this could be done via valign by SegLen.
// Do nothing here, because valign (if possible) will be generated
- // later on (make sure the Seg0 values are as expected, for sanity).
+ // later on (make sure the Seg0 values are as expected).
assert(Seg0 == 1 || Seg0 == 3);
}
}
@@ -1414,7 +1414,7 @@ OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
return OpRef::undef(getSingleVT(MVT::i8));
unsigned HalfLen = HwLen / 2;
- assert(isPowerOf2_32(HalfLen)); // Sanity.
+ assert(isPowerOf2_32(HalfLen));
// Handle special case where the output is the same half of the input
// repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 29572e3106d1..88effed9f076 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -442,8 +442,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
IsVarArg, IsStructRet, StructAttrFlag, Outs,
OutVals, Ins, DAG);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (const CCValAssign &VA : ArgLocs) {
if (VA.isMemLoc()) {
CLI.IsTailCall = false;
break;
@@ -2549,7 +2548,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
// without any coprocessors).
if (ElemWidth == 1) {
- assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
+ assert(VecWidth == VecTy.getVectorNumElements() &&
+ "Vector elements should equal vector width size");
assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
// Check if this is an extract of the lowest bit.
if (IdxN) {
@@ -2863,8 +2863,7 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
Scale /= 2;
}
- // Another sanity check. At this point there should only be two words
- // left, and Scale should be 2.
+ // At this point there should only be two words left, and Scale should be 2.
assert(Scale == 2 && Words[IdxW].size() == 2);
SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 8900fca8bb78..f7237f496aee 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -9,6 +9,7 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
@@ -1846,16 +1847,18 @@ HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = MemN->getChain();
SDValue Base0 = MemN->getBasePtr();
SDValue Base1 = DAG.getMemBasePlusOffset(Base0, TypeSize::Fixed(HwLen), dl);
+ unsigned MemOpc = MemN->getOpcode();
MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
if (MachineMemOperand *MMO = MemN->getMemOperand()) {
MachineFunction &MF = DAG.getMachineFunction();
- MOp0 = MF.getMachineMemOperand(MMO, 0, HwLen);
- MOp1 = MF.getMachineMemOperand(MMO, HwLen, HwLen);
+ uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
+ ? (uint64_t)MemoryLocation::UnknownSize
+ : HwLen;
+ MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
+ MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
}
- unsigned MemOpc = MemN->getOpcode();
-
if (MemOpc == ISD::LOAD) {
assert(cast<LoadSDNode>(Op)->isUnindexed());
SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 76220eff4d51..b6984d40f78e 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -193,9 +193,7 @@ static inline void parseOperands(const MachineInstr &MI,
Defs.clear();
Uses.clear();
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
-
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
@@ -1644,8 +1642,7 @@ bool HexagonInstrInfo::ClobbersPredicate(MachineInstr &MI,
bool SkipDead) const {
const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
- for (unsigned oper = 0; oper < MI.getNumOperands(); ++oper) {
- MachineOperand MO = MI.getOperand(oper);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (!MO.isDef())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 9507de95231f..987c4a5fa6c4 100644
--- a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -109,8 +109,7 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
"MCI opcode should have been set on construction");
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCO;
bool MustExtend = MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended;
diff --git a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index fc31139e13ce..1ff248200572 100644
--- a/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -120,16 +120,12 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (DisableHexagonPeephole) return false;
// Loop over all of the basic blocks.
- for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
- MBBb != MBBe; ++MBBb) {
- MachineBasicBlock *MBB = &*MBBb;
+ for (MachineBasicBlock &MBB : MF) {
PeepholeMap.clear();
PeepholeDoubleRegsMap.clear();
// Traverse the basic block.
- for (auto I = MBB->begin(), E = MBB->end(), NextI = I; I != E; I = NextI) {
- NextI = std::next(I);
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Look for sign extends:
// %170 = SXTW %166
if (!DisableOptSZExt && MI.getOpcode() == Hexagon::A2_sxtw) {
@@ -274,11 +270,11 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
if (NewOp) {
Register PSrc = MI.getOperand(PR).getReg();
if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
- BuildMI(*MBB, MI.getIterator(), MI.getDebugLoc(),
- QII->get(NewOp), MI.getOperand(0).getReg())
- .addReg(POrig)
- .add(MI.getOperand(S2))
- .add(MI.getOperand(S1));
+ BuildMI(MBB, MI.getIterator(), MI.getDebugLoc(), QII->get(NewOp),
+ MI.getOperand(0).getReg())
+ .addReg(POrig)
+ .add(MI.getOperand(S2))
+ .add(MI.getOperand(S1));
MRI->clearKillFlags(POrig);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 93ba277b0c9d..2c5c64cfcfc6 100644
--- a/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -400,8 +400,7 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
unsigned Acc = 0; // Value accumulator.
unsigned Shift = 0;
- for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
- MachineInstr *MI = *I;
+ for (MachineInstr *MI : OG) {
const MachineMemOperand &MMO = getStoreTarget(MI);
MachineOperand &SO = MI->getOperand(2); // Source.
assert(SO.isImm() && "Expecting an immediate operand");
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 87b1c43961d7..ecb2f88d8096 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -305,8 +305,7 @@ void HexagonSubtarget::CallMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
VRegHoldingReg[MI->getOperand(0).getReg()] = MI->getOperand(1).getReg();
LastVRegUse.erase(MI->getOperand(1).getReg());
} else {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
if (MO.isUse() && !MI->isCopy() &&
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 897fb209a8bf..ea2798a3b44e 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -749,7 +749,6 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
WithMaxAlign.ValTy, Adjust);
int Diff = Start - (OffAtMax + Adjust);
AlignVal = HVC.getConstInt(Diff);
- // Sanity.
assert(Diff >= 0);
assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
} else {
diff --git a/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index b9e577d201f9..cafe93bf8f4b 100644
--- a/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -51,9 +51,8 @@ struct Filler : public MachineFunctionPass {
TRI = Subtarget.getRegisterInfo();
bool Changed = false;
- for (MachineFunction::iterator FI = MF.begin(), FE = MF.end(); FI != FE;
- ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= runOnMachineBasicBlock(MBB);
return Changed;
}
@@ -200,8 +199,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::instr_iterator MI, bool &SawLoad,
assert((!MI->isCall() && !MI->isReturn()) &&
"Cannot put calls or returns in delay slot.");
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (const MachineOperand &MO : MI->operands()) {
unsigned Reg;
if (!MO.isReg() || !(Reg = MO.getReg()))
diff --git a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index 3a2d5030775e..3644eafe4353 100644
--- a/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -65,17 +65,14 @@ void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const {
*static_cast<const LanaiInstrInfo *>(STI.getInstrInfo());
unsigned MaxCallFrameSize = MF.getFrameInfo().getMaxCallFrameSize();
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E;
- ++MBB) {
- MachineBasicBlock::iterator MBBI = MBB->begin();
- while (MBBI != MBB->end()) {
- MachineInstr &MI = *MBBI++;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (MI.getOpcode() == Lanai::ADJDYNALLOC) {
DebugLoc DL = MI.getDebugLoc();
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
- BuildMI(*MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst)
+ BuildMI(MBB, MI, DL, LII.get(Lanai::ADD_I_LO), Dst)
.addReg(Src)
.addImm(MaxCallFrameSize);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 21d035c7ee9c..4217b8509676 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -467,8 +467,7 @@ static MachineInstr *canFoldIntoSelect(Register Reg,
return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading SR.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {
// Reject frame index operands.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
diff --git a/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp b/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
index 743f4f7c6e2f..479c0b1f0358 100644
--- a/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -93,9 +93,7 @@ MCOperand LanaiMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
void LanaiMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
case MachineOperand::MO_Register:
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index a83a5d2dfcc9..2a77a150f9aa 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -211,8 +211,8 @@ bool MSP430FrameLowering::restoreCalleeSavedRegisters(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i)
- BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+ for (const CalleeSavedInfo &I : CSI)
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), I.getReg());
return true;
}
diff --git a/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp b/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
index 1e57f33386e6..52c037de7660 100644
--- a/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/llvm/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -115,9 +115,7 @@ LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
switch (MO.getType()) {
default:
diff --git a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index fefa1134b021..622f2039f9e4 100644
--- a/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -72,10 +72,9 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
if (!CSI.empty()) {
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
unsigned DReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::createOffset(nullptr, DReg, Offset));
@@ -119,13 +118,13 @@ bool Mips16FrameLowering::spillCalleeSavedRegisters(
// will be saved with the "save" instruction
// during emitPrologue
//
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
// Add the callee-saved register as live-in. Do not add if the register is
// RA and return address is taken, because it has already been added in
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = CSI[i].getReg();
+ unsigned Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 6d3f3adb2b7a..5d026785b921 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -163,9 +163,8 @@ static void emitDirectiveRelocJalr(const MachineInstr &MI,
TargetMachine &TM,
MCStreamer &OutStreamer,
const MipsSubtarget &Subtarget) {
- for (unsigned int I = MI.getDesc().getNumOperands(), E = MI.getNumOperands();
- I < E; ++I) {
- MachineOperand MO = MI.getOperand(I);
+ for (const MachineOperand &MO :
+ llvm::drop_begin(MI.operands(), MI.getDesc().getNumOperands())) {
if (MO.isMCSymbol() && (MO.getTargetFlags() & MipsII::MO_JALR)) {
MCSymbol *Callee = MO.getMCSymbol();
if (Callee && !Callee->getName().empty()) {
diff --git a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 8e619549f01c..491d379bfe0b 100644
--- a/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -637,8 +637,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(&*I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(&MBB);
// Compute block offsets.
adjustBBOffsetsAfter(&MF->front());
@@ -730,8 +730,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
continue;
// Scan the instructions for constant pool operands.
- for (unsigned op = 0, e = MI.getNumOperands(); op != e; ++op)
- if (MI.getOperand(op).isCPI()) {
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isCPI()) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -759,7 +759,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
break;
}
// Remember that this is a user of a CP entry.
- unsigned CPI = MI.getOperand(op).getIndex();
+ unsigned CPI = MO.getIndex();
MachineInstr *CPEMI = CPEMIs[CPI];
unsigned MaxOffs = ((1 << Bits)-1) * Scale;
unsigned LongFormMaxOffs = ((1 << LongFormBits)-1) * LongFormScale;
@@ -1066,9 +1066,9 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1122,9 +1122,9 @@ int MipsConstantIslands::findLongFormInRangeCPEntry
// Point the CPUser node to the replacement
U.CPEMI = CPEs[i].CPEMI;
// Change the CPI in the instruction operand to refer to the clone.
- for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
- if (UserMI->getOperand(j).isCPI()) {
- UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(CPEs[i].CPI);
break;
}
// Adjust the refcount of the clone...
@@ -1392,9 +1392,9 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
adjustBBOffsetsAfter(&*--NewIsland->getIterator());
// Finally, change the CPI in the instruction operand to be ID.
- for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
- if (UserMI->getOperand(i).isCPI()) {
- UserMI->getOperand(i).setIndex(ID);
+ for (MachineOperand &MO : UserMI->operands())
+ if (MO.isCPI()) {
+ MO.setIndex(ID);
break;
}
@@ -1633,10 +1633,10 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
void MipsConstantIslands::prescanForConstants() {
unsigned J = 0;
(void)J;
- for (MachineFunction::iterator B =
- MF->begin(), E = MF->end(); B != E; ++B) {
- for (MachineBasicBlock::instr_iterator I =
- B->instr_begin(), EB = B->instr_end(); I != EB; ++I) {
+ for (MachineBasicBlock &B : *MF) {
+ for (MachineBasicBlock::instr_iterator I = B.instr_begin(),
+ EB = B.instr_end();
+ I != EB; ++I) {
switch(I->getDesc().getOpcode()) {
case Mips::LwConstant32: {
PrescannedForConstants = true;
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index c2e3d7393a6d..2d27d7553de6 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -218,9 +218,8 @@ namespace {
bool runOnMachineFunction(MachineFunction &F) override {
TM = &F.getTarget();
bool Changed = false;
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : F)
+ Changed |= runOnMachineBasicBlock(MBB);
// This pass invalidates liveness information when it reorders
// instructions to fill delay slot. Without this, -verify-machineinstrs
diff --git a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
index f72dc1da4131..31180d5a23ef 100644
--- a/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/llvm/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -896,9 +896,8 @@ bool MipsExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = STI->getInstrInfo();
bool Modified = false;
- for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
- ++MFI)
- Modified |= expandMBB(*MFI);
+ for (MachineBasicBlock &MBB : MF)
+ Modified |= expandMBB(MBB);
if (Modified)
MF.RenumberBlocks();
diff --git a/llvm/lib/Target/Mips/MipsFrameLowering.h b/llvm/lib/Target/Mips/MipsFrameLowering.h
index 612b2b712fa8..710a3d40c38e 100644
--- a/llvm/lib/Target/Mips/MipsFrameLowering.h
+++ b/llvm/lib/Target/Mips/MipsFrameLowering.h
@@ -34,7 +34,10 @@ public:
bool hasBP(const MachineFunction &MF) const;
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ return false;
+ }
bool enableShrinkWrapping(const MachineFunction &MF) const override {
return true;
diff --git a/llvm/lib/Target/Mips/MipsMCInstLower.cpp b/llvm/lib/Target/Mips/MipsMCInstLower.cpp
index 66e04bda2af3..7b58cb90ab87 100644
--- a/llvm/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/llvm/lib/Target/Mips/MipsMCInstLower.cpp
@@ -318,8 +318,7 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index bb4b9c6fa6a7..193d071447ff 100644
--- a/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -452,10 +452,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
// If Reg is a double precision register, emit two cfa_offsets,
// one for each of the paired single precision registers.
@@ -796,13 +795,13 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters(
MachineFunction *MF = MBB.getParent();
const TargetInstrInfo &TII = *STI.getInstrInfo();
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ for (const CalleeSavedInfo &I : CSI) {
// Add the callee-saved register as live-in. Do not add if the register is
// RA and return address is taken, because it has already been added in
// method MipsTargetLowering::lowerRETURNADDR.
// It's killed at the spill, unless the register is RA and return address
// is taken.
- unsigned Reg = CSI[i].getReg();
+ unsigned Reg = I.getReg();
bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
&& MF->getFrameInfo().isReturnAddressTaken();
if (!IsRAAndRetAddrIsTaken)
@@ -831,8 +830,7 @@ bool MipsSEFrameLowering::spillCalleeSavedRegisters(
// Insert the spill to the stack frame.
bool IsKill = !IsRAAndRetAddrIsTaken;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, IsKill,
- CSI[i].getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, I.getFrameIdx(), RC, TRI);
}
return true;
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 1fe6ab09804b..40b215a8204c 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -3581,8 +3581,8 @@ MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
MachineInstrBuilder MIB =
BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
- for (unsigned i = 1; i < MI.getNumOperands(); i++)
- MIB.add(MI.getOperand(i));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
+ MIB.add(MO);
if(!UsingMips32) {
Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass);
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index aab6d2034f11..c35e67d6726f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -130,10 +130,8 @@ VisitGlobalVariableForEmission(const GlobalVariable *GV,
for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
DiscoverDependentGlobals(GV->getOperand(i), Others);
- for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(),
- E = Others.end();
- I != E; ++I)
- VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
+ for (const GlobalVariable *GV : Others)
+ VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
// Now we can visit ourself
Order.push_back(GV);
@@ -699,35 +697,33 @@ static bool useFuncSeen(const Constant *C,
void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
DenseMap<const Function *, bool> seenMap;
- for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
- const Function *F = &*FI;
-
- if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) {
- emitDeclaration(F, O);
+ for (const Function &F : M) {
+ if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
+ emitDeclaration(&F, O);
continue;
}
- if (F->isDeclaration()) {
- if (F->use_empty())
+ if (F.isDeclaration()) {
+ if (F.use_empty())
continue;
- if (F->getIntrinsicID())
+ if (F.getIntrinsicID())
continue;
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
continue;
}
- for (const User *U : F->users()) {
+ for (const User *U : F.users()) {
if (const Constant *C = dyn_cast<Constant>(U)) {
if (usedInGlobalVarDef(C)) {
// The use is in the initialization of a global variable
// that is a function pointer, so print a declaration
// for the original function
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
// Emit a declaration of this function if the function that
// uses this constant expr has already been seen.
if (useFuncSeen(C, seenMap)) {
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
}
@@ -746,11 +742,11 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
// appearing in the module before the callee. so print out
// a declaration for the callee.
if (seenMap.find(caller) != seenMap.end()) {
- emitDeclaration(F, O);
+ emitDeclaration(&F, O);
break;
}
}
- seenMap[F] = true;
+ seenMap[&F] = true;
}
}
@@ -887,33 +883,11 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
GlobalsEmitted = true;
}
- // XXX Temproarily remove global variables so that doFinalization() will not
- // emit them again (global variables are emitted at beginning).
-
- Module::GlobalListType &global_list = M.getGlobalList();
- int i, n = global_list.size();
- GlobalVariable **gv_array = new GlobalVariable *[n];
-
- // first, back-up GlobalVariable in gv_array
- i = 0;
- for (Module::global_iterator I = global_list.begin(), E = global_list.end();
- I != E; ++I)
- gv_array[i++] = &*I;
-
- // second, empty global_list
- while (!global_list.empty())
- global_list.remove(global_list.begin());
-
// call doFinalization
bool ret = AsmPrinter::doFinalization(M);
- // now we restore global variables
- for (i = 0; i < n; i++)
- global_list.insert(global_list.end(), gv_array[i]);
-
clearAnnotationCache(&M);
- delete[] gv_array;
// Close the last emitted section
if (HasDebugInfo) {
static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 5d680e731e4a..2a3a38d7b2f1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -306,6 +306,11 @@ public:
std::string getVirtualRegisterName(unsigned) const;
const MCSymbol *getFunctionFrameSymbol() const override;
+
+ // Make emitGlobalVariable() no-op for NVPTX.
+ // Global variables have been already emitted by the time the base AsmPrinter
+ // attempts to do so in doFinalization() (see NVPTXAsmPrinter::emitGlobals()).
+ void emitGlobalVariable(const GlobalVariable *GV) override {}
};
} // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index a8a43cee9ab7..34b9dfe87cc2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -72,8 +72,7 @@ bool NVPTXAssignValidGlobalNames::runOnModule(Module &M) {
std::string NVPTXAssignValidGlobalNames::cleanUpName(StringRef Name) {
std::string ValidName;
raw_string_ostream ValidNameStream(ValidName);
- for (unsigned I = 0, E = Name.size(); I != E; ++I) {
- char C = Name[I];
+ for (char C : Name) {
if (C == '.' || C == '@') {
ValidNameStream << "_$_";
} else {
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index e404cead344b..f4934f0bc20b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -56,23 +56,16 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
InstrsToRemove.clear();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
- for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineInstr &MI : MBB)
Changed |= processInstr(MI);
- }
- }
// Now clean up any handle-access instructions
// This is needed in debug mode when code cleanup passes are not executed,
// but we need the handle access to be eliminated because they are not
// valid instructions when image handles are disabled.
- for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
- E = InstrsToRemove.end(); I != E; ++I) {
- (*I)->eraseFromParent();
- }
+ for (MachineInstr *MI : InstrsToRemove)
+ MI->eraseFromParent();
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index f43ba00ec373..f3ae0010ad8e 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -626,7 +626,9 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
// 5 Cycles Fixed-Point and BCD operations, 3 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
(instrs
+ BCDADD_rec,
BCDS_rec,
+ BCDSUB_rec,
BCDTRUNC_rec,
VADDECUQ,
VADDEUQM,
@@ -1974,7 +1976,7 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
ICBLQ,
ICBTLS,
ICCCI,
- LA,
+ LA, LA8,
LDMX,
MFDCR,
MFPMR,
@@ -2073,3 +2075,4 @@ def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read]
VMSUMUHM,
VMSUMUHS
)>;
+
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index c4f4a2b3d796..f7c049951c54 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -151,6 +151,7 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
(instregex "ADD(4|8)(TLS)?(_)?$"),
(instregex "NEG(8)?(O)?$"),
(instregex "ADDI(S)?toc(HA|L)(8)?$"),
+ (instregex "LA(8)?$"),
COPY,
MCRF,
MCRXRX,
@@ -165,7 +166,6 @@ def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
SRADI_32,
RLDIC,
RFEBB,
- LA,
TBEGIN,
TRECHKPT,
NOP,
@@ -624,7 +624,9 @@ def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
BCDS_rec,
BCDTRUNC_rec,
BCDUS_rec,
- BCDUTRUNC_rec
+ BCDUTRUNC_rec,
+ BCDADD_rec,
+ BCDSUB_rec
)>;
// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index a1ff20bb3612..422bd11dca52 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -203,6 +203,22 @@ def FeatureLogicalFusion :
SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
"Target supports Logical Operations fusion",
[FeatureFusion]>;
+def FeatureSha3Fusion :
+ SubtargetFeature<"fuse-sha3", "HasSha3Fusion", "true",
+ "Target supports SHA3 assist fusion",
+ [FeatureFusion]>;
+def FeatureCompareFusion:
+ SubtargetFeature<"fuse-cmp", "HasCompareFusion", "true",
+ "Target supports Comparison Operations fusion",
+ [FeatureFusion]>;
+def FeatureWideImmFusion:
+ SubtargetFeature<"fuse-wideimm", "HasWideImmFusion", "true",
+ "Target supports Wide-Immediate fusion",
+ [FeatureFusion]>;
+def FeatureZeroMoveFusion:
+ SubtargetFeature<"fuse-zeromove", "HasZeroMoveFusion", "true",
+ "Target supports move to SPR with branch fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -393,7 +409,7 @@ def ProcessorFeatures {
// still exist with the exception of those we know are Power9 specific.
list<SubtargetFeature> FusionFeatures = [
FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
- FeatureLogicalFusion, FeatureArithAddFusion
+ FeatureLogicalFusion, FeatureArithAddFusion, FeatureSha3Fusion,
];
list<SubtargetFeature> P10AdditionalFeatures =
!listconcat(FusionFeatures, [
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index a76963abb8e4..16e3b2b85c2e 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -875,18 +875,19 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
- case PPC::ADDItoc: {
+ case PPC::ADDItoc:
+ case PPC::ADDItoc8: {
assert(IsAIX && TM.getCodeModel() == CodeModel::Small &&
- "Operand only valid in AIX 32 bit mode");
+ "PseudoOp only valid for small code model AIX");
- // Transform %rN = ADDItoc @op1, %r2.
+ // Transform %rN = ADDItoc/8 @op1, %r2.
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
// Change the opcode to load address.
- TmpInst.setOpcode(PPC::LA);
+ TmpInst.setOpcode((!IsPPC64) ? (PPC::LA) : (PPC::LA8));
const MachineOperand &MO = MI->getOperand(1);
- assert(MO.isGlobal() && "Invalid operand for ADDItoc.");
+ assert(MO.isGlobal() && "Invalid operand for ADDItoc[8].");
// Map the operand to its corresponding MCSymbol.
const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
diff --git a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index fa6713dcca80..4cac0e3551f6 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -120,16 +120,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
unsigned FuncSize = GetInitialOffset(Fn);
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
+ for (MachineBasicBlock &MBB : Fn) {
// The end of the previous block may have extra nops if this block has an
// alignment requirement.
- if (MBB->getNumber() > 0) {
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
+ if (MBB.getNumber() > 0) {
+ unsigned AlignExtra = GetAlignmentAdjustment(MBB, FuncSize);
- auto &BS = BlockSizes[MBB->getNumber()-1];
+ auto &BS = BlockSizes[MBB.getNumber()-1];
BS.first += AlignExtra;
BS.second = AlignExtra;
@@ -138,10 +135,10 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
unsigned BlockSize = 0;
unsigned UnalignedBytesRemaining = 0;
- for (MachineInstr &MI : *MBB) {
+ for (MachineInstr &MI : MBB) {
unsigned MINumBytes = TII->getInstSizeInBytes(MI);
if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
- FirstImpreciseBlock = MBB->getNumber();
+ FirstImpreciseBlock = MBB.getNumber();
if (TII->isPrefixed(MI.getOpcode())) {
NumPrefixed++;
@@ -171,7 +168,7 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
BlockSize += MINumBytes;
}
- BlockSizes[MBB->getNumber()].first = BlockSize;
+ BlockSizes[MBB.getNumber()].first = BlockSize;
FuncSize += BlockSize;
}
@@ -181,16 +178,13 @@ unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
/// Modify the basic block align adjustment.
void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
unsigned Offset = GetInitialOffset(Fn);
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock *MBB = &*MFI;
-
- if (MBB->getNumber() > 0) {
- auto &BS = BlockSizes[MBB->getNumber()-1];
+ for (MachineBasicBlock &MBB : Fn) {
+ if (MBB.getNumber() > 0) {
+ auto &BS = BlockSizes[MBB.getNumber()-1];
BS.first -= BS.second;
Offset -= BS.second;
- unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+ unsigned AlignExtra = GetAlignmentAdjustment(MBB, Offset);
BS.first += AlignExtra;
BS.second = AlignExtra;
@@ -198,7 +192,7 @@ void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
Offset += AlignExtra;
}
- Offset += BlockSizes[MBB->getNumber()].first;
+ Offset += BlockSizes[MBB.getNumber()].first;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index b9518d6d7064..b1f5bdd885cd 100644
--- a/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -81,8 +81,7 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
}
static bool clobbersCTR(const MachineInstr &MI) {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
return true;
@@ -167,18 +166,16 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
// Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
// any other instructions that might clobber the ctr register.
- for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
- I != IE; ++I) {
- MachineBasicBlock *MBB = &*I;
- if (!MDT->isReachableFromEntry(MBB))
+ for (MachineBasicBlock &MBB : MF) {
+ if (!MDT->isReachableFromEntry(&MBB))
continue;
- for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
- MIIE = MBB->end(); MII != MIIE; ++MII) {
+ for (MachineBasicBlock::iterator MII = MBB.getFirstTerminator(),
+ MIIE = MBB.end(); MII != MIIE; ++MII) {
unsigned Opc = MII->getOpcode();
if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
Opc == PPC::BDZ8 || Opc == PPC::BDZ)
- if (!verifyCTRBranch(MBB, MII))
+ if (!verifyCTRBranch(&MBB, MII))
llvm_unreachable("Invalid PPC CTR loop!");
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index be4c9dd60b00..a9794ddd0566 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -74,8 +74,7 @@ bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock &MBB = *I;
+ for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
MBBI != MBBE;) {
MachineInstr &MI = *MBBI;
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index fc3c7ec35b8d..3ca563fee970 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -391,9 +391,8 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI)
- for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ for (MachineBasicBlock &MBB : MF)
+ for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
--MBBI;
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
MachineOperand &MO = MBBI->getOperand(I);
@@ -1172,8 +1171,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// Describe where callee saved registers were saved, at fixed offsets from
// CFA.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
// This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
@@ -1204,15 +1203,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
continue;
}
- if (CSI[I].isSpilledToReg()) {
- unsigned SpilledReg = CSI[I].getDstReg();
+ if (I.isSpilledToReg()) {
+ unsigned SpilledReg = I.getDstReg();
unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
nullptr, MRI->getDwarfRegNum(Reg, true),
MRI->getDwarfRegNum(SpilledReg, true)));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIRegister);
} else {
- int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
// We have changed the object offset above but we do not want to change
// the actual offsets in the CFI instruction so we have to undo the
// offset change here.
@@ -2085,15 +2084,15 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
SmallVector<CalleeSavedInfo, 18> FPRegs;
SmallVector<CalleeSavedInfo, 18> VRegs;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
(Reg != PPC::X2 && Reg != PPC::R2)) &&
"Not expecting to try to spill R2 in a function that must save TOC");
if (PPC::GPRCRegClass.contains(Reg)) {
HasGPSaveArea = true;
- GPRegs.push_back(CSI[i]);
+ GPRegs.push_back(I);
if (Reg < MinGPR) {
MinGPR = Reg;
@@ -2101,7 +2100,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::G8RCRegClass.contains(Reg)) {
HasG8SaveArea = true;
- G8Regs.push_back(CSI[i]);
+ G8Regs.push_back(I);
if (Reg < MinG8R) {
MinG8R = Reg;
@@ -2109,7 +2108,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
} else if (PPC::F8RCRegClass.contains(Reg)) {
HasFPSaveArea = true;
- FPRegs.push_back(CSI[i]);
+ FPRegs.push_back(I);
if (Reg < MinFPR) {
MinFPR = Reg;
@@ -2123,7 +2122,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// alignment requirements, so overload the save area for both cases.
HasVRSaveArea = true;
- VRegs.push_back(CSI[i]);
+ VRegs.push_back(I);
if (Reg < MinVR) {
MinVR = Reg;
@@ -2395,8 +2394,8 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
}
});
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -2439,11 +2438,11 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
.addReg(PPC::R12,
getKillRegState(true)),
- CSI[i].getFrameIdx()));
+ I.getFrameIdx()));
}
} else {
- if (CSI[i].isSpilledToReg()) {
- unsigned Dst = CSI[i].getDstReg();
+ if (I.isSpilledToReg()) {
+ unsigned Dst = I.getDstReg();
if (Spilled[Dst])
continue;
@@ -2478,9 +2477,9 @@ bool PPCFrameLowering::spillCalleeSavedRegisters(
if (Subtarget.needsSwapsForVSXMemOps() &&
!MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
- CSI[i].getFrameIdx(), RC, TRI);
+ I.getFrameIdx(), RC, TRI);
else
- TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
+ TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(),
RC, TRI);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0abdf81d0908..a2664bcff4ab 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -510,14 +510,12 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
return false;
// TODO: These asserts should be updated as more support for the toc data
- // transformation is added (64 bit, struct support, etc.).
+ // transformation is added (struct support, etc.).
- assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
- "the toc data transformation.");
-
- assert(PointerSize >= GV->getAlign().valueOrOne().value() &&
- "GlobalVariables with an alignment requirement stricter then 4-bytes "
- "not supported by the toc data transformation.");
+ assert(
+ PointerSize >= GV->getAlign().valueOrOne().value() &&
+ "GlobalVariables with an alignment requirement stricter than TOC entry "
+ "size not supported by the toc data transformation.");
Type *GVType = GV->getValueType();
@@ -537,7 +535,7 @@ static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
"supported by the toc data transformation.");
assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
- "A GlobalVariable with size larger than 32 bits is not currently "
+ "A GlobalVariable with size larger than a TOC entry is not currently "
"supported by the toc data transformation.");
if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
@@ -5049,16 +5047,94 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// value for the comparison. When selecting through a .td file, a type
// error is raised. Must check this first so we never break on the
// !Subtarget->isISA3_1() check.
- if (N->getConstantOperandVal(0) == Intrinsic::ppc_fsels) {
+ auto IntID = N->getConstantOperandVal(0);
+ if (IntID == Intrinsic::ppc_fsels) {
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
return;
}
+ if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
+ auto Pred = N->getConstantOperandVal(1);
+ unsigned Opcode =
+ IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
+ unsigned SubReg = 0;
+ unsigned ShiftVal = 0;
+ bool Reverse = false;
+ switch (Pred) {
+ case 0:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ break;
+ case 1:
+ SubReg = PPC::sub_eq;
+ ShiftVal = 1;
+ Reverse = true;
+ break;
+ case 2:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ break;
+ case 3:
+ SubReg = PPC::sub_lt;
+ ShiftVal = 3;
+ Reverse = true;
+ break;
+ case 4:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ break;
+ case 5:
+ SubReg = PPC::sub_gt;
+ ShiftVal = 2;
+ Reverse = true;
+ break;
+ case 6:
+ SubReg = PPC::sub_un;
+ break;
+ case 7:
+ SubReg = PPC::sub_un;
+ Reverse = true;
+ break;
+ }
+
+ EVT VTs[] = {MVT::v16i8, MVT::Glue};
+ SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
+ CurDAG->getTargetConstant(0, dl, MVT::i32)};
+ SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
+ SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
+ // On Power10, we can use SETBC[R]. On prior architectures, we have to use
+ // MFOCRF and shift/negate the value.
+ if (Subtarget->isISA3_1()) {
+ SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
+ SDValue CRBit = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
+ CR6Reg, SubRegIdx, BCDOp.getValue(1)),
+ 0);
+ CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
+ CRBit);
+ } else {
+ SDValue Move =
+ SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
+ BCDOp.getValue(1)),
+ 0);
+ SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
+ getI32Imm(31, dl), getI32Imm(31, dl)};
+ if (!Reverse)
+ CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ else {
+ SDValue Shift = SDValue(
+ CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
+ }
+ }
+ return;
+ }
+
if (!Subtarget->isISA3_1())
break;
unsigned Opcode = 0;
- switch (N->getConstantOperandVal(0)) {
+ switch (IntID) {
default:
break;
case Intrinsic::ppc_altivec_vstribr_p:
@@ -5713,41 +5789,57 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (isAIXABI && CModel == CodeModel::Medium)
report_fatal_error("Medium code model is not supported on AIX.");
- // For 64-bit small code model, we allow SelectCodeCommon to handle this,
- // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
- if (isPPC64 && CModel == CodeModel::Small)
+ // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
+ // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
+ // small code model, we need to check for a toc-data attribute.
+ if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
break;
+ auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
+ EVT OperandTy) {
+ SDValue GA = TocEntry->getOperand(0);
+ SDValue TocBase = TocEntry->getOperand(1);
+ SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
+ transferMemOperands(TocEntry, MN);
+ ReplaceNode(TocEntry, MN);
+ };
+
// Handle 32-bit small code model.
- if (!isPPC64) {
+ if (!isPPC64 && CModel == CodeModel::Small) {
// Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
// PPC::ADDItoc, or PPC::LWZtoc
- auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry) {
- SDValue GA = TocEntry->getOperand(0);
- SDValue TocBase = TocEntry->getOperand(1);
- SDNode *MN = CurDAG->getMachineNode(OpCode, dl, MVT::i32, GA, TocBase);
- transferMemOperands(TocEntry, MN);
- ReplaceNode(TocEntry, MN);
- };
-
if (isELFABI) {
assert(TM.isPositionIndependent() &&
"32-bit ELF can only have TOC entries in position independent"
" code.");
// 32-bit ELF always uses a small code model toc access.
- replaceWith(PPC::LWZtoc, N);
+ replaceWith(PPC::LWZtoc, N, MVT::i32);
return;
}
- if (isAIXABI && CModel == CodeModel::Small) {
- if (hasTocDataAttr(N->getOperand(0),
- CurDAG->getDataLayout().getPointerSize()))
- replaceWith(PPC::ADDItoc, N);
- else
- replaceWith(PPC::LWZtoc, N);
+ assert(isAIXABI && "ELF ABI already handled");
+ if (hasTocDataAttr(N->getOperand(0),
+ CurDAG->getDataLayout().getPointerSize())) {
+ replaceWith(PPC::ADDItoc, N, MVT::i32);
return;
}
+
+ replaceWith(PPC::LWZtoc, N, MVT::i32);
+ return;
+ }
+
+ if (isPPC64 && CModel == CodeModel::Small) {
+ assert(isAIXABI && "ELF ABI handled in common SelectCode");
+
+ if (hasTocDataAttr(N->getOperand(0),
+ CurDAG->getDataLayout().getPointerSize())) {
+ replaceWith(PPC::ADDItoc8, N, MVT::i64);
+ return;
+ }
+ // Break if it doesn't have toc data attribute. Proceed with common
+ // SelectCode.
+ break;
}
assert(CModel != CodeModel::Small && "All small code models handled.");
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ac952b240a48..ec7e30d7e362 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12116,6 +12116,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineFunction::iterator It = ++BB->getIterator();
MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &MRI = F->getRegInfo();
if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
@@ -12721,7 +12722,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
Register OldFPSCRReg = MI.getOperand(0).getReg();
// Save FPSCR value.
- BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
// The floating point rounding mode is in the bits 62:63 of FPCSR, and has
// the following settings:
@@ -12854,7 +12858,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Result of setflm is previous FPSCR content, so we need to save it first.
Register OldFPSCRReg = MI.getOperand(0).getReg();
- BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
+ if (MRI.use_empty(OldFPSCRReg))
+ BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
+ else
+ BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
// Put bits in 32:63 to FPSCR.
Register NewFPSCRReg = MI.getOperand(1).getReg();
@@ -15966,8 +15973,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
case 'v':
- if (Subtarget.hasAltivec())
+ if (Subtarget.hasAltivec() && VT.isVector())
return std::make_pair(0U, &PPC::VRRCRegClass);
+ else if (Subtarget.hasVSX())
+ // Scalars in Altivec registers only make sense with VSX.
+ return std::make_pair(0U, &PPC::VFRCRegClass);
break;
case 'y': // crrc
return std::make_pair(0U, &PPC::CRRCRegClass);
@@ -17664,6 +17674,24 @@ PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
return Mode;
}
+bool PPCTargetLowering::splitValueIntoRegisterParts(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
+ unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
+ EVT ValVT = Val.getValueType();
+ // If we are splitting a scalar integer into f64 parts (i.e. so they
+ // can be placed into VFRC registers), we need to zero extend and
+ // bitcast the values. This will ensure the value is placed into a
+ // VSR using direct moves or stack operations as needed.
+ if (PartVT == MVT::f64 &&
+ (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
+ Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
+ Parts[0] = Val;
+ return true;
+ }
+ return false;
+}
+
// If we happen to match to an aligned D-Form, check if the Frame Index is
// adequately aligned. If it is not, reset the mode to match to X-Form.
static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 34dce2c3172d..87b7f96112ec 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1139,6 +1139,10 @@ namespace llvm {
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG) const;
+ bool
+ splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
+ SDValue *Parts, unsigned NumParts, MVT PartVT,
+ Optional<CallingConv::ID> CC) const override;
/// Structure that collects some common arguments that get passed around
/// between the functions for call lowering.
struct CallFlags {
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 417a6ce7e522..58af8037f59c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -773,6 +773,11 @@ def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
"addis $rD, $rA, $imm", IIC_IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
+def LA8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$sym),
+ "la $rD, $sym($rA)", IIC_IntGeneral,
+ [(set i64:$rD, (add i64:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IIC_IntGeneral,
@@ -1435,6 +1440,13 @@ def ADDIStocHA8: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentr
def ADDItocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
"#ADDItocL", []>, isPPC64;
}
+
+// Local Data Transform
+def ADDItoc8 : PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+ "#ADDItoc8",
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+
let mayLoad = 1 in
def LDtocL: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
"#LDtocL", []>, isPPC64;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 1e0e2d88e54b..fe21a164dfab 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1161,6 +1161,22 @@ def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
} // end HasAltivec
+// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
+class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo,
+ (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
+ !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+}
+
+// [PO VRT VRA VRB 1 / XO]
+class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
+ : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
+ let Defs = [CR6];
+ let PS = 0;
+}
+
def HasP8Altivec : Predicate<"Subtarget->hasP8Altivec()">;
def HasP8Crypto : Predicate<"Subtarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
@@ -1351,6 +1367,13 @@ def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
v2i64, v4i32>;
+def BCDADD_rec : VX_VT5_VA5_VB5_PS1_XO9_o<1, "bcdadd." , []>;
+def BCDSUB_rec : VX_VT5_VA5_VB5_PS1_XO9_o<65, "bcdsub." , []>;
+
+def : Pat<(v16i8 (int_ppc_bcdadd v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDADD_rec $vA, $vB, $PS)>;
+def : Pat<(v16i8 (int_ppc_bcdsub v16i8:$vA, v16i8:$vB, timm:$PS)),
+ (BCDSUB_rec $vA, $vB, $PS)>;
// Shuffle patterns for unary and swapped (LE) vector pack modulo.
def:Pat<(vpkudum_unary_shuffle v16i8:$vA, undef),
@@ -1598,22 +1621,6 @@ def BCDCPSGN_rec : VX1_VT5_VA5_VB5<833, "bcdcpsgn.", []>;
def BCDSETSGN_rec : VX_VT5_EO5_VB5_PS1_XO9_o<31, 385, "bcdsetsgn.", []>;
-// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
-class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo,
- (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
- let Defs = [CR6];
-}
-
-// [PO VRT VRA VRB 1 / XO]
-class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
- let Defs = [CR6];
- let PS = 0;
-}
-
// Decimal Shift/Unsigned-Shift/Shift-and-Round
def BCDS_rec : VX_VT5_VA5_VB5_PS1_XO9_o<193, "bcds." , []>;
def BCDUS_rec : VX_VT5_VA5_VB5_XO9_o <129, "bcdus.", []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 649a150866b4..a0fd2111de11 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2138,9 +2138,8 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
}
static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
- for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
- I != IE; ++I)
- if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+ for (MachineInstr &MI : MBB)
+ if (MI.definesRegister(PPC::CTR) || MI.definesRegister(PPC::CTR8))
return true;
return false;
}
@@ -2331,8 +2330,7 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
bool Found = false;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
const TargetRegisterClass *RC = RCs[c];
if (MO.isReg()) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index d2d5ca92ca1c..d92a10c5b208 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2471,6 +2471,7 @@ def DblwdCmp {
// [HasVSX, HasP8Vector, IsLittleEndian]
// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64]
// [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian]
+// [HasVSX, HasP8Altivec]
// [HasVSX, HasDirectMove]
// [HasVSX, HasDirectMove, IsBigEndian]
// [HasVSX, HasDirectMove, IsLittleEndian]
@@ -2500,6 +2501,10 @@ let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in
def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a,
v16i8:$b, v16i8:$c)),
(v16i8 (VPERMXOR $a, $b, $c))>;
+let Predicates = [HasVSX, HasP8Altivec] in
+ def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor_be v16i8:$a,
+ v16i8:$b, v16i8:$c)),
+ (v16i8 (VPERMXOR $a, $b, $c))>;
let AddedComplexity = 400 in {
// Valid for any VSX subtarget, regardless of endianness.
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 5cc180d770b2..22c5b6c11289 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -152,9 +152,9 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp;
- if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP))
+ if (LowerPPCMachineOperandToMCOperand(MO, MCOp, AP))
OutMI.addOperand(MCOp);
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index bdff5109c1e1..9d5206f8fd43 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -149,6 +149,79 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
case FusionFeature::FK_SldiAdd:
return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
(matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
+
+ // rldicl rx, ra, 1, 0 - xor
+ case FusionFeature::FK_RotateLeftXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 0);
+
+ // rldicr rx, ra, 1, 63 - xor
+ case FusionFeature::FK_RotateRightXor:
+ return matchingImmOps(FirstMI, 2, 1) && matchingImmOps(FirstMI, 3, 63);
+
+ // We actually use CMPW* and CMPD*, 'l' doesn't exist as an operand in instr.
+
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp1:
+ // { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+ // { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+ case FusionFeature::FK_LoadCmp2: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ if (SecondMI.getOpcode() == PPC::CMPDI &&
+ matchingImmOps(SecondMI, 2, -1, 16))
+ return true;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1);
+ }
+
+ // { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+ case FusionFeature::FK_LoadCmp3: {
+ const MachineOperand &BT = SecondMI.getOperand(0);
+ if (!BT.isReg() ||
+ (!Register::isVirtualRegister(BT.getReg()) && BT.getReg() != PPC::CR0))
+ return false;
+ return matchingImmOps(SecondMI, 2, 0) || matchingImmOps(SecondMI, 2, 1) ||
+ matchingImmOps(SecondMI, 2, -1, 16);
+ }
+
+ // mtctr - { bcctr,bcctrl }
+ case FusionFeature::FK_ZeroMoveCTR:
+ // ( mtctr rx ) is alias of ( mtspr 9, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 9);
+
+ // mtlr - { bclr,bclrl }
+ case FusionFeature::FK_ZeroMoveLR:
+ // ( mtlr rx ) is alias of ( mtspr 8, rx )
+ return (FirstMI.getOpcode() != PPC::MTSPR &&
+ FirstMI.getOpcode() != PPC::MTSPR8) ||
+ matchingImmOps(FirstMI, 0, 8);
+
+ // addis rx,ra,si - addi rt,rx,SI, SI >= 0
+ case FusionFeature::FK_AddisAddi: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = SecondMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ return SignExtend64(SI.getImm(), 16) >= 0;
+ }
+
+ // addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+ case FusionFeature::FK_AddiAddis: {
+ const MachineOperand &RA = FirstMI.getOperand(1);
+ const MachineOperand &SI = FirstMI.getOperand(2);
+ if (!SI.isImm() || !RA.isReg())
+ return false;
+ if (RA.getReg() == PPC::ZERO || RA.getReg() == PPC::ZERO8)
+ return false;
+ int64_t ExtendedSI = SignExtend64(SI.getImm(), 16);
+ return ExtendedSI >= 2;
+ }
}
llvm_unreachable("All the cases should have been handled");
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index 469a24800423..e4954b722fd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -78,5 +78,80 @@ FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+// rldicl rx, ra, 1, 0 - xor
+FUSION_FEATURE(RotateLeftXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICL, RLDICL_32, RLDICL_32_64),
+ FUSION_OP_SET(XOR, XOR8))
+
+// rldicr rx, ra, 1, 63 - xor
+FUSION_FEATURE(RotateRightXor, hasSha3Fusion, 1,
+ FUSION_OP_SET(RLDICR, RLDICR_32), FUSION_OP_SET(XOR, XOR8))
+
+// There're two special cases in 'load-compare' series, so we have to split
+// them into several pattern groups to fit into current framework. This can
+// be clearer once we switched to a more expressive approach.
+
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { lbz,lbzx,lhz,lhzx,lwz,lwzx } - cmpli 0,L,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp1, hasCompareFusion, 1,
+ FUSION_OP_SET(LBZ, LBZ8, LBZX, LBZX8, LBZXTLS, LBZXTLS_,
+ LBZXTLS_32, LHZ, LHZ8, LHZX, LHZX8, LHZXTLS,
+ LHZXTLS_, LHZXTLS_32, LWZ, LWZ8, LWZX, LWZX8,
+ LWZXTLS, LWZXTLS_, LWZXTLS_32),
+ FUSION_OP_SET(CMPDI, CMPLDI, CMPLWI))
+
+// { ld,ldx } - cmpi 0,1,rx,{ 0,1,-1 }
+// { ld,ldx } - cmpli 0,1,rx,{ 0,1 }
+FUSION_FEATURE(LoadCmp2, hasCompareFusion, 1,
+ FUSION_OP_SET(LD, LDX, LDXTLS, LDXTLS_),
+ FUSION_OP_SET(CMPDI, CMPLDI))
+
+// { lha,lhax,lwa,lwax } - cmpi 0,L,rx,{ 0,1,-1 }
+FUSION_FEATURE(LoadCmp3, hasCompareFusion, 1,
+ FUSION_OP_SET(LHA, LHA8, LHAX, LHAX8, LWA, LWA_32, LWAX,
+ LWAX_32),
+ FUSION_OP_SET(CMPLDI, CMPLWI))
+
+// ori - oris
+FUSION_FEATURE(OriOris, hasWideImmFusion, 1, FUSION_OP_SET(ORI, ORI8),
+ FUSION_OP_SET(ORIS, ORIS8))
+
+// lis - ori
+FUSION_FEATURE(LisOri, hasWideImmFusion, 1, FUSION_OP_SET(LIS, LIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// oris - ori
+FUSION_FEATURE(OrisOri, hasWideImmFusion, 1, FUSION_OP_SET(ORIS, ORIS8),
+ FUSION_OP_SET(ORI, ORI8))
+
+// xori - xoris
+FUSION_FEATURE(XoriXoris, hasWideImmFusion, 1, FUSION_OP_SET(XORI, XORI8),
+ FUSION_OP_SET(XORIS, XORIS8))
+
+// xoris - xori
+FUSION_FEATURE(XorisXori, hasWideImmFusion, 1, FUSION_OP_SET(XORIS, XORIS8),
+ FUSION_OP_SET(XORI, XORI8))
+
+// addis rx,ra,si - addi rt,rx,SI, SI >= 0
+FUSION_FEATURE(AddisAddi, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8),
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL))
+
+// addi rx,ra,si - addis rt,rx,SI, ra > 0, SI >= 2
+FUSION_FEATURE(AddiAddis, hasWideImmFusion, 1,
+ FUSION_OP_SET(ADDI, ADDI8, ADDItocL),
+ FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8))
+
+// mtctr - { bcctr,bcctrl }
+FUSION_FEATURE(ZeroMoveCTR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTCTR, MTCTRloop, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCCTR, BCCTRn, BCCTR8, BCCTR8n, BCCTRL, BCCTRLn,
+ BCCTRL8, BCCTRL8n, gBCCTR, gBCCTRL))
+
+// mtlr - { bclr,bclrl }
+FUSION_FEATURE(ZeroMoveLR, hasZeroMoveFusion, -1,
+ FUSION_OP_SET(MTLR8, MTLR, MTSPR8, MTSPR),
+ FUSION_OP_SET(BCLR, BCLRn, gBCLR, BCLRL, BCLRLn, gBCLRL))
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index dfc29dbb10f1..1258a1281597 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -131,6 +131,10 @@ void PPCSubtarget::initializeEnvironment() {
HasAddLogicalFusion = false;
HasLogicalAddFusion = false;
HasLogicalFusion = false;
+ HasSha3Fusion = false;
+ HasCompareFusion = false;
+ HasWideImmFusion = false;
+ HasZeroMoveFusion = false;
IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 783ea121ccb8..d52833cb1465 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -151,6 +151,10 @@ protected:
bool HasAddLogicalFusion;
bool HasLogicalAddFusion;
bool HasLogicalFusion;
+ bool HasSha3Fusion;
+ bool HasCompareFusion;
+ bool HasWideImmFusion;
+ bool HasZeroMoveFusion;
bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
@@ -340,6 +344,10 @@ public:
bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
bool hasLogicalFusion() const { return HasLogicalFusion; }
+ bool hasCompareFusion() const { return HasCompareFusion; }
+ bool hasWideImmFusion() const { return HasWideImmFusion; }
+ bool hasSha3Fusion() const { return HasSha3Fusion; }
+ bool hasZeroMoveFusion() const { return HasZeroMoveFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 77d5a2668b60..5d6f58a77a39 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -318,9 +318,20 @@ InstructionCost PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind);
}
+// Check if the current Type is an MMA vector type. Valid MMA types are
+// v256i1 and v512i1 respectively.
+static bool isMMAType(Type *Ty) {
+ return Ty->isVectorTy() && (Ty->getScalarSizeInBits() == 1) &&
+ (Ty->getPrimitiveSizeInBits() > 128);
+}
+
InstructionCost PPCTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
+ // Set the max cost if an MMA type is present (v256i1, v512i1).
+ if (isMMAType(U->getType()))
+ return InstructionCost::getMax();
+
// We already implement getCastInstrCost and getMemoryOpCost where we perform
// the vector adjustment there.
if (isa<CastInst>(U) || isa<LoadInst>(U) || isa<StoreInst>(U))
@@ -942,32 +953,39 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return 2;
}
-// Adjust the cost of vector instructions on targets which there is overlap
-// between the vector and scalar units, thereby reducing the overall throughput
-// of vector code wrt. scalar code.
-InstructionCost PPCTTIImpl::vectorCostAdjustment(InstructionCost Cost,
- unsigned Opcode, Type *Ty1,
- Type *Ty2) {
+// Returns a cost adjustment factor to adjust the cost of vector instructions
+// on targets which there is overlap between the vector and scalar units,
+// thereby reducing the overall throughput of vector code wrt. scalar code.
+// An invalid instruction cost is returned if the type is an MMA vector type.
+InstructionCost PPCTTIImpl::vectorCostAdjustmentFactor(unsigned Opcode,
+ Type *Ty1, Type *Ty2) {
+ // If the vector type is of an MMA type (v256i1, v512i1), an invalid
+ // instruction cost is returned. This is to signify to other cost computing
+ // functions to return the maximum instruction cost in order to prevent any
+ // opportunities for the optimizer to produce MMA types within the IR.
+ if (isMMAType(Ty1))
+ return InstructionCost::getInvalid();
+
if (!ST->vectorsUseTwoUnits() || !Ty1->isVectorTy())
- return Cost;
+ return InstructionCost(1);
std::pair<InstructionCost, MVT> LT1 = TLI->getTypeLegalizationCost(DL, Ty1);
// If type legalization involves splitting the vector, we don't want to
// double the cost at every step - only the last step.
if (LT1.first != 1 || !LT1.second.isVector())
- return Cost;
+ return InstructionCost(1);
int ISD = TLI->InstructionOpcodeToISD(Opcode);
if (TLI->isOperationExpand(ISD, LT1.second))
- return Cost;
+ return InstructionCost(1);
if (Ty2) {
std::pair<InstructionCost, MVT> LT2 = TLI->getTypeLegalizationCost(DL, Ty2);
if (LT2.first != 1 || !LT2.second.isVector())
- return Cost;
+ return InstructionCost(1);
}
- return Cost * 2;
+ return InstructionCost(2);
}
InstructionCost PPCTTIImpl::getArithmeticInstrCost(
@@ -977,6 +995,11 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
const Instruction *CxtI) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Ty, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
// TODO: Handle more cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
@@ -986,12 +1009,18 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
// Fallback to the default implementation.
InstructionCost Cost = BaseT::getArithmeticInstrCost(
Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);
- return vectorCostAdjustment(Cost, Opcode, Ty, nullptr);
+ return Cost * CostFactor;
}
InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index,
Type *SubTp) {
+
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Instruction::ShuffleVector, Tp, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
// Legalize the type.
std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
@@ -1000,8 +1029,7 @@ InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
// instruction). We need one such shuffle instruction for each actual
// register (this is not true for arbitrary shuffles, but is true for the
// structured types of shuffles covered by TTI::ShuffleKind).
- return vectorCostAdjustment(LT.first, Instruction::ShuffleVector, Tp,
- nullptr);
+ return LT.first * CostFactor;
}
InstructionCost PPCTTIImpl::getCFInstrCost(unsigned Opcode,
@@ -1020,9 +1048,13 @@ InstructionCost PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
const Instruction *I) {
assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Dst, Src);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost =
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
- Cost = vectorCostAdjustment(Cost, Opcode, Dst, Src);
+ Cost *= CostFactor;
// TODO: Allow non-throughput costs that aren't binary.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost == 0 ? 0 : 1;
@@ -1034,12 +1066,17 @@ InstructionCost PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I) {
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, ValTy, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost =
BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
// TODO: Handle other cost kinds.
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
- return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr);
+ return Cost * CostFactor;
}
InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
@@ -1049,8 +1086,12 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Val, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
InstructionCost Cost = BaseT::getVectorInstrCost(Opcode, Val, Index);
- Cost = vectorCostAdjustment(Cost, Opcode, Val, nullptr);
+ Cost *= CostFactor;
if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
// Double-precision scalars are already located in index #0 (or #1 if LE).
@@ -1065,7 +1106,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (ISD == ISD::INSERT_VECTOR_ELT)
// A move-to VSR and a permute/insert. Assume vector operation cost
// for both (cost will be 2x on P9).
- return vectorCostAdjustment(2, Opcode, Val, nullptr);
+ return 2 * CostFactor;
// It's an extract. Maybe we can do a cheap move-from VSR.
unsigned EltSize = Val->getScalarSizeInBits();
@@ -1082,7 +1123,7 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// We need a vector extract (or mfvsrld). Assume vector operation cost.
// The cost of the load constant for a vector extract is disregarded
// (invariant, easily schedulable).
- return vectorCostAdjustment(1, Opcode, Val, nullptr);
+ return CostFactor;
} else if (ST->hasDirectMove())
// Assume permute has standard cost.
@@ -1114,6 +1155,11 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I) {
+
+ InstructionCost CostFactor = vectorCostAdjustmentFactor(Opcode, Src, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
if (TLI->getValueType(DL, Src, true) == MVT::Other)
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
CostKind);
@@ -1128,7 +1174,7 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
if (CostKind != TTI::TCK_RecipThroughput)
return Cost;
- Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr);
+ Cost *= CostFactor;
bool IsAltivecType = ST->hasAltivec() &&
(LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
@@ -1194,6 +1240,11 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
+ InstructionCost CostFactor =
+ vectorCostAdjustmentFactor(Opcode, VecTy, nullptr);
+ if (!CostFactor.isValid())
+ return InstructionCost::getMax();
+
if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index aa84013803af..7aeb0c59d503 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -100,8 +100,8 @@ public:
unsigned getCacheLineSize() const override;
unsigned getPrefetchDistance() const override;
unsigned getMaxInterleaveFactor(unsigned VF);
- InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode,
- Type *Ty1, Type *Ty2);
+ InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1,
+ Type *Ty2);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index d1979b5456ce..f1c3810f4ee5 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -170,6 +170,14 @@ void RISCVInstPrinter::printAtomicMemOp(const MCInst *MI, unsigned OpNo,
void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
+ // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
+ // or non-zero bits 8/9/10.
+ if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED ||
+ RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) {
+ O << Imm;
+ return;
+ }
+ // Print the text form.
RISCVVType::printVType(Imm, O);
}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 595c3cdfbb1d..f5d491938050 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -237,7 +237,13 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
- return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF);
+ // If we do not reserve stack space for outgoing arguments in prologue,
+ // we will adjust the stack pointer before call instruction. After the
+ // adjustment, we can not use SP to access the stack objects for the
+ // arguments. Instead, use BP to access these stack objects.
+ return (MFI.hasVarSizedObjects() ||
+ (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) &&
+ TRI->hasStackRealignment(MF);
}
// Determines the size of the frame and maximum call frame size.
@@ -1065,10 +1071,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- // Manually restore values not restored by libcall. Insert in reverse order.
+ // Manually restore values not restored by libcall.
+ // Keep the same order as in the prologue. There is no need to reverse the
+ // order in the epilogue. In addition, the return address will be restored
+ // first in the epilogue. It increases the opportunity to avoid the
+ // load-to-use data hazard between loading RA and return by RA.
// loadRegFromStackSlot can insert multiple instructions.
const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
- for (auto &CS : reverse(NonLibcallCSI)) {
+ for (auto &CS : NonLibcallCSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0f1a6e5f9154..f3331571fc55 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -335,17 +335,29 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- for (auto Op : FPOpToExpand)
- setOperationAction(Op, MVT::f16, Expand);
setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+ setOperationAction(ISD::FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+ setOperationAction(ISD::FCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FSIN, MVT::f16, Promote);
+ setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP, MVT::f16, Promote);
+ setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+ setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+
+ // We need to custom promote this.
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::FPOWI, MVT::i32, Custom);
}
if (Subtarget.hasStdExtF()) {
@@ -676,6 +688,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
@@ -924,6 +940,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP_ROUND, VT, Custom);
setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction(ISD::FTRUNC, VT, Custom);
+ setOperationAction(ISD::FCEIL, VT, Custom);
+ setOperationAction(ISD::FFLOOR, VT, Custom);
+
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
@@ -1165,6 +1185,10 @@ bool RISCVTargetLowering::shouldSinkOperands(
case Instruction::Shl:
case Instruction::LShr:
case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
return Operand == 1;
case Instruction::Call:
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
@@ -1631,6 +1655,66 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
+// Expand vector FTRUNC, FCEIL, and FFLOOR by converting to the integer domain
+// and back. Taking care to avoid converting values that are nan or already
+// correct.
+// TODO: Floor and ceil could be shorter by changing rounding mode, but we don't
+// have FRM dependencies modeled yet.
+static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT.isVector() && "Unexpected type");
+
+ SDLoc DL(Op);
+
+ // Freeze the source since we are increasing the number of uses.
+ SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
+
+ // Truncate to integer and convert back to FP.
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Src);
+ Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
+
+ MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
+
+ if (Op.getOpcode() == ISD::FCEIL) {
+ // If the truncated value is the greater than or equal to the original
+ // value, we've computed the ceil. Otherwise, we went the wrong way and
+ // need to increase by 1.
+ // FIXME: This should use a masked operation. Handle here or in isel?
+ SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Truncated,
+ DAG.getConstantFP(1.0, DL, VT));
+ SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOLT);
+ Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
+ } else if (Op.getOpcode() == ISD::FFLOOR) {
+ // If the truncated value is the less than or equal to the original value,
+ // we've computed the floor. Otherwise, we went the wrong way and need to
+ // decrease by 1.
+ // FIXME: This should use a masked operation. Handle here or in isel?
+ SDValue Adjust = DAG.getNode(ISD::FSUB, DL, VT, Truncated,
+ DAG.getConstantFP(1.0, DL, VT));
+ SDValue NeedAdjust = DAG.getSetCC(DL, SetccVT, Truncated, Src, ISD::SETOGT);
+ Truncated = DAG.getSelect(DL, VT, NeedAdjust, Adjust, Truncated);
+ }
+
+ // Restore the original sign so that -0.0 is preserved.
+ Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
+
+ // Determine the largest integer that can be represented exactly. This and
+ // values larger than it don't have any fractional bits so don't need to
+ // be converted.
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ APFloat MaxVal = APFloat(FltSem);
+ MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
+ /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
+ SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
+
+ // If abs(Src) was larger than MaxVal or nan, keep it.
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
+ SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
+ return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
+}
+
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -2670,6 +2754,20 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DAG.getConstant(3, DL, VT));
return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
}
+ case ISD::FPOWI: {
+ // Custom promote f16 powi with illegal i32 integer type on RV64. Once
+ // promoted this will be legalized into a libcall by LegalizeIntegerTypes.
+ if (Op.getValueType() == MVT::f16 && Subtarget.is64Bit() &&
+ Op.getOperand(1).getValueType() == MVT::i32) {
+ SDLoc DL(Op);
+ SDValue Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op.getOperand(0));
+ SDValue Powi =
+ DAG.getNode(ISD::FPOWI, DL, MVT::f32, Op0, Op.getOperand(1));
+ return DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, Powi,
+ DAG.getIntPtrConstant(0, DL));
+ }
+ return SDValue();
+ }
case ISD::FP_EXTEND: {
// RVV can only do fp_extend to types double the size as the source. We
// custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
@@ -2858,6 +2956,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return lowerFP_TO_INT_SAT(Op, DAG);
+ case ISD::FTRUNC:
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -9834,6 +9936,23 @@ bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
return false;
}
+bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
+ EVT VT) const {
+ if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
+ return false;
+
+ switch (FPVT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget.hasStdExtZfh();
+ case MVT::f32:
+ return Subtarget.hasStdExtF();
+ case MVT::f64:
+ return Subtarget.hasStdExtD();
+ default:
+ return false;
+ }
+}
+
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
VT = VT.getScalarType();
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 8e3d716ae919..849928eb46ae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -514,6 +514,8 @@ public:
bool isLegalElementTypeForRVV(Type *ScalarTy) const;
+ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index b653928ccea9..6f9cde966132 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -705,6 +705,7 @@ def PseudoLD : PseudoLoad<"ld">;
def PseudoSD : PseudoStore<"sd">;
} // Predicates = [IsRV64]
+def : InstAlias<"li $rd, $imm", (ADDI GPR:$rd, X0, simm12:$imm)>;
def : InstAlias<"mv $rd, $rs", (ADDI GPR:$rd, GPR:$rs, 0)>;
def : InstAlias<"not $rd, $rs", (XORI GPR:$rd, GPR:$rs, -1)>;
def : InstAlias<"neg $rd, $rs", (SUB GPR:$rd, X0, GPR:$rs)>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 388cce00bdf3..798532d5bc44 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/ErrorHandling.h"
#define GET_REGINFO_TARGET_DESC
@@ -320,3 +321,30 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
return &RISCV::VRRegClass;
return RC;
}
+
+void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const {
+ // VLENB is the length of a vector register in bytes. We use <vscale x 8 x i8>
+ // to represent one vector register. The dwarf offset is
+ // VLENB * scalable_offset / 8.
+ assert(Offset.getScalable() % 8 == 0 && "Invalid frame offset");
+
+ // Add fixed-sized offset using existing DIExpression interface.
+ DIExpression::appendOffset(Ops, Offset.getFixed());
+
+ unsigned VLENB = getDwarfRegNum(RISCV::VLENB, true);
+ int64_t VLENBSized = Offset.getScalable() / 8;
+ if (VLENBSized > 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(VLENBSized);
+ Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_plus);
+ } else if (VLENBSized < 0) {
+ Ops.push_back(dwarf::DW_OP_constu);
+ Ops.push_back(-VLENBSized);
+ Ops.append({dwarf::DW_OP_bregx, VLENB, 0ULL});
+ Ops.push_back(dwarf::DW_OP_mul);
+ Ops.push_back(dwarf::DW_OP_minus);
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 74a5b83ff6f3..2b2bbdfbdf32 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -63,6 +63,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const TargetRegisterClass *
getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const override;
+
+ void getOffsetOpcodes(const StackOffset &Offset,
+ SmallVectorImpl<uint64_t> &Ops) const override;
};
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a915a572f3b7..a56f992d320e 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -480,6 +480,8 @@ let RegAltNameIndices = [ABIRegAltName] in {
def VL : RISCVReg<0, "vl", ["vl"]>;
def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>;
def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>;
+ def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>,
+ DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>;
}
foreach m = [1, 2, 4] in {
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 41599dd8bb3f..5a4c579dd708 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -388,4 +388,4 @@ def : SysReg<"vxrm", 0x00A>;
def : SysReg<"vcsr", 0x00F>;
def : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
-def : SysReg<"vlenb", 0xC22>;
+def SysRegVLENB: SysReg<"vlenb", 0xC22>;
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 7319924a24ba..259b37954183 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -53,9 +53,8 @@ namespace {
// instructions to fill delay slot.
F.getRegInfo().invalidateLiveness();
- for (MachineFunction::iterator FI = F.begin(), FE = F.end();
- FI != FE; ++FI)
- Changed |= runOnMachineBasicBlock(*FI);
+ for (MachineBasicBlock &MBB : F)
+ Changed |= runOnMachineBasicBlock(MBB);
return Changed;
}
@@ -319,8 +318,7 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
SmallSet<unsigned, 32>& RegDefs,
SmallSet<unsigned, 32>& RegUses)
{
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
diff --git a/llvm/lib/Target/Sparc/LeonPasses.cpp b/llvm/lib/Target/Sparc/LeonPasses.cpp
index fa05a41f3127..bd26710fcbab 100644
--- a/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/llvm/lib/Target/Sparc/LeonPasses.cpp
@@ -42,8 +42,7 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
DebugLoc DL = DebugLoc();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
@@ -77,10 +76,8 @@ bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &MF.getSubtarget<SparcSubtarget>();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
- for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
- MachineInstr &MI = *MBBI;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
unsigned Opcode = MI.getOpcode();
if (Opcode == SP::CALL && MI.getNumOperands() > 0) {
MachineOperand &MO = MI.getOperand(0);
@@ -129,8 +126,7 @@ bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
DebugLoc DL = DebugLoc();
bool Modified = false;
- for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
diff --git a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index d165052ca512..a740de9123c9 100644
--- a/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -343,19 +343,18 @@ void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
}
// Rewrite MBB's Live-ins.
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
+ for (MachineBasicBlock &MBB : MF) {
for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
- if (!MBB->isLiveIn(reg))
+ if (!MBB.isLiveIn(reg))
continue;
- MBB->removeLiveIn(reg);
- MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+ MBB.removeLiveIn(reg);
+ MBB.addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
}
for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
- if (!MBB->isLiveIn(reg))
+ if (!MBB.isLiveIn(reg))
continue;
- MBB->removeLiveIn(reg);
- MBB->addLiveIn(reg - SP::I0 + SP::O0);
+ MBB.removeLiveIn(reg);
+ MBB.addLiveIn(reg - SP::I0 + SP::O0);
}
}
diff --git a/llvm/lib/Target/Sparc/SparcMCInstLower.cpp b/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
index 8ea317fdd453..4e7e7bb5c81b 100644
--- a/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/llvm/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -97,8 +97,7 @@ void llvm::LowerSparcMachineInstrToMCInst(const MachineInstr *MI,
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MI, MO, AP);
if (MCOp.isValid())
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index ac94570e568f..631cbff303e8 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -144,8 +144,7 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) {
if (MI.isDebugInstr())
return Ref;
- for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
if (Register MOReg = MO.getReg()) {
if (TRI->regsOverlap(MOReg, Reg)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index d11d118fb8ee..2f7cdfcf7bde 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -270,8 +270,8 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::GR64BitRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
@@ -283,16 +283,16 @@ bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
}
// Save FPRs/VRs in the normal TargetInstrInfo way.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
}
if (SystemZ::VR128BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
}
@@ -313,13 +313,13 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Restore FPRs/VRs in the normal TargetInstrInfo way.
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg))
- TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
if (SystemZ::VR128BitRegClass.contains(Reg))
- TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
+ TII->loadRegFromStackSlot(MBB, MBBI, Reg, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
@@ -345,8 +345,8 @@ bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
MIB.addImm(RestoreGPRs.GPROffset);
// Do a second scan adding regs as being defined by instruction
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (Reg != RestoreGPRs.LowGPR && Reg != RestoreGPRs.HighGPR &&
SystemZ::GR64BitRegClass.contains(Reg))
MIB.addReg(Reg, RegState::ImplicitDefine);
@@ -965,24 +965,24 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
// Make sure all call-saved GPRs are included as operands and are
// marked as live on entry.
auto &GRRegClass = SystemZ::GR64BitRegClass;
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (GRRegClass.contains(Reg))
addSavedGPR(MBB, MIB, Reg, true);
}
}
// Spill FPRs to the stack in the normal TargetInstrInfo way
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- unsigned Reg = CSI[I].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (SystemZ::FP64BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
}
if (SystemZ::VR128BitRegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, I.getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index 6fddb4f81c41..af219da79c32 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -29,7 +29,18 @@ public:
create(const SystemZSubtarget &STI);
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ // SystemZ wants normal register scavenging slots, as close to the stack or
+ // frame pointer as possible.
+ // The default implementation assumes an x86-like layout, where the frame
+ // pointer is at the opposite end of the frame from the stack pointer.
+ // This meant that when frame pointer elimination was disabled,
+ // the slots ended up being as close as possible to the incoming
+ // stack pointer, which is the opposite of what we want on SystemZ.
+ return false;
+ }
+
bool hasReservedCallFrame(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -43,7 +54,6 @@ public:
SystemZELFFrameLowering();
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2bf80882fa61..e80496e37781 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -203,8 +203,8 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
Size, MI.getOperand(1).isKill(), MI.getOperand(1).isUndef());
// Keep the remaining operands as-is.
- for (unsigned I = 2; I < MI.getNumOperands(); ++I)
- MIB.add(MI.getOperand(I));
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2))
+ MIB.add(MO);
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp b/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
index ef39f80a94ef..d2932de5a6ea 100644
--- a/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp
@@ -93,10 +93,8 @@ MCOperand SystemZMCInstLower::lowerOperand(const MachineOperand &MO) const {
void SystemZMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = MI->getOperand(I);
+ for (const MachineOperand &MO : MI->operands())
// Ignore all implicit register operands.
if (!MO.isReg() || !MO.isImplicit())
OutMI.addOperand(lowerOperand(MO));
- }
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
index 1fe9423e01b8..1d8c3d514bfb 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
@@ -23,14 +23,6 @@ using namespace llvm;
#define DEBUG_TYPE "ve-asmprinter"
-// The generated AsmMatcher VEGenAsmWriter uses "VE" as the target
-// namespace.
-namespace llvm {
-namespace VE {
-using namespace VE;
-}
-} // namespace llvm
-
#define GET_INSTRUCTION_NAME
#define PRINT_ALIAS_INSTR
#include "VEGenAsmWriter.inc"
@@ -62,13 +54,10 @@ void VEInstPrinter::printOperand(const MCInst *MI, int OpNum,
}
if (MO.isImm()) {
- switch (MI->getOpcode()) {
- default:
- // Expects signed 32bit literals
- int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
- O << TruncatedImm;
- return;
- }
+ // Expects signed 32bit literals.
+ int32_t TruncatedImm = static_cast<int32_t>(MO.getImm());
+ O << TruncatedImm;
+ return;
}
assert(MO.isExpr() && "Unknown operand kind in printOperand");
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index ddcfb9da8249..46846edfeafb 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -942,11 +942,11 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MachineInstrBuilder MIB =
BuildMI(*MBB, MI, DL, get(VE::SVMmi), Dest).addReg(VMZ).addImm(Imm);
MachineInstr *Inst = MIB.getInstr();
- MI.eraseFromParent();
if (KillSrc) {
const TargetRegisterInfo *TRI = &getRegisterInfo();
Inst->addRegisterKilled(MI.getOperand(1).getReg(), TRI, true);
}
+ MI.eraseFromParent();
return true;
}
case VE::VFMKyal:
@@ -956,6 +956,7 @@ bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case VE::VFMKSyvl:
case VE::VFMKSyvyl:
expandPseudoVFMK(*this, MI);
+ return true;
}
return false;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 80abccd74782..7b70d99b5f52 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -368,8 +368,8 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
-static void checkSanityForEHAndSjLj(const TargetMachine *TM) {
- // Sanity checking related to -exception-model
+static void basicCheckForEHAndSjLj(const TargetMachine *TM) {
+ // Basic Correctness checking related to -exception-model
if (TM->Options.ExceptionModel != ExceptionHandling::None &&
TM->Options.ExceptionModel != ExceptionHandling::Wasm)
report_fatal_error("-exception-model should be either 'none' or 'wasm'");
@@ -431,7 +431,7 @@ void WebAssemblyPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createWebAssemblyOptimizeReturned());
- checkSanityForEHAndSjLj(TM);
+ basicCheckForEHAndSjLj(TM);
// If exception handling is not enabled and setjmp/longjmp handling is
// enabled, we lower invokes into calls and delete unreachable landingpad
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index d4f39b571394..3df48b466d07 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -148,7 +148,7 @@ public:
AlignBranchType.addKind(X86::AlignBranchJcc);
AlignBranchType.addKind(X86::AlignBranchJmp);
}
- // Allow overriding defaults set by master flag
+ // Allow overriding defaults set by main flag
if (X86AlignBranchBoundary.getNumOccurrences())
AlignBoundary = assumeAligned(X86AlignBranchBoundary);
if (X86AlignBranch.getNumOccurrences())
@@ -1452,9 +1452,7 @@ public:
unsigned NumDefCFAOffsets = 0;
int MinAbsOffset = std::numeric_limits<int>::max();
- for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
- const MCCFIInstruction &Inst = Instrs[i];
-
+ for (const MCCFIInstruction &Inst : Instrs) {
switch (Inst.getOperation()) {
default:
// Any other CFI directives indicate a frame that we aren't prepared
diff --git a/llvm/lib/Target/X86/X86DomainReassignment.cpp b/llvm/lib/Target/X86/X86DomainReassignment.cpp
index a2ae6345c006..9826bf4bf861 100644
--- a/llvm/lib/Target/X86/X86DomainReassignment.cpp
+++ b/llvm/lib/Target/X86/X86DomainReassignment.cpp
@@ -186,8 +186,8 @@ public:
TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
*MBB->getParent()));
MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
- for (unsigned Idx = 1, End = MI->getNumOperands(); Idx < End; ++Idx)
- Bld.add(MI->getOperand(Idx));
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
+ Bld.add(MO);
BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY))
.add(MI->getOperand(0))
diff --git a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
index df8df1e3a65d..c8ceebb8b8e6 100644
--- a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
+++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
@@ -212,6 +212,12 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64;
assert(SlotSize == 4 || SlotSize == 8);
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None;
+ if (unsigned Num = MI->peekDebugInstrNum()) {
+ // Operand 2 of DYN_ALLOCAs contains the stack def.
+ InstrNum = {Num, 2};
+ }
+
switch (L) {
case TouchAndSub: {
assert(Amount >= SlotSize);
@@ -251,7 +257,7 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
// Do the probe.
STI->getFrameLowering()->emitStackProbe(*MBB->getParent(), *MBB, MI, DL,
- /*InProlog=*/false);
+ /*InProlog=*/false, InstrNum);
} else {
// Sub
BuildMI(*MBB, I, DL,
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 01dc509df795..93bc23006dc4 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -209,10 +209,8 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
llvm_unreachable("unexpected opcode");
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
- unsigned OpStart = 1;
bool RAXImplicitDead = false;
- for (; OpStart < MI.getNumOperands(); ++OpStart) {
- MachineOperand &Op = MI.getOperand(OpStart);
+ for (MachineOperand &Op : llvm::drop_begin(MI.operands())) {
// RAX may be 'implicit dead', if there are no other users of the return
// value. We introduce a new use, so change it to 'implicit def'.
if (Op.isReg() && Op.isImplicit() && Op.isDead() &&
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 9a63cffe0a09..4730b936ec1f 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -278,10 +278,9 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
RegUsageState RegUsage = RU_NotUsed;
MachineInstr &MI = *I;
- for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
- MachineOperand &opnd = MI.getOperand(i);
- if (opnd.isReg() && opnd.getReg() == p.getReg()) {
- if (opnd.isDef())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg() == p.getReg()) {
+ if (MO.isDef())
return RU_Write;
RegUsage = RU_Read;
}
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 60e1b37ed61c..4d9160f35226 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -446,11 +446,9 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
// Get dead variables list now because the MI pointer may be deleted as part
// of processing!
SmallVector<unsigned, 8> DeadRegs;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isDead())
DeadRegs.push_back(MO.getReg());
- }
switch (FPInstClass) {
case X86II::ZeroArgFP: handleZeroArgFP(I); break;
@@ -1672,8 +1670,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
// Collect all FP registers (register operands with constraints "t", "u",
// and "f") to kill afer the instruction.
unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &Op = MI.getOperand(i);
+ for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
continue;
unsigned FPReg = getFPReg(Op);
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index bd780273509f..c29ae9f6af4c 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -465,13 +465,11 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- if (CSI.empty()) return;
// Calculate offsets.
- for (std::vector<CalleeSavedInfo>::const_iterator
- I = CSI.begin(), E = CSI.end(); I != E; ++I) {
- int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
- unsigned Reg = I->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ unsigned Reg = I.getReg();
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
if (IsPrologue) {
@@ -484,10 +482,10 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
}
}
-void X86FrameLowering::emitStackProbe(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL, bool InProlog) const {
+void X86FrameLowering::emitStackProbe(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
if (STI.isTargetWindowsCoreCLR()) {
if (InProlog) {
@@ -497,10 +495,14 @@ void X86FrameLowering::emitStackProbe(MachineFunction &MF,
emitStackProbeInline(MF, MBB, MBBI, DL, false);
}
} else {
- emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+ emitStackProbeCall(MF, MBB, MBBI, DL, InProlog, InstrNum);
}
}
+bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
+ return STI.isOSWindows() && !STI.isTargetWin64();
+}
+
void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
@@ -971,11 +973,10 @@ void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
}
}
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const DebugLoc &DL,
- bool InProlog) const {
+void X86FrameLowering::emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const {
bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
// FIXME: Add indirect thunk support and remove this.
@@ -1015,6 +1016,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
.addReg(SP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ MachineInstr *ModInst = CI;
if (STI.isTargetWin64() || !STI.isOSWindows()) {
// MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
// MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
@@ -1022,9 +1024,27 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
// adjusting %rsp.
// All other platforms do not specify a particular ABI for the stack probe
// function, so we arbitrarily define it to not adjust %esp/%rsp itself.
- BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
- .addReg(SP)
- .addReg(AX);
+ ModInst =
+ BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP)
+ .addReg(SP)
+ .addReg(AX);
+ }
+
+ // DebugInfo variable locations -- if there's an instruction number for the
+ // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
+ // modifies SP.
+ if (InstrNum) {
+ if (STI.isTargetWin64() || !STI.isOSWindows()) {
+ // Label destination operand of the subtract.
+ MF.makeDebugValueSubstitution(*InstrNum,
+ {ModInst->getDebugInstrNum(), 0});
+ } else {
+ // Label the call. The operand number is the penultimate operand, zero
+ // based.
+ unsigned SPDefOperand = ModInst->getNumOperands() - 2;
+ MF.makeDebugValueSubstitution(
+ *InstrNum, {ModInst->getDebugInstrNum(), SPDefOperand});
+ }
}
if (InProlog) {
@@ -2652,8 +2672,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
DebugLoc DL = MBB.findDebugLoc(MI);
// Reload XMMs from stack frame.
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (X86::GR64RegClass.contains(Reg) ||
X86::GR32RegClass.contains(Reg))
continue;
@@ -2664,13 +2684,13 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1;
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, I.getFrameIdx(), RC, TRI);
}
// POP GPRs.
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
- for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned Reg = CSI[i].getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
if (!X86::GR64RegClass.contains(Reg) &&
!X86::GR32RegClass.contains(Reg))
continue;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 6309b8a066c4..e18be0d26321 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
#define LLVM_LIB_TARGET_X86_X86FRAMELOWERING_H
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/TypeSize.h"
@@ -51,9 +52,14 @@ public:
/// Emit target stack probe code. This is required for all
/// large stack allocations on Windows. The caller is required to materialize
/// the number of bytes to probe in RAX/EAX.
- void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- bool InProlog) const;
+ /// \p InstrNum optionally contains a debug-info instruction number for the
+ /// new stack pointer.
+ void emitStackProbe(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum = None) const;
+
+ bool stackProbeFunctionModifiesSP() const override;
/// Replace a StackProbe inline-stub with the actual probe code inline.
void inlineStackProbe(MachineFunction &MF,
@@ -198,9 +204,10 @@ private:
uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
/// Emit target stack probe as a call to a helper function
- void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
- bool InProlog) const;
+ void emitStackProbeCall(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog,
+ Optional<MachineFunction::DebugInstrOperandPair> InstrNum) const;
/// Emit target stack probe as an inline sequence.
void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17d14053d804..62b2387396be 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23190,6 +23190,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
+ // We don't need to replace SQRT with RSQRT for half type.
+ if (VT.getScalarType() == MVT::f16)
+ return true;
+
// We never want to use both SQRT and RSQRT instructions for the same input.
if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
return false;
@@ -23228,11 +23232,15 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
UseOneConstNR = false;
// There is no FSQRT for 512-bits, but there is RSQRT14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;
- return DAG.getNode(Opcode, DL, VT, Op);
+ SDValue Estimate = DAG.getNode(Opcode, DL, VT, Op);
+ if (RefinementSteps == 0 && !Reciprocal)
+ Estimate = DAG.getNode(ISD::FMUL, DL, VT, Op, Estimate);
+ return Estimate;
}
if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
Subtarget.hasFP16()) {
+ assert(Reciprocal && "Don't replace SQRT with RSQRT for half type");
if (RefinementSteps == ReciprocalEstimate::Unspecified)
RefinementSteps = 0;
@@ -45680,7 +45688,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (is64BitFP && !Subtarget.is64Bit()) {
// On a 32-bit target, we cannot bitcast the 64-bit float to a
// 64-bit integer, since that's not a legal type. Since
- // OnesOrZeroesF is all ones of all zeroes, we don't need all the
+ // OnesOrZeroesF is all ones or all zeroes, we don't need all the
// bits, but can do this little dance to extract the lowest 32 bits
// and work with those going forward.
SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64,
@@ -46577,6 +46585,59 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return Ret;
}
+static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R,
+ SDValue And1_L, SDValue And1_R, SDLoc DL,
+ SelectionDAG &DAG) {
+ if (!isBitwiseNot(And0_L, true) || !And0_L->hasOneUse())
+ return SDValue();
+ SDValue NotOp = And0_L->getOperand(0);
+ if (NotOp == And1_R)
+ std::swap(And1_R, And1_L);
+ if (NotOp != And1_L)
+ return SDValue();
+
+ // (~(NotOp) & And0_R) | (NotOp & And1_R)
+ // --> ((And0_R ^ And1_R) & NotOp) ^ And1_R
+ EVT VT = And1_L->getValueType(0);
+ SDValue Freeze_And0_R = DAG.getNode(ISD::FREEZE, SDLoc(), VT, And0_R);
+ SDValue Xor0 = DAG.getNode(ISD::XOR, DL, VT, And1_R, Freeze_And0_R);
+ SDValue And = DAG.getNode(ISD::AND, DL, VT, Xor0, NotOp);
+ SDValue Xor1 = DAG.getNode(ISD::XOR, DL, VT, And, Freeze_And0_R);
+ return Xor1;
+}
+
+/// Fold "masked merge" expressions like `(m & x) | (~m & y)` into the
+/// equivalent `((x ^ y) & m) ^ y)` pattern.
+/// This is typically a better representation for targets without a fused
+/// "and-not" operation. This function is intended to be called from a
+/// `TargetLowering::PerformDAGCombine` callback on `ISD::OR` nodes.
+static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
+ // Note that masked-merge variants using XOR or ADD expressions are
+ // normalized to OR by InstCombine so we only check for OR.
+ assert(Node->getOpcode() == ISD::OR && "Must be called with ISD::OR node");
+ SDValue N0 = Node->getOperand(0);
+ if (N0->getOpcode() != ISD::AND || !N0->hasOneUse())
+ return SDValue();
+ SDValue N1 = Node->getOperand(1);
+ if (N1->getOpcode() != ISD::AND || !N1->hasOneUse())
+ return SDValue();
+
+ SDLoc DL(Node);
+ SDValue N00 = N0->getOperand(0);
+ SDValue N01 = N0->getOperand(1);
+ SDValue N10 = N1->getOperand(0);
+ SDValue N11 = N1->getOperand(1);
+ if (SDValue Result = foldMaskedMergeImpl(N00, N01, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N01, N00, N10, N11, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N10, N11, N00, N01, DL, DAG))
+ return Result;
+ if (SDValue Result = foldMaskedMergeImpl(N11, N10, N00, N01, DL, DAG))
+ return Result;
+ return SDValue();
+}
+
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -46670,6 +46731,11 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
return Res;
}
+ // We should fold "masked merge" patterns when `andn` is not available.
+ if (!Subtarget.hasBMI() && VT.isScalarInteger() && VT != MVT::i1)
+ if (SDValue R = foldMaskedMerge(N, DAG))
+ return R;
+
return SDValue();
}
@@ -48504,20 +48570,50 @@ static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL,
SDValue LHS = Src.getOperand(0).getOperand(0);
SDValue RHS = Src.getOperand(0).getOperand(1);
- unsigned ExtOpc = LHS.getOpcode();
- if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) ||
- RHS.getOpcode() != ExtOpc)
- return SDValue();
-
- // Peek through the extends.
- LHS = LHS.getOperand(0);
- RHS = RHS.getOperand(0);
-
- // Ensure the input types match.
- if (LHS.getValueType() != VT || RHS.getValueType() != VT)
- return SDValue();
+ // Count leading sign/zero bits on both inputs - if there are enough then
+ // truncation back to vXi16 will be cheap - either as a pack/shuffle
+ // sequence or using AVX512 truncations. If the inputs are sext/zext then the
+ // truncations may actually be free by peeking through to the ext source.
+ auto IsSext = [&DAG](SDValue V) {
+ return DAG.ComputeMinSignedBits(V) <= 16;
+ };
+ auto IsZext = [&DAG](SDValue V) {
+ return DAG.computeKnownBits(V).countMaxActiveBits() <= 16;
+ };
- unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU;
+ bool IsSigned = IsSext(LHS) && IsSext(RHS);
+ bool IsUnsigned = IsZext(LHS) && IsZext(RHS);
+ if (!IsSigned && !IsUnsigned)
+ return SDValue();
+
+ // Check if both inputs are extensions, which will be removed by truncation.
+ bool IsTruncateFree = (LHS.getOpcode() == ISD::SIGN_EXTEND ||
+ LHS.getOpcode() == ISD::ZERO_EXTEND) &&
+ (RHS.getOpcode() == ISD::SIGN_EXTEND ||
+ RHS.getOpcode() == ISD::ZERO_EXTEND) &&
+ LHS.getOperand(0).getScalarValueSizeInBits() <= 16 &&
+ RHS.getOperand(0).getScalarValueSizeInBits() <= 16;
+
+ // For AVX2+ targets, with the upper bits known zero, we can perform MULHU on
+ // the (bitcasted) inputs directly, and then cheaply pack/truncate the result
+ // (upper elts will be zero). Don't attempt this with just AVX512F as MULHU
+ // will have to split anyway.
+ unsigned InSizeInBits = InVT.getSizeInBits();
+ if (IsUnsigned && !IsTruncateFree && Subtarget.hasInt256() &&
+ !(Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.is256BitVector()) &&
+ (InSizeInBits % 16) == 0) {
+ EVT BCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
+ InVT.getSizeInBits() / 16);
+ SDValue Res = DAG.getNode(ISD::MULHU, DL, BCVT, DAG.getBitcast(BCVT, LHS),
+ DAG.getBitcast(BCVT, RHS));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getBitcast(InVT, Res));
+ }
+
+ // Truncate back to source type.
+ LHS = DAG.getNode(ISD::TRUNCATE, DL, VT, LHS);
+ RHS = DAG.getNode(ISD::TRUNCATE, DL, VT, RHS);
+
+ unsigned Opc = IsSigned ? ISD::MULHS : ISD::MULHU;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8aee96e1c504..1db83033ba35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -12937,8 +12937,8 @@ def : Pat<(v16i32 (X86vzmovl
(iPTR 0)))),
(SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
-def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
- (VMOVW2SHrr GR32:$src)>;
+def : Pat<(v8i16 (X86vzmovl (scalar_to_vector (i16 GR16:$src)))),
+ (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
// AVX 128-bit movw instruction write zeros in the high 128-bit part.
def : Pat<(v8i16 (X86vzload16 addr:$src)),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 639aa5199ea5..bb5637a31947 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1163,8 +1163,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
/// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr &MI) const {
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() &&
MO.getReg() == X86::EFLAGS && !MO.isDead()) {
return true;
@@ -5676,10 +5675,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
MachineOperand &MO = MI.getOperand(i + 2);
MIB.add(MO);
}
- for (unsigned i = NumOps + 2, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), NumOps + 2))
MIB.add(MO);
- }
updateOperandRegConstraints(MF, *NewMI, TII);
diff --git a/llvm/lib/Target/X86/X86RegisterBanks.td b/llvm/lib/Target/X86/X86RegisterBanks.td
index 74c515850ab1..91a497252595 100644
--- a/llvm/lib/Target/X86/X86RegisterBanks.td
+++ b/llvm/lib/Target/X86/X86RegisterBanks.td
@@ -1,4 +1,4 @@
-//=- X86RegisterBank.td - Describe the AArch64 Banks -----*- tablegen -*-=//
+//=- X86RegisterBank.td - Describe the X86 Banks -------------*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 06dacb638d16..869762b35196 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1584,54 +1584,98 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 },
// Mask sign extend has an instruction.
- { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
- { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // vpmovwb
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
- { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
- { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 },
+
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v8i64, 2 },
+
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
{ ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
@@ -1786,40 +1830,94 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] {
// Mask sign extend has an instruction.
- { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
- { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v64i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
- { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
- { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v64i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v64i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v32i1, MVT::v16i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i8, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v16i16, 2 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 },
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // vpsllw+vptestmb
- { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // vpsllw+vptestmw
- { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // vpsllw+vptestmb
};
static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = {
+ // Mask sign extend has an instruction.
+ { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v2i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 },
+
+ // Mask zero extend is a sext + shift.
+ { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v2i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 },
+
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v16i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 },
+ { ISD::TRUNCATE, MVT::v2i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 },
+ { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v4i64, 2 },
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
@@ -3674,6 +3772,10 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
PromEltTyBits = 16; // promote to i16, AVX512BW.
break;
}
+ if (ST->hasDQI()) {
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break;
+ }
return bailout();
default:
return bailout();
@@ -3969,7 +4071,9 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
// Even in the case of (loop invariant) stride whose value is not known at
// compile time, the address computation will not incur more than one extra
// ADD instruction.
- if (Ty->isVectorTy() && SE) {
+ if (Ty->isVectorTy() && SE && !ST->hasAVX2()) {
+ // TODO: AVX2 is the current cut-off because we don't have correct
+ // interleaving costs for prior ISA's.
if (!BaseT::isStridedAccess(Ptr))
return NumVectorInstToHideOverhead;
if (!BaseT::getConstantStrideStep(SE, Ptr))
@@ -5173,7 +5277,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
InstructionCost MemOpCost;
- if (UseMaskForCond || UseMaskForGaps)
+ bool UseMaskedMemOp = UseMaskForCond || UseMaskForGaps;
+ if (UseMaskedMemOp)
MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
AddressSpace, CostKind);
else
@@ -5183,9 +5288,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
- // FIXME: this is the most conservative estimate for the mask cost.
InstructionCost MaskCost;
- if (UseMaskForCond || UseMaskForGaps) {
+ if (UseMaskedMemOp) {
APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
for (unsigned Index : Indices) {
assert(Index < Factor && "Invalid index for interleaved memory op");
@@ -5193,10 +5297,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
DemandedLoadStoreElts.setBit(Index + Elm * Factor);
}
- Type *I8Type = Type::getInt8Ty(VecTy->getContext());
+ Type *I1Type = Type::getInt1Ty(VecTy->getContext());
MaskCost = getReplicationShuffleCost(
- I8Type, Factor, VF,
+ I1Type, Factor, VF,
UseMaskForGaps ? DemandedLoadStoreElts
: APInt::getAllOnes(VecTy->getNumElements()),
CostKind);
@@ -5207,7 +5311,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
if (UseMaskForGaps) {
- auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
+ auto *MaskVT = FixedVectorType::get(I1Type, VecTy->getNumElements());
MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
}
}
@@ -5248,9 +5352,10 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
NumOfLoadsInInterleaveGrp;
// About a half of the loads may be folded in shuffles when we have only
- // one result. If we have more than one result, we do not fold loads at all.
+ // one result. If we have more than one result, or the loads are masked,
+ // we do not fold loads at all.
unsigned NumOfUnfoldedLoads =
- NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
+ UseMaskedMemOp || NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
// Get a number of shuffle operations per result.
unsigned NumOfShufflesPerResult =
diff --git a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 27ac6a4d1439..f2f89f4269ed 100644
--- a/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -427,19 +427,19 @@ bool XCoreFrameLowering::spillCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- for (auto it = CSI.begin(); it != CSI.end(); ++it) {
- unsigned Reg = it->getReg();
+ for (const CalleeSavedInfo &I : CSI) {
+ unsigned Reg = I.getReg();
assert(Reg != XCore::LR && !(Reg == XCore::R10 && hasFP(*MF)) &&
"LR & FP are always handled in emitPrologue");
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, it->getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, I.getFrameIdx(), RC, TRI);
if (emitFrameMoves) {
auto Store = MI;
--Store;
- XFI->getSpillLabels().push_back(std::make_pair(Store, *it));
+ XFI->getSpillLabels().push_back(std::make_pair(Store, I));
}
}
return true;
diff --git a/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index b5dbdea98eea..71836133fae6 100644
--- a/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -48,9 +48,7 @@ bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
const XCoreInstrInfo &TII =
*static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
unsigned StackSize = MF.getFrameInfo().getStackSize();
- for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
- ++MFI) {
- MachineBasicBlock &MBB = *MFI;
+ for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MBBI = MBB.begin(), EE = MBB.end();
MBBI != EE; ++MBBI) {
if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) {
diff --git a/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
index cd28fa5cd144..6f5dcb291e6e 100644
--- a/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -103,8 +103,7 @@ MCOperand XCoreMCInstLower::LowerOperand(const MachineOperand &MO,
void XCoreMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
MCOperand MCOp = LowerOperand(MO);
if (MCOp.isValid())
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index b2c2efed7db8..ba7589c2bf60 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -25,6 +25,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -275,94 +276,64 @@ CleanupPointerRootUsers(GlobalVariable *GV,
/// We just marked GV constant. Loop over all users of the global, cleaning up
/// the obvious ones. This is largely just a quick scan over the use list to
/// clean up the easy and obvious cruft. This returns true if it made a change.
-static bool CleanupConstantGlobalUsers(
- Value *V, Constant *Init, const DataLayout &DL,
- function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
+static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
+ const DataLayout &DL) {
+ Constant *Init = GV->getInitializer();
+ SmallVector<User *, 8> WorkList(GV->users());
+ SmallPtrSet<User *, 8> Visited;
bool Changed = false;
- // Note that we need to use a weak value handle for the worklist items. When
- // we delete a constant array, we may also be holding pointer to one of its
- // elements (or an element of one of its elements if we're dealing with an
- // array of arrays) in the worklist.
- SmallVector<WeakTrackingVH, 8> WorkList(V->users());
+
+ SmallVector<WeakTrackingVH> MaybeDeadInsts;
+ auto EraseFromParent = [&](Instruction *I) {
+ for (Value *Op : I->operands())
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ MaybeDeadInsts.push_back(OpI);
+ I->eraseFromParent();
+ Changed = true;
+ };
while (!WorkList.empty()) {
- Value *UV = WorkList.pop_back_val();
- if (!UV)
+ User *U = WorkList.pop_back_val();
+ if (!Visited.insert(U).second)
continue;
- User *U = cast<User>(UV);
+ if (auto *BO = dyn_cast<BitCastOperator>(U))
+ append_range(WorkList, BO->users());
+ if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
+ append_range(WorkList, ASC->users());
+ else if (auto *GEP = dyn_cast<GEPOperator>(U))
+ append_range(WorkList, GEP->users());
+ else if (auto *LI = dyn_cast<LoadInst>(U)) {
+ // A load from zeroinitializer is always zeroinitializer, regardless of
+ // any applied offset.
+ if (Init->isNullValue()) {
+ LI->replaceAllUsesWith(Constant::getNullValue(LI->getType()));
+ EraseFromParent(LI);
+ continue;
+ }
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- if (Init) {
- if (auto *Casted =
- ConstantFoldLoadThroughBitcast(Init, LI->getType(), DL)) {
- // Replace the load with the initializer.
- LI->replaceAllUsesWith(Casted);
- LI->eraseFromParent();
- Changed = true;
+ Value *PtrOp = LI->getPointerOperand();
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true);
+ if (PtrOp == GV) {
+ if (auto *Value = ConstantFoldLoadFromConst(Init, LI->getType(),
+ Offset, DL)) {
+ LI->replaceAllUsesWith(Value);
+ EraseFromParent(LI);
}
}
} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Store must be unreachable or storing Init into the global.
- SI->eraseFromParent();
- Changed = true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- Constant *SubInit = nullptr;
- if (Init)
- SubInit = ConstantFoldLoadThroughGEPConstantExpr(
- Init, CE, V->getType()->getPointerElementType(), DL);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, GetTLI);
- } else if ((CE->getOpcode() == Instruction::BitCast &&
- CE->getType()->isPointerTy()) ||
- CE->getOpcode() == Instruction::AddrSpaceCast) {
- // Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, GetTLI);
- }
-
- if (CE->use_empty()) {
- CE->destroyConstant();
- Changed = true;
- }
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // Do not transform "gepinst (gep constexpr (GV))" here, because forming
- // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
- // and will invalidate our notion of what Init is.
- Constant *SubInit = nullptr;
- if (!isa<ConstantExpr>(GEP->getOperand(0))) {
- ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(
- ConstantFoldInstruction(GEP, DL, &GetTLI(*GEP->getFunction())));
- if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
- SubInit = ConstantFoldLoadThroughGEPConstantExpr(
- Init, CE, V->getType()->getPointerElementType(), DL);
-
- // If the initializer is an all-null value and we have an inbounds GEP,
- // we already know what the result of any load from that GEP is.
- // TODO: Handle splats.
- if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
- SubInit = Constant::getNullValue(GEP->getResultElementType());
- }
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, DL, GetTLI);
-
- if (GEP->use_empty()) {
- GEP->eraseFromParent();
- Changed = true;
- }
+ EraseFromParent(SI);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
- if (MI->getRawDest() == V) {
- MI->eraseFromParent();
- Changed = true;
- }
-
- } else if (Constant *C = dyn_cast<Constant>(U)) {
- // If we have a chain of dead constantexprs or other things dangling from
- // us, and if they are all dead, nuke them without remorse.
- if (isSafeToDestroyConstant(C)) {
- C->destroyConstant();
- CleanupConstantGlobalUsers(V, Init, DL, GetTLI);
- return true;
- }
+ if (getUnderlyingObject(MI->getRawDest()) == GV)
+ EraseFromParent(MI);
}
}
+
+ Changed |=
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
+ GV->removeDeadConstantUsers();
return Changed;
}
@@ -889,7 +860,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
Changed |= CleanupPointerRootUsers(GV, GetTLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, nullptr, DL, GetTLI);
+ CleanupConstantGlobalUsers(GV, DL);
}
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -1557,8 +1528,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
} else {
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- Changed =
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed = CleanupConstantGlobalUsers(GV, DL);
}
// If the global is dead now, delete it.
@@ -1583,7 +1553,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
}
// Clean up any obviously simplifiable users now.
- Changed |= CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ Changed |= CleanupConstantGlobalUsers(GV, DL);
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1628,7 +1598,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+ CleanupConstantGlobalUsers(GV, DL);
if (GV->use_empty()) {
LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f342c35fa283..055ee6b50296 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1885,6 +1885,7 @@ private:
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
@@ -3727,12 +3728,37 @@ struct AAKernelInfoFunction : AAKernelInfo {
CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ bool UsedAssumedInformationFromReachingKernels = false;
if (!IsKernelEntry) {
- updateReachingKernelEntries(A);
updateParallelLevels(A);
+ bool AllReachingKernelsKnown = true;
+ updateReachingKernelEntries(A, AllReachingKernelsKnown);
+ UsedAssumedInformationFromReachingKernels = !AllReachingKernelsKnown;
+
if (!ParallelLevels.isValidState())
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ else if (!ReachingKernelEntries.isValidState())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ else if (!SPMDCompatibilityTracker.empty()) {
+ // Check if all reaching kernels agree on the mode as we can otherwise
+ // not guard instructions. We might not be sure about the mode so we
+ // we cannot fix the internal spmd-zation state either.
+ int SPMD = 0, Generic = 0;
+ for (auto *Kernel : ReachingKernelEntries) {
+ auto &CBAA = A.getAAFor<AAKernelInfo>(
+ *this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
+ if (CBAA.SPMDCompatibilityTracker.isValidState() &&
+ CBAA.SPMDCompatibilityTracker.isAssumed())
+ ++SPMD;
+ else
+ ++Generic;
+ if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
+ UsedAssumedInformationFromReachingKernels = true;
+ }
+ if (SPMD != 0 && Generic != 0)
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ }
}
// Callback to check a call instruction.
@@ -3779,7 +3805,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// If we haven't used any assumed information for the SPMD state we can fix
// it.
if (!UsedAssumedInformationInCheckRWInst &&
- !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed)
+ !UsedAssumedInformationInCheckCallInst &&
+ !UsedAssumedInformationFromReachingKernels && AllSPMDStatesWereFixed)
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
return StateBefore == getState() ? ChangeStatus::UNCHANGED
@@ -3788,7 +3815,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
private:
/// Update info regarding reaching kernels.
- void updateReachingKernelEntries(Attributor &A) {
+ void updateReachingKernelEntries(Attributor &A,
+ bool &AllReachingKernelsKnown) {
auto PredCallSite = [&](AbstractCallSite ACS) {
Function *Caller = ACS.getInstruction()->getFunction();
@@ -3808,10 +3836,9 @@ private:
return true;
};
- bool AllCallSitesKnown;
if (!A.checkForAllCallSites(PredCallSite, *this,
true /* RequireAllCallSites */,
- AllCallSitesKnown))
+ AllReachingKernelsKnown))
ReachingKernelEntries.indicatePessimisticFixpoint();
}
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 7402e399a88a..2d717475ce7f 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -641,8 +641,7 @@ PartialInlinerImpl::computeOutliningInfo(Function &F) const {
if (!CandidateFound)
return std::unique_ptr<FunctionOutliningInfo>();
- // Do sanity check of the entries: threre should not
- // be any successors (not in the entry set) other than
+ // There should not be any successors (not in the entry set) other than
// {ReturnBlock, NonReturnBlock}
assert(OutliningInfo->Entries[0] == &F.front() &&
"Function Entry must be the first in Entries vector");
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a961c47a7501..b8fac9d47763 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -84,6 +84,7 @@
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
#include <algorithm>
@@ -173,6 +174,9 @@ static cl::opt<bool>
cl::desc("Process functions in a top-down order "
"defined by the profiled call graph when "
"-sample-profile-top-down-load is on."));
+cl::opt<bool>
+ SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
+ cl::desc("Sort profiled recursion by edge weights."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -1648,6 +1652,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
SmallVector<uint32_t, 4> Weights;
uint32_t MaxWeight = 0;
Instruction *MaxDestInst;
+ // Since profi treats multiple edges (multiway branches) as a single edge,
+ // we need to distribute the computed weight among the branches. We do
+ // this by evenly splitting the edge weight among destinations.
+ DenseMap<const BasicBlock *, uint64_t> EdgeMultiplicity;
+ std::vector<uint64_t> EdgeIndex;
+ if (SampleProfileUseProfi) {
+ EdgeIndex.resize(TI->getNumSuccessors());
+ for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
+ const BasicBlock *Succ = TI->getSuccessor(I);
+ EdgeIndex[I] = EdgeMultiplicity[Succ];
+ EdgeMultiplicity[Succ]++;
+ }
+ }
for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
BasicBlock *Succ = TI->getSuccessor(I);
Edge E = std::make_pair(BB, Succ);
@@ -1660,9 +1677,19 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
Weight = std::numeric_limits<uint32_t>::max();
}
- // Weight is added by one to avoid propagation errors introduced by
- // 0 weights.
- Weights.push_back(static_cast<uint32_t>(Weight + 1));
+ if (!SampleProfileUseProfi) {
+ // Weight is added by one to avoid propagation errors introduced by
+ // 0 weights.
+ Weights.push_back(static_cast<uint32_t>(Weight + 1));
+ } else {
+ // Profi creates proper weights that do not require "+1" adjustments but
+ // we evenly split the weight among branches with the same destination.
+ uint64_t W = Weight / EdgeMultiplicity[Succ];
+ // Rounding up, if needed, so that first branches are hotter.
+ if (EdgeIndex[I] < Weight % EdgeMultiplicity[Succ])
+ W++;
+ Weights.push_back(static_cast<uint32_t>(W));
+ }
if (Weight != 0) {
if (Weight > MaxWeight) {
MaxWeight = Weight;
@@ -1853,7 +1880,13 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
while (!CGI.isAtEnd()) {
- for (ProfiledCallGraphNode *Node : *CGI) {
+ auto Range = *CGI;
+ if (SortProfiledSCC) {
+ // Sort nodes in one SCC based on callsite hotness.
+ scc_member_iterator<ProfiledCallGraph *> SI(*CGI);
+ Range = *SI;
+ }
+ for (auto *Node : Range) {
Function *F = SymbolMap.lookup(Node->Name);
if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
FunctionOrderList.push_back(F);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 06c9bf650f37..dc55b5a31596 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1727,16 +1727,18 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
(Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- Value *A, *B, *C;
+ Value *A, *B, *C, *X, *Y;
// (~(A | B) & C) | ... --> ...
// (~(A & B) | C) & ... --> ...
// TODO: One use checks are conservative. We just need to check that a total
// number of multiple used values does not exceed reduction
// in operations.
- if (match(Op0, m_c_BinOp(FlippedOpcode,
- m_Not(m_BinOp(Opcode, m_Value(A), m_Value(B))),
- m_Value(C)))) {
+ if (match(Op0,
+ m_c_BinOp(FlippedOpcode,
+ m_CombineAnd(m_Value(X), m_Not(m_BinOp(Opcode, m_Value(A),
+ m_Value(B)))),
+ m_Value(C)))) {
// (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A
// (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)
if (match(Op1,
@@ -1776,6 +1778,21 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)))))))
return BinaryOperator::CreateNot(Builder.CreateBinOp(
Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B));
+
+ // (~(A | B) & C) | ~(C | (A ^ B)) --> ~((A | B) & (C | (A ^ B)))
+ // Note, the pattern with swapped and/or is not handled because the
+ // result is more undefined than a source:
+ // (~(A & B) | C) & ~(C & (A ^ B)) --> (A ^ B ^ C) | ~(A | C) is invalid.
+ if (Opcode == Instruction::Or && Op0->hasOneUse() &&
+ match(Op1, m_OneUse(m_Not(m_CombineAnd(
+ m_Value(Y),
+ m_c_BinOp(Opcode, m_Specific(C),
+ m_c_Xor(m_Specific(A), m_Specific(B)))))))) {
+ // X = ~(A | B)
+ // Y = (C | (A ^ B)
+ Value *Or = cast<BinaryOperator>(X)->getOperand(0);
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Or, Y));
+ }
}
return nullptr;
@@ -2061,7 +2078,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *CastedAnd = foldCastedBitwiseLogic(I))
return CastedAnd;
+ if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))
+ return Sel;
+
// and(sext(A), B) / and(B, sext(A)) --> A ? B : 0, where A is i1 or <N x i1>.
+ // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
+ // with binop identity constant. But creating a select with non-constant
+ // arm may not be reversible due to poison semantics. Is that a good
+ // canonicalization?
Value *A;
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
@@ -2322,11 +2346,20 @@ Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) {
Value *Cond;
Value *NotB;
if (match(A, m_SExt(m_Value(Cond))) &&
- Cond->getType()->isIntOrIntVectorTy(1) &&
- match(B, m_OneUse(m_Not(m_Value(NotB))))) {
- NotB = peekThroughBitcast(NotB, true);
- if (match(NotB, m_SExt(m_Specific(Cond))))
+ Cond->getType()->isIntOrIntVectorTy(1)) {
+ // A = sext i1 Cond; B = sext (not (i1 Cond))
+ if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
return Cond;
+
+ // A = sext i1 Cond; B = not ({bitcast} (sext (i1 Cond)))
+ // TODO: The one-use checks are unnecessary or misplaced. If the caller
+ // checked for uses on logic ops/casts, that should be enough to
+ // make this transform worthwhile.
+ if (match(B, m_OneUse(m_Not(m_Value(NotB))))) {
+ NotB = peekThroughBitcast(NotB, true);
+ if (match(NotB, m_SExt(m_Specific(Cond))))
+ return Cond;
+ }
}
// All scalar (and most vector) possibilities should be handled now.
@@ -2569,7 +2602,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (I.getType()->isIntOrIntVectorTy(1)) {
+ Type *Ty = I.getType();
+ if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
if (auto *I =
foldAndOrOfSelectUsingImpliedCond(Op1, *SI0, /* IsAnd */ false))
@@ -2602,7 +2636,16 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
Value *Or = Builder.CreateOr(X, Y);
- return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
+ return BinaryOperator::CreateXor(Or, ConstantInt::get(Ty, *CV));
+ }
+
+ // If the operands have no common bits set:
+ // or (mul X, Y), X --> add (mul X, Y), X --> mul X, (Y + 1)
+ if (match(&I,
+ m_c_Or(m_OneUse(m_Mul(m_Value(X), m_Value(Y))), m_Deferred(X))) &&
+ haveNoCommonBitsSet(Op0, Op1, DL)) {
+ Value *IncrementY = Builder.CreateAdd(Y, ConstantInt::get(Ty, 1));
+ return BinaryOperator::CreateMul(X, IncrementY);
}
// (A & C) | (B & D)
@@ -2635,14 +2678,14 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// iff (C0 & C1) == 0 and (X & ~C0) == 0
if (match(A, m_c_Or(m_Value(X), m_Specific(B))) &&
MaskedValueIsZero(X, ~*C0, 0, &I)) {
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(A, C01);
}
// (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)
// iff (C0 & C1) == 0 and (X & ~C1) == 0
if (match(B, m_c_Or(m_Value(X), m_Specific(A))) &&
MaskedValueIsZero(X, ~*C1, 0, &I)) {
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(B, C01);
}
// ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)
@@ -2652,7 +2695,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
match(B, m_Or(m_Specific(X), m_APInt(C3))) &&
(*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) {
Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield");
- Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ Constant *C01 = ConstantInt::get(Ty, *C0 | *C1);
return BinaryOperator::CreateAnd(Or, C01);
}
}
@@ -2788,13 +2831,20 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *CastedOr = foldCastedBitwiseLogic(I))
return CastedOr;
+ if (Instruction *Sel = foldBinopOfSextBoolToSelect(I))
+ return Sel;
+
// or(sext(A), B) / or(B, sext(A)) --> A ? -1 : B, where A is i1 or <N x i1>.
+ // TODO: Move this into foldBinopOfSextBoolToSelect as a more generalized fold
+ // with binop identity constant. But creating a select with non-constant
+ // arm may not be reversible due to poison semantics. Is that a good
+ // canonicalization?
if (match(Op0, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1);
+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op1);
if (match(Op1, m_OneUse(m_SExt(m_Value(A)))) &&
A->getType()->isIntOrIntVectorTy(1))
- return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0);
+ return SelectInst::Create(A, ConstantInt::getAllOnesValue(Ty), Op0);
// Note: If we've gotten to the point of visiting the outer OR, then the
// inner one couldn't be simplified. If it was a constant, then it won't
@@ -2826,7 +2876,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// or(ashr(subNSW(Y, X), ScalarSizeInBits(Y) - 1), X) --> X s> Y ? -1 : X.
{
Value *X, *Y;
- Type *Ty = I.getType();
if (match(&I, m_c_Or(m_OneUse(m_AShr(
m_NSWSub(m_Value(Y), m_Value(X)),
m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
@@ -2876,7 +2925,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (match(&I, m_c_Or(m_Add(m_Shl(m_One(), m_Value(X)), m_AllOnes()),
m_Shl(m_One(), m_Deferred(X)))) &&
match(&I, m_c_Or(m_OneUse(m_Value()), m_Value()))) {
- Type *Ty = X->getType();
Value *Sub = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits() - 1), X);
return BinaryOperator::CreateLShr(Constant::getAllOnesValue(Ty), Sub);
@@ -3601,6 +3649,14 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(A)), m_Value(B)), m_Deferred(A))))
return BinaryOperator::CreateOr(A, B);
+ // (~A | B) ^ A --> ~(A & B)
+ if (match(Op0, m_OneUse(m_c_Or(m_Not(m_Specific(Op1)), m_Value(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Op1, B));
+
+ // A ^ (~A | B) --> ~(A & B)
+ if (match(Op1, m_OneUse(m_c_Or(m_Not(m_Specific(Op0)), m_Value(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(Op0, B));
+
// (A | B) ^ (A | C) --> (B ^ C) & ~A -- There are 4 commuted variants.
// TODO: Loosen one-use restriction if common operand is a constant.
Value *D;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bfa7bfa2290a..7da2669e1d13 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2641,7 +2641,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
ArgNo++;
}
- assert(ArgNo == Call.arg_size() && "sanity check");
+ assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
if (!ArgNos.empty()) {
AttributeList AS = Call.getAttributes();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index ca87477c5d81..33f217659c01 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2771,7 +2771,7 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
m_Value(Y), m_ConstantInt(IndexC)))) &&
DestTy->isIntegerTy() && X->getType() == DestTy &&
- isDesirableIntType(BitWidth)) {
+ Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
// Adjust for big endian - the LSBs are at the high index.
if (DL.isBigEndian())
IndexC = SrcVTy->getNumElements() - 1 - IndexC;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7a9e177f19da..ed53b88aed61 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -1894,23 +1895,6 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
}
- // (X & C2) == 0 -> (trunc X) >= 0
- // (X & C2) != 0 -> (trunc X) < 0
- // iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
- const APInt *C2;
- if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) {
- int32_t ExactLogBase2 = C2->exactLogBase2();
- if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
- Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
- if (auto *AndVTy = dyn_cast<VectorType>(And->getType()))
- NTy = VectorType::get(NTy, AndVTy->getElementCount());
- Value *Trunc = Builder.CreateTrunc(X, NTy);
- auto NewPred =
- Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE : CmpInst::ICMP_SLT;
- return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
- }
- }
-
return nullptr;
}
@@ -2803,7 +2787,8 @@ bool InstCombinerImpl::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
PredB, cast<Constant>(RHS2));
if (!FlippedStrictness)
return false;
- assert(FlippedStrictness->first == ICmpInst::ICMP_SGE && "Sanity check");
+ assert(FlippedStrictness->first == ICmpInst::ICMP_SGE &&
+ "basic correctness failure");
RHS2 = FlippedStrictness->second;
// And kind-of perform the result swap.
std::swap(Less, Greater);
@@ -4614,7 +4599,7 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
InstCombiner::BuilderTy &Builder) {
- const ICmpInst::Predicate Pred = ICmp.getPredicate();
+ ICmpInst::Predicate Pred = ICmp.getPredicate();
Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
// Try to canonicalize trunc + compare-to-constant into a mask + cmp.
@@ -4624,41 +4609,31 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
return nullptr;
+ // This matches patterns corresponding to tests of the signbit as well as:
+ // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
+ // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
+ APInt Mask;
+ if (decomposeBitTestICmp(Op0, Op1, Pred, X, Mask, true /* WithTrunc */)) {
+ Value *And = Builder.CreateAnd(X, Mask);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(Pred, And, Zero);
+ }
+
unsigned SrcBits = X->getType()->getScalarSizeInBits();
- if (Pred == ICmpInst::ICMP_ULT) {
- if (C->isPowerOf2()) {
- // If C is a power-of-2 (one set bit):
- // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
- Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- Constant *Zero = ConstantInt::getNullValue(X->getType());
- return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero);
- }
+ if (Pred == ICmpInst::ICMP_ULT && C->isNegatedPowerOf2()) {
// If C is a negative power-of-2 (high-bit mask):
// (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
- if (C->isNegatedPowerOf2()) {
- Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
- }
+ Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
}
- if (Pred == ICmpInst::ICMP_UGT) {
- // If C is a low-bit-mask (C+1 is a power-of-2):
- // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
- if (C->isMask()) {
- Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- Constant *Zero = ConstantInt::getNullValue(X->getType());
- return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
- }
+ if (Pred == ICmpInst::ICMP_UGT && (~*C).isPowerOf2()) {
// If C is not-of-power-of-2 (one clear bit):
// (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
- if ((~*C).isPowerOf2()) {
- Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
- Value *And = Builder.CreateAnd(X, MaskC);
- return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
- }
+ Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
}
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 72e1b21e8d49..20c75188ec9f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -319,6 +319,7 @@ private:
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
+ Instruction *foldBinopOfSextBoolToSelect(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 7dc516c6fdc3..42ba4a34a5a9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -403,7 +403,7 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
NonNegatedOps.emplace_back(Op); // Just record which operand that was.
}
assert((NegatedOps.size() + NonNegatedOps.size()) == 2 &&
- "Internal consistency sanity check.");
+ "Internal consistency check failed.");
// Did we manage to sink negation into both of the operands?
if (NegatedOps.size() == 2) // Then we get to keep the `add`!
return Builder.CreateAdd(NegatedOps[0], NegatedOps[1],
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4a1e82ae9c1d..518d3952dce5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -246,12 +246,16 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
static unsigned getSelectFoldableOperands(BinaryOperator *I) {
switch (I->getOpcode()) {
case Instruction::Add:
+ case Instruction::FAdd:
case Instruction::Mul:
+ case Instruction::FMul:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
return 3; // Can fold through either operand.
case Instruction::Sub: // Can only fold on the amount subtracted.
+ case Instruction::FSub:
+ case Instruction::FDiv: // Can only fold on the divisor amount.
case Instruction::Shl: // Can only fold on the shift amount.
case Instruction::LShr:
case Instruction::AShr:
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 47b6dcb67a78..1f81624f79e7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -967,6 +967,29 @@ Value *InstCombinerImpl::dyn_castNegVal(Value *V) const {
return nullptr;
}
+/// A binop with a constant operand and a sign-extended boolean operand may be
+/// converted into a select of constants by applying the binary operation to
+/// the constant with the two possible values of the extended boolean (0 or -1).
+Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
+ // TODO: Handle non-commutative binop (constant is operand 0).
+ // TODO: Handle zext.
+ // TODO: Peek through 'not' of cast.
+ Value *BO0 = BO.getOperand(0);
+ Value *BO1 = BO.getOperand(1);
+ Value *X;
+ Constant *C;
+ if (!match(BO0, m_SExt(m_Value(X))) || !match(BO1, m_ImmConstant(C)) ||
+ !X->getType()->isIntOrIntVectorTy(1))
+ return nullptr;
+
+ // bo (sext i1 X), C --> select X, (bo -1, C), (bo 0, C)
+ Constant *Ones = ConstantInt::getAllOnesValue(BO.getType());
+ Constant *Zero = ConstantInt::getNullValue(BO.getType());
+ Constant *TVal = ConstantExpr::get(BO.getOpcode(), Ones, C);
+ Constant *FVal = ConstantExpr::get(BO.getOpcode(), Zero, C);
+ return SelectInst::Create(X, TVal, FVal);
+}
+
static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
InstCombiner::BuilderTy &Builder) {
if (auto *Cast = dyn_cast<CastInst>(&I))
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b56329ad76ae..bd2dc8d639fc 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file is a part of AddressSanitizer, an address sanity checker.
+// This file is a part of AddressSanitizer, an address basic correctness
+// checker.
// Details of the algorithm:
// https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
//
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 62c265e40dab..8d3bc1383e96 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This file is a part of HWAddressSanitizer, an address sanity checker
-/// based on tagged addressing.
+/// This file is a part of HWAddressSanitizer, an address basic correctness
+/// checker based on tagged addressing.
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 36a66e096382..d1d3b8ffdf7a 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -64,10 +64,10 @@ cl::opt<bool> DoHashBasedCounterSplit(
cl::desc("Rename counter variable of a comdat function based on cfg hash"),
cl::init(true));
-cl::opt<bool> RuntimeCounterRelocation(
- "runtime-counter-relocation",
- cl::desc("Enable relocating counters at runtime."),
- cl::init(false));
+cl::opt<bool>
+ RuntimeCounterRelocation("runtime-counter-relocation",
+ cl::desc("Enable relocating counters at runtime."),
+ cl::init(false));
cl::opt<bool> ValueProfileStaticAlloc(
"vp-static-alloc",
@@ -331,8 +331,9 @@ private:
// Check whether the loop satisfies the basic conditions needed to perform
// Counter Promotions.
- bool isPromotionPossible(Loop *LP,
- const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
+ bool
+ isPromotionPossible(Loop *LP,
+ const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
// We can't insert into a catchswitch.
if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
return isa<CatchSwitchInst>(Exit->getTerminator());
@@ -421,13 +422,13 @@ PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
}
char InstrProfilingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(
- InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_BEGIN(InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.",
+ false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(
- InstrProfilingLegacyPass, "instrprof",
- "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_END(InstrProfilingLegacyPass, "instrprof",
+ "Frontend instrumentation-based coverage lowering.", false,
+ false)
ModulePass *
llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options,
@@ -634,13 +635,9 @@ void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
GlobalVariable *Name = Ind->getName();
uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
uint64_t Index = Ind->getIndex()->getZExtValue();
- auto It = ProfileDataMap.find(Name);
- if (It == ProfileDataMap.end()) {
- PerFunctionProfileData PD;
- PD.NumValueSites[ValueKind] = Index + 1;
- ProfileDataMap[Name] = PD;
- } else if (It->second.NumValueSites[ValueKind] <= Index)
- It->second.NumValueSites[ValueKind] = Index + 1;
+ auto &PD = ProfileDataMap[Name];
+ PD.NumValueSites[ValueKind] =
+ std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
}
void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
@@ -703,14 +700,15 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI) {
IRBuilder<> Builder(&I);
- GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
+ GlobalVariable *Bias =
+ M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
// is being used. Runtime has a weak external reference that is used
// to check whether that's the case or not.
- Bias = new GlobalVariable(*M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
- Constant::getNullValue(Int64Ty),
- getInstrProfCounterBiasVarName());
+ Bias = new GlobalVariable(
+ *M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
+ Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
Bias->setVisibility(GlobalVariable::HiddenVisibility);
// A definition that's weak (linkonce_odr) without being in a COMDAT
// section wouldn't lead to link errors, but it would lead to a dead
@@ -839,8 +837,7 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
return false;
// Use linker script magic to get data/cnts/name start/end.
if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() ||
- TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() ||
- TT.isOSWindows())
+ TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows())
return false;
return true;
@@ -849,13 +846,9 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
GlobalVariable *
InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
GlobalVariable *NamePtr = Inc->getName();
- auto It = ProfileDataMap.find(NamePtr);
- PerFunctionProfileData PD;
- if (It != ProfileDataMap.end()) {
- if (It->second.RegionCounters)
- return It->second.RegionCounters;
- PD = It->second;
- }
+ auto &PD = ProfileDataMap[NamePtr];
+ if (PD.RegionCounters)
+ return PD.RegionCounters;
// Match the linkage and visibility of the name global.
Function *Fn = Inc->getParent()->getParent();
@@ -922,6 +915,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
CounterPtr->setAlignment(Align(8));
MaybeSetComdat(CounterPtr);
CounterPtr->setLinkage(Linkage);
+ PD.RegionCounters = CounterPtr;
auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
// Allocate statically the array of pointers to value profile nodes for
@@ -1000,9 +994,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
MaybeSetComdat(Data);
Data->setLinkage(Linkage);
- PD.RegionCounters = CounterPtr;
PD.DataVar = Data;
- ProfileDataMap[NamePtr] = PD;
// Mark the data variable as used so that it isn't stripped out.
CompilerUsedVars.push_back(Data);
@@ -1013,7 +1005,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// Collect the referenced names to be used by emitNameData.
ReferencedNames.push_back(NamePtr);
- return CounterPtr;
+ return PD.RegionCounters;
}
void InstrProfiling::emitVNodes() {
@@ -1078,8 +1070,8 @@ void InstrProfiling::emitNameData() {
}
auto &Ctx = M->getContext();
- auto *NamesVal = ConstantDataArray::getString(
- Ctx, StringRef(CompressedNameStr), false);
+ auto *NamesVal =
+ ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
GlobalValue::PrivateLinkage, NamesVal,
getInstrProfNamesVarName());
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index f98e39d751f4..180012198c42 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -110,7 +110,7 @@ namespace {
/// the module.
struct ThreadSanitizer {
ThreadSanitizer() {
- // Sanity check options and warn user.
+ // Check options and warn user.
if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) {
errs()
<< "warning: Option -tsan-compound-read-before-write has no effect "
diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 74e4eb07b219..4921209f041b 100644
--- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -94,11 +94,9 @@ bool llvm::objcarc::CanUse(const Instruction *Inst, const Value *Ptr,
return false;
} else if (const auto *CS = dyn_cast<CallBase>(Inst)) {
// For calls, just check the arguments (and not the callee operand).
- for (auto OI = CS->arg_begin(), OE = CS->arg_end(); OI != OE; ++OI) {
- const Value *Op = *OI;
+ for (const Value *Op : CS->args())
if (IsPotentialRetainableObjPtr(Op, *PA.getAA()) && PA.related(Ptr, Op))
return true;
- }
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// Special-case stores, because we don't care about the stored value, just
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index ca9567dc7ac8..a3fd97079b1d 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -52,6 +52,11 @@ using namespace llvm;
#define DEBUG_TYPE "correlated-value-propagation"
+static cl::opt<bool> CanonicalizeICmpPredicatesToUnsigned(
+ "canonicalize-icmp-predicates-to-unsigned", cl::init(true), cl::Hidden,
+ cl::desc("Enables canonicalization of signed relational predicates to "
+ "unsigned (e.g. sgt => ugt)"));
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumPhiCommon, "Number of phis deleted via common incoming value");
STATISTIC(NumSelects, "Number of selects propagated");
@@ -64,7 +69,8 @@ STATISTIC(NumSDivSRemsNarrowed,
STATISTIC(NumSDivs, "Number of sdiv converted to udiv");
STATISTIC(NumUDivURemsNarrowed,
"Number of udivs/urems whose width was decreased");
-STATISTIC(NumAShrs, "Number of ashr converted to lshr");
+STATISTIC(NumAShrsConverted, "Number of ashr converted to lshr");
+STATISTIC(NumAShrsRemoved, "Number of ashr removed");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
@@ -297,6 +303,9 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
}
static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+ if (!CanonicalizeICmpPredicatesToUnsigned)
+ return false;
+
// Only for signed relational comparisons of scalar integers.
if (Cmp->getType()->isVectorTy() ||
!Cmp->getOperand(0)->getType()->isIntegerTy())
@@ -376,13 +385,7 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
- APInt Low =
- APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
- APInt High =
- APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
-
- SwitchInst::CaseIt CI = SI->case_begin();
- for (auto CE = SI->case_end(); CI != CE;) {
+ for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
LazyValueInfo::Tristate State =
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -415,28 +418,9 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
break;
}
- // Get Lower/Upper bound from switch cases.
- Low = APIntOps::smin(Case->getValue(), Low);
- High = APIntOps::smax(Case->getValue(), High);
-
// Increment the case iterator since we didn't delete it.
++CI;
}
-
- // Try to simplify default case as unreachable
- if (CI == SI->case_end() && SI->getNumCases() != 0 &&
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
- const ConstantRange SIRange =
- LVI->getConstantRange(SI->getCondition(), SI);
-
- // If the numbered switch cases cover the entire range of the condition,
- // then the default case is not reachable.
- if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
- SI->getNumCases() == High - Low + 1) {
- createUnreachableSwitchDefault(SI, &DTU);
- Changed = true;
- }
- }
}
if (Changed)
@@ -688,7 +672,7 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
ArgNo++;
}
- assert(ArgNo == CB.arg_size() && "sanity check");
+ assert(ArgNo == CB.arg_size() && "Call arguments not processed correctly.");
if (ArgNos.empty())
return Changed;
@@ -954,10 +938,22 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy())
return false;
+ ConstantRange LRange = LVI->getConstantRange(SDI->getOperand(0), SDI);
+ unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
+ ConstantRange NegOneOrZero =
+ ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
+ if (NegOneOrZero.contains(LRange)) {
+ // ashr of -1 or 0 never changes the value, so drop the whole instruction
+ ++NumAShrsRemoved;
+ SDI->replaceAllUsesWith(SDI->getOperand(0));
+ SDI->eraseFromParent();
+ return true;
+ }
+
if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
return false;
- ++NumAShrs;
+ ++NumAShrsConverted;
auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
SDI->getName(), SDI);
BO->setDebugLoc(SDI->getDebugLoc());
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a8ec8bb97970..e0d3a6accadd 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -159,52 +159,22 @@ static cl::opt<unsigned> MemorySSAPathCheckLimit(
cl::desc("The maximum number of blocks to check when trying to prove that "
"all paths to an exit go through a killing block (default = 50)"));
+// This flags allows or disallows DSE to optimize MemorySSA during its
+// traversal. Note that DSE optimizing MemorySSA may impact other passes
+// downstream of the DSE invocation and can lead to issues not being
+// reproducible in isolation (i.e. when MemorySSA is built from scratch). In
+// those cases, the flag can be used to check if DSE's MemorySSA optimizations
+// impact follow-up passes.
+static cl::opt<bool>
+ OptimizeMemorySSA("dse-optimize-memoryssa", cl::init(true), cl::Hidden,
+ cl::desc("Allow DSE to optimize memory accesses."));
+
//===----------------------------------------------------------------------===//
// Helper functions
//===----------------------------------------------------------------------===//
using OverlapIntervalsTy = std::map<int64_t, int64_t>;
using InstOverlapIntervalsTy = DenseMap<Instruction *, OverlapIntervalsTy>;
-/// Does this instruction write some memory? This only returns true for things
-/// that we can analyze with other helpers below.
-static bool hasAnalyzableMemoryWrite(Instruction *I,
- const TargetLibraryInfo &TLI) {
- if (isa<StoreInst>(I))
- return true;
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- case Intrinsic::memcpy_inline:
- case Intrinsic::memcpy_element_unordered_atomic:
- case Intrinsic::memmove_element_unordered_atomic:
- case Intrinsic::memset_element_unordered_atomic:
- case Intrinsic::init_trampoline:
- case Intrinsic::lifetime_end:
- case Intrinsic::masked_store:
- return true;
- }
- }
- if (auto *CB = dyn_cast<CallBase>(I)) {
- LibFunc LF;
- if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
- switch (LF) {
- case LibFunc_strcpy:
- case LibFunc_strncpy:
- case LibFunc_strcat:
- case LibFunc_strncat:
- return true;
- default:
- return false;
- }
- }
- }
- return false;
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -214,7 +184,7 @@ static bool isRemovable(Instruction *I) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- default: llvm_unreachable("doesn't pass 'hasAnalyzableMemoryWrite' predicate");
+ default: llvm_unreachable("Does not have LocForWrite");
case Intrinsic::lifetime_end:
// Never remove dead lifetime_end's, e.g. because it is followed by a
// free.
@@ -296,6 +266,7 @@ enum OverwriteResult {
OW_End,
OW_PartialEarlierWithFullLater,
OW_MaybePartial,
+ OW_None,
OW_Unknown
};
@@ -841,7 +812,7 @@ struct DSEState {
/// Keep track of instructions (partly) overlapping with killing MemoryDefs per
/// basic block.
- DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ MapVector<BasicBlock *, InstOverlapIntervalsTy> IOLs;
// Class contains self-reference, make sure it's not copied/moved.
DSEState(const DSEState &) = delete;
@@ -889,6 +860,7 @@ struct DSEState {
/// Return OW_MaybePartial if \p KillingI does not completely overwrite
/// \p DeadI, but they both write to the same underlying object. In that
/// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OR_None' if \p KillingI is known to not overwrite the
/// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
OverwriteResult isOverwrite(const Instruction *KillingI,
const Instruction *DeadI,
@@ -951,8 +923,16 @@ struct DSEState {
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (DeadUndObj != KillingUndObj)
+ if (DeadUndObj != KillingUndObj) {
+ // Non aliasing stores to different objects don't overlap. Note that
+ // if the killing store is known to overwrite whole object (out of
+ // bounds access overwrites whole object as well) then it is assumed to
+ // completely overwrite any store to the same object even if they don't
+ // actually alias (see next check).
+ if (AAR == AliasResult::NoAlias)
+ return OW_None;
return OW_Unknown;
+ }
// If the KillingI store is to a recognizable object, get its size.
uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
@@ -1006,9 +986,8 @@ struct DSEState {
return OW_MaybePartial;
}
- // Can reach here only if accesses are known not to overlap. There is no
- // dedicated code to indicate no overlap so signal "unknown".
- return OW_Unknown;
+ // Can reach here only if accesses are known not to overlap.
+ return OW_None;
}
bool isInvisibleToCallerAfterRet(const Value *V) {
@@ -1304,6 +1283,15 @@ struct DSEState {
Instruction *KillingI = KillingDef->getMemoryInst();
LLVM_DEBUG(dbgs() << " trying to get dominating access\n");
+ // Only optimize defining access of KillingDef when directly starting at its
+ // defining access. The defining access also must only access KillingLoc. At
+ // the moment we only support instructions with a single write location, so
+ // it should be sufficient to disable optimizations for instructions that
+ // also read from memory.
+ bool CanOptimize = OptimizeMemorySSA &&
+ KillingDef->getDefiningAccess() == StartAccess &&
+ !KillingI->mayReadFromMemory();
+
// Find the next clobbering Mod access for DefLoc, starting at StartAccess.
Optional<MemoryLocation> CurrentLoc;
for (;; Current = cast<MemoryDef>(Current)->getDefiningAccess()) {
@@ -1345,8 +1333,10 @@ struct DSEState {
Instruction *CurrentI = CurrentDef->getMemoryInst();
if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
- TLI))
+ TLI)) {
+ CanOptimize = false;
continue;
+ }
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
@@ -1380,15 +1370,13 @@ struct DSEState {
return None;
}
- // If Current cannot be analyzed or is not removable, check the next
- // candidate.
- if (!hasAnalyzableMemoryWrite(CurrentI, TLI) || !isRemovable(CurrentI))
- continue;
-
- // If Current does not have an analyzable write location, skip it
+ // If Current does not have an analyzable write location or is not
+ // removable, skip it.
CurrentLoc = getLocForWriteEx(CurrentI);
- if (!CurrentLoc)
+ if (!CurrentLoc || !isRemovable(CurrentI)) {
+ CanOptimize = false;
continue;
+ }
// AliasAnalysis does not account for loops. Limit elimination to
// candidates for which we can guarantee they always store to the same
@@ -1396,6 +1384,7 @@ struct DSEState {
if (!isGuaranteedLoopIndependent(CurrentI, KillingI, *CurrentLoc)) {
LLVM_DEBUG(dbgs() << " ... not guaranteed loop independent\n");
WalkerStepLimit -= 1;
+ CanOptimize = false;
continue;
}
@@ -1403,16 +1392,32 @@ struct DSEState {
// If the killing def is a memory terminator (e.g. lifetime.end), check
// the next candidate if the current Current does not write the same
// underlying object as the terminator.
- if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
+ if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI)) {
+ CanOptimize = false;
continue;
+ }
} else {
int64_t KillingOffset = 0;
int64_t DeadOffset = 0;
auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
KillingOffset, DeadOffset);
+ if (CanOptimize) {
+ // CurrentDef is the earliest write clobber of KillingDef. Use it as
+ // optimized access. Do not optimize if CurrentDef is already the
+ // defining access of KillingDef.
+ if (CurrentDef != KillingDef->getDefiningAccess() &&
+ (OR == OW_Complete || OR == OW_MaybePartial))
+ KillingDef->setOptimized(CurrentDef);
+
+ // Once a may-aliasing def is encountered do not set an optimized
+ // access.
+ if (OR != OW_None)
+ CanOptimize = false;
+ }
+
// If Current does not write to the same object as KillingDef, check
// the next candidate.
- if (OR == OW_Unknown)
+ if (OR == OW_Unknown || OR == OW_None)
continue;
else if (OR == OW_MaybePartial) {
// If KillingDef only partially overwrites Current, check the next
@@ -1421,6 +1426,7 @@ struct DSEState {
// which are less likely to be removable in the end.
if (PartialLimit <= 1) {
WalkerStepLimit -= 1;
+ LLVM_DEBUG(dbgs() << " ... reached partial limit ... continue with next access\n");
continue;
}
PartialLimit -= 1;
@@ -1922,7 +1928,14 @@ struct DSEState {
if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
!isRemovable(Def->getMemoryInst()))
continue;
- auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ MemoryDef *UpperDef;
+ // To conserve compile-time, we avoid walking to the next clobbering def.
+ // Instead, we just try to get the optimized access, if it exists. DSE
+ // will try to optimize defs during the earlier traversal.
+ if (Def->isOptimized())
+ UpperDef = dyn_cast<MemoryDef>(Def->getOptimized());
+ else
+ UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
continue;
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ae2fe2767074..7001d330fce0 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1951,7 +1951,6 @@ bool IndVarSimplify::run(Loop *L) {
// using it.
if (!DisableLFTR) {
BasicBlock *PreHeader = L->getLoopPreheader();
- BranchInst *PreHeaderBR = cast<BranchInst>(PreHeader->getTerminator());
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1987,7 +1986,7 @@ bool IndVarSimplify::run(Loop *L) {
// Avoid high cost expansions. Note: This heuristic is questionable in
// that our definition of "high cost" is not exactly principled.
if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
- TTI, PreHeaderBR))
+ TTI, PreHeader->getTerminator()))
continue;
// Check preconditions for proper SCEVExpander operation. SCEV does not
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bf714d167670..6f97f3e93123 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -486,7 +486,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// Check that neither this loop nor its parent have had LCSSA broken. LICM is
// specifically moving instructions across the loop boundary and so it is
- // especially in need of sanity checking here.
+ // especially in need of basic functional correctness checking here.
assert(L->isLCSSAForm(*DT) && "Loop not left in LCSSA form after LICM!");
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
@@ -1860,6 +1860,7 @@ class LoopPromoter : public LoadAndStorePromoter {
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
+ bool CanInsertStoresInExitBlocks;
// We're about to add a use of V in a loop exit block. Insert an LCSSA phi
// (if legal) if doing so would add an out-of-loop use to an instruction
@@ -1886,12 +1887,13 @@ public:
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
- ICFLoopSafetyInfo &SafetyInfo)
+ ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
- SafetyInfo(SafetyInfo) {}
+ SafetyInfo(SafetyInfo),
+ CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@@ -1903,7 +1905,7 @@ public:
return PointerMustAliases.count(Ptr);
}
- void doExtraRewritesBeforeFinalDeletion() override {
+ void insertStoresInLoopExitBlocks() {
// Insert stores after in the loop exit blocks. Each exit block gets a
// store of the live-out values that feed them. Since we've already told
// the SSA updater about the defs in the loop and the preheader
@@ -1937,10 +1939,21 @@ public:
}
}
+ void doExtraRewritesBeforeFinalDeletion() override {
+ if (CanInsertStoresInExitBlocks)
+ insertStoresInLoopExitBlocks();
+ }
+
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
MSSAU->removeMemoryAccess(I);
}
+
+ bool shouldDelete(Instruction *I) const override {
+ if (isa<StoreInst>(I))
+ return CanInsertStoresInExitBlocks;
+ return true;
+ }
};
bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
@@ -2039,6 +2052,7 @@ bool llvm::promoteLoopAccessesToScalars(
bool DereferenceableInPH = false;
bool SafeToInsertStore = false;
+ bool FoundLoadToPromote = false;
SmallVector<Instruction *, 64> LoopUses;
@@ -2067,16 +2081,11 @@ bool llvm::promoteLoopAccessesToScalars(
IsKnownThreadLocalObject = !isa<AllocaInst>(Object);
}
- // Check that all of the pointers in the alias set have the same type. We
- // cannot (yet) promote a memory location that is loaded and stored in
+ // Check that all accesses to pointers in the aliass set use the same type.
+ // We cannot (yet) promote a memory location that is loaded and stored in
// different sizes. While we are at it, collect alignment and AA info.
+ Type *AccessTy = nullptr;
for (Value *ASIV : PointerMustAliases) {
- // Check that all of the pointers in the alias set have the same type. We
- // cannot (yet) promote a memory location that is loaded and stored in
- // different sizes.
- if (SomePtr->getType() != ASIV->getType())
- return false;
-
for (User *U : ASIV->users()) {
// Ignore instructions that are outside the loop.
Instruction *UI = dyn_cast<Instruction>(U);
@@ -2091,6 +2100,7 @@ bool llvm::promoteLoopAccessesToScalars(
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
+ FoundLoadToPromote = true;
Align InstAlignment = Load->getAlign();
@@ -2153,6 +2163,11 @@ bool llvm::promoteLoopAccessesToScalars(
} else
return false; // Not a load or store.
+ if (!AccessTy)
+ AccessTy = getLoadStoreType(UI);
+ else if (AccessTy != getLoadStoreType(UI))
+ return false;
+
// Merge the AA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its AA tags.
@@ -2175,9 +2190,7 @@ bool llvm::promoteLoopAccessesToScalars(
// If we're inserting an atomic load in the preheader, we must be able to
// lower it. We're only guaranteed to be able to lower naturally aligned
// atomics.
- auto *SomePtrElemType = SomePtr->getType()->getPointerElementType();
- if (SawUnorderedAtomic &&
- Alignment < MDL.getTypeStoreSize(SomePtrElemType))
+ if (SawUnorderedAtomic && Alignment < MDL.getTypeStoreSize(AccessTy))
return false;
// If we couldn't prove we can hoist the load, bail.
@@ -2199,13 +2212,20 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
- // If we've still failed to prove we can sink the store, give up.
- if (!SafeToInsertStore)
+ // If we've still failed to prove we can sink the store, hoist the load
+ // only, if possible.
+ if (!SafeToInsertStore && !FoundLoadToPromote)
+ // If we cannot hoist the load either, give up.
return false;
- // Otherwise, this is safe to promote, lets do it!
- LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
- << '\n');
+ // Lets do the promotion!
+ if (SafeToInsertStore)
+ LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr
+ << '\n');
+ else
+ LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr
+ << '\n');
+
ORE->emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar",
LoopUses[0])
@@ -2224,13 +2244,14 @@ bool llvm::promoteLoopAccessesToScalars(
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
- Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo,
+ SafeToInsertStore);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
- SomePtr->getType()->getPointerElementType(), SomePtr,
- SomePtr->getName() + ".promoted", Preheader->getTerminator());
+ AccessTy, SomePtr, SomePtr->getName() + ".promoted",
+ Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index 3df4cfe8e4c1..6c783848432b 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -49,9 +49,17 @@ void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>::printPipeline(raw_ostream &OS,
function_ref<StringRef(StringRef)>
MapClassName2PassName) {
- for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
- auto *P = LoopPasses[Idx].get();
- P->printPipeline(OS, MapClassName2PassName);
+ assert(LoopPasses.size() + LoopNestPasses.size() == IsLoopNestPass.size());
+
+ unsigned IdxLP = 0, IdxLNP = 0;
+ for (unsigned Idx = 0, Size = IsLoopNestPass.size(); Idx != Size; ++Idx) {
+ if (IsLoopNestPass[Idx]) {
+ auto *P = LoopNestPasses[IdxLNP++].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ } else {
+ auto *P = LoopPasses[IdxLP++].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ }
if (Idx + 1 < Size)
OS << ",";
}
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index a87843d658a9..728d63fe2847 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -256,8 +256,8 @@ private:
}
}
- // Sanity check: amount of dead and live loop blocks should match the total
- // number of blocks in loop.
+ // Amount of dead and live loop blocks should match the total number of
+ // blocks in loop.
assert(L.getNumBlocks() == LiveLoopBlocks.size() + DeadLoopBlocks.size() &&
"Malformed block sets?");
@@ -305,7 +305,6 @@ private:
BlocksInLoopAfterFolding.insert(BB);
}
- // Sanity check: header must be in loop.
assert(BlocksInLoopAfterFolding.count(L.getHeader()) &&
"Header not in loop?");
assert(BlocksInLoopAfterFolding.size() <= LiveLoopBlocks.size() &&
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 67702520511b..39c8b65968aa 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -806,28 +806,27 @@ static Optional<unsigned> shouldFullUnroll(
ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
+ assert(FullUnrollTripCount && "should be non-zero!");
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- return FullUnrollTripCount;
+ if (FullUnrollTripCount > UP.FullUnrollMaxCount)
+ return None;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- return FullUnrollTripCount;
- }
- }
- }
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+ return FullUnrollTripCount;
+
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100)
+ return FullUnrollTripCount;
}
return None;
}
@@ -837,51 +836,48 @@ shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
const UnrollCostEstimator UCE,
const TargetTransformInfo::UnrollingPreferences &UP) {
+ if (!TripCount)
+ return None;
+
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ return 0;
+ }
unsigned count = UP.Count;
- if (TripCount) {
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- count = 0;
- return count;
- }
- if (count == 0)
- count = TripCount;
- if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (count > UP.MaxCount)
- count = UP.MaxCount;
- while (count != 0 && TripCount % count != 0)
- count--;
- if (UP.AllowRemainder && count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- count = UP.DefaultUnrollRuntimeCount;
- while (count != 0 &&
- UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
- count >>= 1;
- }
- if (count < 2) {
- count = 0;
- }
- } else {
- count = TripCount;
- }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
if (count > UP.MaxCount)
count = UP.MaxCount;
-
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
-
- return count;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
}
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
- // if didn't return until here, should continue to other priorties
- return None;
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
@@ -900,7 +896,6 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
- Optional<unsigned> UnrollFactor;
const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
const bool PragmaFullUnroll = hasUnrollFullPragma(L);
@@ -926,9 +921,8 @@ bool llvm::computeUnrollCount(
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
// 2nd priority is unroll count set by pragma.
- UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
-
- if (UnrollFactor) {
+ if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount,
+ UCE, UP)) {
UP.Count = *UnrollFactor;
if (UserUnrollCount || (PragmaCount > 0)) {
@@ -948,11 +942,20 @@ bool llvm::computeUnrollCount(
}
}
- // 3rd priority is full unroll count.
- // Full unroll makes sense only when TripCount or its upper bound could be
- // statically calculated.
- // Also we need to check if we exceed FullUnrollMaxCount.
+ // 3rd priority is exact full unrolling. This will eliminate all copies
+ // of some exit test.
+ UP.Count = 0;
+ if (TripCount) {
+ UP.Count = TripCount;
+ if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
+ TripCount, UCE, UP)) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = false;
+ return ExplicitUnroll;
+ }
+ }
+ // 4th priority is bounded unrolling.
// We can unroll by the upper bound amount if it's generally allowed or if
// we know that the loop is executed either the upper bound or zero times.
// (MaxOrZero unrolling keeps only the first loop test, so the number of
@@ -961,37 +964,21 @@ bool llvm::computeUnrollCount(
// number of loop tests goes up which may end up being worse on targets with
// constrained branch predictor resources so is controlled by an option.)
// In addition we only unroll small upper bounds.
- unsigned FullUnrollMaxTripCount = MaxTripCount;
- if (!(UP.UpperBound || MaxOrZero) ||
- FullUnrollMaxTripCount > UnrollMaxUpperBound)
- FullUnrollMaxTripCount = 0;
-
- // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only
- // compute the former when the latter is zero.
- unsigned ExactTripCount = TripCount;
- assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) &&
- "ExtractTripCount and UnrollByMaxCount cannot both be non zero.");
-
- unsigned FullUnrollTripCount =
- ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
- UP.Count = FullUnrollTripCount;
-
- UnrollFactor =
- shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
-
- // if shouldFullUnroll can do the unrolling, some side parameteres should be
- // set
- if (UnrollFactor) {
- UP.Count = *UnrollFactor;
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- TripCount = FullUnrollTripCount;
- TripMultiple = UP.UpperBound ? 1 : TripMultiple;
- return ExplicitUnroll;
- } else {
- UP.Count = FullUnrollTripCount;
+ // Note that the cost of bounded unrolling is always strictly greater than
+ // cost of exact full unrolling. As such, if we have an exact count and
+ // found it unprofitable, we'll never chose to bounded unroll.
+ if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
+ MaxTripCount <= UnrollMaxUpperBound) {
+ UP.Count = MaxTripCount;
+ if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
+ MaxTripCount, UCE, UP)) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = true;
+ return ExplicitUnroll;
+ }
}
- // 4th priority is loop peeling.
+ // 5th priority is loop peeling.
computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
@@ -1004,11 +991,9 @@ bool llvm::computeUnrollCount(
if (TripCount)
UP.Partial |= ExplicitUnroll;
- // 5th priority is partial unrolling.
+ // 6th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
-
- if (UnrollFactor) {
+ if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) {
UP.Count = *UnrollFactor;
if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
@@ -1049,7 +1034,7 @@ bool llvm::computeUnrollCount(
"because loop has a runtime trip count.";
});
- // 6th priority is runtime unrolling.
+ // 7th priority is runtime unrolling.
// Don't unroll a runtime trip count loop when it is disabled.
if (hasRuntimeUnrollDisablePragma(L)) {
UP.Count = 0;
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index b0fb8daaba8f..c354fa177a60 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -494,7 +494,7 @@ static bool LinearizeExprTree(Instruction *I,
SmallVector<Value *, 8> LeafOrder; // Ensure deterministic leaf output order.
#ifndef NDEBUG
- SmallPtrSet<Value *, 8> Visited; // For sanity checking the iteration scheme.
+ SmallPtrSet<Value *, 8> Visited; // For checking the iteration scheme.
#endif
while (!Worklist.empty()) {
std::pair<Instruction*, APInt> P = Worklist.pop_back_val();
@@ -2313,11 +2313,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
MadeChange |= LinearizeExprTree(I, Tree);
SmallVector<ValueEntry, 8> Ops;
Ops.reserve(Tree.size());
- for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
- RepeatedValue E = Tree[i];
- Ops.append(E.second.getZExtValue(),
- ValueEntry(getRank(E.first), E.first));
- }
+ for (const RepeatedValue &E : Tree)
+ Ops.append(E.second.getZExtValue(), ValueEntry(getRank(E.first), E.first));
LLVM_DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 86d3620c312e..3799d2dd1cf2 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -227,8 +227,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
unsigned IterCnt = 0;
(void)IterCnt;
while (LocalChange) {
- assert(IterCnt++ < 1000 &&
- "Sanity: iterative simplification didn't converge!");
+ assert(IterCnt++ < 1000 && "Iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 6469c899feea..d6d6b1a7fa09 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -235,22 +235,26 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
// These dominator edges will be redirected from Pred.
std::vector<DominatorTree::UpdateType> Updates;
if (DTU) {
- SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB));
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenSuccs;
SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
succ_end(PredBB));
- Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1);
+ Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1);
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
// respectively, it is beneficial to have all insert updates first. Deleting
// edges first may lead to unreachable blocks, followed by inserting edges
// making the blocks reachable again. Such DT updates lead to high compile
// times. We add inserts before deletes here to reduce compile time.
- for (BasicBlock *SuccOfBB : SuccsOfBB)
+ for (BasicBlock *SuccOfBB : successors(BB))
// This successor of BB may already be a PredBB's successor.
if (!SuccsOfPredBB.contains(SuccOfBB))
- Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
- for (BasicBlock *SuccOfBB : SuccsOfBB)
- Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB});
+ SeenSuccs.clear();
+ for (BasicBlock *SuccOfBB : successors(BB))
+ if (SeenSuccs.insert(SuccOfBB).second)
+ Updates.push_back({DominatorTree::Delete, BB, SuccOfBB});
Updates.push_back({DominatorTree::Delete, PredBB, BB});
}
@@ -804,14 +808,14 @@ static BasicBlock *SplitBlockImpl(BasicBlock *Old, Instruction *SplitPt,
if (DTU) {
SmallVector<DominatorTree::UpdateType, 8> Updates;
// Old dominates New. New node dominates all other nodes dominated by Old.
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld(succ_begin(New),
- succ_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfOld;
Updates.push_back({DominatorTree::Insert, Old, New});
- Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size());
- for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) {
- Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld});
- Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld});
- }
+ Updates.reserve(Updates.size() + 2 * succ_size(New));
+ for (BasicBlock *SuccessorOfOld : successors(New))
+ if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) {
+ Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld});
+ Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld});
+ }
DTU->applyUpdates(Updates);
} else if (DT)
@@ -870,14 +874,14 @@ BasicBlock *llvm::splitBlockBefore(BasicBlock *Old, Instruction *SplitPt,
SmallVector<DominatorTree::UpdateType, 8> DTUpdates;
// New dominates Old. The predecessor nodes of the Old node dominate
// New node.
- SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld(pred_begin(New),
- pred_end(New));
+ SmallPtrSet<BasicBlock *, 8> UniquePredecessorsOfOld;
DTUpdates.push_back({DominatorTree::Insert, New, Old});
- DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size());
- for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) {
- DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New});
- DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old});
- }
+ DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New));
+ for (BasicBlock *PredecessorOfOld : predecessors(New))
+ if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) {
+ DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New});
+ DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old});
+ }
DTU->applyUpdates(DTUpdates);
@@ -910,13 +914,14 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
} else {
// Split block expects NewBB to have a non-empty set of predecessors.
SmallVector<DominatorTree::UpdateType, 8> Updates;
- SmallPtrSet<BasicBlock *, 8> UniquePreds(Preds.begin(), Preds.end());
+ SmallPtrSet<BasicBlock *, 8> UniquePreds;
Updates.push_back({DominatorTree::Insert, NewBB, OldBB});
- Updates.reserve(Updates.size() + 2 * UniquePreds.size());
- for (auto *UniquePred : UniquePreds) {
- Updates.push_back({DominatorTree::Insert, UniquePred, NewBB});
- Updates.push_back({DominatorTree::Delete, UniquePred, OldBB});
- }
+ Updates.reserve(Updates.size() + 2 * Preds.size());
+ for (auto *Pred : Preds)
+ if (UniquePreds.insert(Pred).second) {
+ Updates.push_back({DominatorTree::Insert, Pred, NewBB});
+ Updates.push_back({DominatorTree::Delete, Pred, OldBB});
+ }
DTU->applyUpdates(Updates);
}
} else if (DT) {
@@ -1376,14 +1381,14 @@ SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
BasicBlock *Head = SplitBefore->getParent();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
if (DTU) {
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead(succ_begin(Tail),
- succ_end(Tail));
+ SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead;
Updates.push_back({DominatorTree::Insert, Head, Tail});
- Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size());
- for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) {
- Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead});
- Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead});
- }
+ Updates.reserve(Updates.size() + 2 * succ_size(Tail));
+ for (BasicBlock *SuccessorOfHead : successors(Tail))
+ if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) {
+ Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead});
+ Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead});
+ }
}
Instruction *HeadOldTerm = Head->getTerminator();
LLVMContext &C = Head->getContext();
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 957935398972..580cfd80141e 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -452,18 +452,17 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
return Changed;
case LibFunc_mempcpy:
case LibFunc_memccpy:
+ Changed |= setWillReturn(F);
+ LLVM_FALLTHROUGH;
+ case LibFunc_memcpy_chk:
Changed |= setDoesNotThrow(F);
Changed |= setOnlyAccessesArgMemory(F);
- Changed |= setWillReturn(F);
Changed |= setDoesNotAlias(F, 0);
Changed |= setOnlyWritesMemory(F, 0);
Changed |= setDoesNotAlias(F, 1);
Changed |= setDoesNotCapture(F, 1);
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
- case LibFunc_memcpy_chk:
- Changed |= setDoesNotThrow(F);
- return Changed;
case LibFunc_memalign:
Changed |= setOnlyAccessesInaccessibleMemory(F);
Changed |= setRetNoUndef(F);
@@ -1018,9 +1017,8 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- // TODO: add LibFunc entries for:
- // case LibFunc_memset_pattern4:
- // case LibFunc_memset_pattern8:
+ case LibFunc_memset_pattern4:
+ case LibFunc_memset_pattern8:
case LibFunc_memset_pattern16:
Changed |= setOnlyAccessesArgMemory(F);
Changed |= setDoesNotCapture(F, 0);
@@ -1029,10 +1027,12 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setOnlyReadsMemory(F, 1);
return Changed;
case LibFunc_memset:
- Changed |= setOnlyAccessesArgMemory(F);
Changed |= setWillReturn(F);
- Changed |= setDoesNotThrow(F);
+ LLVM_FALLTHROUGH;
+ case LibFunc_memset_chk:
+ Changed |= setOnlyAccessesArgMemory(F);
Changed |= setOnlyWritesMemory(F, 0);
+ Changed |= setDoesNotThrow(F);
return Changed;
// int __nvvm_reflect(const char *)
case LibFunc_nvvm_reflect:
diff --git a/llvm/lib/Transforms/Utils/CloneModule.cpp b/llvm/lib/Transforms/Utils/CloneModule.cpp
index 200deca4b317..57c273a0e3c5 100644
--- a/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -135,10 +135,18 @@ std::unique_ptr<Module> llvm::CloneModule(
// Similarly, copy over function bodies now...
//
for (const Function &I : M) {
- if (I.isDeclaration())
+ Function *F = cast<Function>(VMap[&I]);
+
+ if (I.isDeclaration()) {
+ // Copy over metadata for declarations since we're not doing it below in
+ // CloneFunctionInto().
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ I.getAllMetadata(MDs);
+ for (auto MD : MDs)
+ F->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
continue;
+ }
- Function *F = cast<Function>(VMap[&I]);
if (!ShouldCloneDefinition(&I)) {
// Skip after setting the correct linkage for an external reference.
F->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/llvm/lib/Transforms/Utils/GuardUtils.cpp b/llvm/lib/Transforms/Utils/GuardUtils.cpp
index 4dbcbf80d3da..7c310f16d46e 100644
--- a/llvm/lib/Transforms/Utils/GuardUtils.cpp
+++ b/llvm/lib/Transforms/Utils/GuardUtils.cpp
@@ -74,7 +74,7 @@ void llvm::makeGuardControlFlowExplicit(Function *DeoptIntrinsic,
{}, {}, nullptr, "widenable_cond");
CheckBI->setCondition(B.CreateAnd(CheckBI->getCondition(), WC,
"exiplicit_guard_cond"));
- assert(isWidenableBranch(CheckBI) && "sanity check");
+ assert(isWidenableBranch(CheckBI) && "Branch must be widenable.");
}
}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f4776589910f..997667810580 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1218,10 +1218,9 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
if (!RI || !isa<CallBase>(RI->getOperand(0)))
continue;
auto *RetVal = cast<CallBase>(RI->getOperand(0));
- // Sanity check that the cloned RetVal exists and is a call, otherwise we
- // cannot add the attributes on the cloned RetVal.
- // Simplification during inlining could have transformed the cloned
- // instruction.
+ // Check that the cloned RetVal exists and is a call, otherwise we cannot
+ // add the attributes on the cloned RetVal. Simplification during inlining
+ // could have transformed the cloned instruction.
auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));
if (!NewRetVal)
continue;
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 74ab37fadf36..ec926b1f5a94 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -529,8 +529,8 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
std::function<void(Value *)> AboutToDeleteCallback) {
unsigned S = 0, E = DeadInsts.size(), Alive = 0;
for (; S != E; ++S) {
- auto *I = cast<Instruction>(DeadInsts[S]);
- if (!isInstructionTriviallyDead(I)) {
+ auto *I = dyn_cast<Instruction>(DeadInsts[S]);
+ if (!I || !isInstructionTriviallyDead(I)) {
DeadInsts[S] = nullptr;
++Alive;
}
@@ -760,15 +760,18 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
- SmallPtrSet<BasicBlock *, 2> PredsOfPredBB(pred_begin(PredBB),
- pred_end(PredBB));
- Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1);
- for (BasicBlock *PredOfPredBB : PredsOfPredBB)
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 2> SeenPreds;
+ Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1);
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
// This predecessor of PredBB may already have DestBB as a successor.
if (PredOfPredBB != PredBB)
- Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
- for (BasicBlock *PredOfPredBB : PredsOfPredBB)
- Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB});
+ SeenPreds.clear();
+ for (BasicBlock *PredOfPredBB : predecessors(PredBB))
+ if (SeenPreds.insert(PredOfPredBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB});
Updates.push_back({DominatorTree::Delete, PredBB, DestBB});
}
@@ -1096,16 +1099,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
SmallVector<DominatorTree::UpdateType, 32> Updates;
if (DTU) {
+ // To avoid processing the same predecessor more than once.
+ SmallPtrSet<BasicBlock *, 8> SeenPreds;
// All predecessors of BB will be moved to Succ.
- SmallPtrSet<BasicBlock *, 8> PredsOfBB(pred_begin(BB), pred_end(BB));
SmallPtrSet<BasicBlock *, 8> PredsOfSucc(pred_begin(Succ), pred_end(Succ));
- Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1);
- for (auto *PredOfBB : PredsOfBB)
+ Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1);
+ for (auto *PredOfBB : predecessors(BB))
// This predecessor of BB may already have Succ as a successor.
if (!PredsOfSucc.contains(PredOfBB))
- Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
- for (auto *PredOfBB : PredsOfBB)
- Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Insert, PredOfBB, Succ});
+ SeenPreds.clear();
+ for (auto *PredOfBB : predecessors(BB))
+ if (SeenPreds.insert(PredOfBB).second)
+ Updates.push_back({DominatorTree::Delete, PredOfBB, BB});
Updates.push_back({DominatorTree::Delete, BB, Succ});
}
@@ -2190,26 +2197,6 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
-void llvm::createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- auto *BB = Switch->getParent();
- auto *OrigDefaultBlock = Switch->getDefaultDest();
- OrigDefaultBlock->removePredecessor(BB);
- BasicBlock *NewDefaultBlock = BasicBlock::Create(
- BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
- OrigDefaultBlock);
- new UnreachableInst(Switch->getContext(), NewDefaultBlock);
- Switch->setDefaultDest(&*NewDefaultBlock);
- if (DTU) {
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
- if (!is_contained(successors(BB), OrigDefaultBlock))
- Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
- DTU->applyUpdates(Updates);
- }
-}
-
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BasicBlock *UnwindEdge,
DomTreeUpdater *DTU) {
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index a92cb6a313d3..bb719a499a4c 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -623,15 +623,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
if (!SE)
return false;
- // Only unroll loops with a computable trip count, and the trip count needs
- // to be an int value (allowing a pointer type is a TODO item).
+ // Only unroll loops with a computable trip count.
// We calculate the backedge count by using getExitCount on the Latch block,
// which is proven to be the only exiting block in this loop. This is same as
// calculating getBackedgeTakenCount on the loop (which computes SCEV for all
// exiting blocks).
const SCEV *BECountSC = SE->getExitCount(L, Latch);
- if (isa<SCEVCouldNotCompute>(BECountSC) ||
- !BECountSC->getType()->isIntegerTy()) {
+ if (isa<SCEVCouldNotCompute>(BECountSC)) {
LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");
return false;
}
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 68572d479742..c8e42acdffb3 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1049,6 +1049,7 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
return Builder.CreateOrReduce(Src);
case RecurKind::Xor:
return Builder.CreateXorReduce(Src);
+ case RecurKind::FMulAdd:
case RecurKind::FAdd:
return Builder.CreateFAddReduce(ConstantFP::getNegativeZero(SrcVecEltTy),
Src);
@@ -1091,7 +1092,8 @@ Value *llvm::createTargetReduction(IRBuilderBase &B,
Value *llvm::createOrderedReduction(IRBuilderBase &B,
const RecurrenceDescriptor &Desc,
Value *Src, Value *Start) {
- assert(Desc.getRecurrenceKind() == RecurKind::FAdd &&
+ assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
+ Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
"Unexpected reduction kind");
assert(Src->getType()->isVectorTy() && "Expected a vector type");
assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 5893ce15b129..7d9992176658 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -446,6 +446,9 @@ void LoadAndStorePromoter::run(const SmallVectorImpl<Instruction *> &Insts) {
// Now that everything is rewritten, delete the old instructions from the
// function. They should all be dead now.
for (Instruction *User : Insts) {
+ if (!shouldDelete(User))
+ continue;
+
// If this is a load that still has uses, then the load must have been added
// as a live value in the SSAUpdate data structure for a block (e.g. because
// the loaded value was stored later). In this case, we need to recursively
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
new file mode 100644
index 000000000000..9495e442e0bf
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -0,0 +1,462 @@
+//===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a profile inference algorithm. Given an incomplete and
+// possibly imprecise block counts, the algorithm reconstructs realistic block
+// and edge counts that satisfy flow conservation rules, while minimally modify
+// input block counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SampleProfileInference.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <set>
+
+using namespace llvm;
+#define DEBUG_TYPE "sample-profile-inference"
+
+namespace {
+
+/// A value indicating an infinite flow/capacity/weight of a block/edge.
+/// Not using numeric_limits<int64_t>::max(), as the values can be summed up
+/// during the execution.
+static constexpr int64_t INF = ((int64_t)1) << 50;
+
+/// The minimum-cost maximum flow algorithm.
+///
+/// The algorithm finds the maximum flow of minimum cost on a given (directed)
+/// network using a modified version of the classical Moore-Bellman-Ford
+/// approach. The algorithm applies a number of augmentation iterations in which
+/// flow is sent along paths of positive capacity from the source to the sink.
+/// The worst-case time complexity of the implementation is O(v(f)*m*n), where
+/// where m is the number of edges, n is the number of vertices, and v(f) is the
+/// value of the maximum flow. However, the observed running time on typical
+/// instances is sub-quadratic, that is, o(n^2).
+///
+/// The input is a set of edges with specified costs and capacities, and a pair
+/// of nodes (source and sink). The output is the flow along each edge of the
+/// minimum total cost respecting the given edge capacities.
+class MinCostMaxFlow {
+public:
+ // Initialize algorithm's data structures for a network of a given size.
+ void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
+ Source = SourceNode;
+ Target = SinkNode;
+
+ Nodes = std::vector<Node>(NodeCount);
+ Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
+ }
+
+ // Run the algorithm.
+ int64_t run() {
+ // Find an augmenting path and update the flow along the path
+ size_t AugmentationIters = 0;
+ while (findAugmentingPath()) {
+ augmentFlowAlongPath();
+ AugmentationIters++;
+ }
+
+ // Compute the total flow and its cost
+ int64_t TotalCost = 0;
+ int64_t TotalFlow = 0;
+ for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0) {
+ TotalCost += Edge.Cost * Edge.Flow;
+ if (Src == Source)
+ TotalFlow += Edge.Flow;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
+ << " iterations with " << TotalFlow << " total flow"
+ << " of " << TotalCost << " cost\n");
+ (void)TotalFlow;
+ return TotalCost;
+ }
+
+ /// Adding an edge to the network with a specified capacity and a cost.
+ /// Multiple edges between a pair of nodes are allowed but self-edges
+ /// are not supported.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
+ assert(Capacity > 0 && "adding an edge of zero capacity");
+ assert(Src != Dst && "loop edge are not supported");
+
+ Edge SrcEdge;
+ SrcEdge.Dst = Dst;
+ SrcEdge.Cost = Cost;
+ SrcEdge.Capacity = Capacity;
+ SrcEdge.Flow = 0;
+ SrcEdge.RevEdgeIndex = Edges[Dst].size();
+
+ Edge DstEdge;
+ DstEdge.Dst = Src;
+ DstEdge.Cost = -Cost;
+ DstEdge.Capacity = 0;
+ DstEdge.Flow = 0;
+ DstEdge.RevEdgeIndex = Edges[Src].size();
+
+ Edges[Src].push_back(SrcEdge);
+ Edges[Dst].push_back(DstEdge);
+ }
+
+ /// Adding an edge to the network of infinite capacity and a given cost.
+ void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
+ addEdge(Src, Dst, INF, Cost);
+ }
+
+ /// Get the total flow from a given source node.
+ /// Returns a list of pairs (target node, amount of flow to the target).
+ const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
+ std::vector<std::pair<uint64_t, int64_t>> Flow;
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Flow > 0)
+ Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
+ }
+ return Flow;
+ }
+
+ /// Get the total flow between a pair of nodes.
+ int64_t getFlow(uint64_t Src, uint64_t Dst) const {
+ int64_t Flow = 0;
+ for (auto &Edge : Edges[Src]) {
+ if (Edge.Dst == Dst) {
+ Flow += Edge.Flow;
+ }
+ }
+ return Flow;
+ }
+
+ /// A cost of increasing a block's count by one.
+ static constexpr int64_t AuxCostInc = 10;
+ /// A cost of decreasing a block's count by one.
+ static constexpr int64_t AuxCostDec = 20;
+ /// A cost of increasing a count of zero-weight block by one.
+ static constexpr int64_t AuxCostIncZero = 11;
+ /// A cost of increasing the entry block's count by one.
+ static constexpr int64_t AuxCostIncEntry = 40;
+ /// A cost of decreasing the entry block's count by one.
+ static constexpr int64_t AuxCostDecEntry = 10;
+ /// A cost of taking an unlikely jump.
+ static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
+
+private:
+ /// Check for existence of an augmenting path with a positive capacity.
+ bool findAugmentingPath() {
+ // Initialize data structures
+ for (auto &Node : Nodes) {
+ Node.Distance = INF;
+ Node.ParentNode = uint64_t(-1);
+ Node.ParentEdgeIndex = uint64_t(-1);
+ Node.Taken = false;
+ }
+
+ std::queue<uint64_t> Queue;
+ Queue.push(Source);
+ Nodes[Source].Distance = 0;
+ Nodes[Source].Taken = true;
+ while (!Queue.empty()) {
+ uint64_t Src = Queue.front();
+ Queue.pop();
+ Nodes[Src].Taken = false;
+ // Although the residual network contains edges with negative costs
+ // (in particular, backward edges), it can be shown that there are no
+ // negative-weight cycles and the following two invariants are maintained:
+ // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
+ // where Dist is the length of the shortest path between two nodes. This
+ // allows to prune the search-space of the path-finding algorithm using
+ // the following early-stop criteria:
+ // -- If we find a path with zero-distance from Source to Target, stop the
+ // search, as the path is the shortest since Dist[Source, Target] >= 0;
+ // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
+ // process node V, as it is guaranteed _not_ to be on a shortest path
+ // from Source to Target; it follows from inequalities
+ // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
+ // >= Dist[Source, V]
+ if (Nodes[Target].Distance == 0)
+ break;
+ if (Nodes[Src].Distance > Nodes[Target].Distance)
+ continue;
+
+ // Process adjacent edges
+ for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
+ auto &Edge = Edges[Src][EdgeIdx];
+ if (Edge.Flow < Edge.Capacity) {
+ uint64_t Dst = Edge.Dst;
+ int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
+ if (Nodes[Dst].Distance > NewDistance) {
+ // Update the distance and the parent node/edge
+ Nodes[Dst].Distance = NewDistance;
+ Nodes[Dst].ParentNode = Src;
+ Nodes[Dst].ParentEdgeIndex = EdgeIdx;
+ // Add the node to the queue, if it is not there yet
+ if (!Nodes[Dst].Taken) {
+ Queue.push(Dst);
+ Nodes[Dst].Taken = true;
+ }
+ }
+ }
+ }
+ }
+
+ return Nodes[Target].Distance != INF;
+ }
+
+ /// Update the current flow along the augmenting path.
+ void augmentFlowAlongPath() {
+ // Find path capacity
+ int64_t PathCapacity = INF;
+ uint64_t Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+ PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
+ Now = Pred;
+ }
+
+ assert(PathCapacity > 0 && "found incorrect augmenting path");
+
+ // Update the flow along the path
+ Now = Target;
+ while (Now != Source) {
+ uint64_t Pred = Nodes[Now].ParentNode;
+ auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
+ auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
+
+ Edge.Flow += PathCapacity;
+ RevEdge.Flow -= PathCapacity;
+
+ Now = Pred;
+ }
+ }
+
+ /// An node in a flow network.
+ struct Node {
+ /// The cost of the cheapest path from the source to the current node.
+ int64_t Distance;
+ /// The node preceding the current one in the path.
+ uint64_t ParentNode;
+ /// The index of the edge between ParentNode and the current node.
+ uint64_t ParentEdgeIndex;
+ /// An indicator of whether the current node is in a queue.
+ bool Taken;
+ };
+ /// An edge in a flow network.
+ struct Edge {
+ /// The cost of the edge.
+ int64_t Cost;
+ /// The capacity of the edge.
+ int64_t Capacity;
+ /// The current flow on the edge.
+ int64_t Flow;
+ /// The destination node of the edge.
+ uint64_t Dst;
+ /// The index of the reverse edge between Dst and the current node.
+ uint64_t RevEdgeIndex;
+ };
+
+ /// The set of network nodes.
+ std::vector<Node> Nodes;
+ /// The set of network edges.
+ std::vector<std::vector<Edge>> Edges;
+ /// Source node of the flow.
+ uint64_t Source;
+ /// Target (sink) node of the flow.
+ uint64_t Target;
+};
+
+/// Initializing flow network for a given function.
+///
+/// Every block is split into three nodes that are responsible for (i) an
+/// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
+/// reduction of the block weight.
+void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+ assert(NumBlocks > 1 && "Too few blocks in a function");
+ LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
+
+ // Pre-process data: make sure the entry weight is at least 1
+ if (Func.Blocks[Func.Entry].Weight == 0) {
+ Func.Blocks[Func.Entry].Weight = 1;
+ }
+ // Introducing dummy source/sink pairs to allow flow circulation.
+ // The nodes corresponding to blocks of Func have indicies in the range
+ // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
+ uint64_t S = 3 * NumBlocks;
+ uint64_t T = S + 1;
+ uint64_t S1 = S + 2;
+ uint64_t T1 = S + 3;
+
+ Network.initialize(3 * NumBlocks + 4, S1, T1);
+
+ // Create three nodes for every block of the function
+ for (uint64_t B = 0; B < NumBlocks; B++) {
+ auto &Block = Func.Blocks[B];
+ assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
+ "non-zero weight of a block w/o weight except for an entry");
+
+ // Split every block into two nodes
+ uint64_t Bin = 3 * B;
+ uint64_t Bout = 3 * B + 1;
+ uint64_t Baux = 3 * B + 2;
+ if (Block.Weight > 0) {
+ Network.addEdge(S1, Bout, Block.Weight, 0);
+ Network.addEdge(Bin, T1, Block.Weight, 0);
+ }
+
+ // Edges from S and to T
+ assert((!Block.isEntry() || !Block.isExit()) &&
+ "a block cannot be an entry and an exit");
+ if (Block.isEntry()) {
+ Network.addEdge(S, Bin, 0);
+ } else if (Block.isExit()) {
+ Network.addEdge(Bout, T, 0);
+ }
+
+ // An auxiliary node to allow increase/reduction of block counts:
+ // We assume that decreasing block counts is more expensive than increasing,
+ // and thus, setting separate costs here. In the future we may want to tune
+ // the relative costs so as to maximize the quality of generated profiles.
+ int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
+ int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
+ if (Block.UnknownWeight) {
+ // Do not penalize changing weights of blocks w/o known profile count
+ AuxCostInc = 0;
+ AuxCostDec = 0;
+ } else {
+ // Increasing the count for "cold" blocks with zero initial count is more
+ // expensive than for "hot" ones
+ if (Block.Weight == 0) {
+ AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
+ }
+ // Modifying the count of the entry block is expensive
+ if (Block.isEntry()) {
+ AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
+ AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
+ }
+ }
+ // For blocks with self-edges, do not penalize a reduction of the count,
+ // as all of the increase can be attributed to the self-edge
+ if (Block.HasSelfEdge) {
+ AuxCostDec = 0;
+ }
+
+ Network.addEdge(Bin, Baux, AuxCostInc);
+ Network.addEdge(Baux, Bout, AuxCostInc);
+ if (Block.Weight > 0) {
+ Network.addEdge(Bout, Baux, AuxCostDec);
+ Network.addEdge(Baux, Bin, AuxCostDec);
+ }
+ }
+
+ // Creating edges for every jump
+ for (auto &Jump : Func.Jumps) {
+ uint64_t Src = Jump.Source;
+ uint64_t Dst = Jump.Target;
+ if (Src != Dst) {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t DstIn = 3 * Dst;
+ uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
+ Network.addEdge(SrcOut, DstIn, Cost);
+ }
+ }
+
+ // Make sure we have a valid flow circulation
+ Network.addEdge(T, S, 0);
+}
+
+/// Extract resulting block and edge counts from the flow network.
+void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
+ uint64_t NumBlocks = Func.Blocks.size();
+
+ // Extract resulting block counts
+ for (uint64_t Src = 0; Src < NumBlocks; Src++) {
+ auto &Block = Func.Blocks[Src];
+ uint64_t SrcOut = 3 * Src + 1;
+ int64_t Flow = 0;
+ for (auto &Adj : Network.getFlow(SrcOut)) {
+ uint64_t DstIn = Adj.first;
+ int64_t DstFlow = Adj.second;
+ bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
+ if (!IsAuxNode || Block.HasSelfEdge) {
+ Flow += DstFlow;
+ }
+ }
+ Block.Flow = Flow;
+ assert(Flow >= 0 && "negative block flow");
+ }
+
+ // Extract resulting jump counts
+ for (auto &Jump : Func.Jumps) {
+ uint64_t Src = Jump.Source;
+ uint64_t Dst = Jump.Target;
+ int64_t Flow = 0;
+ if (Src != Dst) {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t DstIn = 3 * Dst;
+ Flow = Network.getFlow(SrcOut, DstIn);
+ } else {
+ uint64_t SrcOut = 3 * Src + 1;
+ uint64_t SrcAux = 3 * Src + 2;
+ int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
+ if (AuxFlow > 0)
+ Flow = AuxFlow;
+ }
+ Jump.Flow = Flow;
+ assert(Flow >= 0 && "negative jump flow");
+ }
+}
+
+#ifndef NDEBUG
+/// Verify that the computed flow values satisfy flow conservation rules
+void verifyWeights(const FlowFunction &Func) {
+ const uint64_t NumBlocks = Func.Blocks.size();
+ auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
+ auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
+ for (auto &Jump : Func.Jumps) {
+ InFlow[Jump.Target] += Jump.Flow;
+ OutFlow[Jump.Source] += Jump.Flow;
+ }
+
+ uint64_t TotalInFlow = 0;
+ uint64_t TotalOutFlow = 0;
+ for (uint64_t I = 0; I < NumBlocks; I++) {
+ auto &Block = Func.Blocks[I];
+ if (Block.isEntry()) {
+ TotalInFlow += Block.Flow;
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ } else if (Block.isExit()) {
+ TotalOutFlow += Block.Flow;
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ } else {
+ assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
+ assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
+ }
+ }
+ assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
+}
+#endif
+
+} // end of anonymous namespace
+
+/// Apply the profile inference algorithm for a given flow function
+void llvm::applyFlowInference(FlowFunction &Func) {
+ // Create and apply an inference network model
+ auto InferenceNetwork = MinCostMaxFlow();
+ initializeNetwork(InferenceNetwork, Func);
+ InferenceNetwork.run();
+
+ // Extract flow values for every block and every edge
+ extractWeights(InferenceNetwork, Func);
+
+#ifndef NDEBUG
+ // Verify the result
+ verifyWeights(Func);
+#endif
+}
diff --git a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
index 6d995cf4c048..ea0e8343eb88 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp
@@ -34,6 +34,10 @@ cl::opt<bool> NoWarnSampleUnused(
cl::desc("Use this option to turn off/on warnings about function with "
"samples but without debug information to use those samples. "));
+cl::opt<bool> SampleProfileUseProfi(
+ "sample-profile-use-profi", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Use profi to infer block and edge counts."));
+
namespace sampleprofutil {
/// Return true if the given callsite is hot wrt to hot cutoff threshold.
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index a042146d7ace..71c15d5c51fc 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -18,6 +18,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -1833,22 +1834,6 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
-/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursively
-/// but this is better than nothing.
-static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) {
- if (isa<OverflowingBinaryOperator>(I)) {
- if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
- if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
- return true;
- if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
- return true;
- }
- } else if (isa<PossiblyExactOperator>(I) && I->isExact())
- return true;
- return false;
-}
-
ScalarEvolution::ValueOffsetPair
SCEVExpander::FindValueInExprValueMap(const SCEV *S,
const Instruction *InsertPt) {
@@ -1872,8 +1857,7 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (S->getType() == V->getType() &&
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) &&
- !SCEVLostPoisonFlags(S, EntInst))
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
return {V, Offset};
}
}
@@ -1952,26 +1936,36 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (!V)
V = visit(S);
- else if (VO.second) {
- if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
- Type *Ety = Vty->getPointerElementType();
- int64_t Offset = VO.second->getSExtValue();
- int64_t ESize = SE.getTypeSizeInBits(Ety);
- if ((Offset * 8) % ESize == 0) {
- ConstantInt *Idx =
+ else {
+ // If we're reusing an existing instruction, we are effectively CSEing two
+ // copies of the instruction (with potentially different flags). As such,
+ // we need to drop any poison generating flags unless we can prove that
+ // said flags must be valid for all new users.
+ if (auto *I = dyn_cast<Instruction>(V))
+ if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I))
+ I->dropPoisonGeneratingFlags();
+
+ if (VO.second) {
+ if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+ Type *Ety = Vty->getPointerElementType();
+ int64_t Offset = VO.second->getSExtValue();
+ int64_t ESize = SE.getTypeSizeInBits(Ety);
+ if ((Offset * 8) % ESize == 0) {
+ ConstantInt *Idx =
ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
- V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
- } else {
- ConstantInt *Idx =
+ V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+ } else {
+ ConstantInt *Idx =
ConstantInt::getSigned(VO.second->getType(), -Offset);
- unsigned AS = Vty->getAddressSpace();
- V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
- V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
- "uglygep");
- V = Builder.CreateBitCast(V, Vty);
+ unsigned AS = Vty->getAddressSpace();
+ V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+ V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+ "uglygep");
+ V = Builder.CreateBitCast(V, Vty);
+ }
+ } else {
+ V = Builder.CreateSub(V, VO.second);
}
- } else {
- V = Builder.CreateSub(V, VO.second);
}
}
// Remember the expanded value for this SCEV at this location.
@@ -2180,7 +2174,9 @@ SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
}
// Use expand's logic which is used for reusing a previous Value in
- // ExprValueMap.
+ // ExprValueMap. Note that we don't currently model the cost of
+ // needing to drop poison generating flags on the instruction if we
+ // want to reuse it. We effectively assume that has zero cost.
ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
if (VO.first)
return VO;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f467de5f924e..afa3ecde77f9 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3936,7 +3936,7 @@ bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
BasicBlock *KeepEdge1 = TrueBB;
BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
- SmallPtrSet<BasicBlock *, 2> RemovedSuccessors;
+ SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
// Then remove the rest.
for (BasicBlock *Succ : successors(OldTerm)) {
@@ -4782,6 +4782,26 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
+static void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ auto *BB = Switch->getParent();
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
+ OrigDefaultBlock->removePredecessor(BB);
+ BasicBlock *NewDefaultBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
+ OrigDefaultBlock);
+ new UnreachableInst(Switch->getContext(), NewDefaultBlock);
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
+ if (!is_contained(successors(BB), OrigDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
+ DTU->applyUpdates(Updates);
+ }
+}
+
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
@@ -4927,10 +4947,14 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
// Gather dead cases.
SmallVector<ConstantInt *, 8> DeadCases;
SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
+ SmallVector<BasicBlock *, 8> UniqueSuccessors;
for (auto &Case : SI->cases()) {
auto *Successor = Case.getCaseSuccessor();
- if (DTU)
+ if (DTU) {
+ if (!NumPerSuccessorCases.count(Successor))
+ UniqueSuccessors.push_back(Successor);
++NumPerSuccessorCases[Successor];
+ }
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
(CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
@@ -4973,9 +4997,9 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
if (DTU) {
std::vector<DominatorTree::UpdateType> Updates;
- for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
- if (I.second == 0)
- Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
+ for (auto *Successor : UniqueSuccessors)
+ if (NumPerSuccessorCases[Successor] == 0)
+ Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
DTU->applyUpdates(Updates);
}
@@ -6040,15 +6064,13 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
if (Succ == SI->getDefaultDest())
continue;
Succ->removePredecessor(BB);
- RemovedSuccessors.insert(Succ);
+ if (DTU && RemovedSuccessors.insert(Succ).second)
+ Updates.push_back({DominatorTree::Delete, BB, Succ});
}
SI->eraseFromParent();
- if (DTU) {
- for (BasicBlock *RemovedSuccessor : RemovedSuccessors)
- Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
+ if (DTU)
DTU->applyUpdates(Updates);
- }
++NumLookupTables;
if (NeedMask)
@@ -6215,7 +6237,7 @@ bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
// Eliminate redundant destinations.
SmallPtrSet<Value *, 8> Succs;
- SmallPtrSet<BasicBlock *, 8> RemovedSuccs;
+ SmallSetVector<BasicBlock *, 8> RemovedSuccs;
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
BasicBlock *Dest = IBI->getDestination(i);
if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
@@ -6305,8 +6327,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// We've found an identical block. Update our predecessors to take that
// path instead and make ourselves dead.
- SmallPtrSet<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
- for (BasicBlock *Pred : Preds) {
+ SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
+ for (BasicBlock *Pred : UniquePreds) {
InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
"unexpected successor");
@@ -6323,8 +6345,8 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
if (isa<DbgInfoIntrinsic>(Inst))
Inst.eraseFromParent();
- SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
- for (BasicBlock *Succ : Succs) {
+ SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
+ for (BasicBlock *Succ : UniqueSuccs) {
Succ->removePredecessor(BB);
if (DTU)
Updates.push_back({DominatorTree::Delete, BB, Succ});
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 23bb6f0860c9..5ca0adb4242c 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -473,18 +473,10 @@ public:
/// handle the more complex control flow around the loops.
virtual BasicBlock *createVectorizedLoopSkeleton();
- /// Widen a single instruction within the innermost loop.
- void widenInstruction(Instruction &I, VPValue *Def, VPUser &Operands,
- VPTransformState &State);
-
/// Widen a single call instruction within the innermost loop.
void widenCallInstruction(CallInst &I, VPValue *Def, VPUser &ArgOperands,
VPTransformState &State);
- /// Widen a single select instruction within the innermost loop.
- void widenSelectInstruction(SelectInst &I, VPValue *VPDef, VPUser &Operands,
- bool InvariantCond, VPTransformState &State);
-
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop(VPTransformState &State);
@@ -496,12 +488,6 @@ public:
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
- /// Vectorize a single GetElementPtrInst based on information gathered and
- /// decisions taken during planning.
- void widenGEP(GetElementPtrInst *GEP, VPValue *VPDef, VPUser &Indices,
- unsigned UF, ElementCount VF, bool IsPtrLoopInvariant,
- SmallBitVector &IsIndexLoopInvariant, VPTransformState &State);
-
/// Vectorize a single first-order recurrence or pointer induction PHINode in
/// a block. This method handles the induction variable canonicalization. It
/// supports both VF = 1 for unrolled loops and arbitrary length vectors.
@@ -511,9 +497,9 @@ public:
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
- /// inclusive. Uses the VPValue operands from \p Operands instead of \p
+ /// inclusive. Uses the VPValue operands from \p RepRecipe instead of \p
/// Instr's operands.
- void scalarizeInstruction(Instruction *Instr, VPValue *Def, VPUser &Operands,
+ void scalarizeInstruction(Instruction *Instr, VPReplicateRecipe *RepRecipe,
const VPIteration &Instance, bool IfPredicateInstr,
VPTransformState &State);
@@ -538,15 +524,6 @@ public:
ArrayRef<VPValue *> StoredValues,
VPValue *BlockInMask = nullptr);
- /// Vectorize Load and Store instructions with the base address given in \p
- /// Addr, optionally masking the vector operations if \p BlockInMask is
- /// non-null. Use \p State to translate given VPValues to IR values in the
- /// vectorized loop.
- void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
- VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask,
- bool ConsecutiveStride, bool Reverse);
-
/// Set the debug location in the builder \p Ptr using the debug location in
/// \p V. If \p Ptr is None then it uses the class member's Builder.
void setDebugLocFromInst(const Value *V,
@@ -566,6 +543,17 @@ public:
/// element.
virtual Value *getBroadcastInstrs(Value *V);
+ /// Add metadata from one instruction to another.
+ ///
+ /// This includes both the original MDs from \p From and additional ones (\see
+ /// addNewMetadata). Use this for *newly created* instructions in the vector
+ /// loop.
+ void addMetadata(Instruction *To, Instruction *From);
+
+ /// Similar to the previous function but it adds the metadata to a
+ /// vector of instructions.
+ void addMetadata(ArrayRef<Value *> To, Instruction *From);
+
protected:
friend class LoopVectorizationPlanner;
@@ -741,16 +729,16 @@ protected:
/// vector loop.
void addNewMetadata(Instruction *To, const Instruction *Orig);
- /// Add metadata from one instruction to another.
- ///
- /// This includes both the original MDs from \p From and additional ones (\see
- /// addNewMetadata). Use this for *newly created* instructions in the vector
- /// loop.
- void addMetadata(Instruction *To, Instruction *From);
-
- /// Similar to the previous function but it adds the metadata to a
- /// vector of instructions.
- void addMetadata(ArrayRef<Value *> To, Instruction *From);
+ /// Collect poison-generating recipes that may generate a poison value that is
+ /// used after vectorization, even when their operands are not poison. Those
+ /// recipes meet the following conditions:
+ /// * Contribute to the address computation of a recipe generating a widen
+ /// memory load/store (VPWidenMemoryInstructionRecipe or
+ /// VPInterleaveRecipe).
+ /// * Such a widen memory load/store has at least one underlying Instruction
+ /// that is in a basic block that needs predication and after vectorization
+ /// the generated instruction won't be predicated.
+ void collectPoisonGeneratingRecipes(VPTransformState &State);
/// Allow subclasses to override and print debug traces before/after vplan
/// execution, when trace information is requested.
@@ -1173,6 +1161,84 @@ void InnerLoopVectorizer::addNewMetadata(Instruction *To,
LVer->annotateInstWithNoAlias(To, Orig);
}
+void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
+ VPTransformState &State) {
+
+ // Collect recipes in the backward slice of `Root` that may generate a poison
+ // value that is used after vectorization.
+ SmallPtrSet<VPRecipeBase *, 16> Visited;
+ auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
+ SmallVector<VPRecipeBase *, 16> Worklist;
+ Worklist.push_back(Root);
+
+ // Traverse the backward slice of Root through its use-def chain.
+ while (!Worklist.empty()) {
+ VPRecipeBase *CurRec = Worklist.back();
+ Worklist.pop_back();
+
+ if (!Visited.insert(CurRec).second)
+ continue;
+
+ // Prune search if we find another recipe generating a widen memory
+ // instruction. Widen memory instructions involved in address computation
+ // will lead to gather/scatter instructions, which don't need to be
+ // handled.
+ if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
+ isa<VPInterleaveRecipe>(CurRec))
+ continue;
+
+ // This recipe contributes to the address computation of a widen
+ // load/store. Collect recipe if its underlying instruction has
+ // poison-generating flags.
+ Instruction *Instr = CurRec->getUnderlyingInstr();
+ if (Instr && Instr->hasPoisonGeneratingFlags())
+ State.MayGeneratePoisonRecipes.insert(CurRec);
+
+ // Add new definitions to the worklist.
+ for (VPValue *operand : CurRec->operands())
+ if (VPDef *OpDef = operand->getDef())
+ Worklist.push_back(cast<VPRecipeBase>(OpDef));
+ }
+ });
+
+ // Traverse all the recipes in the VPlan and collect the poison-generating
+ // recipes in the backward slice starting at the address of a VPWidenRecipe or
+ // VPInterleaveRecipe.
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<VPBlockBase *>(State.Plan->getEntry()));
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
+ for (VPRecipeBase &Recipe : *VPBB) {
+ if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) {
+ Instruction *UnderlyingInstr = WidenRec->getUnderlyingInstr();
+ VPDef *AddrDef = WidenRec->getAddr()->getDef();
+ if (AddrDef && WidenRec->isConsecutive() && UnderlyingInstr &&
+ Legal->blockNeedsPredication(UnderlyingInstr->getParent()))
+ collectPoisonGeneratingInstrsInBackwardSlice(
+ cast<VPRecipeBase>(AddrDef));
+ } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
+ VPDef *AddrDef = InterleaveRec->getAddr()->getDef();
+ if (AddrDef) {
+ // Check if any member of the interleave group needs predication.
+ const InterleaveGroup<Instruction> *InterGroup =
+ InterleaveRec->getInterleaveGroup();
+ bool NeedPredication = false;
+ for (int I = 0, NumMembers = InterGroup->getNumMembers();
+ I < NumMembers; ++I) {
+ Instruction *Member = InterGroup->getMember(I);
+ if (Member)
+ NeedPredication |=
+ Legal->blockNeedsPredication(Member->getParent());
+ }
+
+ if (NeedPredication)
+ collectPoisonGeneratingInstrsInBackwardSlice(
+ cast<VPRecipeBase>(AddrDef));
+ }
+ }
+ }
+ }
+}
+
void InnerLoopVectorizer::addMetadata(Instruction *To,
Instruction *From) {
propagateMetadata(To, From);
@@ -1541,7 +1607,16 @@ public:
// Returns true if \p I is an instruction that will be predicated either
// through scalar predication or masked load/store or masked gather/scatter.
// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I) {
+ bool isPredicatedInst(Instruction *I, bool IsKnownUniform = false) {
+ // When we know the load is uniform and the original scalar loop was not
+ // predicated we don't need to mark it as a predicated instruction. Any
+ // vectorised blocks created when tail-folding are something artificial we
+ // have introduced and we know there is always at least one active lane.
+ // That's why we call Legal->blockNeedsPredication here because it doesn't
+ // query tail-folding.
+ if (IsKnownUniform && isa<LoadInst>(I) &&
+ !Legal->blockNeedsPredication(I->getParent()))
+ return false;
if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
// Loads and stores that need some form of masked operation are predicated
@@ -1816,9 +1891,11 @@ private:
/// Collect the instructions that are scalar after vectorization. An
/// instruction is scalar if it is known to be uniform or will be scalarized
- /// during vectorization. Non-uniform scalarized instructions will be
- /// represented by VF values in the vectorized loop, each corresponding to an
- /// iteration of the original scalar loop.
+ /// during vectorization. collectLoopScalars should only add non-uniform nodes
+ /// to the list if they are used by a load/store instruction that is marked as
+ /// CM_Scalarize. Non-uniform scalarized instructions will be represented by
+ /// VF values in the vectorized loop, each corresponding to an iteration of
+ /// the original scalar loop.
void collectLoopScalars(ElementCount VF);
/// Keeps cost model vectorization decision and cost for instructions.
@@ -2918,132 +2995,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
}
}
-void InnerLoopVectorizer::vectorizeMemoryInstruction(
- Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride,
- bool Reverse) {
- // Attempt to issue a wide load.
- LoadInst *LI = dyn_cast<LoadInst>(Instr);
- StoreInst *SI = dyn_cast<StoreInst>(Instr);
-
- assert((LI || SI) && "Invalid Load/Store instruction");
- assert((!SI || StoredValue) && "No stored value provided for widened store");
- assert((!LI || !StoredValue) && "Stored value provided for widened load");
-
- Type *ScalarDataTy = getLoadStoreType(Instr);
-
- auto *DataTy = VectorType::get(ScalarDataTy, VF);
- const Align Alignment = getLoadStoreAlignment(Instr);
- bool CreateGatherScatter = !ConsecutiveStride;
-
- VectorParts BlockInMaskParts(UF);
- bool isMaskRequired = BlockInMask;
- if (isMaskRequired)
- for (unsigned Part = 0; Part < UF; ++Part)
- BlockInMaskParts[Part] = State.get(BlockInMask, Part);
-
- const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
- // Calculate the pointer for the specific unroll-part.
- GetElementPtrInst *PartPtr = nullptr;
-
- bool InBounds = false;
- if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
- InBounds = gep->isInBounds();
- if (Reverse) {
- // If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
- // RunTimeVF = VScale * VF.getKnownMinValue()
- // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF);
- // NumElt = -Part * RunTimeVF
- Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
- // LastLane = 1 - RunTimeVF
- Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
- PartPtr =
- cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
- PartPtr->setIsInBounds(InBounds);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
- PartPtr->setIsInBounds(InBounds);
- if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
- BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
- } else {
- Value *Increment =
- createStepForVF(Builder, Builder.getInt32Ty(), VF, Part);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
- PartPtr->setIsInBounds(InBounds);
- }
-
- unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
- return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
- };
-
- // Handle Stores:
- if (SI) {
- setDebugLocFromInst(SI);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Instruction *NewSI = nullptr;
- Value *StoredVal = State.get(StoredValue, Part);
- if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(Addr, Part);
- NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
- MaskPart);
- } else {
- if (Reverse) {
- // If we store to reverse consecutive memory locations, then we need
- // to reverse the order of elements in the stored value.
- StoredVal = reverseVector(StoredVal);
- // We don't want to update the value in the map as it might be used in
- // another expression. So don't call resetVectorValue(StoredVal).
- }
- auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
- if (isMaskRequired)
- NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
- BlockInMaskParts[Part]);
- else
- NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
- }
- addMetadata(NewSI, SI);
- }
- return;
- }
-
- // Handle loads.
- assert(LI && "Must have a load instruction");
- setDebugLocFromInst(LI);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *NewLI;
- if (CreateGatherScatter) {
- Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
- Value *VectorGep = State.get(Addr, Part);
- NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
- nullptr, "wide.masked.gather");
- addMetadata(NewLI, LI);
- } else {
- auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0)));
- if (isMaskRequired)
- NewLI = Builder.CreateMaskedLoad(
- DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
- PoisonValue::get(DataTy), "wide.masked.load");
- else
- NewLI =
- Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
-
- // Add metadata to the load, but setVectorValue to the reverse shuffle.
- addMetadata(NewLI, LI);
- if (Reverse)
- NewLI = reverseVector(NewLI);
- }
-
- State.set(Def, NewLI, Part);
- }
-}
-
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
- VPUser &User,
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
+ VPReplicateRecipe *RepRecipe,
const VPIteration &Instance,
bool IfPredicateInstr,
VPTransformState &State) {
@@ -3064,17 +3017,26 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
+ // If the scalarized instruction contributes to the address computation of a
+ // widen masked load/store which was in a basic block that needed predication
+ // and is not predicated after vectorization, we can't propagate
+ // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized
+ // instruction could feed a poison value to the base address of the widen
+ // load/store.
+ if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0)
+ Cloned->dropPoisonGeneratingFlags();
+
State.Builder.SetInsertPoint(Builder.GetInsertBlock(),
Builder.GetInsertPoint());
// Replace the operands of the cloned instructions with their scalar
// equivalents in the new loop.
- for (unsigned op = 0, e = User.getNumOperands(); op != e; ++op) {
+ for (unsigned op = 0, e = RepRecipe->getNumOperands(); op != e; ++op) {
auto *Operand = dyn_cast<Instruction>(Instr->getOperand(op));
auto InputInstance = Instance;
if (!Operand || !OrigLoop->contains(Operand) ||
(Cost->isUniformAfterVectorization(Operand, State.VF)))
InputInstance.Lane = VPLane::getFirstLane();
- auto *NewOp = State.get(User.getOperand(op), InputInstance);
+ auto *NewOp = State.get(RepRecipe->getOperand(op), InputInstance);
Cloned->setOperand(op, NewOp);
}
addNewMetadata(Cloned, Instr);
@@ -3082,7 +3044,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, VPValue *Def,
// Place the cloned scalar in the new loop.
Builder.Insert(Cloned);
- State.set(Def, Cloned, Instance);
+ State.set(RepRecipe, Cloned, Instance);
// If we just cloned a new assumption, add it the assumption cache.
if (auto *II = dyn_cast<AssumeInst>(Cloned))
@@ -4615,77 +4577,6 @@ bool InnerLoopVectorizer::useOrderedReductions(RecurrenceDescriptor &RdxDesc) {
return Cost->useOrderedReductions(RdxDesc);
}
-void InnerLoopVectorizer::widenGEP(GetElementPtrInst *GEP, VPValue *VPDef,
- VPUser &Operands, unsigned UF,
- ElementCount VF, bool IsPtrLoopInvariant,
- SmallBitVector &IsIndexLoopInvariant,
- VPTransformState &State) {
- // Construct a vector GEP by widening the operands of the scalar GEP as
- // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
- // results in a vector of pointers when at least one operand of the GEP
- // is vector-typed. Thus, to keep the representation compact, we only use
- // vector-typed operands for loop-varying values.
-
- if (VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
- // If we are vectorizing, but the GEP has only loop-invariant operands,
- // the GEP we build (by only using vector-typed operands for
- // loop-varying values) would be a scalar pointer. Thus, to ensure we
- // produce a vector of pointers, we need to either arbitrarily pick an
- // operand to broadcast, or broadcast a clone of the original GEP.
- // Here, we broadcast a clone of the original.
- //
- // TODO: If at some point we decide to scalarize instructions having
- // loop-invariant operands, this special case will no longer be
- // required. We would add the scalarization decision to
- // collectLoopScalars() and teach getVectorValue() to broadcast
- // the lane-zero scalar value.
- auto *Clone = Builder.Insert(GEP->clone());
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *EntryPart = Builder.CreateVectorSplat(VF, Clone);
- State.set(VPDef, EntryPart, Part);
- addMetadata(EntryPart, GEP);
- }
- } else {
- // If the GEP has at least one loop-varying operand, we are sure to
- // produce a vector of pointers. But if we are only unrolling, we want
- // to produce a scalar GEP for each unroll part. Thus, the GEP we
- // produce with the code below will be scalar (if VF == 1) or vector
- // (otherwise). Note that for the unroll-only case, we still maintain
- // values in the vector mapping with initVector, as we do for other
- // instructions.
- for (unsigned Part = 0; Part < UF; ++Part) {
- // The pointer operand of the new GEP. If it's loop-invariant, we
- // won't broadcast it.
- auto *Ptr = IsPtrLoopInvariant
- ? State.get(Operands.getOperand(0), VPIteration(0, 0))
- : State.get(Operands.getOperand(0), Part);
-
- // Collect all the indices for the new GEP. If any index is
- // loop-invariant, we won't broadcast it.
- SmallVector<Value *, 4> Indices;
- for (unsigned I = 1, E = Operands.getNumOperands(); I < E; I++) {
- VPValue *Operand = Operands.getOperand(I);
- if (IsIndexLoopInvariant[I - 1])
- Indices.push_back(State.get(Operand, VPIteration(0, 0)));
- else
- Indices.push_back(State.get(Operand, Part));
- }
-
- // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
- // but it should be a vector, otherwise.
- auto *NewGEP =
- GEP->isInBounds()
- ? Builder.CreateInBoundsGEP(GEP->getSourceElementType(), Ptr,
- Indices)
- : Builder.CreateGEP(GEP->getSourceElementType(), Ptr, Indices);
- assert((VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
- "NewGEP is not a pointer vector");
- State.set(VPDef, NewGEP, Part);
- addMetadata(NewGEP, GEP);
- }
- }
-}
-
void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
VPWidenPHIRecipe *PhiR,
VPTransformState &State) {
@@ -4745,38 +4636,14 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
// iteration. If the instruction is uniform, we only need to generate the
// first lane. Otherwise, we generate all VF values.
bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
- unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
-
- bool NeedsVectorIndex = !IsUniform && VF.isScalable();
- Value *UnitStepVec = nullptr, *PtrIndSplat = nullptr;
- if (NeedsVectorIndex) {
- Type *VecIVTy = VectorType::get(PtrInd->getType(), VF);
- UnitStepVec = Builder.CreateStepVector(VecIVTy);
- PtrIndSplat = Builder.CreateVectorSplat(VF, PtrInd);
- }
+ assert((IsUniform || !State.VF.isScalable()) &&
+ "Cannot scalarize a scalable VF");
+ unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
for (unsigned Part = 0; Part < UF; ++Part) {
Value *PartStart =
createStepForVF(Builder, PtrInd->getType(), VF, Part);
- if (NeedsVectorIndex) {
- // Here we cache the whole vector, which means we can support the
- // extraction of any lane. However, in some cases the extractelement
- // instruction that is generated for scalar uses of this vector (e.g.
- // a load instruction) is not folded away. Therefore we still
- // calculate values for the first n lanes to avoid redundant moves
- // (when extracting the 0th element) and to produce scalar code (i.e.
- // additional add/gep instructions instead of expensive extractelement
- // instructions) when extracting higher-order elements.
- Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart);
- Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec);
- Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices);
- Value *SclrGep =
- emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II);
- SclrGep->setName("next.gep");
- State.set(PhiR, SclrGep, Part);
- }
-
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
Value *Idx = Builder.CreateAdd(
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
@@ -4858,114 +4725,6 @@ static bool mayDivideByZero(Instruction &I) {
return !CInt || CInt->isZero();
}
-void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
- VPUser &User,
- VPTransformState &State) {
- switch (I.getOpcode()) {
- case Instruction::Call:
- case Instruction::Br:
- case Instruction::PHI:
- case Instruction::GetElementPtr:
- case Instruction::Select:
- llvm_unreachable("This instruction is handled by a different recipe.");
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::FNeg:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // Just widen unops and binops.
- setDebugLocFromInst(&I);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- SmallVector<Value *, 2> Ops;
- for (VPValue *VPOp : User.operands())
- Ops.push_back(State.get(VPOp, Part));
-
- Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
-
- if (auto *VecOp = dyn_cast<Instruction>(V))
- VecOp->copyIRFlags(&I);
-
- // Use this vector value for all users of the original instruction.
- State.set(Def, V, Part);
- addMetadata(V, &I);
- }
-
- break;
- }
- case Instruction::ICmp:
- case Instruction::FCmp: {
- // Widen compares. Generate vector compares.
- bool FCmp = (I.getOpcode() == Instruction::FCmp);
- auto *Cmp = cast<CmpInst>(&I);
- setDebugLocFromInst(Cmp);
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = State.get(User.getOperand(0), Part);
- Value *B = State.get(User.getOperand(1), Part);
- Value *C = nullptr;
- if (FCmp) {
- // Propagate fast math flags.
- IRBuilder<>::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(Cmp->getFastMathFlags());
- C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
- } else {
- C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
- }
- State.set(Def, C, Part);
- addMetadata(C, &I);
- }
-
- break;
- }
-
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = cast<CastInst>(&I);
- setDebugLocFromInst(CI);
-
- /// Vectorize casts.
- Type *DestTy =
- (VF.isScalar()) ? CI->getType() : VectorType::get(CI->getType(), VF);
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *A = State.get(User.getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(Def, Cast, Part);
- addMetadata(Cast, &I);
- }
- break;
- }
- default:
- // This instruction is not vectorized by simple widening.
- LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
- llvm_unreachable("Unhandled instruction!");
- } // end of switch.
-}
-
void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
VPUser &ArgOperands,
VPTransformState &State) {
@@ -5039,31 +4798,6 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
}
}
-void InnerLoopVectorizer::widenSelectInstruction(SelectInst &I, VPValue *VPDef,
- VPUser &Operands,
- bool InvariantCond,
- VPTransformState &State) {
- setDebugLocFromInst(&I);
-
- // The condition can be loop invariant but still defined inside the
- // loop. This means that we can't just use the original 'cond' value.
- // We have to take the 'vectorized' value and pick the first lane.
- // Instcombine will make this a no-op.
- auto *InvarCond = InvariantCond
- ? State.get(Operands.getOperand(0), VPIteration(0, 0))
- : nullptr;
-
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *Cond =
- InvarCond ? InvarCond : State.get(Operands.getOperand(0), Part);
- Value *Op0 = State.get(Operands.getOperand(1), Part);
- Value *Op1 = State.get(Operands.getOperand(2), Part);
- Value *Sel = Builder.CreateSelect(Cond, Op0, Op1);
- State.set(VPDef, Sel, Part);
- addMetadata(Sel, &I);
- }
-}
-
void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
@@ -5103,38 +4837,11 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
!TheLoop->isLoopInvariant(V);
};
- auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) {
- if (!isa<PHINode>(Ptr) ||
- !Legal->getInductionVars().count(cast<PHINode>(Ptr)))
- return false;
- auto &Induction = Legal->getInductionVars()[cast<PHINode>(Ptr)];
- if (Induction.getKind() != InductionDescriptor::IK_PtrInduction)
- return false;
- return isScalarUse(MemAccess, Ptr);
- };
-
- // A helper that evaluates a memory access's use of a pointer. If the
- // pointer is actually the pointer induction of a loop, it is being
- // inserted into Worklist. If the use will be a scalar use, and the
- // pointer is only used by memory accesses, we place the pointer in
- // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs.
+ // A helper that evaluates a memory access's use of a pointer. If the use will
+ // be a scalar use and the pointer is only used by memory accesses, we place
+ // the pointer in ScalarPtrs. Otherwise, the pointer is placed in
+ // PossibleNonScalarPtrs.
auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
- if (isScalarPtrInduction(MemAccess, Ptr)) {
- Worklist.insert(cast<Instruction>(Ptr));
- LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr
- << "\n");
-
- Instruction *Update = cast<Instruction>(
- cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
-
- // If there is more than one user of Update (Ptr), we shouldn't assume it
- // will be scalar after vectorisation as other users of the instruction
- // may require widening. Otherwise, add it to ScalarPtrs.
- if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
- ScalarPtrs.insert(Update);
- return;
- }
- }
// We only care about bitcast and getelementptr instructions contained in
// the loop.
if (!isLoopVaryingBitCastOrGEP(Ptr))
@@ -5226,11 +4933,22 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
if (Ind == Legal->getPrimaryInduction() && foldTailByMasking())
continue;
+ // Returns true if \p Indvar is a pointer induction that is used directly by
+ // load/store instruction \p I.
+ auto IsDirectLoadStoreFromPtrIndvar = [&](Instruction *Indvar,
+ Instruction *I) {
+ return Induction.second.getKind() ==
+ InductionDescriptor::IK_PtrInduction &&
+ (isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+ Indvar == getLoadStorePointerOperand(I) && isScalarUse(I, Indvar);
+ };
+
// Determine if all users of the induction variable are scalar after
// vectorization.
auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I);
+ return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) ||
+ IsDirectLoadStoreFromPtrIndvar(Ind, I);
});
if (!ScalarInd)
continue;
@@ -5240,7 +4958,8 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
auto ScalarIndUpdate =
llvm::all_of(IndUpdate->users(), [&](User *U) -> bool {
auto *I = cast<Instruction>(U);
- return I == Ind || !TheLoop->contains(I) || Worklist.count(I);
+ return I == Ind || !TheLoop->contains(I) || Worklist.count(I) ||
+ IsDirectLoadStoreFromPtrIndvar(IndUpdate, I);
});
if (!ScalarIndUpdate)
continue;
@@ -7079,6 +6798,8 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
+ // NOTE: PtrTy is a vector to signal `TTI::getAddressComputationCost`
+ // that it is being called from this specific place.
// Figure out whether the access is strided and get the stride value
// if it's known in compile time
@@ -7286,6 +7007,12 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
InstructionCost BaseCost = TTI.getArithmeticReductionCost(
RdxDesc.getOpcode(), VectorTy, RdxDesc.getFastMathFlags(), CostKind);
+ // For a call to the llvm.fmuladd intrinsic we need to add the cost of a
+ // normal fmul instruction to the cost of the fadd reduction.
+ if (RdxDesc.getRecurrenceKind() == RecurKind::FMulAdd)
+ BaseCost +=
+ TTI.getArithmeticInstrCost(Instruction::FMul, VectorTy, CostKind);
+
// If we're using ordered reductions then we can just return the base cost
// here, since getArithmeticReductionCost calculates the full ordered
// reduction cost when FP reassociation is not allowed.
@@ -7962,6 +7689,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
case Instruction::Call: {
+ if (RecurrenceDescriptor::isFMulAddIntrinsic(I))
+ if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
+ return *RedCost;
bool NeedToScalarize;
CallInst *CI = cast<CallInst>(I);
InstructionCost CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
@@ -8260,6 +7990,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
+ ILV.collectPoisonGeneratingRecipes(State);
ILV.printDebugTracesAtStart();
@@ -8468,7 +8199,8 @@ void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "intermediate fn:\n" << *Induction->getFunction() << "\n";
+ dbgs() << "intermediate fn:\n"
+ << *OrigLoop->getHeader()->getParent() << "\n";
});
}
@@ -8666,7 +8398,7 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() {
void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "final fn:\n" << *Induction->getFunction() << "\n";
+ dbgs() << "final fn:\n" << *OrigLoop->getHeader()->getParent() << "\n";
});
}
@@ -9052,7 +8784,8 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
Range);
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ [&](ElementCount VF) { return CM.isPredicatedInst(I, IsUniform); },
+ Range);
// Even if the instruction is not marked as uniform, there are certain
// intrinsic calls that can be effectively treated as such, so we check for
@@ -9354,7 +9087,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
if (VPBB)
VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
else {
- Plan->setEntry(FirstVPBBForBB);
+ auto *TopRegion = new VPRegionBlock("vector loop");
+ TopRegion->setEntry(FirstVPBBForBB);
+ Plan->setEntry(TopRegion);
HeaderVPBB = FirstVPBBForBB;
}
VPBB = FirstVPBBForBB;
@@ -9426,9 +9161,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
- assert(isa<VPBasicBlock>(Plan->getEntry()) &&
+ assert(isa<VPRegionBlock>(Plan->getEntry()) &&
!Plan->getEntry()->getEntryBasicBlock()->empty() &&
- "entry block must be set to a non-empty VPBasicBlock");
+ "entry block must be set to a VPRegionBlock having a non-empty entry "
+ "VPBasicBlock");
+ cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB);
RecipeBuilder.fixHeaderPhis();
// ---------------------------------------------------------------------------
@@ -9653,12 +9390,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
unsigned FirstOpId;
assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
"Only min/max recurrences allowed for inloop reductions");
+ // Recognize a call to the llvm.fmuladd intrinsic.
+ bool IsFMulAdd = (Kind == RecurKind::FMulAdd);
+ assert((!IsFMulAdd || RecurrenceDescriptor::isFMulAddIntrinsic(R)) &&
+ "Expected instruction to be a call to the llvm.fmuladd intrinsic");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
"Expected to replace a VPWidenSelectSC");
FirstOpId = 1;
} else {
- assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe)) &&
+ assert((MinVF.isScalar() || isa<VPWidenRecipe>(WidenRecipe) ||
+ (IsFMulAdd && isa<VPWidenCallRecipe>(WidenRecipe))) &&
"Expected to replace a VPWidenSC");
FirstOpId = 0;
}
@@ -9669,8 +9411,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *CondOp = CM.foldTailByMasking()
? RecipeBuilder.createBlockInMask(R->getParent(), Plan)
: nullptr;
- VPReductionRecipe *RedRecipe = new VPReductionRecipe(
- &RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
+
+ if (IsFMulAdd) {
+ // If the instruction is a call to the llvm.fmuladd intrinsic then we
+ // need to create an fmul recipe to use as the vector operand for the
+ // fadd reduction.
+ VPInstruction *FMulRecipe = new VPInstruction(
+ Instruction::FMul, {VecOp, Plan->getVPValue(R->getOperand(1))});
+ FMulRecipe->setFastMathFlags(R->getFastMathFlags());
+ WidenRecipe->getParent()->insert(FMulRecipe,
+ WidenRecipe->getIterator());
+ VecOp = FMulRecipe;
+ }
+ VPReductionRecipe *RedRecipe =
+ new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
Plan->removeVPValueFor(R);
Plan->addVPValue(R, RedRecipe);
@@ -9744,18 +9498,218 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
}
void VPWidenSelectRecipe::execute(VPTransformState &State) {
- State.ILV->widenSelectInstruction(*cast<SelectInst>(getUnderlyingInstr()),
- this, *this, InvariantCond, State);
+ auto &I = *cast<SelectInst>(getUnderlyingInstr());
+ State.ILV->setDebugLocFromInst(&I);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
+ Value *Op0 = State.get(getOperand(1), Part);
+ Value *Op1 = State.get(getOperand(2), Part);
+ Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
+ State.set(this, Sel, Part);
+ State.ILV->addMetadata(Sel, &I);
+ }
}
void VPWidenRecipe::execute(VPTransformState &State) {
- State.ILV->widenInstruction(*getUnderlyingInstr(), this, *this, State);
+ auto &I = *cast<Instruction>(getUnderlyingValue());
+ auto &Builder = State.Builder;
+ switch (I.getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Br:
+ case Instruction::PHI:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ llvm_unreachable("This instruction is handled by a different recipe.");
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::FNeg:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen unops and binops.
+ State.ILV->setDebugLocFromInst(&I);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ SmallVector<Value *, 2> Ops;
+ for (VPValue *VPOp : operands())
+ Ops.push_back(State.get(VPOp, Part));
+
+ Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
+
+ if (auto *VecOp = dyn_cast<Instruction>(V)) {
+ VecOp->copyIRFlags(&I);
+
+ // If the instruction is vectorized and was in a basic block that needed
+ // predication, we can't propagate poison-generating flags (nuw/nsw,
+ // exact, etc.). The control flow has been linearized and the
+ // instruction is no longer guarded by the predicate, which could make
+ // the flag properties to no longer hold.
+ if (State.MayGeneratePoisonRecipes.count(this) > 0)
+ VecOp->dropPoisonGeneratingFlags();
+ }
+
+ // Use this vector value for all users of the original instruction.
+ State.set(this, V, Part);
+ State.ILV->addMetadata(V, &I);
+ }
+
+ break;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (I.getOpcode() == Instruction::FCmp);
+ auto *Cmp = cast<CmpInst>(&I);
+ State.ILV->setDebugLocFromInst(Cmp);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *B = State.get(getOperand(1), Part);
+ Value *C = nullptr;
+ if (FCmp) {
+ // Propagate fast math flags.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ Builder.setFastMathFlags(Cmp->getFastMathFlags());
+ C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ } else {
+ C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ }
+ State.set(this, C, Part);
+ State.ILV->addMetadata(C, &I);
+ }
+
+ break;
+ }
+
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ auto *CI = cast<CastInst>(&I);
+ State.ILV->setDebugLocFromInst(CI);
+
+ /// Vectorize casts.
+ Type *DestTy = (State.VF.isScalar())
+ ? CI->getType()
+ : VectorType::get(CI->getType(), State.VF);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ State.set(this, Cast, Part);
+ State.ILV->addMetadata(Cast, &I);
+ }
+ break;
+ }
+ default:
+ // This instruction is not vectorized by simple widening.
+ LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+ llvm_unreachable("Unhandled instruction!");
+ } // end of switch.
}
void VPWidenGEPRecipe::execute(VPTransformState &State) {
- State.ILV->widenGEP(cast<GetElementPtrInst>(getUnderlyingInstr()), this,
- *this, State.UF, State.VF, IsPtrLoopInvariant,
- IsIndexLoopInvariant, State);
+ auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
+ // Construct a vector GEP by widening the operands of the scalar GEP as
+ // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
+ // results in a vector of pointers when at least one operand of the GEP
+ // is vector-typed. Thus, to keep the representation compact, we only use
+ // vector-typed operands for loop-varying values.
+
+ if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ // If we are vectorizing, but the GEP has only loop-invariant operands,
+ // the GEP we build (by only using vector-typed operands for
+ // loop-varying values) would be a scalar pointer. Thus, to ensure we
+ // produce a vector of pointers, we need to either arbitrarily pick an
+ // operand to broadcast, or broadcast a clone of the original GEP.
+ // Here, we broadcast a clone of the original.
+ //
+ // TODO: If at some point we decide to scalarize instructions having
+ // loop-invariant operands, this special case will no longer be
+ // required. We would add the scalarization decision to
+ // collectLoopScalars() and teach getVectorValue() to broadcast
+ // the lane-zero scalar value.
+ auto *Clone = State.Builder.Insert(GEP->clone());
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
+ State.set(this, EntryPart, Part);
+ State.ILV->addMetadata(EntryPart, GEP);
+ }
+ } else {
+ // If the GEP has at least one loop-varying operand, we are sure to
+ // produce a vector of pointers. But if we are only unrolling, we want
+ // to produce a scalar GEP for each unroll part. Thus, the GEP we
+ // produce with the code below will be scalar (if VF == 1) or vector
+ // (otherwise). Note that for the unroll-only case, we still maintain
+ // values in the vector mapping with initVector, as we do for other
+ // instructions.
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ // The pointer operand of the new GEP. If it's loop-invariant, we
+ // won't broadcast it.
+ auto *Ptr = IsPtrLoopInvariant
+ ? State.get(getOperand(0), VPIteration(0, 0))
+ : State.get(getOperand(0), Part);
+
+ // Collect all the indices for the new GEP. If any index is
+ // loop-invariant, we won't broadcast it.
+ SmallVector<Value *, 4> Indices;
+ for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
+ VPValue *Operand = getOperand(I);
+ if (IsIndexLoopInvariant[I - 1])
+ Indices.push_back(State.get(Operand, VPIteration(0, 0)));
+ else
+ Indices.push_back(State.get(Operand, Part));
+ }
+
+ // If the GEP instruction is vectorized and was in a basic block that
+ // needed predication, we can't propagate the poison-generating 'inbounds'
+ // flag. The control flow has been linearized and the GEP is no longer
+ // guarded by the predicate, which could make the 'inbounds' properties to
+ // no longer hold.
+ bool IsInBounds =
+ GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
+
+ // Create the new GEP. Note that this GEP may be a scalar if VF == 1,
+ // but it should be a vector, otherwise.
+ auto *NewGEP = IsInBounds
+ ? State.Builder.CreateInBoundsGEP(
+ GEP->getSourceElementType(), Ptr, Indices)
+ : State.Builder.CreateGEP(GEP->getSourceElementType(),
+ Ptr, Indices);
+ assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
+ "NewGEP is not a pointer vector");
+ State.set(this, NewGEP, Part);
+ State.ILV->addMetadata(NewGEP, GEP);
+ }
+ }
}
void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
@@ -9867,8 +9821,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
void VPReplicateRecipe::execute(VPTransformState &State) {
if (State.Instance) { // Generate a single instance.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
- State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this,
- *State.Instance, IsPredicated, State);
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *State.Instance,
+ IsPredicated, State);
// Insert scalar instance packing it into a vector.
if (AlsoPack && State.VF.isVector()) {
// If we're constructing lane 0, initialize to start from poison.
@@ -9891,7 +9845,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
"Can't scalarize a scalable vector");
for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(getUnderlyingInstr(), this, *this,
+ State.ILV->scalarizeInstruction(getUnderlyingInstr(), this,
VPIteration(Part, Lane), IsPredicated,
State);
}
@@ -9970,9 +9924,129 @@ void VPPredInstPHIRecipe::execute(VPTransformState &State) {
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
- State.ILV->vectorizeMemoryInstruction(
- &Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(),
- StoredValue, getMask(), Consecutive, Reverse);
+
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
+ StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
+
+ assert((LI || SI) && "Invalid Load/Store instruction");
+ assert((!SI || StoredValue) && "No stored value provided for widened store");
+ assert((!LI || !StoredValue) && "Stored value provided for widened load");
+
+ Type *ScalarDataTy = getLoadStoreType(&Ingredient);
+
+ auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
+ const Align Alignment = getLoadStoreAlignment(&Ingredient);
+ bool CreateGatherScatter = !Consecutive;
+
+ auto &Builder = State.Builder;
+ InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
+ bool isMaskRequired = getMask();
+ if (isMaskRequired)
+ for (unsigned Part = 0; Part < State.UF; ++Part)
+ BlockInMaskParts[Part] = State.get(getMask(), Part);
+
+ const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
+ // Calculate the pointer for the specific unroll-part.
+ GetElementPtrInst *PartPtr = nullptr;
+
+ bool InBounds = false;
+ if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+ InBounds = gep->isInBounds();
+ if (Reverse) {
+ // If the address is consecutive but reversed, then the
+ // wide store needs to start at the last vector element.
+ // RunTimeVF = VScale * VF.getKnownMinValue()
+ // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
+ Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
+ // NumElt = -Part * RunTimeVF
+ Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
+ // LastLane = 1 - RunTimeVF
+ Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
+ PartPtr =
+ cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
+ PartPtr->setIsInBounds(InBounds);
+ PartPtr = cast<GetElementPtrInst>(
+ Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
+ PartPtr->setIsInBounds(InBounds);
+ if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
+ BlockInMaskParts[Part] =
+ Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
+ } else {
+ Value *Increment =
+ createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part);
+ PartPtr = cast<GetElementPtrInst>(
+ Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
+ PartPtr->setIsInBounds(InBounds);
+ }
+
+ unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
+ return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
+ };
+
+ // Handle Stores:
+ if (SI) {
+ State.ILV->setDebugLocFromInst(SI);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Instruction *NewSI = nullptr;
+ Value *StoredVal = State.get(StoredValue, Part);
+ if (CreateGatherScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
+ MaskPart);
+ } else {
+ if (Reverse) {
+ // If we store to reverse consecutive memory locations, then we need
+ // to reverse the order of elements in the stored value.
+ StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+ // We don't want to update the value in the map as it might be used in
+ // another expression. So don't call resetVectorValue(StoredVal).
+ }
+ auto *VecPtr =
+ CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ if (isMaskRequired)
+ NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
+ BlockInMaskParts[Part]);
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
+ }
+ State.ILV->addMetadata(NewSI, SI);
+ }
+ return;
+ }
+
+ // Handle loads.
+ assert(LI && "Must have a load instruction");
+ State.ILV->setDebugLocFromInst(LI);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *NewLI;
+ if (CreateGatherScatter) {
+ Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
+ Value *VectorGep = State.get(getAddr(), Part);
+ NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
+ nullptr, "wide.masked.gather");
+ State.ILV->addMetadata(NewLI, LI);
+ } else {
+ auto *VecPtr =
+ CreateVecPtr(Part, State.get(getAddr(), VPIteration(0, 0)));
+ if (isMaskRequired)
+ NewLI = Builder.CreateMaskedLoad(
+ DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
+ PoisonValue::get(DataTy), "wide.masked.load");
+ else
+ NewLI =
+ Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
+
+ // Add metadata to the load, but setVectorValue to the reverse shuffle.
+ State.ILV->addMetadata(NewLI, LI);
+ if (Reverse)
+ NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
+ }
+
+ State.set(getVPSingleValue(), NewLI, Part);
+ }
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3ef0b794f68..95061e9053fa 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -283,6 +283,26 @@ static bool isCommutative(Instruction *I) {
return false;
}
+/// Checks if the given value is actually an undefined constant vector.
+static bool isUndefVector(const Value *V) {
+ if (isa<UndefValue>(V))
+ return true;
+ auto *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+ if (!C->containsUndefOrPoisonElement())
+ return false;
+ auto *VecTy = dyn_cast<FixedVectorType>(C->getType());
+ if (!VecTy)
+ return false;
+ for (unsigned I = 0, E = VecTy->getNumElements(); I != E; ++I) {
+ if (Constant *Elem = C->getAggregateElement(I))
+ if (!isa<UndefValue>(Elem))
+ return false;
+ }
+ return true;
+}
+
/// Checks if the vector of instructions can be represented as a shuffle, like:
/// %x0 = extractelement <4 x i8> %x, i32 0
/// %x3 = extractelement <4 x i8> %x, i32 3
@@ -327,7 +347,11 @@ static bool isCommutative(Instruction *I) {
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
- auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ const auto *It =
+ find_if(VL, [](Value *V) { return isa<ExtractElementInst>(V); });
+ if (It == VL.end())
+ return None;
+ auto *EI0 = cast<ExtractElementInst>(*It);
if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
return None;
unsigned Size =
@@ -336,33 +360,41 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;
+ Mask.assign(VL.size(), UndefMaskElem);
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ // Undef can be represented as an undef element in a vector.
+ if (isa<UndefValue>(VL[I]))
+ continue;
auto *EI = cast<ExtractElementInst>(VL[I]);
+ if (isa<ScalableVectorType>(EI->getVectorOperandType()))
+ return None;
auto *Vec = EI->getVectorOperand();
+ // We can extractelement from undef or poison vector.
+ if (isUndefVector(Vec))
+ continue;
// All vector operands must have the same number of vector elements.
if (cast<FixedVectorType>(Vec->getType())->getNumElements() != Size)
return None;
+ if (isa<UndefValue>(EI->getIndexOperand()))
+ continue;
auto *Idx = dyn_cast<ConstantInt>(EI->getIndexOperand());
if (!Idx)
return None;
// Undefined behavior if Idx is negative or >= Size.
- if (Idx->getValue().uge(Size)) {
- Mask.push_back(UndefMaskElem);
+ if (Idx->getValue().uge(Size))
continue;
- }
unsigned IntIdx = Idx->getValue().getZExtValue();
- Mask.push_back(IntIdx);
- // We can extractelement from undef or poison vector.
- if (isa<UndefValue>(Vec))
- continue;
+ Mask[I] = IntIdx;
// For correct shuffling we have to have at most 2 different vector operands
// in all extractelement instructions.
- if (!Vec1 || Vec1 == Vec)
+ if (!Vec1 || Vec1 == Vec) {
Vec1 = Vec;
- else if (!Vec2 || Vec2 == Vec)
+ } else if (!Vec2 || Vec2 == Vec) {
Vec2 = Vec;
- else
+ Mask[I] += Size;
+ } else {
return None;
+ }
if (CommonShuffleMode == Permute)
continue;
// If the extract index is not the same as the operation number, it is a
@@ -1680,6 +1712,28 @@ private:
return IsSame(Scalars, ReuseShuffleIndices);
}
+ /// \returns true if current entry has same operands as \p TE.
+ bool hasEqualOperands(const TreeEntry &TE) const {
+ if (TE.getNumOperands() != getNumOperands())
+ return false;
+ SmallBitVector Used(getNumOperands());
+ for (unsigned I = 0, E = getNumOperands(); I < E; ++I) {
+ unsigned PrevCount = Used.count();
+ for (unsigned K = 0; K < E; ++K) {
+ if (Used.test(K))
+ continue;
+ if (getOperand(K) == TE.getOperand(I)) {
+ Used.set(K);
+ break;
+ }
+ }
+ // Check if we actually found the matching operand.
+ if (PrevCount == Used.count())
+ return false;
+ }
+ return true;
+ }
+
/// \return Final vectorization factor for the node. Defined by the total
/// number of vectorized scalars, including those, used several times in the
/// entry and counted in the \a ReuseShuffleIndices, if any.
@@ -1773,6 +1827,12 @@ private:
return Operands[OpIdx];
}
+ /// \returns the \p OpIdx operand of this TreeEntry.
+ ArrayRef<Value *> getOperand(unsigned OpIdx) const {
+ assert(OpIdx < Operands.size() && "Off bounds");
+ return Operands[OpIdx];
+ }
+
/// \returns the number of operands.
unsigned getNumOperands() const { return Operands.size(); }
@@ -2078,7 +2138,7 @@ private:
SmallPtrSet<const Value *, 32> EphValues;
/// Holds all of the instructions that we gathered.
- SetVector<Instruction *> GatherSeq;
+ SetVector<Instruction *> GatherShuffleSeq;
/// A list of blocks that we are going to CSE.
SetVector<BasicBlock *> CSEBlocks;
@@ -4386,15 +4446,19 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
bool IsGather) {
DenseMap<Value *, int> ExtractVectorsTys;
for (auto *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
// instruction as dead and remove its cost from the final cost of the
// vectorized tree.
- if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
- (IsGather && ScalarToTreeEntry.count(V)))
+ if (!areAllUsersVectorized(cast<Instruction>(V), VectorizedVals))
continue;
auto *EE = cast<ExtractElementInst>(V);
- unsigned Idx = *getExtractIndex(EE);
+ Optional<unsigned> EEIdx = getExtractIndex(EE);
+ if (!EEIdx)
+ continue;
+ unsigned Idx = *EEIdx;
if (TTIRef.getNumberOfParts(VecTy) !=
TTIRef.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
@@ -4426,6 +4490,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
for (const auto &Data : ExtractVectorsTys) {
auto *EEVTy = cast<FixedVectorType>(Data.first->getType());
unsigned NumElts = VecTy->getNumElements();
+ if (Data.second % NumElts == 0)
+ continue;
if (TTIRef.getNumberOfParts(EEVTy) > TTIRef.getNumberOfParts(VecTy)) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
@@ -4488,10 +4554,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// broadcast.
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy);
}
- if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) &&
- allSameBlock(VL) &&
- !isa<ScalableVectorType>(
- cast<ExtractElementInst>(E->getMainOp())->getVectorOperandType())) {
+ if ((E->getOpcode() == Instruction::ExtractElement ||
+ all_of(E->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
+ allSameType(VL)) {
// Check that gather of extractelements can be represented as just a
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
@@ -4738,7 +4806,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
return !is_contained(E->Scalars,
cast<Instruction>(V)->getOperand(0));
}));
- if (isa<UndefValue>(FirstInsert->getOperand(0))) {
+ if (isUndefVector(FirstInsert->getOperand(0))) {
Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
} else {
SmallVector<int> InsertMask(NumElts);
@@ -5016,7 +5084,30 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// VecCost is equal to sum of the cost of creating 2 vectors
// and the cost of creating shuffle.
InstructionCost VecCost = 0;
- if (Instruction::isBinaryOp(E->getOpcode())) {
+ // Try to find the previous shuffle node with the same operands and same
+ // main/alternate ops.
+ auto &&TryFindNodeWithEqualOperands = [this, E]() {
+ for (const std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ if (TE.get() == E)
+ break;
+ if (TE->isAltShuffle() &&
+ ((TE->getOpcode() == E->getOpcode() &&
+ TE->getAltOpcode() == E->getAltOpcode()) ||
+ (TE->getOpcode() == E->getAltOpcode() &&
+ TE->getAltOpcode() == E->getOpcode())) &&
+ TE->hasEqualOperands(*E))
+ return true;
+ }
+ return false;
+ };
+ if (TryFindNodeWithEqualOperands()) {
+ LLVM_DEBUG({
+ dbgs() << "SLP: diamond match for alternate node found.\n";
+ E->dump();
+ });
+ // No need to add new vector costs here since we're going to reuse
+ // same main/alternate vector ops, just do different shuffling.
+ } else if (Instruction::isBinaryOp(E->getOpcode())) {
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind);
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy,
CostKind);
@@ -5060,7 +5151,11 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
[this](Value *V) { return EphValues.contains(V); }) &&
(allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
TE->Scalars.size() < Limit ||
- (TE->getOpcode() == Instruction::ExtractElement &&
+ ((TE->getOpcode() == Instruction::ExtractElement ||
+ all_of(TE->Scalars,
+ [](Value *V) {
+ return isa<ExtractElementInst, UndefValue>(V);
+ })) &&
isFixedVectorShuffle(TE->Scalars, Mask)) ||
(TE->State == TreeEntry::NeedToGather &&
TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
@@ -5280,6 +5375,42 @@ InstructionCost BoUpSLP::getSpillCost() const {
return Cost;
}
+/// Check if two insertelement instructions are from the same buildvector.
+static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
+ InsertElementInst *V) {
+ // Instructions must be from the same basic blocks.
+ if (VU->getParent() != V->getParent())
+ return false;
+ // Checks if 2 insertelements are from the same buildvector.
+ if (VU->getType() != V->getType())
+ return false;
+ // Multiple used inserts are separate nodes.
+ if (!VU->hasOneUse() && !V->hasOneUse())
+ return false;
+ auto *IE1 = VU;
+ auto *IE2 = V;
+ // Go through the vector operand of insertelement instructions trying to find
+ // either VU as the original vector for IE2 or V as the original vector for
+ // IE1.
+ do {
+ if (IE2 == VU || IE1 == V)
+ return true;
+ if (IE1) {
+ if (IE1 != VU && !IE1->hasOneUse())
+ IE1 = nullptr;
+ else
+ IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
+ }
+ if (IE2) {
+ if (IE2 != V && !IE2->hasOneUse())
+ IE2 = nullptr;
+ else
+ IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
+ }
+ } while (IE1 || IE2);
+ return false;
+}
+
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
@@ -5306,7 +5437,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
SmallVector<APInt> DemandedElts;
for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(EU.Scalar).second)
+ if (!isa_and_nonnull<InsertElementInst>(EU.User) &&
+ !ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
@@ -5326,35 +5458,35 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
- if (isa_and_nonnull<InsertElementInst>(EU.User)) {
- if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
- Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
+ if (auto *VU = dyn_cast_or_null<InsertElementInst>(EU.User)) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(VU->getType())) {
+ Optional<int> InsertIdx = getInsertIndex(VU, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- Value *VU = EU.User;
auto *It = find_if(FirstUsers, [VU](Value *V) {
- // Checks if 2 insertelements are from the same buildvector.
- if (VU->getType() != V->getType())
- return false;
- auto *IE1 = cast<InsertElementInst>(VU);
- auto *IE2 = cast<InsertElementInst>(V);
- // Go through of insertelement instructions trying to find either VU
- // as the original vector for IE2 or V as the original vector for IE1.
- do {
- if (IE1 == VU || IE2 == V)
- return true;
- if (IE1)
- IE1 = dyn_cast<InsertElementInst>(IE1->getOperand(0));
- if (IE2)
- IE2 = dyn_cast<InsertElementInst>(IE2->getOperand(0));
- } while (IE1 || IE2);
- return false;
+ return areTwoInsertFromSameBuildVector(VU,
+ cast<InsertElementInst>(V));
});
int VecId = -1;
if (It == FirstUsers.end()) {
VF.push_back(FTy->getNumElements());
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
- FirstUsers.push_back(EU.User);
+ // Find the insertvector, vectorized in tree, if any.
+ Value *Base = VU;
+ while (isa<InsertElementInst>(Base)) {
+ // Build the mask for the vectorized insertelement instructions.
+ if (const TreeEntry *E = getTreeEntry(Base)) {
+ VU = cast<InsertElementInst>(Base);
+ do {
+ int Idx = E->findLaneForValue(Base);
+ ShuffleMask.back()[Idx] = Idx;
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ } while (E == getTreeEntry(Base));
+ break;
+ }
+ Base = cast<InsertElementInst>(Base)->getOperand(0);
+ }
+ FirstUsers.push_back(VU);
DemandedElts.push_back(APInt::getZero(VF.back()));
VecId = FirstUsers.size() - 1;
} else {
@@ -5363,6 +5495,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
int Idx = *InsertIdx;
ShuffleMask[VecId][Idx] = EU.Lane;
DemandedElts[VecId].setBit(Idx);
+ continue;
}
}
@@ -5386,47 +5519,86 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
InstructionCost SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
- // For the very first element - simple shuffle of the source vector.
- int Limit = ShuffleMask[I].size() * 2;
- if (I == 0 &&
- all_of(ShuffleMask[I], [Limit](int Idx) { return Idx < Limit; }) &&
- !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) {
+ if (FirstUsers.size() == 1) {
+ int Limit = ShuffleMask.front().size() * 2;
+ if (all_of(ShuffleMask.front(), [Limit](int Idx) { return Idx < Limit; }) &&
+ !ShuffleVectorInst::isIdentityMask(ShuffleMask.front())) {
InstructionCost C = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc,
- cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]);
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ ShuffleMask.front());
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of insertelement external users "
<< *VectorizableTree.front()->Scalars.front() << ".\n"
<< "SLP: Current total cost = " << Cost << "\n");
Cost += C;
- continue;
}
- // Other elements - permutation of 2 vectors (the initial one and the next
- // Ith incoming vector).
- unsigned VF = ShuffleMask[I].size();
- for (unsigned Idx = 0; Idx < VF; ++Idx) {
- int &Mask = ShuffleMask[I][Idx];
- Mask = Mask == UndefMaskElem ? Idx : VF + Mask;
- }
- InstructionCost C = TTI->getShuffleCost(
- TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()),
- ShuffleMask[I]);
- LLVM_DEBUG(
- dbgs()
- << "SLP: Adding cost " << C
- << " for final shuffle of vector node and external insertelement users "
- << *VectorizableTree.front()->Scalars.front() << ".\n"
- << "SLP: Current total cost = " << Cost << "\n");
- Cost += C;
InstructionCost InsertCost = TTI->getScalarizationOverhead(
- cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
- /*Insert*/ true,
- /*Extract*/ false);
+ cast<FixedVectorType>(FirstUsers.front()->getType()),
+ DemandedElts.front(), /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
Cost -= InsertCost;
+ } else if (FirstUsers.size() >= 2) {
+ unsigned MaxVF = *std::max_element(VF.begin(), VF.end());
+ // Combined masks of the first 2 vectors.
+ SmallVector<int> CombinedMask(MaxVF, UndefMaskElem);
+ copy(ShuffleMask.front(), CombinedMask.begin());
+ APInt CombinedDemandedElts = DemandedElts.front().zextOrSelf(MaxVF);
+ auto *VecTy = FixedVectorType::get(
+ cast<VectorType>(FirstUsers.front()->getType())->getElementType(),
+ MaxVF);
+ for (int I = 0, E = ShuffleMask[1].size(); I < E; ++I) {
+ if (ShuffleMask[1][I] != UndefMaskElem) {
+ CombinedMask[I] = ShuffleMask[1][I] + MaxVF;
+ CombinedDemandedElts.setBit(I);
+ }
+ }
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ VecTy, CombinedDemandedElts, /*Insert*/ true, /*Extract*/ false);
LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
<< " for insertelements gather.\n"
<< "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ for (int I = 2, E = FirstUsers.size(); I < E; ++I) {
+ // Other elements - permutation of 2 vectors (the initial one and the
+ // next Ith incoming vector).
+ unsigned VF = ShuffleMask[I].size();
+ for (unsigned Idx = 0; Idx < VF; ++Idx) {
+ int Mask = ShuffleMask[I][Idx];
+ if (Mask != UndefMaskElem)
+ CombinedMask[Idx] = MaxVF + Mask;
+ else if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ }
+ for (unsigned Idx = VF; Idx < MaxVF; ++Idx)
+ if (CombinedMask[Idx] != UndefMaskElem)
+ CombinedMask[Idx] = Idx;
+ InstructionCost C =
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, CombinedMask);
+ LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
+ << " for final shuffle of vector node and external "
+ "insertelement users "
+ << *VectorizableTree.front()->Scalars.front() << ".\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost += C;
+ InstructionCost InsertCost = TTI->getScalarizationOverhead(
+ cast<FixedVectorType>(FirstUsers[I]->getType()), DemandedElts[I],
+ /*Insert*/ true, /*Extract*/ false);
+ LLVM_DEBUG(dbgs() << "SLP: subtracting the cost " << InsertCost
+ << " for insertelements gather.\n"
+ << "SLP: Current total cost = " << Cost << "\n");
+ Cost -= InsertCost;
+ }
}
#ifndef NDEBUG
@@ -5728,7 +5900,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
auto *InsElt = dyn_cast<InsertElementInst>(Vec);
if (!InsElt)
return Vec;
- GatherSeq.insert(InsElt);
+ GatherShuffleSeq.insert(InsElt);
CSEBlocks.insert(InsElt->getParent());
// Add to our 'need-to-extract' list.
if (TreeEntry *Entry = getTreeEntry(V)) {
@@ -5771,10 +5943,17 @@ class ShuffleInstructionBuilder {
const unsigned VF = 0;
bool IsFinalized = false;
SmallVector<int, 4> Mask;
+ /// Holds all of the instructions that we gathered.
+ SetVector<Instruction *> &GatherShuffleSeq;
+ /// A list of blocks that we are going to CSE.
+ SetVector<BasicBlock *> &CSEBlocks;
public:
- ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF)
- : Builder(Builder), VF(VF) {}
+ ShuffleInstructionBuilder(IRBuilderBase &Builder, unsigned VF,
+ SetVector<Instruction *> &GatherShuffleSeq,
+ SetVector<BasicBlock *> &CSEBlocks)
+ : Builder(Builder), VF(VF), GatherShuffleSeq(GatherShuffleSeq),
+ CSEBlocks(CSEBlocks) {}
/// Adds a mask, inverting it before applying.
void addInversedMask(ArrayRef<unsigned> SubMask) {
@@ -5804,7 +5983,12 @@ public:
if (VF == ValueVF && ShuffleVectorInst::isIdentityMask(Mask))
return V;
- return Builder.CreateShuffleVector(V, Mask, "shuffle");
+ Value *Vec = Builder.CreateShuffleVector(V, Mask, "shuffle");
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ return Vec;
}
~ShuffleInstructionBuilder() {
@@ -5862,6 +6046,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
std::iota(UniformMask.begin(), UniformMask.end(), 0);
V = Builder.CreateShuffleVector(V, UniformMask, "shrink.shuffle");
}
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
}
return V;
}
@@ -5909,15 +6097,12 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
VL = UniqueValues;
}
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
+ CSEBlocks);
Value *Vec = gather(VL);
if (!ReuseShuffleIndicies.empty()) {
ShuffleBuilder.addMask(ReuseShuffleIndicies);
Vec = ShuffleBuilder.finalize(Vec);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
}
return Vec;
}
@@ -5932,7 +6117,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
unsigned VF = E->getVectorFactor();
- ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
+ ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
+ CSEBlocks);
if (E->State == TreeEntry::NeedToGather) {
if (E->getMainOp())
setInsertPointAfterBundle(E);
@@ -5946,16 +6132,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
"Expected shuffle of 1 or 2 entries.");
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
Entries.back()->VectorizedValue, Mask);
+ if (auto *I = dyn_cast<Instruction>(Vec)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
} else {
Vec = gather(E->Scalars);
}
if (NeedToShuffleReuses) {
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
Vec = ShuffleBuilder.finalize(Vec);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
}
E->VectorizedValue = Vec;
return Vec;
@@ -6072,11 +6258,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
IsIdentity &= *InsertIdx - Offset == I;
Mask[*InsertIdx - Offset] = I;
}
- if (!IsIdentity || NumElts != NumScalars)
+ if (!IsIdentity || NumElts != NumScalars) {
V = Builder.CreateShuffleVector(V, Mask);
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ }
if ((!IsIdentity || Offset != 0 ||
- !isa<UndefValue>(FirstInsert->getOperand(0))) &&
+ !isUndefVector(FirstInsert->getOperand(0))) &&
NumElts != NumScalars) {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
@@ -6088,6 +6279,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V = Builder.CreateShuffleVector(
FirstInsert->getOperand(0), V, InsertMask,
cast<Instruction>(E->Scalars.back())->getName());
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
}
++NumVectorInstructions;
@@ -6444,6 +6639,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
V1 = Builder.CreateCast(
static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
}
+ // Add V0 and V1 to later analysis to try to find and remove matching
+ // instruction, if any.
+ for (Value *V : {V0, V1}) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
+ }
// Create shuffle to take alternate operations from the vector.
// Also, gather up main and alt scalar ops to propagate IR flags to
@@ -6462,8 +6665,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
propagateIRFlags(V1, AltScalars);
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
- if (Instruction *I = dyn_cast<Instruction>(V))
+ if (auto *I = dyn_cast<Instruction>(V)) {
V = propagateMetadata(I, E->Scalars);
+ GatherShuffleSeq.insert(I);
+ CSEBlocks.insert(I->getParent());
+ }
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -6657,10 +6863,10 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
}
void BoUpSLP::optimizeGatherSequence() {
- LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherSeq.size()
+ LLVM_DEBUG(dbgs() << "SLP: Optimizing " << GatherShuffleSeq.size()
<< " gather sequences instructions.\n");
// LICM InsertElementInst sequences.
- for (Instruction *I : GatherSeq) {
+ for (Instruction *I : GatherShuffleSeq) {
if (isDeleted(I))
continue;
@@ -6677,11 +6883,10 @@ void BoUpSLP::optimizeGatherSequence() {
// If the vector or the element that we insert into it are
// instructions that are defined in this basic block then we can't
// hoist this instruction.
- auto *Op0 = dyn_cast<Instruction>(I->getOperand(0));
- auto *Op1 = dyn_cast<Instruction>(I->getOperand(1));
- if (Op0 && L->contains(Op0))
- continue;
- if (Op1 && L->contains(Op1))
+ if (any_of(I->operands(), [L](Value *V) {
+ auto *OpI = dyn_cast<Instruction>(V);
+ return OpI && L->contains(OpI);
+ }))
continue;
// We can hoist this instruction. Move it to the pre-header.
@@ -6705,7 +6910,50 @@ void BoUpSLP::optimizeGatherSequence() {
return A->getDFSNumIn() < B->getDFSNumIn();
});
- // Perform O(N^2) search over the gather sequences and merge identical
+ // Less defined shuffles can be replaced by the more defined copies.
+ // Between two shuffles one is less defined if it has the same vector operands
+ // and its mask indeces are the same as in the first one or undefs. E.g.
+ // shuffle %0, poison, <0, 0, 0, undef> is less defined than shuffle %0,
+ // poison, <0, 0, 0, 0>.
+ auto &&IsIdenticalOrLessDefined = [this](Instruction *I1, Instruction *I2,
+ SmallVectorImpl<int> &NewMask) {
+ if (I1->getType() != I2->getType())
+ return false;
+ auto *SI1 = dyn_cast<ShuffleVectorInst>(I1);
+ auto *SI2 = dyn_cast<ShuffleVectorInst>(I2);
+ if (!SI1 || !SI2)
+ return I1->isIdenticalTo(I2);
+ if (SI1->isIdenticalTo(SI2))
+ return true;
+ for (int I = 0, E = SI1->getNumOperands(); I < E; ++I)
+ if (SI1->getOperand(I) != SI2->getOperand(I))
+ return false;
+ // Check if the second instruction is more defined than the first one.
+ NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end());
+ ArrayRef<int> SM1 = SI1->getShuffleMask();
+ // Count trailing undefs in the mask to check the final number of used
+ // registers.
+ unsigned LastUndefsCnt = 0;
+ for (int I = 0, E = NewMask.size(); I < E; ++I) {
+ if (SM1[I] == UndefMaskElem)
+ ++LastUndefsCnt;
+ else
+ LastUndefsCnt = 0;
+ if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem &&
+ NewMask[I] != SM1[I])
+ return false;
+ if (NewMask[I] == UndefMaskElem)
+ NewMask[I] = SM1[I];
+ }
+ // Check if the last undefs actually change the final number of used vector
+ // registers.
+ return SM1.size() - LastUndefsCnt > 1 &&
+ TTI->getNumberOfParts(SI1->getType()) ==
+ TTI->getNumberOfParts(
+ FixedVectorType::get(SI1->getType()->getElementType(),
+ SM1.size() - LastUndefsCnt));
+ };
+ // Perform O(N^2) search over the gather/shuffle sequences and merge identical
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
@@ -6719,17 +6967,35 @@ void BoUpSLP::optimizeGatherSequence() {
if (isDeleted(&In))
continue;
if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) &&
- !isa<ShuffleVectorInst>(&In))
+ !isa<ShuffleVectorInst>(&In) && !GatherShuffleSeq.contains(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
bool Replaced = false;
- for (Instruction *v : Visited) {
- if (In.isIdenticalTo(v) &&
- DT->dominates(v->getParent(), In.getParent())) {
- In.replaceAllUsesWith(v);
+ for (Instruction *&V : Visited) {
+ SmallVector<int> NewMask;
+ if (IsIdenticalOrLessDefined(&In, V, NewMask) &&
+ DT->dominates(V->getParent(), In.getParent())) {
+ In.replaceAllUsesWith(V);
eraseInstruction(&In);
+ if (auto *SI = dyn_cast<ShuffleVectorInst>(V))
+ if (!NewMask.empty())
+ SI->setShuffleMask(NewMask);
+ Replaced = true;
+ break;
+ }
+ if (isa<ShuffleVectorInst>(In) && isa<ShuffleVectorInst>(V) &&
+ GatherShuffleSeq.contains(V) &&
+ IsIdenticalOrLessDefined(V, &In, NewMask) &&
+ DT->dominates(In.getParent(), V->getParent())) {
+ In.moveAfter(V);
+ V->replaceAllUsesWith(&In);
+ eraseInstruction(V);
+ if (auto *SI = dyn_cast<ShuffleVectorInst>(&In))
+ if (!NewMask.empty())
+ SI->setShuffleMask(NewMask);
+ V = &In;
Replaced = true;
break;
}
@@ -6741,7 +7007,7 @@ void BoUpSLP::optimizeGatherSequence() {
}
}
CSEBlocks.clear();
- GatherSeq.clear();
+ GatherShuffleSeq.clear();
}
// Groups the instructions to a bundle (which is then a single scheduling entity)
@@ -8791,6 +9057,8 @@ private:
assert(VectorizedValue && "Need to have a vectorized tree node");
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
+ assert(RdxKind != RecurKind::FMulAdd &&
+ "A call to the llvm.fmuladd intrinsic is not handled yet");
++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
@@ -9123,8 +9391,9 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
SmallVector<Value *, 16> BuildVectorOpds;
SmallVector<int> Mask;
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
- (llvm::all_of(BuildVectorOpds,
- [](Value *V) { return isa<ExtractElementInst>(V); }) &&
+ (llvm::all_of(
+ BuildVectorOpds,
+ [](Value *V) { return isa<ExtractElementInst, UndefValue>(V); }) &&
isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
@@ -9132,44 +9401,6 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
return tryToVectorizeList(BuildVectorInsts, R);
}
-bool SLPVectorizerPass::vectorizeSimpleInstructions(
- SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
- bool AtTerminator) {
- bool OpsChanged = false;
- SmallVector<Instruction *, 4> PostponedCmps;
- for (auto *I : reverse(Instructions)) {
- if (R.isDeleted(I))
- continue;
- if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
- OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
- else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
- OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
- else if (isa<CmpInst>(I))
- PostponedCmps.push_back(I);
- }
- if (AtTerminator) {
- // Try to find reductions first.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- for (Value *Op : I->operands())
- OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
- }
- // Try to vectorize operands as vector bundles.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- OpsChanged |= tryToVectorize(I, R);
- }
- Instructions.clear();
- } else {
- // Insert in reverse order since the PostponedCmps vector was filled in
- // reverse order.
- Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
- }
- return OpsChanged;
-}
-
template <typename T>
static bool
tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
@@ -9242,6 +9473,101 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
return Changed;
}
+bool SLPVectorizerPass::vectorizeSimpleInstructions(
+ SmallVectorImpl<Instruction *> &Instructions, BasicBlock *BB, BoUpSLP &R,
+ bool AtTerminator) {
+ bool OpsChanged = false;
+ SmallVector<Instruction *, 4> PostponedCmps;
+ for (auto *I : reverse(Instructions)) {
+ if (R.isDeleted(I))
+ continue;
+ if (auto *LastInsertValue = dyn_cast<InsertValueInst>(I))
+ OpsChanged |= vectorizeInsertValueInst(LastInsertValue, BB, R);
+ else if (auto *LastInsertElem = dyn_cast<InsertElementInst>(I))
+ OpsChanged |= vectorizeInsertElementInst(LastInsertElem, BB, R);
+ else if (isa<CmpInst>(I))
+ PostponedCmps.push_back(I);
+ }
+ if (AtTerminator) {
+ // Try to find reductions first.
+ for (Instruction *I : PostponedCmps) {
+ if (R.isDeleted(I))
+ continue;
+ for (Value *Op : I->operands())
+ OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
+ }
+ // Try to vectorize operands as vector bundles.
+ for (Instruction *I : PostponedCmps) {
+ if (R.isDeleted(I))
+ continue;
+ OpsChanged |= tryToVectorize(I, R);
+ }
+ // Try to vectorize list of compares.
+ // Sort by type, compare predicate, etc.
+ // TODO: Add analysis on the operand opcodes (profitable to vectorize
+ // instructions with same/alternate opcodes/const values).
+ auto &&CompareSorter = [&R](Value *V, Value *V2) {
+ auto *CI1 = cast<CmpInst>(V);
+ auto *CI2 = cast<CmpInst>(V2);
+ if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
+ return false;
+ if (CI1->getOperand(0)->getType()->getTypeID() <
+ CI2->getOperand(0)->getType()->getTypeID())
+ return true;
+ if (CI1->getOperand(0)->getType()->getTypeID() >
+ CI2->getOperand(0)->getType()->getTypeID())
+ return false;
+ return CI1->getPredicate() < CI2->getPredicate() ||
+ (CI1->getPredicate() > CI2->getPredicate() &&
+ CI1->getPredicate() <
+ CmpInst::getSwappedPredicate(CI2->getPredicate()));
+ };
+
+ auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
+ if (V1 == V2)
+ return true;
+ auto *CI1 = cast<CmpInst>(V1);
+ auto *CI2 = cast<CmpInst>(V2);
+ if (R.isDeleted(CI2) || !isValidElementType(CI2->getType()))
+ return false;
+ if (CI1->getOperand(0)->getType() != CI2->getOperand(0)->getType())
+ return false;
+ return CI1->getPredicate() == CI2->getPredicate() ||
+ CI1->getPredicate() ==
+ CmpInst::getSwappedPredicate(CI2->getPredicate());
+ };
+ auto Limit = [&R](Value *V) {
+ unsigned EltSize = R.getVectorElementSize(V);
+ return std::max(2U, R.getMaxVecRegSize() / EltSize);
+ };
+
+ SmallVector<Value *> Vals(PostponedCmps.begin(), PostponedCmps.end());
+ OpsChanged |= tryToVectorizeSequence<Value>(
+ Vals, Limit, CompareSorter, AreCompatibleCompares,
+ [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
+ // Exclude possible reductions from other blocks.
+ bool ArePossiblyReducedInOtherBlock =
+ any_of(Candidates, [](Value *V) {
+ return any_of(V->users(), [V](User *U) {
+ return isa<SelectInst>(U) &&
+ cast<SelectInst>(U)->getParent() !=
+ cast<Instruction>(V)->getParent();
+ });
+ });
+ if (ArePossiblyReducedInOtherBlock)
+ return false;
+ return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
+ },
+ /*LimitForRegisterSize=*/true);
+ Instructions.clear();
+ } else {
+ // Insert in reverse order since the PostponedCmps vector was filled in
+ // reverse order.
+ Instructions.assign(PostponedCmps.rbegin(), PostponedCmps.rend());
+ }
+ return OpsChanged;
+}
+
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 638467f94e1c..44b5e1df0839 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -718,6 +718,8 @@ void VPInstruction::generateInstruction(VPTransformState &State,
void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
+ IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
+ State.Builder.setFastMathFlags(FMF);
for (unsigned Part = 0; Part < State.UF; ++Part)
generateInstruction(State, Part);
}
@@ -760,6 +762,8 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
O << Instruction::getOpcodeName(getOpcode());
}
+ O << FMF;
+
for (const VPValue *Operand : operands()) {
O << " ";
Operand->printAsOperand(O, SlotTracker);
@@ -767,6 +771,16 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
+ // Make sure the VPInstruction is a floating-point operation.
+ assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
+ Opcode == Instruction::FCmp) &&
+ "this op can't take fast-math flags");
+ FMF = FMFNew;
+}
+
/// Generate the code inside the body of the vectorized loop. Assumes a single
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
@@ -1196,8 +1210,10 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
- O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode())
- << " (";
+ O << " +";
+ if (isa<FPMathOperator>(getUnderlyingInstr()))
+ O << getUnderlyingInstr()->getFastMathFlags();
+ O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (getCondOp()) {
O << ", ";
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 00ee31007cb7..810dd5030f95 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -59,6 +59,7 @@ class Value;
class VPBasicBlock;
class VPRegionBlock;
class VPlan;
+class VPReplicateRecipe;
class VPlanSlp;
/// Returns a calculation for the total number of elements for a given \p VF.
@@ -346,6 +347,10 @@ struct VPTransformState {
/// Pointer to the VPlan code is generated for.
VPlan *Plan;
+
+ /// Holds recipes that may generate a poison value that is used after
+ /// vectorization, even when their operands are not poison.
+ SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
};
/// VPUsers instance used by VPBlockBase to manage CondBit and the block
@@ -789,6 +794,7 @@ public:
private:
typedef unsigned char OpcodeTy;
OpcodeTy Opcode;
+ FastMathFlags FMF;
/// Utility method serving execute(): generates a single instance of the
/// modeled instruction.
@@ -802,13 +808,6 @@ public:
: VPRecipeBase(VPRecipeBase::VPInstructionSC, Operands),
VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {}
- VPInstruction(unsigned Opcode, ArrayRef<VPInstruction *> Operands)
- : VPRecipeBase(VPRecipeBase::VPInstructionSC, {}),
- VPValue(VPValue::VPVInstructionSC, nullptr, this), Opcode(Opcode) {
- for (auto *I : Operands)
- addOperand(I->getVPSingleValue());
- }
-
VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands)
: VPInstruction(Opcode, ArrayRef<VPValue *>(Operands)) {}
@@ -870,6 +869,9 @@ public:
return true;
}
}
+
+ /// Set the fast-math flags.
+ void setFastMathFlags(FastMathFlags FMFNew);
};
/// VPWidenRecipe is a recipe for producing a copy of vector type its
@@ -1511,7 +1513,7 @@ public:
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
-class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
+class VPWidenMemoryInstructionRecipe : public VPRecipeBase, public VPValue {
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
@@ -1533,10 +1535,10 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse)
- : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}),
+ VPValue(VPValue::VPVMemoryInstructionSC, &Load, this), Ingredient(Load),
Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
- new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
@@ -1544,6 +1546,7 @@ public:
VPValue *StoredValue, VPValue *Mask,
bool Consecutive, bool Reverse)
: VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
+ VPValue(VPValue::VPVMemoryInstructionSC, &Store, this),
Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index 5c9ff41a2d5d..d357ad7c9e10 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -176,8 +176,8 @@ private:
std::vector<std::pair<std::string, std::unique_ptr<MemoryBuffer>>>
LoadedSourceFiles;
- /// Whitelist from -name-whitelist to be used for filtering.
- std::unique_ptr<SpecialCaseList> NameWhitelist;
+ /// Allowlist from -name-allowlist to be used for filtering.
+ std::unique_ptr<SpecialCaseList> NameAllowlist;
};
}
@@ -668,11 +668,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::ZeroOrMore, cl::cat(FilteringCategory));
cl::list<std::string> NameFilterFiles(
- "name-whitelist", cl::Optional,
+ "name-allowlist", cl::Optional,
cl::desc("Show code coverage only for functions listed in the given "
"file"),
cl::ZeroOrMore, cl::cat(FilteringCategory));
+ // Allow for accepting previous option name.
+ cl::list<std::string> NameFilterFilesDeprecated(
+ "name-whitelist", cl::Optional, cl::Hidden,
+ cl::desc("Show code coverage only for functions listed in the given "
+ "file. Deprecated, use -name-allowlist instead"),
+ cl::ZeroOrMore, cl::cat(FilteringCategory));
+
cl::list<std::string> NameRegexFilters(
"name-regex", cl::Optional,
cl::desc("Show code coverage only for functions that match the given "
@@ -809,23 +816,34 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
ViewOpts.DemanglerOpts.swap(DemanglerOpts);
}
- // Read in -name-whitelist files.
- if (!NameFilterFiles.empty()) {
+ // Read in -name-allowlist files.
+ if (!NameFilterFiles.empty() || !NameFilterFilesDeprecated.empty()) {
std::string SpecialCaseListErr;
- NameWhitelist = SpecialCaseList::create(
- NameFilterFiles, *vfs::getRealFileSystem(), SpecialCaseListErr);
- if (!NameWhitelist)
+ if (!NameFilterFiles.empty())
+ NameAllowlist = SpecialCaseList::create(
+ NameFilterFiles, *vfs::getRealFileSystem(), SpecialCaseListErr);
+ if (!NameFilterFilesDeprecated.empty())
+ NameAllowlist = SpecialCaseList::create(NameFilterFilesDeprecated,
+ *vfs::getRealFileSystem(),
+ SpecialCaseListErr);
+
+ if (!NameAllowlist)
error(SpecialCaseListErr);
}
// Create the function filters
- if (!NameFilters.empty() || NameWhitelist || !NameRegexFilters.empty()) {
+ if (!NameFilters.empty() || NameAllowlist || !NameRegexFilters.empty()) {
auto NameFilterer = std::make_unique<CoverageFilters>();
for (const auto &Name : NameFilters)
NameFilterer->push_back(std::make_unique<NameCoverageFilter>(Name));
- if (NameWhitelist)
- NameFilterer->push_back(
- std::make_unique<NameWhitelistCoverageFilter>(*NameWhitelist));
+ if (NameAllowlist) {
+ if (!NameFilterFiles.empty())
+ NameFilterer->push_back(
+ std::make_unique<NameAllowlistCoverageFilter>(*NameAllowlist));
+ if (!NameFilterFilesDeprecated.empty())
+ NameFilterer->push_back(
+ std::make_unique<NameWhitelistCoverageFilter>(*NameAllowlist));
+ }
for (const auto &Regex : NameRegexFilters)
NameFilterer->push_back(
std::make_unique<NameRegexCoverageFilter>(Regex));
diff --git a/llvm/tools/llvm-cov/CoverageFilters.cpp b/llvm/tools/llvm-cov/CoverageFilters.cpp
index fac7518d7da2..b7998647cc57 100644
--- a/llvm/tools/llvm-cov/CoverageFilters.cpp
+++ b/llvm/tools/llvm-cov/CoverageFilters.cpp
@@ -34,6 +34,13 @@ bool NameRegexCoverageFilter::matchesFilename(StringRef Filename) const {
return llvm::Regex(Regex).match(Filename);
}
+bool NameAllowlistCoverageFilter::matches(
+ const coverage::CoverageMapping &,
+ const coverage::FunctionRecord &Function) const {
+ return Allowlist.inSection("llvmcov", "allowlist_fun", Function.Name);
+}
+
+// TODO: remove this when -name-whitelist option is removed.
bool NameWhitelistCoverageFilter::matches(
const coverage::CoverageMapping &,
const coverage::FunctionRecord &Function) const {
diff --git a/llvm/tools/llvm-cov/CoverageFilters.h b/llvm/tools/llvm-cov/CoverageFilters.h
index 33fd9929c59a..3040fe74f7cf 100644
--- a/llvm/tools/llvm-cov/CoverageFilters.h
+++ b/llvm/tools/llvm-cov/CoverageFilters.h
@@ -67,7 +67,19 @@ public:
};
/// Matches functions whose name appears in a SpecialCaseList in the
-/// whitelist_fun section.
+/// allowlist_fun section.
+class NameAllowlistCoverageFilter : public CoverageFilter {
+ const SpecialCaseList &Allowlist;
+
+public:
+ NameAllowlistCoverageFilter(const SpecialCaseList &Allowlist)
+ : Allowlist(Allowlist) {}
+
+ bool matches(const coverage::CoverageMapping &CM,
+ const coverage::FunctionRecord &Function) const override;
+};
+
+// TODO: Remove this class when -name-whitelist option is removed.
class NameWhitelistCoverageFilter : public CoverageFilter {
const SpecialCaseList &Whitelist;
diff --git a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
index eb746cd2a865..4bdefcdc1758 100644
--- a/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
@@ -269,15 +269,35 @@ class FunctionDifferenceEngine {
} else if (isa<CallInst>(L)) {
return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
} else if (isa<PHINode>(L)) {
- // FIXME: implement.
+ const PHINode &LI = cast<PHINode>(*L);
+ const PHINode &RI = cast<PHINode>(*R);
// This is really weird; type uniquing is broken?
- if (L->getType() != R->getType()) {
- if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
+ if (LI.getType() != RI.getType()) {
+ if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
if (Complain) Engine.log("different phi types");
return true;
}
}
+
+ if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
+ if (Complain)
+ Engine.log("PHI node # of incoming values differ");
+ return true;
+ }
+
+ for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
+ if (TryUnify)
+ tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
+
+ if (!equivalentAsOperands(LI.getIncomingValue(I),
+ RI.getIncomingValue(I))) {
+ if (Complain)
+ Engine.log("PHI node incoming values differ");
+ return true;
+ }
+ }
+
return false;
// Terminators.
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index b237e014038d..5c08e43b4b09 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -60,6 +60,19 @@ struct SaturatingUINT64 {
}
};
+/// Utility struct to store the full location of a DIE - its CU and offset.
+struct DIELocation {
+ DWARFUnit *DwUnit;
+ uint64_t DIEOffset;
+ DIELocation(DWARFUnit *_DwUnit, uint64_t _DIEOffset)
+ : DwUnit(_DwUnit), DIEOffset(_DIEOffset) {}
+};
+/// This represents DWARF locations of CrossCU referencing DIEs.
+using CrossCUReferencingDIELocationTy = llvm::SmallVector<DIELocation>;
+
+/// This maps function DIE offset to its DWARF CU.
+using FunctionDIECUTyMap = llvm::DenseMap<uint64_t, DWARFUnit *>;
+
/// Holds statistics for one function (or other entity that has a PC range and
/// contains variables, such as a compile unit).
struct PerFunctionStats {
@@ -450,15 +463,18 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
/// Recursively collect variables from subprogram with DW_AT_inline attribute.
static void collectAbstractOriginFnInfo(
DWARFDie Die, uint64_t SPOffset,
- AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo) {
+ AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
+ AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo) {
DWARFDie Child = Die.getFirstChild();
while (Child) {
const dwarf::Tag ChildTag = Child.getTag();
if (ChildTag == dwarf::DW_TAG_formal_parameter ||
- ChildTag == dwarf::DW_TAG_variable)
+ ChildTag == dwarf::DW_TAG_variable) {
GlobalAbstractOriginFnInfo[SPOffset].push_back(Child.getOffset());
- else if (ChildTag == dwarf::DW_TAG_lexical_block)
- collectAbstractOriginFnInfo(Child, SPOffset, GlobalAbstractOriginFnInfo);
+ LocalAbstractOriginFnInfo[SPOffset].push_back(Child.getOffset());
+ } else if (ChildTag == dwarf::DW_TAG_lexical_block)
+ collectAbstractOriginFnInfo(Child, SPOffset, GlobalAbstractOriginFnInfo,
+ LocalAbstractOriginFnInfo);
Child = Child.getSibling();
}
}
@@ -468,8 +484,9 @@ static void collectStatsRecursive(
DWARFDie Die, std::string FnPrefix, std::string VarPrefix,
uint64_t BytesInScope, uint32_t InlineDepth,
StringMap<PerFunctionStats> &FnStatMap, GlobalStats &GlobalStats,
- LocationStats &LocStats,
+ LocationStats &LocStats, FunctionDIECUTyMap &AbstractOriginFnCUs,
AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
+ AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo,
FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed,
AbstractOriginVarsTy *AbstractOriginVarsPtr = nullptr) {
// Skip NULL nodes.
@@ -499,11 +516,12 @@ static void collectStatsRecursive(
auto OffsetFn = Die.find(dwarf::DW_AT_abstract_origin);
if (OffsetFn) {
uint64_t OffsetOfInlineFnCopy = (*OffsetFn).getRawUValue();
- if (GlobalAbstractOriginFnInfo.count(OffsetOfInlineFnCopy)) {
- AbstractOriginVars = GlobalAbstractOriginFnInfo[OffsetOfInlineFnCopy];
+ if (LocalAbstractOriginFnInfo.count(OffsetOfInlineFnCopy)) {
+ AbstractOriginVars = LocalAbstractOriginFnInfo[OffsetOfInlineFnCopy];
AbstractOriginVarsPtr = &AbstractOriginVars;
} else {
- // This means that the DW_AT_inline fn copy is out of order,
+ // This means that the DW_AT_inline fn copy is out of order
+ // or that the abstract_origin references another CU,
// so this abstract origin instance will be processed later.
FnsWithAbstractOriginToBeProcessed.push_back(Die.getOffset());
AbstractOriginVarsPtr = nullptr;
@@ -543,7 +561,9 @@ static void collectStatsRecursive(
// for inlined instancies.
if (Die.find(dwarf::DW_AT_inline)) {
uint64_t SPOffset = Die.getOffset();
- collectAbstractOriginFnInfo(Die, SPOffset, GlobalAbstractOriginFnInfo);
+ AbstractOriginFnCUs[SPOffset] = Die.getDwarfUnit();
+ collectAbstractOriginFnInfo(Die, SPOffset, GlobalAbstractOriginFnInfo,
+ LocalAbstractOriginFnInfo);
return;
}
@@ -597,8 +617,9 @@ static void collectStatsRecursive(
collectStatsRecursive(
Child, FnPrefix, ChildVarPrefix, BytesInScope, InlineDepth, FnStatMap,
- GlobalStats, LocStats, GlobalAbstractOriginFnInfo,
- FnsWithAbstractOriginToBeProcessed, AbstractOriginVarsPtr);
+ GlobalStats, LocStats, AbstractOriginFnCUs, GlobalAbstractOriginFnInfo,
+ LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed,
+ AbstractOriginVarsPtr);
Child = Child.getSibling();
}
@@ -733,16 +754,24 @@ static void updateVarsWithAbstractOriginLocCovInfo(
/// the DW_TAG_subprogram) with an abstract_origin attribute.
static void collectZeroLocCovForVarsWithAbstractOrigin(
DWARFUnit *DwUnit, GlobalStats &GlobalStats, LocationStats &LocStats,
- AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
+ AbstractOriginVarsTyMap &LocalAbstractOriginFnInfo,
FunctionsWithAbstractOriginTy &FnsWithAbstractOriginToBeProcessed) {
+ // The next variable is used to filter out functions that have been processed,
+ // leaving FnsWithAbstractOriginToBeProcessed with just CrossCU references.
+ FunctionsWithAbstractOriginTy ProcessedFns;
for (auto FnOffset : FnsWithAbstractOriginToBeProcessed) {
DWARFDie FnDieWithAbstractOrigin = DwUnit->getDIEForOffset(FnOffset);
auto FnCopy = FnDieWithAbstractOrigin.find(dwarf::DW_AT_abstract_origin);
AbstractOriginVarsTy AbstractOriginVars;
if (!FnCopy)
continue;
-
- AbstractOriginVars = GlobalAbstractOriginFnInfo[(*FnCopy).getRawUValue()];
+ uint64_t FnCopyRawUValue = (*FnCopy).getRawUValue();
+ // If there is no entry within LocalAbstractOriginFnInfo for the given
+ // FnCopyRawUValue, function isn't out-of-order in DWARF. Rather, we have
+ // CrossCU referencing.
+ if (!LocalAbstractOriginFnInfo.count(FnCopyRawUValue))
+ continue;
+ AbstractOriginVars = LocalAbstractOriginFnInfo[FnCopyRawUValue];
updateVarsWithAbstractOriginLocCovInfo(FnDieWithAbstractOrigin,
AbstractOriginVars);
@@ -758,6 +787,46 @@ static void collectZeroLocCovForVarsWithAbstractOrigin(
LocStats.LocalVarLocStats[ZeroCoverageBucket]++;
}
}
+ ProcessedFns.push_back(FnOffset);
+ }
+ for (auto ProcessedFn : ProcessedFns)
+ llvm::erase_value(FnsWithAbstractOriginToBeProcessed, ProcessedFn);
+}
+
+/// Collect zero location coverage for inlined variables which refer to
+/// a DW_AT_inline copy of subprogram that is in a different CU.
+static void collectZeroLocCovForVarsWithCrossCUReferencingAbstractOrigin(
+ LocationStats &LocStats, FunctionDIECUTyMap AbstractOriginFnCUs,
+ AbstractOriginVarsTyMap &GlobalAbstractOriginFnInfo,
+ CrossCUReferencingDIELocationTy &CrossCUReferencesToBeResolved) {
+ for (const auto &CrossCUReferenceToBeResolved :
+ CrossCUReferencesToBeResolved) {
+ DWARFUnit *DwUnit = CrossCUReferenceToBeResolved.DwUnit;
+ DWARFDie FnDIEWithCrossCUReferencing =
+ DwUnit->getDIEForOffset(CrossCUReferenceToBeResolved.DIEOffset);
+ auto FnCopy =
+ FnDIEWithCrossCUReferencing.find(dwarf::DW_AT_abstract_origin);
+ if (!FnCopy)
+ continue;
+ uint64_t FnCopyRawUValue = (*FnCopy).getRawUValue();
+ AbstractOriginVarsTy AbstractOriginVars =
+ GlobalAbstractOriginFnInfo[FnCopyRawUValue];
+ updateVarsWithAbstractOriginLocCovInfo(FnDIEWithCrossCUReferencing,
+ AbstractOriginVars);
+ for (auto Offset : AbstractOriginVars) {
+ LocStats.NumVarParam++;
+ LocStats.VarParamLocStats[ZeroCoverageBucket]++;
+ auto Tag = (AbstractOriginFnCUs[FnCopyRawUValue])
+ ->getDIEForOffset(Offset)
+ .getTag();
+ if (Tag == dwarf::DW_TAG_formal_parameter) {
+ LocStats.NumParam++;
+ LocStats.ParamLocStats[ZeroCoverageBucket]++;
+ } else if (Tag == dwarf::DW_TAG_variable) {
+ LocStats.NumVar++;
+ LocStats.LocalVarLocStats[ZeroCoverageBucket]++;
+ }
+ }
}
}
@@ -778,28 +847,46 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
GlobalStats GlobalStats;
LocationStats LocStats;
StringMap<PerFunctionStats> Statistics;
+ // This variable holds variable information for functions with
+ // abstract_origin globally, across all CUs.
+ AbstractOriginVarsTyMap GlobalAbstractOriginFnInfo;
+ // This variable holds information about the CU of a function with
+ // abstract_origin.
+ FunctionDIECUTyMap AbstractOriginFnCUs;
+ CrossCUReferencingDIELocationTy CrossCUReferencesToBeResolved;
for (const auto &CU : static_cast<DWARFContext *>(&DICtx)->compile_units()) {
if (DWARFDie CUDie = CU->getNonSkeletonUnitDIE(false)) {
- // These variables are being reset for each CU, since there could be
- // a situation where we have two subprogram DIEs with the same offsets
- // in two diferent CUs, and we can end up using wrong variables info
- // when trying to resolve abstract_origin attribute.
- // TODO: Handle LTO cases where the abstract origin of
- // the function is in a different CU than the one it's
- // referenced from or inlined into.
- AbstractOriginVarsTyMap GlobalAbstractOriginFnInfo;
+ // This variable holds variable information for functions with
+ // abstract_origin, but just for the current CU.
+ AbstractOriginVarsTyMap LocalAbstractOriginFnInfo;
FunctionsWithAbstractOriginTy FnsWithAbstractOriginToBeProcessed;
- collectStatsRecursive(CUDie, "/", "g", 0, 0, Statistics, GlobalStats,
- LocStats, GlobalAbstractOriginFnInfo,
- FnsWithAbstractOriginToBeProcessed);
+ collectStatsRecursive(
+ CUDie, "/", "g", 0, 0, Statistics, GlobalStats, LocStats,
+ AbstractOriginFnCUs, GlobalAbstractOriginFnInfo,
+ LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed);
+ // collectZeroLocCovForVarsWithAbstractOrigin will filter out all
+ // out-of-order DWARF functions that have been processed within it,
+ // leaving FnsWithAbstractOriginToBeProcessed with only CrossCU
+ // references.
collectZeroLocCovForVarsWithAbstractOrigin(
CUDie.getDwarfUnit(), GlobalStats, LocStats,
- GlobalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed);
+ LocalAbstractOriginFnInfo, FnsWithAbstractOriginToBeProcessed);
+
+ // Collect all CrossCU references into CrossCUReferencesToBeResolved.
+ for (auto CrossCUReferencingDIEOffset :
+ FnsWithAbstractOriginToBeProcessed)
+ CrossCUReferencesToBeResolved.push_back(
+ DIELocation(CUDie.getDwarfUnit(), CrossCUReferencingDIEOffset));
}
}
+ /// Resolve CrossCU references.
+ collectZeroLocCovForVarsWithCrossCUReferencingAbstractOrigin(
+ LocStats, AbstractOriginFnCUs, GlobalAbstractOriginFnInfo,
+ CrossCUReferencesToBeResolved);
+
/// Collect the sizes of debug sections.
SectionSizes Sizes;
calculateSectionSizes(Obj, Sizes, Filename);
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index fd67cac3cdd2..7208011c9866 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -17,6 +17,7 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/RawMemProfReader.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/ProfileData/SampleProfWriter.h"
#include "llvm/Support/CommandLine.h"
@@ -80,8 +81,8 @@ static void exitWithError(Error E, StringRef Whence = "") {
instrprof_error instrError = IPE.get();
StringRef Hint = "";
if (instrError == instrprof_error::unrecognized_format) {
- // Hint for common error of forgetting --sample for sample profiles.
- Hint = "Perhaps you forgot to use the --sample option?";
+ // Hint in case user missed specifying the profile type.
+ Hint = "Perhaps you forgot to use the --sample or --memory option?";
}
exitWithError(IPE.message(), std::string(Whence), std::string(Hint));
});
@@ -95,7 +96,7 @@ static void exitWithErrorCode(std::error_code EC, StringRef Whence = "") {
}
namespace {
-enum ProfileKinds { instr, sample };
+enum ProfileKinds { instr, sample, memory };
enum FailureMode { failIfAnyAreInvalid, failIfAllAreInvalid };
}
@@ -2447,6 +2448,17 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
return 0;
}
+static int showMemProfProfile(const std::string &Filename, raw_fd_ostream &OS) {
+ auto ReaderOr = llvm::memprof::RawMemProfReader::create(Filename);
+ if (Error E = ReaderOr.takeError())
+ exitWithError(std::move(E), Filename);
+
+ std::unique_ptr<llvm::memprof::RawMemProfReader> Reader(
+ ReaderOr.get().release());
+ Reader->printSummaries(OS);
+ return 0;
+}
+
static int show_main(int argc, const char *argv[]) {
cl::opt<std::string> Filename(cl::Positional, cl::Required,
cl::desc("<profdata-file>"));
@@ -2487,7 +2499,8 @@ static int show_main(int argc, const char *argv[]) {
cl::opt<ProfileKinds> ProfileKind(
cl::desc("Profile kind:"), cl::init(instr),
cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
- clEnumVal(sample, "Sample profile")));
+ clEnumVal(sample, "Sample profile"),
+ clEnumVal(memory, "MemProf memory access profile")));
cl::opt<uint32_t> TopNFunctions(
"topn", cl::init(0),
cl::desc("Show the list of functions with the largest internal counts"));
@@ -2532,11 +2545,12 @@ static int show_main(int argc, const char *argv[]) {
ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
TextFormat, ShowBinaryIds, OS);
- else
+ if (ProfileKind == sample)
return showSampleProfile(Filename, ShowCounts, TopNFunctions,
ShowAllFunctions, ShowDetailedSummary,
ShowFunction, ShowProfileSymbolList,
ShowSectionInfoOnly, ShowHotFuncList, OS);
+ return showMemProfProfile(Filename, OS);
}
int main(int argc, const char *argv[]) {
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 4abea0b1d23d..9dd777dd98e7 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -5333,6 +5333,13 @@ const NoteType FreeBSDNoteTypes[] = {
"NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"},
};
+const NoteType NetBSDCoreNoteTypes[] = {
+ {ELF::NT_NETBSDCORE_PROCINFO,
+ "NT_NETBSDCORE_PROCINFO (procinfo structure)"},
+ {ELF::NT_NETBSDCORE_AUXV, "NT_NETBSDCORE_AUXV (ELF auxiliary vector data)"},
+ {ELF::NT_NETBSDCORE_LWPSTATUS, "PT_LWPSTATUS (ptrace_lwpstatus structure)"},
+};
+
const NoteType OpenBSDCoreNoteTypes[] = {
{ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"},
{ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"},
@@ -5453,6 +5460,12 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
return FindNote(FreeBSDNoteTypes);
}
}
+ if (ELFType == ELF::ET_CORE && Name.startswith("NetBSD-CORE")) {
+ StringRef Result = FindNote(NetBSDCoreNoteTypes);
+ if (!Result.empty())
+ return Result;
+ return FindNote(CoreNoteTypes);
+ }
if (Name.startswith("OpenBSD") && ELFType == ELF::ET_CORE) {
// OpenBSD also places the generic core notes in the OpenBSD namespace.
StringRef Result = FindNote(OpenBSDCoreNoteTypes);
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
index bf25efc0b0bd..4a69f96a597a 100644
--- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -39,7 +39,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-const opt::OptTable::Info InfoTable[] = {
+static const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -55,7 +55,7 @@ class TLICheckerOptTable : public opt::OptTable {
public:
TLICheckerOptTable() : OptTable(InfoTable) {}
};
-} // namespace
+} // end anonymous namespace
// We have three levels of reporting.
enum class ReportKind {
@@ -66,13 +66,14 @@ enum class ReportKind {
};
// Most of the ObjectFile interfaces return an Expected<T>, so make it easy
-// to ignore those.
-template <typename T> T unwrapIgnoreError(Expected<T> E) {
+// to ignore errors.
+template <typename T>
+static T unwrapIgnoreError(Expected<T> E, T Default = T()) {
if (E)
return std::move(*E);
// Sink the error and return a nothing value.
consumeError(E.takeError());
- return T();
+ return Default;
}
static void fail(const Twine &Message) {
@@ -99,13 +100,14 @@ static void reportArchiveChildIssue(const object::Archive::Child &C, int Index,
}
// Return Name, and if Name is mangled, append "aka" and the demangled name.
-static std::string PrintableName(StringRef Name) {
+static std::string getPrintableName(StringRef Name) {
std::string OutputName = "'";
OutputName += Name;
OutputName += "'";
- if (Name.startswith("_Z") || Name.startswith("??")) {
+ std::string DemangledName(demangle(Name.str()));
+ if (Name != DemangledName) {
OutputName += " aka ";
- OutputName += demangle(Name.str());
+ OutputName += DemangledName;
}
return OutputName;
}
@@ -119,7 +121,7 @@ struct TLINameList : std::vector<std::pair<StringRef, bool>> {
// Print out what we found.
void dump();
};
-TLINameList TLINames;
+static TLINameList TLINames;
void TLINameList::initialize(StringRef TargetTriple) {
Triple T(TargetTriple);
@@ -146,7 +148,7 @@ void TLINameList::dump() {
// output as a header. So, for example, no need to repeat the triple.
for (auto &TLIName : TLINames) {
outs() << (TLIName.second ? " " : "not ")
- << "available: " << PrintableName(TLIName.first) << '\n';
+ << "available: " << getPrintableName(TLIName.first) << '\n';
}
}
@@ -159,24 +161,27 @@ class SDKNameMap : public StringMap<bool> {
public:
void populateFromFile(StringRef LibDir, StringRef LibName);
};
-SDKNameMap SDKNames;
+static SDKNameMap SDKNames;
// Given an ObjectFile, extract the global function symbols.
void SDKNameMap::populateFromObject(ObjectFile *O) {
- // FIXME: Support COFF.
+ // FIXME: Support other formats.
if (!O->isELF()) {
- WithColor::warning() << "Only ELF-format files are supported\n";
+ WithColor::warning() << O->getFileName()
+ << ": only ELF-format files are supported\n";
return;
}
- auto *ELF = cast<const ELFObjectFileBase>(O);
+ const auto *ELF = cast<ELFObjectFileBase>(O);
- for (auto I = ELF->getDynamicSymbolIterators().begin();
- I != ELF->getDynamicSymbolIterators().end(); ++I) {
- // We want only global function symbols.
- SymbolRef::Type Type = unwrapIgnoreError(I->getType());
- uint32_t Flags = unwrapIgnoreError(I->getFlags());
- StringRef Name = unwrapIgnoreError(I->getName());
- if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global))
+ for (auto &S : ELF->getDynamicSymbolIterators()) {
+ // We want only defined global function symbols.
+ SymbolRef::Type Type = unwrapIgnoreError(S.getType());
+ uint32_t Flags = unwrapIgnoreError(S.getFlags());
+ section_iterator Section = unwrapIgnoreError(S.getSection(),
+ /*Default=*/O->section_end());
+ StringRef Name = unwrapIgnoreError(S.getName());
+ if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global) &&
+ Section != O->section_end())
insert({Name, true});
}
}
@@ -211,7 +216,7 @@ void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) {
SmallString<255> Filepath(LibDir);
sys::path::append(Filepath, LibName);
if (!sys::fs::exists(Filepath)) {
- WithColor::warning() << "Could not find '" << StringRef(Filepath) << "'\n";
+ WithColor::warning() << StringRef(Filepath) << ": not found\n";
return;
}
outs() << "\nLooking for symbols in '" << StringRef(Filepath) << "'\n";
@@ -229,13 +234,12 @@ void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) {
else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
populateFromObject(O);
else {
- WithColor::warning() << "Not an Archive or ObjectFile: '"
- << StringRef(Filepath) << "'\n";
+ WithColor::warning() << StringRef(Filepath)
+ << ": not an archive or object file\n";
return;
}
if (Precount == size())
- WithColor::warning() << "No symbols found in '" << StringRef(Filepath)
- << "'\n";
+ WithColor::warning() << StringRef(Filepath) << ": no symbols found\n";
else
outs() << "Found " << size() - Precount << " global function symbols in '"
<< StringRef(Filepath) << "'\n";
@@ -268,10 +272,8 @@ int main(int argc, char *argv[]) {
}
std::vector<std::string> LibList = Args.getAllArgValues(OPT_INPUT);
- if (LibList.empty()) {
- WithColor::error() << "No input files\n";
- exit(EXIT_FAILURE);
- }
+ if (LibList.empty())
+ fail("no input files\n");
StringRef LibDir = Args.getLastArgValue(OPT_libdir_EQ);
bool SeparateMode = Args.hasArg(OPT_separate);
@@ -283,10 +285,8 @@ int main(int argc, char *argv[]) {
.Case("discrepancy", ReportKind::Discrepancy)
.Case("full", ReportKind::Full)
.Default(ReportKind::Error);
- if (ReportLevel == ReportKind::Error) {
- WithColor::error() << "invalid option for --report: " << A->getValue();
- exit(EXIT_FAILURE);
- }
+ if (ReportLevel == ReportKind::Error)
+ fail(Twine("invalid option for --report: ", StringRef(A->getValue())));
}
for (size_t I = 0; I < LibList.size(); ++I) {
@@ -330,7 +330,8 @@ int main(int argc, char *argv[]) {
constexpr char YesNo[2][4] = {"no ", "yes"};
constexpr char Indicator[4][3] = {"!!", ">>", "<<", "=="};
outs() << Indicator[Which] << " TLI " << YesNo[TLIHas] << " SDK "
- << YesNo[SDKHas] << ": " << PrintableName(TLIName.first) << '\n';
+ << YesNo[SDKHas] << ": " << getPrintableName(TLIName.first)
+ << '\n';
}
}
diff --git a/llvm/utils/TableGen/AsmWriterInst.cpp b/llvm/utils/TableGen/AsmWriterInst.cpp
index cf24f79334ca..887abbac9d3b 100644
--- a/llvm/utils/TableGen/AsmWriterInst.cpp
+++ b/llvm/utils/TableGen/AsmWriterInst.cpp
@@ -147,8 +147,7 @@ AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI, unsigned CGIIndex,
std::string::size_type ModifierStart = VarEnd;
while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
++VarEnd;
- Modifier = std::string(AsmString.begin()+ModifierStart,
- AsmString.begin()+VarEnd);
+ Modifier = AsmString.substr(ModifierStart, VarEnd - ModifierStart);
if (Modifier.empty())
PrintFatalError(CGI.TheDef->getLoc(),
"Bad operand modifier name in '" +
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 137f99078faf..d3beaf61989e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -676,12 +676,11 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
isSpeculatable = false;
hasSideEffects = false;
- if (DefName.size() <= 4 ||
- std::string(DefName.begin(), DefName.begin() + 4) != "int_")
+ if (DefName.size() <= 4 || DefName.substr(0, 4) != "int_")
PrintFatalError(DefLoc,
"Intrinsic '" + DefName + "' does not start with 'int_'!");
- EnumName = std::string(DefName.begin()+4, DefName.end());
+ EnumName = DefName.substr(4);
if (R->getValue("GCCBuiltinName")) // Ignore a missing GCCBuiltinName field.
GCCBuiltinName = std::string(R->getValueAsString("GCCBuiltinName"));
@@ -699,8 +698,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
Name += (EnumName[i] == '_') ? '.' : EnumName[i];
} else {
// Verify it starts with "llvm.".
- if (Name.size() <= 5 ||
- std::string(Name.begin(), Name.begin() + 5) != "llvm.")
+ if (Name.size() <= 5 || Name.substr(0, 5) != "llvm.")
PrintFatalError(DefLoc, "Intrinsic '" + DefName +
"'s name does not start with 'llvm.'!");
}
@@ -709,8 +707,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
// "llvm.<targetprefix>.".
if (!TargetPrefix.empty()) {
if (Name.size() < 6+TargetPrefix.size() ||
- std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size())
- != (TargetPrefix + "."))
+ Name.substr(5, 1 + TargetPrefix.size()) != (TargetPrefix + "."))
PrintFatalError(DefLoc, "Intrinsic '" + DefName +
"' does not start with 'llvm." +
TargetPrefix + ".'!");